X-Git-Url: http://git.salome-platform.org/gitweb/?a=blobdiff_plain;f=src%2FContainer%2FSALOME_ContainerManager.cxx;h=0072c24435fef010576eab033c04b47570598671;hb=fd8d518d82191e73173498a067da5c3bfddf7eb0;hp=8a33c0fec0ff6711f252c1480ebef3ead3b9976e;hpb=6ed566599d86abfa34488a6f4f65c5a44dae0172;p=modules%2Fkernel.git diff --git a/src/Container/SALOME_ContainerManager.cxx b/src/Container/SALOME_ContainerManager.cxx index 8a33c0fec..0072c2443 100644 --- a/src/Container/SALOME_ContainerManager.cxx +++ b/src/Container/SALOME_ContainerManager.cxx @@ -1,4 +1,4 @@ -// Copyright (C) 2007-2014 CEA/DEN, EDF R&D, OPEN CASCADE +// Copyright (C) 2007-2015 CEA/DEN, EDF R&D, OPEN CASCADE // // Copyright (C) 2003-2007 OPEN CASCADE, EADS/CCR, LIP6, CEA/DEN, // CEDRAT, EDF R&D, LEG, PRINCIPIA R&D, BUREAU VERITAS @@ -21,7 +21,10 @@ // #include "SALOME_ContainerManager.hxx" +#include "SALOME_ResourcesManager.hxx" +#include "SALOME_LoadRateManager.hxx" #include "SALOME_NamingService.hxx" +#include "SALOME_ResourcesManager_Client.hxx" #include "SALOME_ModuleCatalog.hh" #include "Basics_Utils.hxx" #include "Basics_DirUtils.hxx" @@ -39,6 +42,10 @@ #include #include CORBA_CLIENT_HEADER(SALOME_Session) +#ifdef HAVE_MPI2 +#include +#endif + #ifdef WIN32 #include #define getpid _getpid @@ -48,9 +55,9 @@ #include "PaCOPP.hxx" #endif -#define TIME_OUT_TO_LAUNCH_CONT 60 +const int SALOME_ContainerManager::TIME_OUT_TO_LAUNCH_CONT=60; -const char *SALOME_ContainerManager::_ContainerManagerNameInNS = +const char *SALOME_ContainerManager::_ContainerManagerNameInNS = "/ContainerManager"; omni_mutex SALOME_ContainerManager::_numInstanceMutex; @@ -60,7 +67,7 @@ Utils_Mutex SALOME_ContainerManager::_getenvMutex; Utils_Mutex SALOME_ContainerManager::_systemMutex; //============================================================================= -/*! +/*! * Constructor * \param orb * Define a CORBA single thread policy for the server, which avoid to deal @@ -68,11 +75,12 @@ Utils_Mutex SALOME_ContainerManager::_systemMutex; */ //============================================================================= -SALOME_ContainerManager::SALOME_ContainerManager(CORBA::ORB_ptr orb, PortableServer::POA_var poa, SALOME_ResourcesManager *rm, SALOME_NamingService *ns):_nbprocUsed(1) +SALOME_ContainerManager::SALOME_ContainerManager(CORBA::ORB_ptr orb, PortableServer::POA_var poa, SALOME_NamingService *ns) + : _nbprocUsed(1) { MESSAGE("constructor"); _NS = ns; - _ResManager = rm; + _resManager = new SALOME_ResourcesManager_Client(ns); PortableServer::POAManager_var pman = poa->the_POAManager(); _orb = CORBA::ORB::_duplicate(orb) ; @@ -92,11 +100,11 @@ SALOME_ContainerManager::SALOME_ContainerManager(CORBA::ORB_ptr orb, PortableSer _isAppliSalomeDefined = (GetenvThreadSafe("APPLI") != 0); #ifdef HAVE_MPI2 -#ifdef WITHOPENMPI +#ifdef OPEN_MPI _pid_mpiServer = -1; // the urifile name depends on pid of the process std::stringstream urifile; - urifile << GetenvThreadSafe("HOME") << "/.urifile_" << getpid(); + urifile << GetenvThreadSafeAsString("HOME") << "/.urifile_" << getpid(); setenv("OMPI_URI_FILE",urifile.str().c_str(),1); if( GetenvThreadSafe("OMPI_URI_FILE") != NULL ){ // get the pid of all ompi-server @@ -104,7 +112,7 @@ SALOME_ContainerManager::SALOME_ContainerManager(CORBA::ORB_ptr orb, PortableSer // launch a new ompi-server std::string command; command = "ompi-server -r "; - command += GetenvThreadSafe("OMPI_URI_FILE"); + command += GetenvThreadSafeAsString("OMPI_URI_FILE"); int status=SystemThreadSafe(command.c_str()); if(status!=0) throw SALOME_Exception("Error when launching ompi-server"); @@ -118,7 +126,7 @@ SALOME_ContainerManager::SALOME_ContainerManager(CORBA::ORB_ptr orb, PortableSer if(_pid_mpiServer < 0) throw SALOME_Exception("Error when getting ompi-server id"); } -#elif defined(WITHMPICH) +#elif defined(MPICH) _pid_mpiServer = -1; // get the pid of all hydra_nameserver std::set thepids1 = getpidofprogram("hydra_nameserver"); @@ -140,7 +148,7 @@ SALOME_ContainerManager::SALOME_ContainerManager(CORBA::ORB_ptr orb, PortableSer } //============================================================================= -/*! +/*! * destructor */ //============================================================================= @@ -148,8 +156,9 @@ SALOME_ContainerManager::SALOME_ContainerManager(CORBA::ORB_ptr orb, PortableSer SALOME_ContainerManager::~SALOME_ContainerManager() { MESSAGE("destructor"); + delete _resManager; #ifdef HAVE_MPI2 -#ifdef WITHOPENMPI +#ifdef OPEN_MPI if( GetenvThreadSafe("OMPI_URI_FILE") != NULL ){ // kill my ompi-server if( kill(_pid_mpiServer,SIGTERM) != 0 ) @@ -159,7 +168,7 @@ SALOME_ContainerManager::~SALOME_ContainerManager() if(status!=0) throw SALOME_Exception("Error when removing urifile"); } -#elif defined(WITHMPICH) +#elif defined(MPICH) // kill my hydra_nameserver if(_pid_mpiServer > -1) if( kill(_pid_mpiServer,SIGTERM) != 0 ) @@ -239,7 +248,7 @@ void SALOME_ContainerManager::ShutdownContainers() MESSAGE("ShutdownContainers: " << (*iter)); cont->Shutdown(); } - else + else MESSAGE("ShutdownContainers: no container ref for " << (*iter)); } catch(CORBA::SystemException& e) @@ -299,21 +308,22 @@ Engines::Container_ptr SALOME_ContainerManager::GiveContainer(const Engines::Con // Step 2: Get all possibleResources from the parameters // Consider only resources that can run containers - local_params.resource_params.can_run_containers = true; - Engines::ResourceList_var possibleResources = _ResManager->GetFittingResources(local_params.resource_params); - MESSAGE("[GiveContainer] - length of possible resources " << possibleResources->length()); + resourceParams resource_params = resourceParameters_CORBAtoCPP(local_params.resource_params); + resource_params.can_run_containers = true; + std::vector possibleResources = _resManager->GetFittingResources(resource_params); + MESSAGE("[GiveContainer] - length of possible resources " << possibleResources.size()); std::vector local_resources; - // Step 3: if mode is "get" keep only machines with existing containers + // Step 3: if mode is "get" keep only machines with existing containers if(mode == "get") { - for(unsigned int i=0; i < possibleResources->length(); i++) + for(unsigned int i=0; i < possibleResources.size(); i++) { - Engines::Container_ptr cont = FindContainer(params, possibleResources[i].in()); + Engines::Container_ptr cont = FindContainer(params, possibleResources[i]); try { if(!cont->_non_existent()) - local_resources.push_back(std::string(possibleResources[i])); + local_resources.push_back(possibleResources[i]); } catch(CORBA::Exception&) {} } @@ -326,8 +336,7 @@ Engines::Container_ptr SALOME_ContainerManager::GiveContainer(const Engines::Con } } else - for(unsigned int i=0; i < possibleResources->length(); i++) - local_resources.push_back(std::string(possibleResources[i])); + local_resources = possibleResources; // Step 4: select the resource where to get/start the container bool resource_available = true; @@ -341,7 +350,7 @@ Engines::Container_ptr SALOME_ContainerManager::GiveContainer(const Engines::Con { try { - resource_selected = _ResManager->GetImpl()->Find(params.resource_params.policy.in(), resources); + resource_selected = _resManager->Find(params.resource_params.policy.in(), resources); // Remove resource_selected from vector std::vector::iterator it; for (it=resources.begin() ; it < resources.end(); it++ ) @@ -359,8 +368,8 @@ Engines::Container_ptr SALOME_ContainerManager::GiveContainer(const Engines::Con MESSAGE("[GiveContainer] Resource selected is: " << resource_selected); // Step 5: Create container name - Engines::ResourceDefinition_var resource_definition = _ResManager->GetResourceDefinition(resource_selected.c_str()); - std::string hostname(resource_definition->hostname.in()); + ParserResourcesType resource_definition = _resManager->GetResourceDefinition(resource_selected); + std::string hostname(resource_definition.HostName); std::string containerNameInNS; if(params.isMPI){ int nbproc; @@ -508,21 +517,21 @@ SALOME_ContainerManager::LaunchContainer(const Engines::ContainerParameters& par { // Preparing remote command std::string command = ""; - const ParserResourcesType resInfo(_ResManager->GetImpl()->GetResourcesDescr(resource_selected)); + const ParserResourcesType resInfo(_resManager->GetResourceDefinition(resource_selected)); command = getCommandToRunRemoteProcess(resInfo.Protocol, resInfo.HostName, resInfo.UserName); if (resInfo.AppliPath != "") command += resInfo.AppliPath; else { ASSERT(GetenvThreadSafe("APPLI")); - command += GetenvThreadSafe("APPLI"); + command += GetenvThreadSafeAsString("APPLI"); } command += "/runRemote.sh "; ASSERT(GetenvThreadSafe("NSHOST")); - command += GetenvThreadSafe("NSHOST"); // hostname of CORBA name server + command += GetenvThreadSafeAsString("NSHOST"); // hostname of CORBA name server command += " "; ASSERT(GetenvThreadSafe("NSPORT")); - command += GetenvThreadSafe("NSPORT"); // port of CORBA name server + command += GetenvThreadSafeAsString("NSPORT"); // port of CORBA name server command += " \"ls /tmp >/dev/null 2>&1\""; // Launch remote command @@ -561,11 +570,13 @@ SALOME_ContainerManager::LaunchContainer(const Engines::ContainerParameters& par //redirect stdout and stderr in a file #ifdef WIN32 - logFilename=GetenvThreadSafe("TEMP"); + logFilename=GetenvThreadSafeAsString("TEMP"); logFilename += "\\"; - user = GetenvThreadSafe( "USERNAME" ); + user = GetenvThreadSafeAsString( "USERNAME" ); #else - user = GetenvThreadSafe( "USER" ); + user = GetenvThreadSafeAsString( "USER" ); + if (user.empty()) + user = GetenvThreadSafeAsString( "LOGNAME" ); logFilename="/tmp"; char* val = GetenvThreadSafe("SALOME_TMP_DIR"); if(val) @@ -584,11 +595,7 @@ SALOME_ContainerManager::LaunchContainer(const Engines::ContainerParameters& par logFilename += tmp.str(); logFilename += ".log" ; command += " > " + logFilename + " 2>&1"; -#ifdef WIN32 - command = "%PYTHONBIN% -c \"import win32pm ; win32pm.spawnpid(r'" + command + "', '')\""; -#else - command += " &"; -#endif + MakeTheCommandToBeLaunchedASync(command); // launch container with a system call status=SystemThreadSafe(command.c_str()); @@ -609,30 +616,14 @@ SALOME_ContainerManager::LaunchContainer(const Engines::ContainerParameters& par else { // Step 4: Wait for the container - int count = TIME_OUT_TO_LAUNCH_CONT; - if (GetenvThreadSafe("TIMEOUT_TO_LAUNCH_CONTAINER") != 0) - { - std::string new_count_str = GetenvThreadSafe("TIMEOUT_TO_LAUNCH_CONTAINER"); - int new_count; - std::istringstream ss(new_count_str); - if (!(ss >> new_count)) - { - INFOS("[LaunchContainer] TIMEOUT_TO_LAUNCH_CONTAINER should be an int"); - } - else - count = new_count; - } + int count(GetTimeOutToLoaunchServer()); INFOS("[GiveContainer] waiting " << count << " second steps container " << containerNameInNS); while (CORBA::is_nil(ret) && count) { -#ifndef WIN32 - sleep( 1 ) ; -#else - Sleep(1000); -#endif + SleepInSecond(1); count--; MESSAGE("[GiveContainer] step " << count << " Waiting for container on " << resource_selected); - CORBA::Object_var obj = _NS->Resolve(containerNameInNS.c_str()); + CORBA::Object_var obj(_NS->Resolve(containerNameInNS.c_str())); ret=Engines::Container::_narrow(obj); } if (CORBA::is_nil(ret)) @@ -684,8 +675,8 @@ Engines::Container_ptr SALOME_ContainerManager::FindContainer(const Engines::Con Engines::Container_ptr SALOME_ContainerManager::FindContainer(const Engines::ContainerParameters& params, const std::string& resource) { - Engines::ResourceDefinition_var resource_definition = _ResManager->GetResourceDefinition(resource.c_str()); - std::string hostname(resource_definition->hostname.in()); + ParserResourcesType resource_definition = _resManager->GetResourceDefinition(resource); + std::string hostname(resource_definition.HostName); std::string containerNameInNS(_NS->BuildContainerNameForNS(params, hostname.c_str())); MESSAGE("[FindContainer] Try to find a container " << containerNameInNS << " on resource " << resource); CORBA::Object_var obj = _NS->Resolve(containerNameInNS.c_str()); @@ -708,7 +699,7 @@ bool isPythonContainer(const char* ContainerName); //============================================================================= /*! * This is no longer valid (C++ container are also python containers) - */ + */ //============================================================================= bool isPythonContainer(const char* ContainerName) { @@ -743,7 +734,7 @@ bool isPythonContainer(const char* ContainerName) * use to launch SALOME and servers in $APPLI: runAppli.sh, runRemote.sh) * - where workingdir is the requested working directory for the container. * If WORKINGDIR (and workingdir) is not present the working dir will be $HOME - */ + */ //============================================================================= std::string @@ -755,9 +746,7 @@ SALOME_ContainerManager::BuildCommandToLaunchRemoteContainer(const std::string& else { int nbproc; - Engines::ResourceDefinition_var resource_definition = _ResManager->GetResourceDefinition(resource_name.c_str()); - std::string hostname(resource_definition->hostname.in()); - const ParserResourcesType resInfo(_ResManager->GetImpl()->GetResourcesDescr(resource_name)); + const ParserResourcesType resInfo(_resManager->GetResourceDefinition(resource_name)); if (params.isMPI) { @@ -776,17 +765,17 @@ SALOME_ContainerManager::BuildCommandToLaunchRemoteContainer(const std::string& else { ASSERT(GetenvThreadSafe("APPLI")); - command += GetenvThreadSafe("APPLI"); // path relative to user@machine $HOME + command += GetenvThreadSafeAsString("APPLI"); // path relative to user@machine $HOME } command += "/runRemote.sh "; ASSERT(GetenvThreadSafe("NSHOST")); - command += GetenvThreadSafe("NSHOST"); // hostname of CORBA name server + command += GetenvThreadSafeAsString("NSHOST"); // hostname of CORBA name server command += " "; ASSERT(GetenvThreadSafe("NSPORT")); - command += GetenvThreadSafe("NSPORT"); // port of CORBA name server + command += GetenvThreadSafeAsString("NSPORT"); // port of CORBA name server std::string wdir = params.workingdir.in(); if(wdir != "") @@ -796,7 +785,7 @@ SALOME_ContainerManager::BuildCommandToLaunchRemoteContainer(const std::string& if(wdir == "$TEMPDIR") wdir="\\$TEMPDIR"; command += wdir; // requested working directory - command += "'"; + command += "'"; } if(params.isMPI) @@ -805,18 +794,18 @@ SALOME_ContainerManager::BuildCommandToLaunchRemoteContainer(const std::string& std::ostringstream o; o << nbproc << " "; command += o.str(); -#ifdef WITHLAM +#ifdef LAM_MPI command += "-x PATH,LD_LIBRARY_PATH,OMNIORB_CONFIG,SALOME_trace "; -#elif defined(WITHOPENMPI) +#elif defined(OPEN_MPI) if( GetenvThreadSafe("OMPI_URI_FILE") == NULL ) command += "-x PATH -x LD_LIBRARY_PATH -x OMNIORB_CONFIG -x SALOME_trace"; else{ command += "-x PATH -x LD_LIBRARY_PATH -x OMNIORB_CONFIG -x SALOME_trace -ompi-server file:"; - command += GetenvThreadSafe("OMPI_URI_FILE"); + command += GetenvThreadSafeAsString("OMPI_URI_FILE"); } -#elif defined(WITHMPICH) +#elif defined(MPICH) command += "-nameserver " + Kernel_Utils::GetHostname(); -#endif +#endif command += " SALOME_MPIContainer "; } else @@ -835,7 +824,7 @@ SALOME_ContainerManager::BuildCommandToLaunchRemoteContainer(const std::string& //============================================================================= /*! * builds the command to be launched. - */ + */ //============================================================================= std::string SALOME_ContainerManager::BuildCommandToLaunchLocalContainer(const Engines::ContainerParameters& params, const std::string& machinesFile, const std::string& container_exe, std::string& tmpFileName) const { @@ -859,17 +848,17 @@ std::string SALOME_ContainerManager::BuildCommandToLaunchLocalContainer(const En if( GetenvThreadSafe("LIBBATCH_NODEFILE") != NULL ) o << "-machinefile " << machinesFile << " "; -#ifdef WITHLAM +#ifdef LAM_MPI o << "-x PATH,LD_LIBRARY_PATH,OMNIORB_CONFIG,SALOME_trace "; -#elif defined(WITHOPENMPI) +#elif defined(OPEN_MPI) if( GetenvThreadSafe("OMPI_URI_FILE") == NULL ) o << "-x PATH -x LD_LIBRARY_PATH -x OMNIORB_CONFIG -x SALOME_trace"; else { o << "-x PATH -x LD_LIBRARY_PATH -x OMNIORB_CONFIG -x SALOME_trace -ompi-server file:"; - o << GetenvThreadSafe("OMPI_URI_FILE"); + o << GetenvThreadSafeAsString("OMPI_URI_FILE"); } -#elif defined(WITHMPICH) +#elif defined(MPICH) o << "-nameserver " + Kernel_Utils::GetHostname(); #endif @@ -938,7 +927,7 @@ std::string SALOME_ContainerManager::BuildCommandToLaunchLocalContainer(const En /*! * removes the generated temporary file in case of a remote launch. * This method is thread safe - */ + */ //============================================================================= void SALOME_ContainerManager::RmTmpFile(std::string& tmpFileName) @@ -949,7 +938,7 @@ void SALOME_ContainerManager::RmTmpFile(std::string& tmpFileName) #ifdef WIN32 std::string command = "del /F "; #else - std::string command = "rm "; + std::string command = "rm "; #endif if ( lenght > 4 ) command += tmpFileName.substr(0, lenght - 3 ); @@ -974,46 +963,81 @@ void SALOME_ContainerManager::RmTmpFile(std::string& tmpFileName) //============================================================================= /*! * add to command all options relative to naming service. - */ + */ //============================================================================= void SALOME_ContainerManager::AddOmninamesParams(std::string& command) const { - CORBA::String_var iorstr = _NS->getIORaddr(); - command += "ORBInitRef NameService="; - command += iorstr; + std::ostringstream oss; + AddOmninamesParams(oss); + command+=oss.str(); } //============================================================================= /*! * add to command all options relative to naming service. - */ + */ //============================================================================= -void SALOME_ContainerManager::AddOmninamesParams(std::ofstream& fileStream) const +void SALOME_ContainerManager::AddOmninamesParams(std::ostream& fileStream) const { - CORBA::String_var iorstr = _NS->getIORaddr(); - fileStream << "ORBInitRef NameService="; - fileStream << iorstr; + AddOmninamesParams(fileStream,_NS); } //============================================================================= /*! * add to command all options relative to naming service. - */ + */ //============================================================================= -void SALOME_ContainerManager::AddOmninamesParams(std::ostringstream& oss) const +void SALOME_ContainerManager::AddOmninamesParams(std::ostream& fileStream, SALOME_NamingService *ns) +{ + CORBA::String_var iorstr(ns->getIORaddr()); + fileStream << "ORBInitRef NameService="; + fileStream << iorstr; +} + +void SALOME_ContainerManager::MakeTheCommandToBeLaunchedASync(std::string& command) +{ +#ifdef WIN32 + command = "%PYTHONBIN% -c \"import subprocess ; subprocess.Popen(command).pid\""; +#else + command += " &"; +#endif +} + +int SALOME_ContainerManager::GetTimeOutToLoaunchServer() +{ + int count(TIME_OUT_TO_LAUNCH_CONT); + if (GetenvThreadSafe("TIMEOUT_TO_LAUNCH_CONTAINER") != 0) + { + std::string new_count_str(GetenvThreadSafeAsString("TIMEOUT_TO_LAUNCH_CONTAINER")); + int new_count; + std::istringstream ss(new_count_str); + if (!(ss >> new_count)) + { + INFOS("[LaunchContainer] TIMEOUT_TO_LAUNCH_CONTAINER should be an int"); + } + else + count = new_count; + } + return count; +} + +void SALOME_ContainerManager::SleepInSecond(int ellapseTimeInSecond) { - CORBA::String_var iorstr = _NS->getIORaddr(); - oss << "ORBInitRef NameService="; - oss << iorstr; +#ifndef WIN32 + sleep( ellapseTimeInSecond ) ; +#else + int timeInMS(1000*ellapseTimeInSecond); + Sleep(timeInMS); +#endif } //============================================================================= /*! * generate a file name in /tmp directory - */ + */ //============================================================================= std::string SALOME_ContainerManager::BuildTemporaryFileName() @@ -1031,21 +1055,21 @@ std::string SALOME_ContainerManager::BuildTemporaryFileName() //============================================================================= /*! * Builds in a temporary file the script to be launched. - * + * * Used if SALOME Application ($APPLI) is not defined. * The command is build with data from CatalogResources, in which every path * used on remote computer must be defined. - */ + */ //============================================================================= -std::string SALOME_ContainerManager::BuildTempFileToLaunchRemoteContainer (const std::string& resource_name, const Engines::ContainerParameters& params, std::string& tmpFileName) const throw(SALOME_Exception) +std::string SALOME_ContainerManager::BuildTempFileToLaunchRemoteContainer (const std::string& resource_name, const Engines::ContainerParameters& params, std::string& tmpFileName) const { int status; tmpFileName = BuildTemporaryFileName(); std::ofstream tempOutputFile; tempOutputFile.open(tmpFileName.c_str(), std::ofstream::out ); - const ParserResourcesType resInfo(_ResManager->GetImpl()->GetResourcesDescr(resource_name)); + const ParserResourcesType resInfo(_resManager->GetResourceDefinition(resource_name)); tempOutputFile << "#! /bin/sh" << std::endl; // --- set env vars @@ -1068,21 +1092,21 @@ std::string SALOME_ContainerManager::BuildTempFileToLaunchRemoteContainer (const std::ostringstream o; tempOutputFile << nbproc << " "; -#ifdef WITHLAM +#ifdef LAM_MPI tempOutputFile << "-x PATH,LD_LIBRARY_PATH,OMNIORB_CONFIG,SALOME_trace "; -#elif defined(WITHOPENMPI) +#elif defined(OPEN_MPI) if( GetenvThreadSafe("OMPI_URI_FILE") == NULL ) tempOutputFile << "-x PATH -x LD_LIBRARY_PATH -x OMNIORB_CONFIG -x SALOME_trace"; else{ tempOutputFile << "-x PATH -x LD_LIBRARY_PATH -x OMNIORB_CONFIG -x SALOME_trace -ompi-server file:"; - tempOutputFile << GetenvThreadSafe("OMPI_URI_FILE"); + tempOutputFile << GetenvThreadSafeAsString("OMPI_URI_FILE"); } -#elif defined(WITHMPICH) +#elif defined(MPICH) tempOutputFile << "-nameserver " + Kernel_Utils::GetHostname(); #endif } - tempOutputFile << GetenvThreadSafe("KERNEL_ROOT_DIR") << "/bin/salome/"; + tempOutputFile << GetenvThreadSafeAsString("KERNEL_ROOT_DIR") << "/bin/salome/"; if (params.isMPI) { @@ -1152,7 +1176,7 @@ std::string SALOME_ContainerManager::BuildTempFileToLaunchRemoteContainer (const throw SALOME_Exception("Unknown protocol"); if(status) - throw SALOME_Exception("Error of connection on remote host"); + throw SALOME_Exception("Error of connection on remote host"); command += resInfo.HostName; command += " "; @@ -1175,7 +1199,7 @@ std::string SALOME_ContainerManager::GetMPIZeroNode(const std::string machine, c { if (_isAppliSalomeDefined) { - const ParserResourcesType resInfo(_ResManager->GetImpl()->GetResourcesDescr(machine)); + const ParserResourcesType resInfo(_resManager->GetResourceDefinition(machine)); if (resInfo.Protocol == rsh) command = "rsh "; @@ -1201,17 +1225,17 @@ std::string SALOME_ContainerManager::GetMPIZeroNode(const std::string machine, c else { ASSERT(GetenvThreadSafe("APPLI")); - command += GetenvThreadSafe("APPLI"); // path relative to user@machine $HOME + command += GetenvThreadSafeAsString("APPLI"); // path relative to user@machine $HOME } command += "/runRemote.sh "; ASSERT(GetenvThreadSafe("NSHOST")); - command += GetenvThreadSafe("NSHOST"); // hostname of CORBA name server + command += GetenvThreadSafeAsString("NSHOST"); // hostname of CORBA name server command += " "; ASSERT(GetenvThreadSafe("NSPORT")); - command += GetenvThreadSafe("NSPORT"); // port of CORBA name server + command += GetenvThreadSafeAsString("NSPORT"); // port of CORBA name server command += " mpirun -np 1 hostname -s > " + tmpFile; } @@ -1235,7 +1259,7 @@ std::string SALOME_ContainerManager::GetMPIZeroNode(const std::string machine, c std::string SALOME_ContainerManager::machinesFile(const int nbproc) { std::string tmp; - std::string nodesFile = GetenvThreadSafe("LIBBATCH_NODEFILE"); + std::string nodesFile = GetenvThreadSafeAsString("LIBBATCH_NODEFILE"); std::string machinesFile = Kernel_Utils::GetTmpFileName(); std::ifstream fpi(nodesFile.c_str(),std::ios::in); std::ofstream fpo(machinesFile.c_str(),std::ios::out); @@ -1317,11 +1341,11 @@ std::string SALOME_ContainerManager::getCommandToRunRemoteProcess(AccessProtocol return command.str(); } -bool +bool SALOME_ContainerManager::checkPaCOParameters(Engines::ContainerParameters & params, std::string resource_selected) { bool result = true; - + // Step 1 : check ContainerParameters // Check container_name, has to be defined if (std::string(params.container_name.in()) == "") @@ -1346,17 +1370,18 @@ SALOME_ContainerManager::checkPaCOParameters(Engines::ContainerParameters & para } // Step 2 : check resource_selected - Engines::ResourceDefinition_var resource_definition = _ResManager->GetResourceDefinition(resource_selected.c_str()); - std::string protocol = resource_definition->protocol.in(); - std::string username = resource_definition->username.in(); - std::string applipath = resource_definition->applipath.in(); + const ParserResourcesType resource_definition = _resManager->GetResourceDefinition(resource_selected); + //std::string protocol = resource_definition->protocol.in(); + std::string username = resource_definition.UserName; + std::string applipath = resource_definition.AppliPath; - if (protocol == "" || username == "" || applipath == "") + //if (protocol == "" || username == "" || applipath == "") + if (username == "" || applipath == "") { INFOS("[checkPaCOParameters] resource selected is not well defined"); - INFOS("[checkPaCOParameters] resource name: " << resource_definition->name.in()); - INFOS("[checkPaCOParameters] resource hostname: " << resource_definition->hostname.in()); - INFOS("[checkPaCOParameters] resource protocol: " << protocol); + INFOS("[checkPaCOParameters] resource name: " << resource_definition.Name); + INFOS("[checkPaCOParameters] resource hostname: " << resource_definition.HostName); + INFOS("[checkPaCOParameters] resource protocol: " << resource_definition.getAccessProtocolTypeStr()); INFOS("[checkPaCOParameters] resource username: " << username); INFOS("[checkPaCOParameters] resource applipath: " << applipath); result = false; @@ -1365,12 +1390,28 @@ SALOME_ContainerManager::checkPaCOParameters(Engines::ContainerParameters & para return result; } +/* + * :WARNING: Do not directly convert returned value to std::string + * This function may return NULL if env variable is not defined. + * And std::string(NULL) causes undefined behavior. + * Use GetenvThreadSafeAsString to properly get a std::string. +*/ char *SALOME_ContainerManager::GetenvThreadSafe(const char *name) {// getenv is not thread safe. See man 7 pthread. Utils_Locker lock (&_getenvMutex); return getenv(name); } +/* + * Return env variable as a std::string. + * Return empty string if env variable is not set. + */ +std::string SALOME_ContainerManager::GetenvThreadSafeAsString(const char *name) +{ + char* var = GetenvThreadSafe(name); + return var ? std::string(var) : std::string(); +} + int SALOME_ContainerManager::SystemThreadSafe(const char *command) { Utils_Locker lock (&_systemMutex); @@ -1408,7 +1449,7 @@ SALOME_ContainerManager::StartPaCOPPContainer(const Engines::ContainerParameters INFOS("[StartPaCOPPContainer] on resource : " << resource_selected); // Step 2 : Get a MachineFile for the parallel container - std::string machine_file_name = _ResManager->getMachineFile(resource_selected, + std::string machine_file_name = _resManager->getMachineFile(resource_selected, params.nb_proc, params.parallelLib.in()); @@ -1421,7 +1462,7 @@ SALOME_ContainerManager::StartPaCOPPContainer(const Engines::ContainerParameters // Step 3 : starting parallel container proxy std::string command_proxy(""); std::string proxy_machine; - try + try { command_proxy = BuildCommandToLaunchPaCOProxyContainer(params, machine_file_name, proxy_machine); } @@ -1443,7 +1484,7 @@ SALOME_ContainerManager::StartPaCOPPContainer(const Engines::ContainerParameters // Step 4 : starting parallel container nodes std::string command_nodes(""); SALOME_ContainerManager::actual_launch_machine_t nodes_machines; - try + try { command_nodes = BuildCommandToLaunchPaCONodeContainer(params, machine_file_name, nodes_machines, proxy_machine); } @@ -1460,7 +1501,7 @@ SALOME_ContainerManager::StartPaCOPPContainer(const Engines::ContainerParameters { INFOS("[StarPaCOPPContainer] LaunchPaCONodeContainer failed !"); // Il faut tuer le proxy - try + try { Engines::Container_var proxy = Engines::Container::_narrow(container_proxy); proxy->Shutdown(); @@ -1473,7 +1514,7 @@ SALOME_ContainerManager::StartPaCOPPContainer(const Engines::ContainerParameters } // Step 4 : connecting nodes and the proxy to actually create a parallel container - for (int i = 0; i < params.nb_proc; i++) + for (int i = 0; i < params.nb_proc; i++) { std::ostringstream tmp; tmp << i; @@ -1483,7 +1524,7 @@ SALOME_ContainerManager::StartPaCOPPContainer(const Engines::ContainerParameters std::string theNodeMachine(nodes_machines[i]); std::string containerNameInNS = _NS->BuildContainerNameForNS(container_node_name.c_str(), theNodeMachine.c_str()); obj = _NS->Resolve(containerNameInNS.c_str()); - if (CORBA::is_nil(obj)) + if (CORBA::is_nil(obj)) { INFOS("[StarPaCOPPContainer] CONNECTION FAILED From Naming Service !"); INFOS("[StarPaCOPPContainer] Container name is " << containerNameInNS); @@ -1517,7 +1558,7 @@ SALOME_ContainerManager::StartPaCOPPContainer(const Engines::ContainerParameters } // Step 5 : starting parallel container - try + try { MESSAGE ("[StarPaCOPPContainer] Starting parallel object"); container_proxy->start(); @@ -1538,7 +1579,7 @@ SALOME_ContainerManager::StartPaCOPPContainer(const Engines::ContainerParameters } catch(std::exception& exc) { - INFOS("Caught std::exception - "<GetResourceDefinition(params.resource_params.name); + ParserResourcesType resource_definition = + _resManager->GetResourceDefinition(params.resource_params.name.in()); // Choose hostname std::string hostname; @@ -1582,7 +1623,7 @@ SALOME_ContainerManager::BuildCommandToLaunchPaCOProxyContainer(const Engines::C MESSAGE("[BuildCommandToLaunchPaCOProxyContainer] remote machine case detected !"); remote_execution = true; } - + // Log environnement std::string log_type(""); char * get_val = GetenvThreadSafe("PARALLEL_LOG"); @@ -1605,14 +1646,14 @@ SALOME_ContainerManager::BuildCommandToLaunchPaCOProxyContainer(const Engines::C ASSERT(GetenvThreadSafe("NSHOST")); ASSERT(GetenvThreadSafe("NSPORT")); - command << resource_definition->protocol.in(); + command << resource_definition.getAccessProtocolTypeStr(); command << " -l "; - command << resource_definition->username.in(); + command << resource_definition.UserName; command << " " << hostname; - command << " " << resource_definition->applipath.in(); + command << " " << resource_definition.AppliPath; command << "/runRemote.sh "; - command << GetenvThreadSafe("NSHOST") << " "; // hostname of CORBA name server - command << GetenvThreadSafe("NSPORT") << " "; // port of CORBA name server + command << GetenvThreadSafeAsString("NSHOST") << " "; // hostname of CORBA name server + command << GetenvThreadSafeAsString("NSPORT") << " "; // port of CORBA name server } command << exe_name; @@ -1630,7 +1671,7 @@ SALOME_ContainerManager::BuildCommandToLaunchPaCOProxyContainer(const Engines::C return command.str(); } -std::string +std::string SALOME_ContainerManager::BuildCommandToLaunchPaCONodeContainer(const Engines::ContainerParameters& params, const std::string & machine_file_name, SALOME_ContainerManager::actual_launch_machine_t & vect_machine, @@ -1646,9 +1687,9 @@ SALOME_ContainerManager::BuildCommandToLaunchPaCONodeContainer(const Engines::Co nb_proc_stream << params.nb_proc; // Get resource definition - Engines::ResourceDefinition_var resource_definition = - _ResManager->GetResourceDefinition(params.resource_params.name); - + ParserResourcesType resource_definition = + _resManager->GetResourceDefinition(params.resource_params.name.in()); + // Log environnement std::string log_type(""); char * get_val = GetenvThreadSafe("PARALLEL_LOG"); @@ -1693,14 +1734,14 @@ SALOME_ContainerManager::BuildCommandToLaunchPaCONodeContainer(const Engines::Co ASSERT(GetenvThreadSafe("NSHOST")); ASSERT(GetenvThreadSafe("NSPORT")); - command_node_stream << resource_definition->protocol.in(); + command_node_stream << resource_definition.getAccessProtocolTypeStr(); command_node_stream << " -l "; - command_node_stream << resource_definition->username.in(); + command_node_stream << resource_definition.UserName; command_node_stream << " " << hostname; - command_node_stream << " " << resource_definition->applipath.in(); + command_node_stream << " " << resource_definition.AppliPath; command_node_stream << "/runRemote.sh "; - command_node_stream << GetenvThreadSafe("NSHOST") << " "; // hostname of CORBA name server - command_node_stream << GetenvThreadSafe("NSPORT") << " "; // port of CORBA name server + command_node_stream << GetenvThreadSafeAsString("NSHOST") << " "; // hostname of CORBA name server + command_node_stream << GetenvThreadSafeAsString("NSPORT") << " "; // port of CORBA name server } command_node_stream << exe_name; @@ -1738,14 +1779,13 @@ SALOME_ContainerManager::BuildCommandToLaunchPaCONodeContainer(const Engines::Co if (last == std::string::npos) last = -1; - std::string protocol = resource_definition->protocol.in(); - if (protocol == "rsh") + if (resource_definition.Protocol == rsh) command_remote_stream << "rcp "; - else + else command_remote_stream << "scp "; command_remote_stream << machine_file_name << " "; - command_remote_stream << resource_definition->username.in() << "@"; - command_remote_stream << hostname << ":" << resource_definition->applipath.in(); + command_remote_stream << resource_definition.UserName << "@"; + command_remote_stream << hostname << ":" << resource_definition.AppliPath; command_remote_stream << "/" << machine_file_name.substr(last+1); int status = SystemThreadSafe(command_remote_stream.str().c_str()); @@ -1770,20 +1810,20 @@ SALOME_ContainerManager::BuildCommandToLaunchPaCONodeContainer(const Engines::Co ASSERT(GetenvThreadSafe("NSHOST")); ASSERT(GetenvThreadSafe("NSPORT")); - command_nodes << resource_definition->protocol.in(); + command_nodes << resource_definition.getAccessProtocolTypeStr(); command_nodes << " -l "; - command_nodes << resource_definition->username.in(); + command_nodes << resource_definition.UserName; command_nodes << " " << hostname; - command_nodes << " " << resource_definition->applipath.in(); + command_nodes << " " << resource_definition.AppliPath; command_nodes << "/runRemote.sh "; - command_nodes << GetenvThreadSafe("NSHOST") << " "; // hostname of CORBA name server - command_nodes << GetenvThreadSafe("NSPORT") << " "; // port of CORBA name server + command_nodes << GetenvThreadSafeAsString("NSHOST") << " "; // hostname of CORBA name server + command_nodes << GetenvThreadSafeAsString("NSPORT") << " "; // port of CORBA name server } - if (std::string(resource_definition->mpiImpl.in()) == "lam") + if (resource_definition.mpi == lam) { command_nodes << "mpiexec -ssi boot "; - command_nodes << "-machinefile " << machine_file_name << " "; + command_nodes << "-machinefile " << machine_file_name << " "; command_nodes << "-n " << params.nb_proc; } else @@ -1799,7 +1839,7 @@ SALOME_ContainerManager::BuildCommandToLaunchPaCONodeContainer(const Engines::Co // We don't put hostname, because nodes are registered in the resource of the proxy for (int i= 0; i < params.nb_proc; i++) - vect_machine.push_back(proxy_hostname); + vect_machine.push_back(proxy_hostname); command_nodes << command_end; } @@ -1811,7 +1851,7 @@ SALOME_ContainerManager::LogConfiguration(const std::string & log_type, const std::string & exe_type, const std::string & container_name, const std::string & hostname, - std::string & begin, + std::string & begin, std::string & end) { if(log_type == "xterm") @@ -1828,13 +1868,16 @@ SALOME_ContainerManager::LogConfiguration(const std::string & log_type, { // default into a file... std::string logFilename = "/tmp/" + container_name + "_" + hostname + "_" + exe_type + "_"; - logFilename += std::string(GetenvThreadSafe("USER")) + ".log"; + std::string user = GetenvThreadSafeAsString("USER"); + if (user.empty()) + user = GetenvThreadSafeAsString("LOGNAME"); + logFilename += user + ".log"; end = " > " + logFilename + " 2>&1 & "; } } -CORBA::Object_ptr -SALOME_ContainerManager::LaunchPaCOProxyContainer(const std::string& command, +CORBA::Object_ptr +SALOME_ContainerManager::LaunchPaCOProxyContainer(const std::string& command, const Engines::ContainerParameters& params, const std::string & hostname) { @@ -1851,20 +1894,20 @@ SALOME_ContainerManager::LaunchPaCOProxyContainer(const std::string& command, return container_proxy; } - int count = TIME_OUT_TO_LAUNCH_CONT; + int count(GetTimeOutToLoaunchServer()); CORBA::Object_var obj = CORBA::Object::_nil(); - std::string containerNameInNS = _NS->BuildContainerNameForNS(params.container_name.in(), + std::string containerNameInNS = _NS->BuildContainerNameForNS(params.container_name.in(), hostname.c_str()); MESSAGE("[LaunchParallelContainer] Waiting for Parallel Container proxy : " << containerNameInNS); - while (CORBA::is_nil(obj) && count) + while (CORBA::is_nil(obj) && count) { sleep(1); count--; obj = _NS->Resolve(containerNameInNS.c_str()); } - try + try { container_proxy = PaCO::InterfaceManager::_narrow(obj); } @@ -1906,7 +1949,7 @@ SALOME_ContainerManager::LaunchPaCOProxyContainer(const std::string& command, */ //============================================================================= bool -SALOME_ContainerManager::LaunchPaCONodeContainer(const std::string& command, +SALOME_ContainerManager::LaunchPaCONodeContainer(const std::string& command, const Engines::ContainerParameters& params, const std::string& name, SALOME_ContainerManager::actual_launch_machine_t & vect_machine) @@ -1924,7 +1967,7 @@ SALOME_ContainerManager::LaunchPaCONodeContainer(const std::string& command, INFOS("[LaunchPaCONodeContainer] Waiting for the nodes of the parallel container"); // We are waiting all the nodes - for (int i = 0; i < params.nb_proc; i++) + for (int i = 0; i < params.nb_proc; i++) { CORBA::Object_var obj = CORBA::Object::_nil(); std::string theMachine(vect_machine[i]); @@ -1935,9 +1978,9 @@ SALOME_ContainerManager::LaunchPaCONodeContainer(const std::string& command, std::string container_node_name = name + proc_number; std::string containerNameInNS = _NS->BuildContainerNameForNS((char*) container_node_name.c_str(), theMachine.c_str()); INFOS("[LaunchPaCONodeContainer] Waiting for Parallel Container node " << containerNameInNS << " on " << theMachine); - int count = TIME_OUT_TO_LAUNCH_CONT; + int count(GetTimeOutToLoaunchServer()); while (CORBA::is_nil(obj) && count) { - sleep(1) ; + SleepInSecond(1); count-- ; obj = _NS->Resolve(containerNameInNS.c_str()); } @@ -1962,7 +2005,7 @@ SALOME_ContainerManager::StartPaCOPPContainer(const Engines::ContainerParameters return ret; } -std::string +std::string SALOME_ContainerManager::BuildCommandToLaunchPaCOProxyContainer(const Engines::ContainerParameters& params, std::string machine_file_name, std::string & proxy_hostname) @@ -1970,26 +2013,26 @@ SALOME_ContainerManager::BuildCommandToLaunchPaCOProxyContainer(const Engines::C return ""; } -std::string +std::string SALOME_ContainerManager::BuildCommandToLaunchPaCONodeContainer(const Engines::ContainerParameters& params, const std::string & machine_file_name, - SALOME_ContainerManager::actual_launch_machine_t & vect_machine, - const std::string & proxy_hostname) + SALOME_ContainerManager::actual_launch_machine_t & vect_machine, + const std::string & proxy_hostname) { return ""; } -void +void SALOME_ContainerManager::LogConfiguration(const std::string & log_type, const std::string & exe_type, const std::string & container_name, const std::string & hostname, - std::string & begin, + std::string & begin, std::string & end) { } -CORBA::Object_ptr -SALOME_ContainerManager::LaunchPaCOProxyContainer(const std::string& command, +CORBA::Object_ptr +SALOME_ContainerManager::LaunchPaCOProxyContainer(const std::string& command, const Engines::ContainerParameters& params, const std::string& hostname) { @@ -1997,8 +2040,8 @@ SALOME_ContainerManager::LaunchPaCOProxyContainer(const std::string& command, return ret; } -bool -SALOME_ContainerManager::LaunchPaCONodeContainer(const std::string& command, +bool +SALOME_ContainerManager::LaunchPaCONodeContainer(const std::string& command, const Engines::ContainerParameters& params, const std::string& name, SALOME_ContainerManager::actual_launch_machine_t & vect_machine)