X-Git-Url: http://git.salome-platform.org/gitweb/?a=blobdiff_plain;f=src%2FContainer%2FSALOME_ContainerManager.cxx;h=81ff3e15f33070c366c18b74663340bb3b43a3f2;hb=b31c841167feb262ef99fd56ba74a6f75db382c1;hp=29a4fa0e2e3d40b27d58b77f95f7ea9c858464b2;hpb=768b3a83b1b1fd8c87d6c624b7f21043738c43c2;p=modules%2Fkernel.git diff --git a/src/Container/SALOME_ContainerManager.cxx b/src/Container/SALOME_ContainerManager.cxx index 29a4fa0e2..81ff3e15f 100644 --- a/src/Container/SALOME_ContainerManager.cxx +++ b/src/Container/SALOME_ContainerManager.cxx @@ -1,4 +1,4 @@ -// Copyright (C) 2007-2014 CEA/DEN, EDF R&D, OPEN CASCADE +// Copyright (C) 2007-2017 CEA/DEN, EDF R&D, OPEN CASCADE // // Copyright (C) 2003-2007 OPEN CASCADE, EADS/CCR, LIP6, CEA/DEN, // CEDRAT, EDF R&D, LEG, PRINCIPIA R&D, BUREAU VERITAS @@ -21,6 +21,8 @@ // #include "SALOME_ContainerManager.hxx" +#include "SALOME_ResourcesManager.hxx" +#include "SALOME_LoadRateManager.hxx" #include "SALOME_NamingService.hxx" #include "SALOME_ResourcesManager_Client.hxx" #include "SALOME_ModuleCatalog.hh" @@ -40,6 +42,11 @@ #include #include CORBA_CLIENT_HEADER(SALOME_Session) +#ifdef HAVE_MPI2 +#include +#include +#endif + #ifdef WIN32 #include #define getpid _getpid @@ -49,9 +56,9 @@ #include "PaCOPP.hxx" #endif -#define TIME_OUT_TO_LAUNCH_CONT 60 +const int SALOME_ContainerManager::TIME_OUT_TO_LAUNCH_CONT=60; -const char *SALOME_ContainerManager::_ContainerManagerNameInNS = +const char *SALOME_ContainerManager::_ContainerManagerNameInNS = "/ContainerManager"; omni_mutex SALOME_ContainerManager::_numInstanceMutex; @@ -61,7 +68,7 @@ Utils_Mutex SALOME_ContainerManager::_getenvMutex; Utils_Mutex SALOME_ContainerManager::_systemMutex; //============================================================================= -/*! +/*! * Constructor * \param orb * Define a CORBA single thread policy for the server, which avoid to deal @@ -94,47 +101,49 @@ SALOME_ContainerManager::SALOME_ContainerManager(CORBA::ORB_ptr orb, PortableSer _isAppliSalomeDefined = (GetenvThreadSafe("APPLI") != 0); #ifdef HAVE_MPI2 -#ifdef WITHOPENMPI +#ifdef OPEN_MPI _pid_mpiServer = -1; // the urifile name depends on pid of the process std::stringstream urifile; - urifile << GetenvThreadSafe("HOME") << "/.urifile_" << getpid(); + urifile << GetenvThreadSafeAsString("HOME") << "/.urifile_" << getpid(); setenv("OMPI_URI_FILE",urifile.str().c_str(),1); if( GetenvThreadSafe("OMPI_URI_FILE") != NULL ){ - // get the pid of all ompi-server - std::set thepids1 = getpidofprogram("ompi-server"); - // launch a new ompi-server - std::string command; - command = "ompi-server -r "; - command += GetenvThreadSafe("OMPI_URI_FILE"); - int status=SystemThreadSafe(command.c_str()); - if(status!=0) - throw SALOME_Exception("Error when launching ompi-server"); - // get the pid of all ompi-server - std::set thepids2 = getpidofprogram("ompi-server"); - // my ompi-server is the new one - std::set::const_iterator it; - for(it=thepids2.begin();it!=thepids2.end();it++) - if(thepids1.find(*it) == thepids1.end()) - _pid_mpiServer = *it; - if(_pid_mpiServer < 0) - throw SALOME_Exception("Error when getting ompi-server id"); + // Linux specific code + pid_t pid = fork(); // spawn a child process, following code is executed in both processes + if ( pid == 0 ) // I'm a child, replace myself with a new ompi-server + { + std::string uriarg = GetenvThreadSafeAsString("OMPI_URI_FILE"); + execlp( "ompi-server", "ompi-server", "-r", uriarg.c_str(), NULL ); + throw SALOME_Exception("Error when launching ompi-server"); // execlp failed + } + else if ( pid < 0 ) + { + throw SALOME_Exception("fork() failed"); + } + else // I'm a parent + { + //wait(NULL); // wait(?) for a child end + _pid_mpiServer = pid; + } } -#elif defined(WITHMPICH) +#elif defined(MPICH) _pid_mpiServer = -1; - // get the pid of all hydra_nameserver - std::set thepids1 = getpidofprogram("hydra_nameserver"); - // launch a new hydra_nameserver - std::string command; - command = "hydra_nameserver &"; - SystemThreadSafe(command.c_str()); - // get the pid of all hydra_nameserver - std::set thepids2 = getpidofprogram("hydra_nameserver"); - // my hydra_nameserver is the new one - std::set::const_iterator it; - for(it=thepids2.begin();it!=thepids2.end();it++) - if(thepids1.find(*it) == thepids1.end()) - _pid_mpiServer = *it; + // Linux specific code + pid_t pid = fork(); // spawn a child process, following code is executed in both processes + if ( pid == 0 ) // I'm a child, replace myself with a new hydra_nameserver + { + execlp( "hydra_nameserver", "hydra_nameserver", NULL ); + throw SALOME_Exception("Error when launching hydra_nameserver"); // execlp failed + } + else if ( pid < 0 ) + { + throw SALOME_Exception("fork() failed"); + } + else // I'm a parent + { + //wait(NULL); + _pid_mpiServer = pid; + } #endif #endif @@ -142,7 +151,7 @@ SALOME_ContainerManager::SALOME_ContainerManager(CORBA::ORB_ptr orb, PortableSer } //============================================================================= -/*! +/*! * destructor */ //============================================================================= @@ -152,7 +161,7 @@ SALOME_ContainerManager::~SALOME_ContainerManager() MESSAGE("destructor"); delete _resManager; #ifdef HAVE_MPI2 -#ifdef WITHOPENMPI +#ifdef OPEN_MPI if( GetenvThreadSafe("OMPI_URI_FILE") != NULL ){ // kill my ompi-server if( kill(_pid_mpiServer,SIGTERM) != 0 ) @@ -162,7 +171,7 @@ SALOME_ContainerManager::~SALOME_ContainerManager() if(status!=0) throw SALOME_Exception("Error when removing urifile"); } -#elif defined(WITHMPICH) +#elif defined(MPICH) // kill my hydra_nameserver if(_pid_mpiServer > -1) if( kill(_pid_mpiServer,SIGTERM) != 0 ) @@ -242,7 +251,7 @@ void SALOME_ContainerManager::ShutdownContainers() MESSAGE("ShutdownContainers: " << (*iter)); cont->Shutdown(); } - else + else MESSAGE("ShutdownContainers: no container ref for " << (*iter)); } catch(CORBA::SystemException& e) @@ -308,7 +317,7 @@ Engines::Container_ptr SALOME_ContainerManager::GiveContainer(const Engines::Con MESSAGE("[GiveContainer] - length of possible resources " << possibleResources.size()); std::vector local_resources; - // Step 3: if mode is "get" keep only machines with existing containers + // Step 3: if mode is "get" keep only machines with existing containers if(mode == "get") { for(unsigned int i=0; i < possibleResources.size(); i++) @@ -384,7 +393,8 @@ Engines::Container_ptr SALOME_ContainerManager::GiveContainer(const Engines::Con return ret; } // A mpi parallel container register on zero node in NS - containerNameInNS = _NS->BuildContainerNameForNS(params, GetMPIZeroNode(hostname,machFile).c_str()); + std::string mpiZeroNode = GetMPIZeroNode(resource_selected,machFile).c_str(); + containerNameInNS = _NS->BuildContainerNameForNS(params, mpiZeroNode.c_str()); } else containerNameInNS = _NS->BuildContainerNameForNS(params, hostname.c_str()); @@ -518,14 +528,14 @@ SALOME_ContainerManager::LaunchContainer(const Engines::ContainerParameters& par else { ASSERT(GetenvThreadSafe("APPLI")); - command += GetenvThreadSafe("APPLI"); + command += GetenvThreadSafeAsString("APPLI"); } command += "/runRemote.sh "; ASSERT(GetenvThreadSafe("NSHOST")); - command += GetenvThreadSafe("NSHOST"); // hostname of CORBA name server + command += GetenvThreadSafeAsString("NSHOST"); // hostname of CORBA name server command += " "; ASSERT(GetenvThreadSafe("NSPORT")); - command += GetenvThreadSafe("NSPORT"); // port of CORBA name server + command += GetenvThreadSafeAsString("NSPORT"); // port of CORBA name server command += " \"ls /tmp >/dev/null 2>&1\""; // Launch remote command @@ -564,11 +574,13 @@ SALOME_ContainerManager::LaunchContainer(const Engines::ContainerParameters& par //redirect stdout and stderr in a file #ifdef WIN32 - logFilename=GetenvThreadSafe("TEMP"); + logFilename=GetenvThreadSafeAsString("TEMP"); logFilename += "\\"; - user = GetenvThreadSafe( "USERNAME" ); + user = GetenvThreadSafeAsString( "USERNAME" ); #else - user = GetenvThreadSafe( "USER" ); + user = GetenvThreadSafeAsString( "USER" ); + if (user.empty()) + user = GetenvThreadSafeAsString( "LOGNAME" ); logFilename="/tmp"; char* val = GetenvThreadSafe("SALOME_TMP_DIR"); if(val) @@ -587,11 +599,7 @@ SALOME_ContainerManager::LaunchContainer(const Engines::ContainerParameters& par logFilename += tmp.str(); logFilename += ".log" ; command += " > " + logFilename + " 2>&1"; -#ifdef WIN32 - command = "%PYTHONBIN% -c \"import win32pm ; win32pm.spawnpid(r'" + command + "', '')\""; -#else - command += " &"; -#endif + MakeTheCommandToBeLaunchedASync(command); // launch container with a system call status=SystemThreadSafe(command.c_str()); @@ -612,30 +620,14 @@ SALOME_ContainerManager::LaunchContainer(const Engines::ContainerParameters& par else { // Step 4: Wait for the container - int count = TIME_OUT_TO_LAUNCH_CONT; - if (GetenvThreadSafe("TIMEOUT_TO_LAUNCH_CONTAINER") != 0) - { - std::string new_count_str = GetenvThreadSafe("TIMEOUT_TO_LAUNCH_CONTAINER"); - int new_count; - std::istringstream ss(new_count_str); - if (!(ss >> new_count)) - { - INFOS("[LaunchContainer] TIMEOUT_TO_LAUNCH_CONTAINER should be an int"); - } - else - count = new_count; - } + int count(GetTimeOutToLoaunchServer()); INFOS("[GiveContainer] waiting " << count << " second steps container " << containerNameInNS); while (CORBA::is_nil(ret) && count) { -#ifndef WIN32 - sleep( 1 ) ; -#else - Sleep(1000); -#endif + SleepInSecond(1); count--; MESSAGE("[GiveContainer] step " << count << " Waiting for container on " << resource_selected); - CORBA::Object_var obj = _NS->Resolve(containerNameInNS.c_str()); + CORBA::Object_var obj(_NS->Resolve(containerNameInNS.c_str())); ret=Engines::Container::_narrow(obj); } if (CORBA::is_nil(ret)) @@ -711,7 +703,7 @@ bool isPythonContainer(const char* ContainerName); //============================================================================= /*! * This is no longer valid (C++ container are also python containers) - */ + */ //============================================================================= bool isPythonContainer(const char* ContainerName) { @@ -737,7 +729,7 @@ bool isPythonContainer(const char* ContainerName) * ssh user@machine distantPath/runRemote.sh hostNS portNS WORKINGDIR workingdir \ * SALOME_Container containerName &" - * - where user is ommited if not specified in CatalogResources, + * - where user is omitted if not specified in CatalogResources, * - where distant path is always relative to user@machine $HOME, and * equal to $APPLI if not specified in CatalogResources, * - where hostNS is the hostname of CORBA naming server (set by scripts to @@ -746,7 +738,7 @@ bool isPythonContainer(const char* ContainerName) * use to launch SALOME and servers in $APPLI: runAppli.sh, runRemote.sh) * - where workingdir is the requested working directory for the container. * If WORKINGDIR (and workingdir) is not present the working dir will be $HOME - */ + */ //============================================================================= std::string @@ -768,7 +760,7 @@ SALOME_ContainerManager::BuildCommandToLaunchRemoteContainer(const std::string& nbproc = params.nb_proc; } - // "ssh -l user machine distantPath/runRemote.sh hostNS portNS WORKINGDIR workingdir \ + // "ssh -l user machine distantPath/runRemote.sh hostNS portNS WORKINGDIR workingdir // SALOME_Container containerName &" command = getCommandToRunRemoteProcess(resInfo.Protocol, resInfo.HostName, resInfo.UserName); @@ -777,17 +769,17 @@ SALOME_ContainerManager::BuildCommandToLaunchRemoteContainer(const std::string& else { ASSERT(GetenvThreadSafe("APPLI")); - command += GetenvThreadSafe("APPLI"); // path relative to user@machine $HOME + command += GetenvThreadSafeAsString("APPLI"); // path relative to user@machine $HOME } command += "/runRemote.sh "; ASSERT(GetenvThreadSafe("NSHOST")); - command += GetenvThreadSafe("NSHOST"); // hostname of CORBA name server + command += GetenvThreadSafeAsString("NSHOST"); // hostname of CORBA name server command += " "; ASSERT(GetenvThreadSafe("NSPORT")); - command += GetenvThreadSafe("NSPORT"); // port of CORBA name server + command += GetenvThreadSafeAsString("NSPORT"); // port of CORBA name server std::string wdir = params.workingdir.in(); if(wdir != "") @@ -797,7 +789,7 @@ SALOME_ContainerManager::BuildCommandToLaunchRemoteContainer(const std::string& if(wdir == "$TEMPDIR") wdir="\\$TEMPDIR"; command += wdir; // requested working directory - command += "'"; + command += "'"; } if(params.isMPI) @@ -806,18 +798,18 @@ SALOME_ContainerManager::BuildCommandToLaunchRemoteContainer(const std::string& std::ostringstream o; o << nbproc << " "; command += o.str(); -#ifdef WITHLAM +#ifdef LAM_MPI command += "-x PATH,LD_LIBRARY_PATH,OMNIORB_CONFIG,SALOME_trace "; -#elif defined(WITHOPENMPI) +#elif defined(OPEN_MPI) if( GetenvThreadSafe("OMPI_URI_FILE") == NULL ) command += "-x PATH -x LD_LIBRARY_PATH -x OMNIORB_CONFIG -x SALOME_trace"; else{ command += "-x PATH -x LD_LIBRARY_PATH -x OMNIORB_CONFIG -x SALOME_trace -ompi-server file:"; - command += GetenvThreadSafe("OMPI_URI_FILE"); + command += GetenvThreadSafeAsString("OMPI_URI_FILE"); } -#elif defined(WITHMPICH) +#elif defined(MPICH) command += "-nameserver " + Kernel_Utils::GetHostname(); -#endif +#endif command += " SALOME_MPIContainer "; } else @@ -836,7 +828,7 @@ SALOME_ContainerManager::BuildCommandToLaunchRemoteContainer(const std::string& //============================================================================= /*! * builds the command to be launched. - */ + */ //============================================================================= std::string SALOME_ContainerManager::BuildCommandToLaunchLocalContainer(const Engines::ContainerParameters& params, const std::string& machinesFile, const std::string& container_exe, std::string& tmpFileName) const { @@ -860,17 +852,17 @@ std::string SALOME_ContainerManager::BuildCommandToLaunchLocalContainer(const En if( GetenvThreadSafe("LIBBATCH_NODEFILE") != NULL ) o << "-machinefile " << machinesFile << " "; -#ifdef WITHLAM +#ifdef LAM_MPI o << "-x PATH,LD_LIBRARY_PATH,OMNIORB_CONFIG,SALOME_trace "; -#elif defined(WITHOPENMPI) +#elif defined(OPEN_MPI) if( GetenvThreadSafe("OMPI_URI_FILE") == NULL ) o << "-x PATH -x LD_LIBRARY_PATH -x OMNIORB_CONFIG -x SALOME_trace"; else { o << "-x PATH -x LD_LIBRARY_PATH -x OMNIORB_CONFIG -x SALOME_trace -ompi-server file:"; - o << GetenvThreadSafe("OMPI_URI_FILE"); + o << GetenvThreadSafeAsString("OMPI_URI_FILE"); } -#elif defined(WITHMPICH) +#elif defined(MPICH) o << "-nameserver " + Kernel_Utils::GetHostname(); #endif @@ -939,21 +931,21 @@ std::string SALOME_ContainerManager::BuildCommandToLaunchLocalContainer(const En /*! * removes the generated temporary file in case of a remote launch. * This method is thread safe - */ + */ //============================================================================= void SALOME_ContainerManager::RmTmpFile(std::string& tmpFileName) { - int lenght = tmpFileName.size(); - if ( lenght > 0) + int length = tmpFileName.size(); + if ( length > 0) { #ifdef WIN32 std::string command = "del /F "; #else - std::string command = "rm "; + std::string command = "rm "; #endif - if ( lenght > 4 ) - command += tmpFileName.substr(0, lenght - 3 ); + if ( length > 4 ) + command += tmpFileName.substr(0, length - 3 ); else command += tmpFileName; command += '*'; @@ -975,52 +967,90 @@ void SALOME_ContainerManager::RmTmpFile(std::string& tmpFileName) //============================================================================= /*! * add to command all options relative to naming service. - */ + */ //============================================================================= void SALOME_ContainerManager::AddOmninamesParams(std::string& command) const { - CORBA::String_var iorstr = _NS->getIORaddr(); - command += "ORBInitRef NameService="; - command += iorstr; + std::ostringstream oss; + AddOmninamesParams(oss); + command+=oss.str(); } //============================================================================= /*! * add to command all options relative to naming service. - */ + */ //============================================================================= -void SALOME_ContainerManager::AddOmninamesParams(std::ofstream& fileStream) const +void SALOME_ContainerManager::AddOmninamesParams(std::ostream& fileStream) const { - CORBA::String_var iorstr = _NS->getIORaddr(); - fileStream << "ORBInitRef NameService="; - fileStream << iorstr; + AddOmninamesParams(fileStream,_NS); } //============================================================================= /*! * add to command all options relative to naming service. - */ + */ //============================================================================= -void SALOME_ContainerManager::AddOmninamesParams(std::ostringstream& oss) const +void SALOME_ContainerManager::AddOmninamesParams(std::ostream& fileStream, SALOME_NamingService *ns) +{ + CORBA::String_var iorstr(ns->getIORaddr()); + fileStream << "ORBInitRef NameService="; + fileStream << iorstr; +} + +void SALOME_ContainerManager::MakeTheCommandToBeLaunchedASync(std::string& command) +{ +#ifdef WIN32 + command = "%PYTHONBIN% -c \"import subprocess ; subprocess.Popen(r'" + command + "').pid\""; +#else + command += " &"; +#endif +} + +int SALOME_ContainerManager::GetTimeOutToLoaunchServer() { - CORBA::String_var iorstr = _NS->getIORaddr(); - oss << "ORBInitRef NameService="; - oss << iorstr; + int count(TIME_OUT_TO_LAUNCH_CONT); + if (GetenvThreadSafe("TIMEOUT_TO_LAUNCH_CONTAINER") != 0) + { + std::string new_count_str(GetenvThreadSafeAsString("TIMEOUT_TO_LAUNCH_CONTAINER")); + int new_count; + std::istringstream ss(new_count_str); + if (!(ss >> new_count)) + { + INFOS("[LaunchContainer] TIMEOUT_TO_LAUNCH_CONTAINER should be an int"); + } + else + count = new_count; + } + return count; +} + +void SALOME_ContainerManager::SleepInSecond(int ellapseTimeInSecond) +{ +#ifndef WIN32 + sleep( ellapseTimeInSecond ) ; +#else + int timeInMS(1000*ellapseTimeInSecond); + Sleep(timeInMS); +#endif } //============================================================================= /*! * generate a file name in /tmp directory - */ + */ //============================================================================= std::string SALOME_ContainerManager::BuildTemporaryFileName() { //build more complex file name to support multiple salome session std::string aFileName = Kernel_Utils::GetTmpFileName(); + std::ostringstream str_pid; + str_pid << ::getpid(); + aFileName = aFileName + "-" + str_pid.str(); #ifndef WIN32 aFileName += ".sh"; #else @@ -1032,11 +1062,11 @@ std::string SALOME_ContainerManager::BuildTemporaryFileName() //============================================================================= /*! * Builds in a temporary file the script to be launched. - * + * * Used if SALOME Application ($APPLI) is not defined. * The command is build with data from CatalogResources, in which every path * used on remote computer must be defined. - */ + */ //============================================================================= std::string SALOME_ContainerManager::BuildTempFileToLaunchRemoteContainer (const std::string& resource_name, const Engines::ContainerParameters& params, std::string& tmpFileName) const @@ -1069,21 +1099,21 @@ std::string SALOME_ContainerManager::BuildTempFileToLaunchRemoteContainer (const std::ostringstream o; tempOutputFile << nbproc << " "; -#ifdef WITHLAM +#ifdef LAM_MPI tempOutputFile << "-x PATH,LD_LIBRARY_PATH,OMNIORB_CONFIG,SALOME_trace "; -#elif defined(WITHOPENMPI) +#elif defined(OPEN_MPI) if( GetenvThreadSafe("OMPI_URI_FILE") == NULL ) tempOutputFile << "-x PATH -x LD_LIBRARY_PATH -x OMNIORB_CONFIG -x SALOME_trace"; else{ tempOutputFile << "-x PATH -x LD_LIBRARY_PATH -x OMNIORB_CONFIG -x SALOME_trace -ompi-server file:"; - tempOutputFile << GetenvThreadSafe("OMPI_URI_FILE"); + tempOutputFile << GetenvThreadSafeAsString("OMPI_URI_FILE"); } -#elif defined(WITHMPICH) +#elif defined(MPICH) tempOutputFile << "-nameserver " + Kernel_Utils::GetHostname(); #endif } - tempOutputFile << GetenvThreadSafe("KERNEL_ROOT_DIR") << "/bin/salome/"; + tempOutputFile << GetenvThreadSafeAsString("KERNEL_ROOT_DIR") << "/bin/salome/"; if (params.isMPI) { @@ -1153,7 +1183,7 @@ std::string SALOME_ContainerManager::BuildTempFileToLaunchRemoteContainer (const throw SALOME_Exception("Unknown protocol"); if(status) - throw SALOME_Exception("Error of connection on remote host"); + throw SALOME_Exception("Error of connection on remote host"); command += resInfo.HostName; command += " "; @@ -1171,12 +1201,17 @@ std::string SALOME_ContainerManager::GetMPIZeroNode(const std::string machine, c std::string zeronode; std::string command; std::string tmpFile = BuildTemporaryFileName(); + const ParserResourcesType resInfo(_resManager->GetResourceDefinition(machine)); + + if(resInfo.Protocol == sh) + { + return resInfo.HostName; + } if( GetenvThreadSafe("LIBBATCH_NODEFILE") == NULL ) { if (_isAppliSalomeDefined) { - const ParserResourcesType resInfo(_resManager->GetResourceDefinition(machine)); if (resInfo.Protocol == rsh) command = "rsh "; @@ -1202,17 +1237,17 @@ std::string SALOME_ContainerManager::GetMPIZeroNode(const std::string machine, c else { ASSERT(GetenvThreadSafe("APPLI")); - command += GetenvThreadSafe("APPLI"); // path relative to user@machine $HOME + command += GetenvThreadSafeAsString("APPLI"); // path relative to user@machine $HOME } command += "/runRemote.sh "; ASSERT(GetenvThreadSafe("NSHOST")); - command += GetenvThreadSafe("NSHOST"); // hostname of CORBA name server + command += GetenvThreadSafeAsString("NSHOST"); // hostname of CORBA name server command += " "; ASSERT(GetenvThreadSafe("NSPORT")); - command += GetenvThreadSafe("NSPORT"); // port of CORBA name server + command += GetenvThreadSafeAsString("NSPORT"); // port of CORBA name server command += " mpirun -np 1 hostname -s > " + tmpFile; } @@ -1236,7 +1271,7 @@ std::string SALOME_ContainerManager::GetMPIZeroNode(const std::string machine, c std::string SALOME_ContainerManager::machinesFile(const int nbproc) { std::string tmp; - std::string nodesFile = GetenvThreadSafe("LIBBATCH_NODEFILE"); + std::string nodesFile = GetenvThreadSafeAsString("LIBBATCH_NODEFILE"); std::string machinesFile = Kernel_Utils::GetTmpFileName(); std::ifstream fpi(nodesFile.c_str(),std::ios::in); std::ofstream fpo(machinesFile.c_str(),std::ios::out); @@ -1262,21 +1297,6 @@ std::string SALOME_ContainerManager::machinesFile(const int nbproc) } -std::set SALOME_ContainerManager::getpidofprogram(const std::string program) -{ - std::set thepids; - std::string tmpFile = Kernel_Utils::GetTmpFileName(); - std::string cmd; - std::string thepid; - cmd = "pidof " + program + " > " + tmpFile; - SystemThreadSafe(cmd.c_str()); - std::ifstream fpi(tmpFile.c_str(),std::ios::in); - while(fpi >> thepid){ - thepids.insert(atoi(thepid.c_str())); - } - return thepids; -} - std::string SALOME_ContainerManager::getCommandToRunRemoteProcess(AccessProtocolType protocol, const std::string & hostname, const std::string & username) @@ -1318,11 +1338,11 @@ std::string SALOME_ContainerManager::getCommandToRunRemoteProcess(AccessProtocol return command.str(); } -bool +bool SALOME_ContainerManager::checkPaCOParameters(Engines::ContainerParameters & params, std::string resource_selected) { bool result = true; - + // Step 1 : check ContainerParameters // Check container_name, has to be defined if (std::string(params.container_name.in()) == "") @@ -1367,12 +1387,28 @@ SALOME_ContainerManager::checkPaCOParameters(Engines::ContainerParameters & para return result; } +/* + * :WARNING: Do not directly convert returned value to std::string + * This function may return NULL if env variable is not defined. + * And std::string(NULL) causes undefined behavior. + * Use GetenvThreadSafeAsString to properly get a std::string. +*/ char *SALOME_ContainerManager::GetenvThreadSafe(const char *name) {// getenv is not thread safe. See man 7 pthread. Utils_Locker lock (&_getenvMutex); return getenv(name); } +/* + * Return env variable as a std::string. + * Return empty string if env variable is not set. + */ +std::string SALOME_ContainerManager::GetenvThreadSafeAsString(const char *name) +{ + char* var = GetenvThreadSafe(name); + return var ? std::string(var) : std::string(); +} + int SALOME_ContainerManager::SystemThreadSafe(const char *command) { Utils_Locker lock (&_systemMutex); @@ -1423,7 +1459,7 @@ SALOME_ContainerManager::StartPaCOPPContainer(const Engines::ContainerParameters // Step 3 : starting parallel container proxy std::string command_proxy(""); std::string proxy_machine; - try + try { command_proxy = BuildCommandToLaunchPaCOProxyContainer(params, machine_file_name, proxy_machine); } @@ -1445,7 +1481,7 @@ SALOME_ContainerManager::StartPaCOPPContainer(const Engines::ContainerParameters // Step 4 : starting parallel container nodes std::string command_nodes(""); SALOME_ContainerManager::actual_launch_machine_t nodes_machines; - try + try { command_nodes = BuildCommandToLaunchPaCONodeContainer(params, machine_file_name, nodes_machines, proxy_machine); } @@ -1462,20 +1498,20 @@ SALOME_ContainerManager::StartPaCOPPContainer(const Engines::ContainerParameters { INFOS("[StarPaCOPPContainer] LaunchPaCONodeContainer failed !"); // Il faut tuer le proxy - try + try { Engines::Container_var proxy = Engines::Container::_narrow(container_proxy); proxy->Shutdown(); } catch (...) { - INFOS("[StarPaCOPPContainer] Exception catched from proxy Shutdown..."); + INFOS("[StarPaCOPPContainer] Exception caught from proxy Shutdown..."); } return ret; } // Step 4 : connecting nodes and the proxy to actually create a parallel container - for (int i = 0; i < params.nb_proc; i++) + for (int i = 0; i < params.nb_proc; i++) { std::ostringstream tmp; tmp << i; @@ -1485,7 +1521,7 @@ SALOME_ContainerManager::StartPaCOPPContainer(const Engines::ContainerParameters std::string theNodeMachine(nodes_machines[i]); std::string containerNameInNS = _NS->BuildContainerNameForNS(container_node_name.c_str(), theNodeMachine.c_str()); obj = _NS->Resolve(containerNameInNS.c_str()); - if (CORBA::is_nil(obj)) + if (CORBA::is_nil(obj)) { INFOS("[StarPaCOPPContainer] CONNECTION FAILED From Naming Service !"); INFOS("[StarPaCOPPContainer] Container name is " << containerNameInNS); @@ -1519,7 +1555,7 @@ SALOME_ContainerManager::StartPaCOPPContainer(const Engines::ContainerParameters } // Step 5 : starting parallel container - try + try { MESSAGE ("[StarPaCOPPContainer] Starting parallel object"); container_proxy->start(); @@ -1540,7 +1576,7 @@ SALOME_ContainerManager::StartPaCOPPContainer(const Engines::ContainerParameters } catch(std::exception& exc) { - INFOS("Caught std::exception - "<&1 & "; } } -CORBA::Object_ptr -SALOME_ContainerManager::LaunchPaCOProxyContainer(const std::string& command, +CORBA::Object_ptr +SALOME_ContainerManager::LaunchPaCOProxyContainer(const std::string& command, const Engines::ContainerParameters& params, const std::string & hostname) { @@ -1852,20 +1891,20 @@ SALOME_ContainerManager::LaunchPaCOProxyContainer(const std::string& command, return container_proxy; } - int count = TIME_OUT_TO_LAUNCH_CONT; + int count(GetTimeOutToLoaunchServer()); CORBA::Object_var obj = CORBA::Object::_nil(); - std::string containerNameInNS = _NS->BuildContainerNameForNS(params.container_name.in(), + std::string containerNameInNS = _NS->BuildContainerNameForNS(params.container_name.in(), hostname.c_str()); MESSAGE("[LaunchParallelContainer] Waiting for Parallel Container proxy : " << containerNameInNS); - while (CORBA::is_nil(obj) && count) + while (CORBA::is_nil(obj) && count) { sleep(1); count--; obj = _NS->Resolve(containerNameInNS.c_str()); } - try + try { container_proxy = PaCO::InterfaceManager::_narrow(obj); } @@ -1897,7 +1936,7 @@ SALOME_ContainerManager::LaunchPaCOProxyContainer(const std::string& command, //============================================================================= /*! This method launches the parallel container. - * It will may be placed on the ressources manager. + * It will may be placed on the resources manager. * * \param command to launch * \param container's parameters @@ -1907,7 +1946,7 @@ SALOME_ContainerManager::LaunchPaCOProxyContainer(const std::string& command, */ //============================================================================= bool -SALOME_ContainerManager::LaunchPaCONodeContainer(const std::string& command, +SALOME_ContainerManager::LaunchPaCONodeContainer(const std::string& command, const Engines::ContainerParameters& params, const std::string& name, SALOME_ContainerManager::actual_launch_machine_t & vect_machine) @@ -1925,7 +1964,7 @@ SALOME_ContainerManager::LaunchPaCONodeContainer(const std::string& command, INFOS("[LaunchPaCONodeContainer] Waiting for the nodes of the parallel container"); // We are waiting all the nodes - for (int i = 0; i < params.nb_proc; i++) + for (int i = 0; i < params.nb_proc; i++) { CORBA::Object_var obj = CORBA::Object::_nil(); std::string theMachine(vect_machine[i]); @@ -1936,9 +1975,9 @@ SALOME_ContainerManager::LaunchPaCONodeContainer(const std::string& command, std::string container_node_name = name + proc_number; std::string containerNameInNS = _NS->BuildContainerNameForNS((char*) container_node_name.c_str(), theMachine.c_str()); INFOS("[LaunchPaCONodeContainer] Waiting for Parallel Container node " << containerNameInNS << " on " << theMachine); - int count = TIME_OUT_TO_LAUNCH_CONT; + int count(GetTimeOutToLoaunchServer()); while (CORBA::is_nil(obj) && count) { - sleep(1) ; + SleepInSecond(1); count-- ; obj = _NS->Resolve(containerNameInNS.c_str()); } @@ -1963,7 +2002,7 @@ SALOME_ContainerManager::StartPaCOPPContainer(const Engines::ContainerParameters return ret; } -std::string +std::string SALOME_ContainerManager::BuildCommandToLaunchPaCOProxyContainer(const Engines::ContainerParameters& params, std::string machine_file_name, std::string & proxy_hostname) @@ -1971,26 +2010,26 @@ SALOME_ContainerManager::BuildCommandToLaunchPaCOProxyContainer(const Engines::C return ""; } -std::string +std::string SALOME_ContainerManager::BuildCommandToLaunchPaCONodeContainer(const Engines::ContainerParameters& params, const std::string & machine_file_name, - SALOME_ContainerManager::actual_launch_machine_t & vect_machine, - const std::string & proxy_hostname) + SALOME_ContainerManager::actual_launch_machine_t & vect_machine, + const std::string & proxy_hostname) { return ""; } -void +void SALOME_ContainerManager::LogConfiguration(const std::string & log_type, const std::string & exe_type, const std::string & container_name, const std::string & hostname, - std::string & begin, + std::string & begin, std::string & end) { } -CORBA::Object_ptr -SALOME_ContainerManager::LaunchPaCOProxyContainer(const std::string& command, +CORBA::Object_ptr +SALOME_ContainerManager::LaunchPaCOProxyContainer(const std::string& command, const Engines::ContainerParameters& params, const std::string& hostname) { @@ -1998,8 +2037,8 @@ SALOME_ContainerManager::LaunchPaCOProxyContainer(const std::string& command, return ret; } -bool -SALOME_ContainerManager::LaunchPaCONodeContainer(const std::string& command, +bool +SALOME_ContainerManager::LaunchPaCONodeContainer(const std::string& command, const Engines::ContainerParameters& params, const std::string& name, SALOME_ContainerManager::actual_launch_machine_t & vect_machine) @@ -2007,4 +2046,3 @@ SALOME_ContainerManager::LaunchPaCONodeContainer(const std::string& command, return false; } #endif -