From: secher Date: Fri, 4 Mar 2011 10:49:42 +0000 (+0000) Subject: allow two Salome sessions with mpi, to run in the same time X-Git-Tag: Start_BR_19998_21191~28 X-Git-Url: http://git.salome-platform.org/gitweb/?a=commitdiff_plain;h=82bddf85bfffd1ffb8d4763c34b9870c264d1f4f;p=modules%2Fkernel.git allow two Salome sessions with mpi, to run in the same time --- diff --git a/src/Container/SALOME_ContainerManager.cxx b/src/Container/SALOME_ContainerManager.cxx index 66577e1c1..f960e152c 100644 --- a/src/Container/SALOME_ContainerManager.cxx +++ b/src/Container/SALOME_ContainerManager.cxx @@ -27,6 +27,7 @@ #include "Basics_DirUtils.hxx" #include #include +#include #ifndef WIN32 #include #endif @@ -86,17 +87,30 @@ SALOME_ContainerManager::SALOME_ContainerManager(CORBA::ORB_ptr orb, PortableSer #ifdef HAVE_MPI2 #ifdef WITHOPENMPI + _pid_ompiServer = -1; + // the urifile name depends on pid of the process std::stringstream urifile; urifile << getenv("HOME") << "/.urifile_" << getpid(); setenv("OMPI_URI_FILE",urifile.str().c_str(),1); if( getenv("OMPI_URI_FILE") != NULL ){ - system("killall -q ompi-server"); + // get the pid of all ompi-server + std::set thepids1 = getpidofprogram("ompi-server"); + // launch a new ompi-server std::string command; command = "ompi-server -r "; command += getenv("OMPI_URI_FILE"); int status=system(command.c_str()); if(status!=0) throw SALOME_Exception("Error when launching ompi-server"); + // get the pid of all ompi-server + std::set thepids2 = getpidofprogram("ompi-server"); + // my ompi-server is the new one + std::set::const_iterator it; + for(it=thepids2.begin();it!=thepids2.end();it++) + if(thepids1.find(*it) == thepids1.end()) + _pid_ompiServer = *it; + if(_pid_ompiServer < 0) + throw SALOME_Exception("Error when getting ompi-server id"); } #endif #endif @@ -116,10 +130,11 @@ SALOME_ContainerManager::~SALOME_ContainerManager() #ifdef HAVE_MPI2 #ifdef WITHOPENMPI if( getenv("OMPI_URI_FILE") != NULL ){ - int status=system("killall -q ompi-server"); - if(status!=0) + // kill my ompi-server + if( kill(_pid_ompiServer,SIGTERM) != 0 ) throw SALOME_Exception("Error when killing ompi-server"); - status=system("rm -f ${OMPI_URI_FILE}"); + // delete my urifile + int status=system("rm -f ${OMPI_URI_FILE}"); if(status!=0) throw SALOME_Exception("Error when removing urifile"); } @@ -1098,6 +1113,21 @@ std::string SALOME_ContainerManager::machinesFile(const int nbproc) } +std::set SALOME_ContainerManager::getpidofprogram(const std::string program) +{ + std::set thepids; + std::string tmpFile = Kernel_Utils::GetTmpFileName(); + std::string cmd; + std::string thepid; + cmd = "pidof " + program + " > " + tmpFile; + system(cmd.c_str()); + std::ifstream fpi(tmpFile.c_str(),std::ios::in); + while(fpi >> thepid){ + thepids.insert(atoi(thepid.c_str())); + } + return thepids; +} + bool SALOME_ContainerManager::checkPaCOParameters(Engines::ContainerParameters & params, std::string resource_selected) { diff --git a/src/Container/SALOME_ContainerManager.hxx b/src/Container/SALOME_ContainerManager.hxx index 59ceee4e8..d13d8bd18 100644 --- a/src/Container/SALOME_ContainerManager.hxx +++ b/src/Container/SALOME_ContainerManager.hxx @@ -32,6 +32,7 @@ #include "SALOME_LoadRateManager.hxx" #include +#include class SALOME_NamingService; @@ -89,6 +90,8 @@ protected: std::string machinesFile(const int nbproc); + std::set getpidofprogram(const std::string program); + CORBA::ORB_var _orb; PortableServer::POA_var _poa; @@ -110,6 +113,8 @@ protected: static omni_mutex _numInstanceMutex ; // lib and instance protection + pid_t _pid_ompiServer; + // Begin of PacO++ Parallel extension typedef std::vector actual_launch_machine_t;