-// Copyright (C) 2007-2017 CEA/DEN, EDF R&D, OPEN CASCADE
+// Copyright (C) 2007-2021 CEA/DEN, EDF R&D, OPEN CASCADE
//
// Copyright (C) 2003-2007 OPEN CASCADE, EADS/CCR, LIP6, CEA/DEN,
// CEDRAT, EDF R&D, LEG, PRINCIPIA R&D, BUREAU VERITAS
#include "SALOME_LoadRateManager.hxx"
#include "SALOME_NamingService.hxx"
#include "SALOME_ResourcesManager_Client.hxx"
+#include "SALOME_Embedded_NamingService.hxx"
#include "SALOME_ModuleCatalog.hh"
#include "Basics_Utils.hxx"
#include "Basics_DirUtils.hxx"
#ifdef HAVE_MPI2
#include <mpi.h>
+#include <sys/wait.h>
#endif
#ifdef WIN32
#include <process.h>
#define getpid _getpid
+
+#ifndef S_ISREG
+#define S_ISREG(mode) (((mode) & S_IFMT) == S_IFREG)
+#endif
+
#endif
#ifdef WITH_PACO_PARALLEL
*/
//=============================================================================
-SALOME_ContainerManager::SALOME_ContainerManager(CORBA::ORB_ptr orb, PortableServer::POA_var poa, SALOME_NamingService *ns)
+SALOME_ContainerManager::SALOME_ContainerManager(CORBA::ORB_ptr orb, PortableServer::POA_var poa, SALOME_NamingService_Abstract *ns)
: _nbprocUsed(1)
{
MESSAGE("constructor");
threadPol->destroy();
PortableServer::ObjectId_var id = _poa->activate_object(this);
CORBA::Object_var obj = _poa->id_to_reference(id);
- Engines::ContainerManager_var refContMan =
- Engines::ContainerManager::_narrow(obj);
-
- _NS->Register(refContMan,_ContainerManagerNameInNS);
+ Engines::ContainerManager_var refContMan = Engines::ContainerManager::_narrow(obj);
+ if(_NS)
+ _NS->Register(refContMan,_ContainerManagerNameInNS);
_isAppliSalomeDefined = (GetenvThreadSafe("APPLI") != 0);
#ifdef HAVE_MPI2
urifile << GetenvThreadSafeAsString("HOME") << "/.urifile_" << getpid();
setenv("OMPI_URI_FILE",urifile.str().c_str(),1);
if( GetenvThreadSafe("OMPI_URI_FILE") != NULL ){
- // get the pid of all ompi-server
- std::set<pid_t> thepids1 = getpidofprogram("ompi-server");
- // launch a new ompi-server
- std::string command;
- command = "ompi-server -r ";
- command += GetenvThreadSafeAsString("OMPI_URI_FILE");
- int status=SystemThreadSafe(command.c_str());
- if(status!=0)
- throw SALOME_Exception("Error when launching ompi-server");
- // get the pid of all ompi-server
- std::set<pid_t> thepids2 = getpidofprogram("ompi-server");
- // my ompi-server is the new one
- std::set<pid_t>::const_iterator it;
- for(it=thepids2.begin();it!=thepids2.end();it++)
- if(thepids1.find(*it) == thepids1.end())
- _pid_mpiServer = *it;
- if(_pid_mpiServer < 0)
- throw SALOME_Exception("Error when getting ompi-server id");
+ // Linux specific code
+ pid_t pid = fork(); // spawn a child process, following code is executed in both processes
+ if ( pid == 0 ) // I'm a child, replace myself with a new ompi-server
+ {
+ std::string uriarg = GetenvThreadSafeAsString("OMPI_URI_FILE");
+ execlp( "ompi-server", "ompi-server", "-r", uriarg.c_str(), NULL );
+ throw SALOME_Exception("Error when launching ompi-server"); // execlp failed
+ }
+ else if ( pid < 0 )
+ {
+ throw SALOME_Exception("fork() failed");
+ }
+ else // I'm a parent
+ {
+ //wait(NULL); // wait(?) for a child end
+ _pid_mpiServer = pid;
+ }
}
#elif defined(MPICH)
_pid_mpiServer = -1;
- // get the pid of all hydra_nameserver
- std::set<pid_t> thepids1 = getpidofprogram("hydra_nameserver");
- // launch a new hydra_nameserver
- std::string command;
- command = "hydra_nameserver &";
- SystemThreadSafe(command.c_str());
- // get the pid of all hydra_nameserver
- std::set<pid_t> thepids2 = getpidofprogram("hydra_nameserver");
- // my hydra_nameserver is the new one
- std::set<pid_t>::const_iterator it;
- for(it=thepids2.begin();it!=thepids2.end();it++)
- if(thepids1.find(*it) == thepids1.end())
- _pid_mpiServer = *it;
+ // Linux specific code
+ pid_t pid = fork(); // spawn a child process, following code is executed in both processes
+ if ( pid == 0 ) // I'm a child, replace myself with a new hydra_nameserver
+ {
+ execlp( "hydra_nameserver", "hydra_nameserver", NULL );
+ throw SALOME_Exception("Error when launching hydra_nameserver"); // execlp failed
+ }
+ else if ( pid < 0 )
+ {
+ throw SALOME_Exception("fork() failed");
+ }
+ else // I'm a parent
+ {
+ //wait(NULL);
+ _pid_mpiServer = pid;
+ }
#endif
#endif
{
MESSAGE("Shutdown");
ShutdownContainers();
- _NS->Destroy_Name(_ContainerManagerNameInNS);
+ if(_NS)
+ _NS->Destroy_Name(_ContainerManagerNameInNS);
PortableServer::ObjectId_var oid = _poa->servant_to_id(this);
_poa->deactivate_object(oid);
}
void SALOME_ContainerManager::ShutdownContainers()
{
MESSAGE("ShutdownContainers");
-
+ if(!_NS)
+ return ;
SALOME::Session_var session = SALOME::Session::_nil();
CORBA::Long pid = 0;
CORBA::Object_var objS = _NS->Resolve("/Kernel/Session");
if(!CORBA::is_nil(cont) && pid != cont->getPID())
lstCont.push_back((*iter));
}
- catch(const CORBA::Exception& e)
+ catch(const CORBA::Exception&)
{
// ignore this entry and continue
}
break;
}
}
- catch(const SALOME_Exception &ex)
+ catch(const SALOME_Exception &ex) //!< TODO: unused variable
{
MESSAGE("[GiveContainer] Exception in ResourceManager find !: " << ex.what());
return ret;
std::string hostname(resource_definition.HostName);
std::string containerNameInNS;
if(params.isMPI){
- int nbproc;
- if ( params.nb_proc <= 0 )
- nbproc = 1;
- else
- nbproc = params.nb_proc;
+ int nbproc = params.nb_proc <= 0 ? 1 : params.nb_proc;
try
{
if( GetenvThreadSafe("LIBBATCH_NODEFILE") != NULL )
// Mpi already tested in step 5, specific code on BuildCommandToLaunch Local/Remote Container methods
// TODO -> separates Mpi from Classic/Exe
// Classic or Exe ?
- std::string container_exe = "SALOME_Container"; // Classic container
+ std::string container_exe = this->_isSSL ? "SALOME_Container_No_NS_Serv" : "SALOME_Container"; // Classic container
Engines::ContainerParameters local_params(params);
int found=0;
try
// Only if an application directory is set
if(hostname != Kernel_Utils::GetHostname() && _isAppliSalomeDefined)
{
- // Preparing remote command
- std::string command = "";
+
const ParserResourcesType resInfo(_resManager->GetResourceDefinition(resource_selected));
- command = getCommandToRunRemoteProcess(resInfo.Protocol, resInfo.HostName, resInfo.UserName);
- if (resInfo.AppliPath != "")
- command += resInfo.AppliPath;
- else
- {
- ASSERT(GetenvThreadSafe("APPLI"));
- command += GetenvThreadSafeAsString("APPLI");
- }
- command += "/runRemote.sh ";
- ASSERT(GetenvThreadSafe("NSHOST"));
- command += GetenvThreadSafeAsString("NSHOST"); // hostname of CORBA name server
- command += " ";
- ASSERT(GetenvThreadSafe("NSPORT"));
- command += GetenvThreadSafeAsString("NSPORT"); // port of CORBA name server
- command += " \"ls /tmp >/dev/null 2>&1\"";
+ std::string command = getCommandToRunRemoteProcess(resInfo.Protocol, resInfo.HostName,
+ resInfo.UserName, resInfo.AppliPath);
// Launch remote command
- int status = SystemThreadSafe(command.c_str());
+ command += " \"ls /tmp >/dev/null 2>&1\"";
+ // Anthony : command is NO MORE launched to improve dramatically time to launch containers
+ int status = 0;
if (status != 0)
{
// Error on resource - cannot launch commands
else
return Engines::Container::_narrow(obj);
}
- catch(const CORBA::Exception& e)
+ catch(const CORBA::Exception&)
{
return Engines::Container::_nil();
}
{
return false; // VSR 02/08/2013: Python containers are no more supported
bool ret = false;
- int len = strlen(ContainerName);
+ size_t len = strlen(ContainerName);
if (len >= 2)
if (strcmp(ContainerName + len - 2, "Py") == 0)
command = BuildTempFileToLaunchRemoteContainer(resource_name, params, tmpFileName);
else
{
- int nbproc;
const ParserResourcesType resInfo(_resManager->GetResourceDefinition(resource_name));
- if (params.isMPI)
- {
- if ( params.nb_proc <= 0 )
- nbproc = 1;
- else
- nbproc = params.nb_proc;
- }
+ std::string wdir = params.workingdir.in();
// "ssh -l user machine distantPath/runRemote.sh hostNS portNS WORKINGDIR workingdir
- // SALOME_Container containerName &"
- command = getCommandToRunRemoteProcess(resInfo.Protocol, resInfo.HostName, resInfo.UserName);
-
- if (resInfo.AppliPath != "")
- command += resInfo.AppliPath; // path relative to user@machine $HOME
- else
- {
- ASSERT(GetenvThreadSafe("APPLI"));
- command += GetenvThreadSafeAsString("APPLI"); // path relative to user@machine $HOME
- }
-
- command += "/runRemote.sh ";
-
- ASSERT(GetenvThreadSafe("NSHOST"));
- command += GetenvThreadSafeAsString("NSHOST"); // hostname of CORBA name server
-
- command += " ";
- ASSERT(GetenvThreadSafe("NSPORT"));
- command += GetenvThreadSafeAsString("NSPORT"); // port of CORBA name server
-
- std::string wdir = params.workingdir.in();
- if(wdir != "")
- {
- command += " WORKINGDIR ";
- command += " '";
- if(wdir == "$TEMPDIR")
- wdir="\\$TEMPDIR";
- command += wdir; // requested working directory
- command += "'";
- }
+ // SALOME_Container containerName -ORBInitRef NameService=IOR:01000..."
+ // or
+ // "ssh -l user machine distantLauncher remote -p hostNS -m portNS -d dir
+ // -- SALOME_Container contName -ORBInitRef NameService=IOR:01000..."
+ command = getCommandToRunRemoteProcess(resInfo.Protocol, resInfo.HostName,
+ resInfo.UserName, resInfo.AppliPath,
+ wdir);
if(params.isMPI)
{
+ int nbproc = params.nb_proc <= 0 ? 1 : params.nb_proc;
command += " mpirun -np ";
std::ostringstream o;
o << nbproc << " ";
{
tmpFileName = BuildTemporaryFileName();
std::string command;
- int nbproc = 0;
std::ostringstream o;
if (params.isMPI)
{
- o << "mpirun -np ";
+ int nbproc = params.nb_proc <= 0 ? 1 : params.nb_proc;
- if ( params.nb_proc <= 0 )
- nbproc = 1;
- else
- nbproc = params.nb_proc;
+ o << "mpirun -np ";
o << nbproc << " ";
o << container_exe + " ";
}
+
+ o << _NS->ContainerName(params) << " ";
- o << _NS->ContainerName(params);
- o << " -";
- AddOmninamesParams(o);
-
+ if( this->_isSSL )
+ {
+ Engines::EmbeddedNamingService_var ns = GetEmbeddedNamingService();
+ CORBA::String_var iorNS = _orb->object_to_string(ns);
+ o << iorNS;
+ }
+ else
+ {
+ o << "-";
+ AddOmninamesParams(o);
+ }
+
std::ofstream command_file( tmpFileName.c_str() );
command_file << o.str();
command_file.close();
void SALOME_ContainerManager::RmTmpFile(std::string& tmpFileName)
{
- int lenght = tmpFileName.size();
- if ( lenght > 0)
+ size_t length = tmpFileName.size();
+ if ( length > 0)
{
#ifdef WIN32
std::string command = "del /F ";
#else
std::string command = "rm ";
#endif
- if ( lenght > 4 )
- command += tmpFileName.substr(0, lenght - 3 );
+ if ( length > 4 )
+ command += tmpFileName.substr(0, length - 3 );
else
command += tmpFileName;
command += '*';
*/
//=============================================================================
-void SALOME_ContainerManager::AddOmninamesParams(std::ostream& fileStream, SALOME_NamingService *ns)
+void SALOME_ContainerManager::AddOmninamesParams(std::ostream& fileStream, SALOME_NamingService_Abstract *ns)
{
- CORBA::String_var iorstr(ns->getIORaddr());
- fileStream << "ORBInitRef NameService=";
- fileStream << iorstr;
+ SALOME_NamingService *nsTrad(dynamic_cast<SALOME_NamingService *>(ns));
+ if(nsTrad)
+ {
+ CORBA::String_var iorstr(nsTrad->getIORaddr());
+ fileStream << "ORBInitRef NameService=";
+ fileStream << iorstr;
+ }
}
void SALOME_ContainerManager::MakeTheCommandToBeLaunchedASync(std::string& command)
if (params.isMPI)
{
- tempOutputFile << "mpirun -np ";
- int nbproc;
+ int nbproc = params.nb_proc <= 0 ? 1 : params.nb_proc;
- if ( params.nb_proc <= 0 )
- nbproc = 1;
- else
- nbproc = params.nb_proc;
-
- std::ostringstream o;
+ tempOutputFile << "mpirun -np ";
tempOutputFile << nbproc << " ";
#ifdef LAM_MPI
else if (resInfo.Protocol == srun)
{
- command = "srun -n 1 -N 1 --share --nodelist=";
+ command = "srun -n 1 -N 1 -s --mem-per-cpu=0 --cpu-bind=none --nodelist=";
std::string commandRcp = "rcp ";
commandRcp += tmpFileName;
commandRcp += " ";
{
if (_isAppliSalomeDefined)
{
-
- if (resInfo.Protocol == rsh)
- command = "rsh ";
- else if (resInfo.Protocol == ssh)
- command = "ssh ";
- else if (resInfo.Protocol == srun)
- command = "srun -n 1 -N 1 --share --nodelist=";
- else
- throw SALOME_Exception("Unknown protocol");
-
- if (resInfo.UserName != "")
- {
- command += "-l ";
- command += resInfo.UserName;
- command += " ";
- }
-
- command += resInfo.HostName;
- command += " ";
-
- if (resInfo.AppliPath != "")
- command += resInfo.AppliPath; // path relative to user@machine $HOME
- else
- {
- ASSERT(GetenvThreadSafe("APPLI"));
- command += GetenvThreadSafeAsString("APPLI"); // path relative to user@machine $HOME
- }
-
- command += "/runRemote.sh ";
-
- ASSERT(GetenvThreadSafe("NSHOST"));
- command += GetenvThreadSafeAsString("NSHOST"); // hostname of CORBA name server
-
- command += " ";
- ASSERT(GetenvThreadSafe("NSPORT"));
- command += GetenvThreadSafeAsString("NSPORT"); // port of CORBA name server
-
+ command = getCommandToRunRemoteProcess(resInfo.Protocol, resInfo.HostName,
+ resInfo.UserName, resInfo.AppliPath);
command += " mpirun -np 1 hostname -s > " + tmpFile;
}
else
}
-std::set<pid_t> SALOME_ContainerManager::getpidofprogram(const std::string program)
-{
- std::set<pid_t> thepids;
- std::string tmpFile = Kernel_Utils::GetTmpFileName();
- std::string cmd;
- std::string thepid;
- cmd = "pidof " + program + " > " + tmpFile;
- SystemThreadSafe(cmd.c_str());
- std::ifstream fpi(tmpFile.c_str(),std::ios::in);
- while(fpi >> thepid){
- thepids.insert(atoi(thepid.c_str()));
- }
- return thepids;
-}
-
std::string SALOME_ContainerManager::getCommandToRunRemoteProcess(AccessProtocolType protocol,
const std::string & hostname,
- const std::string & username)
+ const std::string & username,
+ const std::string & applipath,
+ const std::string & workdir)
{
std::ostringstream command;
switch (protocol)
case srun:
// no need to redefine the user with srun, the job user is taken by default
// (note: for srun, user id can be specified with " --uid=<user>")
- command << "srun -n 1 -N 1 --share --nodelist=" << hostname << " ";
+ command << "srun -n 1 -N 1 -s --mem-per-cpu=0 --cpu-bind=none --nodelist=" << hostname << " ";
break;
case pbsdsh:
command << "pbsdsh -o -h " << hostname << " ";
throw SALOME_Exception("Unknown protocol");
}
+ std::string remoteapplipath;
+ if (applipath=="")
+ remoteapplipath = GetenvThreadSafeAsString("APPLI");
+ else
+ remoteapplipath = applipath;
+
+ ASSERT(GetenvThreadSafe("NSHOST"));
+ ASSERT(GetenvThreadSafe("NSPORT"));
+
+ // $APPLI points either to an application directory, or to a salome launcher file
+ // we prepare the remote command according to the case
+ struct stat statbuf;
+ if (stat(GetenvThreadSafe("APPLI"), &statbuf) ==0 && S_ISREG(statbuf.st_mode))
+ {
+ // if $APPLI is a regular file, we asume it's a salome Launcher
+ // generate a command with a salome launcher
+ command << remoteapplipath
+ << " remote"
+ << " -m "
+ << GetenvThreadSafeAsString("NSHOST") // hostname of CORBA name server
+ << " -p "
+ << GetenvThreadSafeAsString("NSPORT"); // port of CORBA name server
+ if (workdir != "")
+ command << "-d " << workdir;
+ command << " -- " ;
+ }
+ else // we assume it's a salome application directory
+ {
+ // generate a command with runRemote.sh
+ command << remoteapplipath;
+ command << "/runRemote.sh ";
+ command << GetenvThreadSafeAsString("NSHOST"); // hostname of CORBA name server
+ command << " ";
+ command << GetenvThreadSafeAsString("NSPORT"); // port of CORBA name server
+ if(workdir != "")
+ {
+ command << " WORKINGDIR ";
+ command << " '";
+ if(workdir == "$TEMPDIR")
+ command << "\\$TEMPDIR";
+ else
+ command << workdir; // requested working directory
+ command << "'";
+ }
+ }
+
return command.str();
}
return system(command);
}
+long SALOME_ContainerManager::SystemWithPIDThreadSafe(const std::vector<std::string>& command)
+{
+ Utils_Locker lock(&_systemMutex);
+ if(command.size()<1)
+ throw SALOME_Exception("SystemWithPIDThreadSafe : command is expected to have a length of size 1 at least !");
+#ifndef WIN32
+ pid_t pid ( fork() ) ; // spawn a child process, following code is executed in both processes
+#else
+ pid_t pid = -1; //Throw SALOME_Exception on Windows
+#endif
+ if ( pid == 0 ) // I'm a child, replace myself with a new ompi-server
+ {
+ std::size_t sz(command.size());
+ char **args = new char *[sz+1];
+ for(std::size_t i=0;i<sz;i++)
+ args[i] = strdup(command[i].c_str());
+ args[sz] = nullptr;
+ execvp( command[0].c_str() , args );
+ std::ostringstream oss;
+ oss << "Error when launching " << command[0];
+ throw SALOME_Exception(oss.str().c_str()); // execvp failed
+ }
+ else if ( pid < 0 )
+ {
+ throw SALOME_Exception("fork() failed");
+ }
+ else // I'm a parent
+ {
+ return pid;
+ }
+}
+
#ifdef WITH_PACO_PARALLEL
//=============================================================================
remote_execution = true;
}
- // Log environnement
+ // Log environment
std::string log_type("");
char * get_val = GetenvThreadSafe("PARALLEL_LOG");
if (get_val)
ParserResourcesType resource_definition =
_resManager->GetResourceDefinition(params.resource_params.name.in());
- // Log environnement
+ // Log environment
std::string log_type("");
char * get_val = GetenvThreadSafe("PARALLEL_LOG");
if (get_val)
//=============================================================================
/*! This method launches the parallel container.
- * It will may be placed on the ressources manager.
+ * It will may be placed on the resources manager.
*
* \param command to launch
* \param container's parameters
#else
Engines::Container_ptr
-SALOME_ContainerManager::StartPaCOPPContainer(const Engines::ContainerParameters& params,
- std::string resource_selected)
+SALOME_ContainerManager::StartPaCOPPContainer(const Engines::ContainerParameters& /*params*/,
+ std::string /*resource_selected*/)
{
Engines::Container_ptr ret = Engines::Container::_nil();
INFOS("[StarPaCOPPContainer] is disabled !");
}
std::string
-SALOME_ContainerManager::BuildCommandToLaunchPaCOProxyContainer(const Engines::ContainerParameters& params,
- std::string machine_file_name,
- std::string & proxy_hostname)
+SALOME_ContainerManager::BuildCommandToLaunchPaCOProxyContainer(const Engines::ContainerParameters& /*params*/,
+ std::string /*machine_file_name*/,
+ std::string & /*proxy_hostname*/)
{
return "";
}
std::string
-SALOME_ContainerManager::BuildCommandToLaunchPaCONodeContainer(const Engines::ContainerParameters& params,
- const std::string & machine_file_name,
- SALOME_ContainerManager::actual_launch_machine_t & vect_machine,
- const std::string & proxy_hostname)
+SALOME_ContainerManager::BuildCommandToLaunchPaCONodeContainer(const Engines::ContainerParameters& /*params*/,
+ const std::string & /*machine_file_name*/,
+ SALOME_ContainerManager::actual_launch_machine_t & /*vect_machine*/,
+ const std::string & /*proxy_hostname*/)
{
return "";
}
void
-SALOME_ContainerManager::LogConfiguration(const std::string & log_type,
- const std::string & exe_type,
- const std::string & container_name,
- const std::string & hostname,
- std::string & begin,
- std::string & end)
+SALOME_ContainerManager::LogConfiguration(const std::string & /*log_type*/,
+ const std::string & /*exe_type*/,
+ const std::string & /*container_name*/,
+ const std::string & /*hostname*/,
+ std::string & /*begin*/,
+ std::string & /*end*/)
{
}
CORBA::Object_ptr
-SALOME_ContainerManager::LaunchPaCOProxyContainer(const std::string& command,
- const Engines::ContainerParameters& params,
- const std::string& hostname)
+SALOME_ContainerManager::LaunchPaCOProxyContainer(const std::string& /*command*/,
+ const Engines::ContainerParameters& /*params*/,
+ const std::string& /*hostname*/)
{
CORBA::Object_ptr ret = CORBA::Object::_nil();
return ret;
}
bool
-SALOME_ContainerManager::LaunchPaCONodeContainer(const std::string& command,
- const Engines::ContainerParameters& params,
- const std::string& name,
- SALOME_ContainerManager::actual_launch_machine_t & vect_machine)
+SALOME_ContainerManager::LaunchPaCONodeContainer(const std::string& /*command*/,
+ const Engines::ContainerParameters& /*params*/,
+ const std::string& /*name*/,
+ SALOME_ContainerManager::actual_launch_machine_t & /*vect_machine*/)
{
return false;
}