Engines::Container_ptr
SALOME_ContainerManager::GiveContainer(const Engines::ContainerParameters& params)
{
+ string machFile;
Engines::Container_ptr ret = Engines::Container::_nil();
// Step 0: Default mode is start
}
MESSAGE("[GiveContainer] Resource selected is: " << resource_selected);
- _numInstanceMutex.lock();
-
// Step 5: get container in the naming service
Engines::ResourceDefinition_var resource_definition = _ResManager->GetResourceDefinition(resource_selected.c_str());
std::string hostname(resource_definition->name.in());
else
nbproc = params.resource_params.nb_node * params.resource_params.nb_proc_per_node;
if( getenv("LIBBATCH_NODEFILE") != NULL )
- machinesFile(nbproc);
+ machFile = machinesFile(nbproc);
// A mpi parallel container register on zero node in NS
- containerNameInNS = _NS->BuildContainerNameForNS(params, GetMPIZeroNode(hostname).c_str());
+ containerNameInNS = _NS->BuildContainerNameForNS(params, GetMPIZeroNode(hostname,machFile).c_str());
}
else
containerNameInNS = _NS->BuildContainerNameForNS(params, hostname.c_str());
if(!cont->_non_existent())
{
if(std::string(params.mode.in())=="getorstart" or std::string(params.mode.in())=="get"){
- _numInstanceMutex.unlock();
return cont._retn(); /* the container exists and params.mode is getorstart or get use it*/
}
else
if (std::string(local_params.parallelLib.in()) != "")
{
INFOS("[GiveContainer] PaCO++ container are not currently available");
- _numInstanceMutex.unlock();
return ret;
}
// Classic or Exe ?
if (CORBA::is_nil (Catalog))
{
INFOS("[GiveContainer] Module Catalog is not found -> cannot launch a container");
- _numInstanceMutex.unlock();
return ret;
}
// Loop through component list
if(found)
{
INFOS("ContainerManager Error: you can't have 2 CEXE component in the same container" );
- _numInstanceMutex.unlock();
return Engines::Container::_nil();
}
MESSAGE("[GiveContainer] Exe container found !: " << container_exe_tmp);
catch (ServiceUnreachable&)
{
INFOS("Caught exception: Naming Service Unreachable");
- _numInstanceMutex.unlock();
return ret;
}
catch (...)
{
INFOS("Caught unknown exception.");
- _numInstanceMutex.unlock();
return ret;
}
std::string command;
// if a parallel container is launched in batch job, command is: "mpirun -np nbproc -machinefile nodesfile SALOME_MPIContainer"
if( getenv("LIBBATCH_NODEFILE") != NULL && params.isMPI )
- command = BuildCommandToLaunchLocalContainer(params,container_exe);
+ command = BuildCommandToLaunchLocalContainer(params, machFile, container_exe);
// if a container is launched on localhost, command is "SALOME_Container" or "mpirun -np nbproc SALOME_MPIContainer"
else if(hostname == Kernel_Utils::GetHostname())
- command = BuildCommandToLaunchLocalContainer(params, container_exe);
+ command = BuildCommandToLaunchLocalContainer(params, machFile, container_exe);
// if a container is launched in remote mode, command is "ssh resource_selected SALOME_Container" or "ssh resource_selected mpirun -np nbproc SALOME_MPIContainer"
else
command = BuildCommandToLaunchRemoteContainer(resource_selected, params, container_exe);
// launch container with a system call
int status=system(command.c_str());
- _numInstanceMutex.unlock();
-
if (status == -1){
MESSAGE("SALOME_ContainerManager::StartContainer rsh failed (system command status -1)");
RmTmpFile(_TmpFileName); // command file can be removed here
//=============================================================================
string
SALOME_ContainerManager::BuildCommandToLaunchLocalContainer
-(const Engines::ContainerParameters& params, const std::string& container_exe)
+(const Engines::ContainerParameters& params, const std::string& machinesFile, const std::string& container_exe)
{
_TmpFileName = BuildTemporaryFileName();
string command;
o << nbproc << " ";
if( getenv("LIBBATCH_NODEFILE") != NULL )
- o << "-machinefile " << _machinesFile << " ";
+ o << "-machinefile " << machinesFile << " ";
#ifdef WITHLAM
o << "-x PATH,LD_LIBRARY_PATH,OMNIORB_CONFIG,SALOME_trace ";
}
#endif
-string SALOME_ContainerManager::GetMPIZeroNode(string machine)
+string SALOME_ContainerManager::GetMPIZeroNode(const string machine, const string machinesFile)
{
int status;
string zeronode;
if( getenv("LIBBATCH_NODEFILE") == NULL )
cmd = "ssh " + machine + " mpirun -np 1 hostname > " + tmpFile;
else
- cmd = "mpirun -np 1 -machinefile " + _machinesFile + " hostname > " + tmpFile;
+ cmd = "mpirun -np 1 -machinefile " + machinesFile + " hostname > " + tmpFile;
status = system(cmd.c_str());
if( status == 0 ){
return zeronode;
}
-void SALOME_ContainerManager::machinesFile(const int nbproc)
+string SALOME_ContainerManager::machinesFile(const int nbproc)
{
string tmp;
string nodesFile = getenv("LIBBATCH_NODEFILE");
- _machinesFile = Kernel_Utils::GetTmpFileName();
+ string machinesFile = Kernel_Utils::GetTmpFileName();
ifstream fpi(nodesFile.c_str(),ios::in);
- ofstream fpo(_machinesFile.c_str(),ios::out);
+ ofstream fpo(machinesFile.c_str(),ios::out);
+
+ _numInstanceMutex.lock();
for(int i=0;i<_nbprocUsed;i++)
fpi >> tmp;
fpi.close();
fpo.close();
+ _numInstanceMutex.unlock();
+
+ return machinesFile;
+
}
const std::string& container_exe="SALOME_Container");
std::string BuildCommandToLaunchLocalContainer(const Engines::ContainerParameters& params,
+ const std::string& machinesFile,
const std::string& container_exe="SALOME_Container");
std::string BuildTempFileToLaunchRemoteContainer(const std::string& resource_name,
std::string BuildTemporaryFileName() const;
- std::string GetMPIZeroNode(std::string machine);
+ std::string GetMPIZeroNode(const std::string machine, const std::string machinesFile);
- void machinesFile(const int nbproc);
+ std::string machinesFile(const int nbproc);
// For PacO++ Parallel extension
typedef std::vector<std::string> actual_launch_machine_t;
//! attribute that contains the number of processes used in batch mode by MPI containers
int _nbprocUsed;
- //! attributes that contains the machinefile for MPI containers
- std::string _machinesFile;
-
static omni_mutex _numInstanceMutex ; // lib and instance protection
};