X-Git-Url: http://git.salome-platform.org/gitweb/?a=blobdiff_plain;f=src%2FContainer%2FSALOME_ContainerManager.cxx;h=676adc8e34b03a7cc0252c8db70bb569ca399233;hb=52111d0ad7a0d8acccc1bb4e49bb5877d949b844;hp=d174bce548459e0fcf3e8ec72db370dbb8e5467d;hpb=3be590637f919313f2bceabf1bc45d69c2541547;p=modules%2Fkernel.git diff --git a/src/Container/SALOME_ContainerManager.cxx b/src/Container/SALOME_ContainerManager.cxx index d174bce54..676adc8e3 100644 --- a/src/Container/SALOME_ContainerManager.cxx +++ b/src/Container/SALOME_ContainerManager.cxx @@ -1,4 +1,4 @@ -// Copyright (C) 2007-2013 CEA/DEN, EDF R&D, OPEN CASCADE +// Copyright (C) 2007-2016 CEA/DEN, EDF R&D, OPEN CASCADE // // Copyright (C) 2003-2007 OPEN CASCADE, EADS/CCR, LIP6, CEA/DEN, // CEDRAT, EDF R&D, LEG, PRINCIPIA R&D, BUREAU VERITAS @@ -6,7 +6,7 @@ // This library is free software; you can redistribute it and/or // modify it under the terms of the GNU Lesser General Public // License as published by the Free Software Foundation; either -// version 2.1 of the License. +// version 2.1 of the License, or (at your option) any later version. // // This library is distributed in the hope that it will be useful, // but WITHOUT ANY WARRANTY; without even the implied warranty of @@ -21,7 +21,10 @@ // #include "SALOME_ContainerManager.hxx" +#include "SALOME_ResourcesManager.hxx" +#include "SALOME_LoadRateManager.hxx" #include "SALOME_NamingService.hxx" +#include "SALOME_ResourcesManager_Client.hxx" #include "SALOME_ModuleCatalog.hh" #include "Basics_Utils.hxx" #include "Basics_DirUtils.hxx" @@ -39,7 +42,11 @@ #include #include CORBA_CLIENT_HEADER(SALOME_Session) -#ifdef WNT +#ifdef HAVE_MPI2 +#include +#endif + +#ifdef WIN32 #include #define getpid _getpid #endif @@ -48,16 +55,19 @@ #include "PaCOPP.hxx" #endif -#define TIME_OUT_TO_LAUNCH_CONT 60 +const int SALOME_ContainerManager::TIME_OUT_TO_LAUNCH_CONT=60; -const char *SALOME_ContainerManager::_ContainerManagerNameInNS = +const char *SALOME_ContainerManager::_ContainerManagerNameInNS = "/ContainerManager"; omni_mutex SALOME_ContainerManager::_numInstanceMutex; +Utils_Mutex SALOME_ContainerManager::_getenvMutex; + +Utils_Mutex SALOME_ContainerManager::_systemMutex; //============================================================================= -/*! +/*! * Constructor * \param orb * Define a CORBA single thread policy for the server, which avoid to deal @@ -65,21 +75,21 @@ omni_mutex SALOME_ContainerManager::_numInstanceMutex; */ //============================================================================= -SALOME_ContainerManager::SALOME_ContainerManager(CORBA::ORB_ptr orb, PortableServer::POA_var poa, SALOME_ResourcesManager *rm, SALOME_NamingService *ns):_nbprocUsed(1) +SALOME_ContainerManager::SALOME_ContainerManager(CORBA::ORB_ptr orb, PortableServer::POA_var poa, SALOME_NamingService *ns) + : _nbprocUsed(1) { MESSAGE("constructor"); _NS = ns; - _ResManager = rm; + _resManager = new SALOME_ResourcesManager_Client(ns); PortableServer::POAManager_var pman = poa->the_POAManager(); _orb = CORBA::ORB::_duplicate(orb) ; CORBA::PolicyList policies; policies.length(1); - PortableServer::ThreadPolicy_var threadPol = - poa->create_thread_policy(PortableServer::SINGLE_THREAD_MODEL); + PortableServer::ThreadPolicy_var threadPol(poa->create_thread_policy(PortableServer::ORB_CTRL_MODEL)); policies[0] = PortableServer::ThreadPolicy::_duplicate(threadPol); - _poa = poa->create_POA("SThreadPOA",pman,policies); + _poa = poa->create_POA("MThreadPOA",pman,policies); threadPol->destroy(); PortableServer::ObjectId_var id = _poa->activate_object(this); CORBA::Object_var obj = _poa->id_to_reference(id); @@ -87,23 +97,23 @@ SALOME_ContainerManager::SALOME_ContainerManager(CORBA::ORB_ptr orb, PortableSer Engines::ContainerManager::_narrow(obj); _NS->Register(refContMan,_ContainerManagerNameInNS); - _isAppliSalomeDefined = (getenv("APPLI") != 0); + _isAppliSalomeDefined = (GetenvThreadSafe("APPLI") != 0); #ifdef HAVE_MPI2 -#ifdef WITHOPENMPI +#ifdef OPEN_MPI _pid_mpiServer = -1; // the urifile name depends on pid of the process std::stringstream urifile; - urifile << getenv("HOME") << "/.urifile_" << getpid(); + urifile << GetenvThreadSafeAsString("HOME") << "/.urifile_" << getpid(); setenv("OMPI_URI_FILE",urifile.str().c_str(),1); - if( getenv("OMPI_URI_FILE") != NULL ){ + if( GetenvThreadSafe("OMPI_URI_FILE") != NULL ){ // get the pid of all ompi-server std::set thepids1 = getpidofprogram("ompi-server"); // launch a new ompi-server std::string command; command = "ompi-server -r "; - command += getenv("OMPI_URI_FILE"); - int status=system(command.c_str()); + command += GetenvThreadSafeAsString("OMPI_URI_FILE"); + int status=SystemThreadSafe(command.c_str()); if(status!=0) throw SALOME_Exception("Error when launching ompi-server"); // get the pid of all ompi-server @@ -116,14 +126,14 @@ SALOME_ContainerManager::SALOME_ContainerManager(CORBA::ORB_ptr orb, PortableSer if(_pid_mpiServer < 0) throw SALOME_Exception("Error when getting ompi-server id"); } -#elif defined(WITHMPICH) +#elif defined(MPICH) _pid_mpiServer = -1; // get the pid of all hydra_nameserver std::set thepids1 = getpidofprogram("hydra_nameserver"); // launch a new hydra_nameserver std::string command; command = "hydra_nameserver &"; - system(command.c_str()); + SystemThreadSafe(command.c_str()); // get the pid of all hydra_nameserver std::set thepids2 = getpidofprogram("hydra_nameserver"); // my hydra_nameserver is the new one @@ -138,7 +148,7 @@ SALOME_ContainerManager::SALOME_ContainerManager(CORBA::ORB_ptr orb, PortableSer } //============================================================================= -/*! +/*! * destructor */ //============================================================================= @@ -146,18 +156,19 @@ SALOME_ContainerManager::SALOME_ContainerManager(CORBA::ORB_ptr orb, PortableSer SALOME_ContainerManager::~SALOME_ContainerManager() { MESSAGE("destructor"); + delete _resManager; #ifdef HAVE_MPI2 -#ifdef WITHOPENMPI - if( getenv("OMPI_URI_FILE") != NULL ){ +#ifdef OPEN_MPI + if( GetenvThreadSafe("OMPI_URI_FILE") != NULL ){ // kill my ompi-server if( kill(_pid_mpiServer,SIGTERM) != 0 ) throw SALOME_Exception("Error when killing ompi-server"); // delete my urifile - int status=system("rm -f ${OMPI_URI_FILE}"); + int status=SystemThreadSafe("rm -f ${OMPI_URI_FILE}"); if(status!=0) throw SALOME_Exception("Error when removing urifile"); } -#elif defined(WITHMPICH) +#elif defined(MPICH) // kill my hydra_nameserver if(_pid_mpiServer > -1) if( kill(_pid_mpiServer,SIGTERM) != 0 ) @@ -237,7 +248,7 @@ void SALOME_ContainerManager::ShutdownContainers() MESSAGE("ShutdownContainers: " << (*iter)); cont->Shutdown(); } - else + else MESSAGE("ShutdownContainers: no container ref for " << (*iter)); } catch(CORBA::SystemException& e) @@ -263,11 +274,10 @@ void SALOME_ContainerManager::ShutdownContainers() * \return the container or nil */ //============================================================================= -Engines::Container_ptr -SALOME_ContainerManager::GiveContainer(const Engines::ContainerParameters& params) +Engines::Container_ptr SALOME_ContainerManager::GiveContainer(const Engines::ContainerParameters& params) { std::string machFile; - Engines::Container_ptr ret = Engines::Container::_nil(); + Engines::Container_ptr ret(Engines::Container::_nil()); // Step 0: Default mode is start Engines::ContainerParameters local_params(params); @@ -298,21 +308,22 @@ SALOME_ContainerManager::GiveContainer(const Engines::ContainerParameters& param // Step 2: Get all possibleResources from the parameters // Consider only resources that can run containers - local_params.resource_params.can_run_containers = true; - Engines::ResourceList_var possibleResources = _ResManager->GetFittingResources(local_params.resource_params); - MESSAGE("[GiveContainer] - length of possible resources " << possibleResources->length()); + resourceParams resource_params = resourceParameters_CORBAtoCPP(local_params.resource_params); + resource_params.can_run_containers = true; + std::vector possibleResources = _resManager->GetFittingResources(resource_params); + MESSAGE("[GiveContainer] - length of possible resources " << possibleResources.size()); std::vector local_resources; - // Step 3: if mode is "get" keep only machines with existing containers + // Step 3: if mode is "get" keep only machines with existing containers if(mode == "get") { - for(unsigned int i=0; i < possibleResources->length(); i++) + for(unsigned int i=0; i < possibleResources.size(); i++) { - Engines::Container_ptr cont = FindContainer(params, possibleResources[i].in()); + Engines::Container_ptr cont = FindContainer(params, possibleResources[i]); try { if(!cont->_non_existent()) - local_resources.push_back(std::string(possibleResources[i])); + local_resources.push_back(possibleResources[i]); } catch(CORBA::Exception&) {} } @@ -325,8 +336,7 @@ SALOME_ContainerManager::GiveContainer(const Engines::ContainerParameters& param } } else - for(unsigned int i=0; i < possibleResources->length(); i++) - local_resources.push_back(std::string(possibleResources[i])); + local_resources = possibleResources; // Step 4: select the resource where to get/start the container bool resource_available = true; @@ -340,7 +350,7 @@ SALOME_ContainerManager::GiveContainer(const Engines::ContainerParameters& param { try { - resource_selected = _ResManager->GetImpl()->Find(params.resource_params.policy.in(), resources); + resource_selected = _resManager->Find(params.resource_params.policy.in(), resources); // Remove resource_selected from vector std::vector::iterator it; for (it=resources.begin() ; it < resources.end(); it++ ) @@ -358,8 +368,8 @@ SALOME_ContainerManager::GiveContainer(const Engines::ContainerParameters& param MESSAGE("[GiveContainer] Resource selected is: " << resource_selected); // Step 5: Create container name - Engines::ResourceDefinition_var resource_definition = _ResManager->GetResourceDefinition(resource_selected.c_str()); - std::string hostname(resource_definition->hostname.in()); + ParserResourcesType resource_definition = _resManager->GetResourceDefinition(resource_selected); + std::string hostname(resource_definition.HostName); std::string containerNameInNS; if(params.isMPI){ int nbproc; @@ -369,7 +379,7 @@ SALOME_ContainerManager::GiveContainer(const Engines::ContainerParameters& param nbproc = params.nb_proc; try { - if( getenv("LIBBATCH_NODEFILE") != NULL ) + if( GetenvThreadSafe("LIBBATCH_NODEFILE") != NULL ) machFile = machinesFile(nbproc); } catch(const SALOME_Exception & ex) @@ -380,7 +390,8 @@ SALOME_ContainerManager::GiveContainer(const Engines::ContainerParameters& param return ret; } // A mpi parallel container register on zero node in NS - containerNameInNS = _NS->BuildContainerNameForNS(params, GetMPIZeroNode(hostname,machFile).c_str()); + std::string mpiZeroNode = GetMPIZeroNode(resource_selected,machFile).c_str(); + containerNameInNS = _NS->BuildContainerNameForNS(params, mpiZeroNode.c_str()); } else containerNameInNS = _NS->BuildContainerNameForNS(params, hostname.c_str()); @@ -389,29 +400,33 @@ SALOME_ContainerManager::GiveContainer(const Engines::ContainerParameters& param // Step 6: check if the name exists in naming service //if params.mode == "getorstart" or "get" use the existing container //if params.mode == "start" shutdown the existing container before launching a new one with that name - CORBA::Object_var obj = _NS->Resolve(containerNameInNS.c_str()); - if (!CORBA::is_nil(obj)) - { - try - { - Engines::Container_var cont=Engines::Container::_narrow(obj); - if(!cont->_non_existent()) + + { // critical section + Utils_Locker lock (&_giveContainerMutex1); + CORBA::Object_var obj = _NS->Resolve(containerNameInNS.c_str()); + if (!CORBA::is_nil(obj)) { - if(std::string(params.mode.in())=="getorstart" || std::string(params.mode.in())=="get"){ - return cont._retn(); /* the container exists and params.mode is getorstart or get use it*/ + try + { + Engines::Container_var cont=Engines::Container::_narrow(obj); + if(!cont->_non_existent()) + { + if(std::string(params.mode.in())=="getorstart" || std::string(params.mode.in())=="get"){ + return cont._retn(); /* the container exists and params.mode is getorstart or get use it*/ + } + else + { + INFOS("[GiveContainer] A container is already registered with the name: " << containerNameInNS << ", shutdown the existing container"); + cont->Shutdown(); // shutdown the registered container if it exists + } + } } - else + catch(CORBA::Exception&) { - INFOS("[GiveContainer] A container is already registered with the name: " << containerNameInNS << ", shutdown the existing container"); - cont->Shutdown(); // shutdown the registered container if it exists + INFOS("[GiveContainer] CORBA::Exception ignored when trying to get the container - we start a new one"); } } - } - catch(CORBA::Exception&) - { - INFOS("[GiveContainer] CORBA::Exception ignored when trying to get the container - we start a new one"); - } - } + } // end critical section Engines::Container_var cont = LaunchContainer(params, resource_selected, hostname, machFile, containerNameInNS); if (!CORBA::is_nil(cont)) { @@ -440,221 +455,204 @@ SALOME_ContainerManager::LaunchContainer(const Engines::ContainerParameters& par const std::string & machFile, const std::string & containerNameInNS) { - - // Step 1: type of container: PaCO, Exe, Mpi or Classic - // Mpi already tested in step 5, specific code on BuildCommandToLaunch Local/Remote Container methods - // TODO -> separates Mpi from Classic/Exe - // Classic or Exe ? - std::string container_exe = "SALOME_Container"; // Classic container - Engines::ContainerParameters local_params(params); - Engines::Container_ptr ret = Engines::Container::_nil(); - int found=0; - try - { - CORBA::String_var container_exe_tmp; - CORBA::Object_var obj = _NS->Resolve("/Kernel/ModulCatalog"); - SALOME_ModuleCatalog::ModuleCatalog_var Catalog = SALOME_ModuleCatalog::ModuleCatalog::_narrow(obj) ; - if (CORBA::is_nil (Catalog)) + std::string user,command,logFilename,tmpFileName; + int status; + Engines::Container_ptr ret(Engines::Container::_nil()); + {//start of critical section + Utils_Locker lock (&_giveContainerMutex1); + // Step 1: type of container: PaCO, Exe, Mpi or Classic + // Mpi already tested in step 5, specific code on BuildCommandToLaunch Local/Remote Container methods + // TODO -> separates Mpi from Classic/Exe + // Classic or Exe ? + std::string container_exe = "SALOME_Container"; // Classic container + Engines::ContainerParameters local_params(params); + int found=0; + try { - INFOS("[GiveContainer] Module Catalog is not found -> cannot launch a container"); - return ret; + CORBA::String_var container_exe_tmp; + CORBA::Object_var obj = _NS->Resolve("/Kernel/ModulCatalog"); + SALOME_ModuleCatalog::ModuleCatalog_var Catalog = SALOME_ModuleCatalog::ModuleCatalog::_narrow(obj) ; + if (CORBA::is_nil (Catalog)) + { + INFOS("[GiveContainer] Module Catalog is not found -> cannot launch a container"); + return ret; + } + // Loop through component list + for(unsigned int i=0; i < local_params.resource_params.componentList.length(); i++) + { + const char* compoi = local_params.resource_params.componentList[i]; + SALOME_ModuleCatalog::Acomponent_var compoInfo = Catalog->GetComponent(compoi); + if (CORBA::is_nil (compoInfo)) + { + continue; + } + SALOME_ModuleCatalog::ImplType impl=compoInfo->implementation_type(); + container_exe_tmp=compoInfo->implementation_name(); + if(impl==SALOME_ModuleCatalog::CEXE) + { + if(found) + { + INFOS("ContainerManager Error: you can't have 2 CEXE component in the same container" ); + return Engines::Container::_nil(); + } + MESSAGE("[GiveContainer] Exe container found !: " << container_exe_tmp); + container_exe = container_exe_tmp.in(); + found=1; + } + } } - // Loop through component list - for(unsigned int i=0; i < local_params.resource_params.componentList.length(); i++) + catch (ServiceUnreachable&) { - const char* compoi = local_params.resource_params.componentList[i]; - SALOME_ModuleCatalog::Acomponent_var compoInfo = Catalog->GetComponent(compoi); - if (CORBA::is_nil (compoInfo)) - { - continue; - } - SALOME_ModuleCatalog::ImplType impl=compoInfo->implementation_type(); - container_exe_tmp=compoInfo->implementation_name(); - if(impl==SALOME_ModuleCatalog::CEXE) - { - if(found) - { - INFOS("ContainerManager Error: you can't have 2 CEXE component in the same container" ); - return Engines::Container::_nil(); - } - MESSAGE("[GiveContainer] Exe container found !: " << container_exe_tmp); - container_exe = container_exe_tmp.in(); - found=1; - } + INFOS("Caught exception: Naming Service Unreachable"); + return ret; } - } - catch (ServiceUnreachable&) - { - INFOS("Caught exception: Naming Service Unreachable"); - return ret; - } - catch (...) - { - INFOS("Caught unknown exception."); - return ret; - } - - // Step 2: test resource - // Only if an application directory is set - if(hostname != Kernel_Utils::GetHostname() && _isAppliSalomeDefined) - { - // Preparing remote command - std::string command = ""; - const ParserResourcesType& resInfo = _ResManager->GetImpl()->GetResourcesDescr(resource_selected); - command = getCommandToRunRemoteProcess(resInfo.Protocol, resInfo.HostName, resInfo.UserName); - if (resInfo.AppliPath != "") - command += resInfo.AppliPath; - else + catch (...) { - ASSERT(getenv("APPLI")); - command += getenv("APPLI"); + INFOS("Caught unknown exception."); + return ret; } - command += "/runRemote.sh "; - ASSERT(getenv("NSHOST")); - command += getenv("NSHOST"); // hostname of CORBA name server - command += " "; - ASSERT(getenv("NSPORT")); - command += getenv("NSPORT"); // port of CORBA name server - command += " ls /tmp >/dev/null 2>&1"; - // Launch remote command - int status = system(command.c_str()); - if (status != 0) - { - // Error on resource - cannot launch commands - INFOS("[LaunchContainer] Cannot launch commands on machine " << hostname); - INFOS("[LaunchContainer] Command was " << command); + // Step 2: test resource + // Only if an application directory is set + if(hostname != Kernel_Utils::GetHostname() && _isAppliSalomeDefined) + { + // Preparing remote command + std::string command = ""; + const ParserResourcesType resInfo(_resManager->GetResourceDefinition(resource_selected)); + command = getCommandToRunRemoteProcess(resInfo.Protocol, resInfo.HostName, resInfo.UserName); + if (resInfo.AppliPath != "") + command += resInfo.AppliPath; + else + { + ASSERT(GetenvThreadSafe("APPLI")); + command += GetenvThreadSafeAsString("APPLI"); + } + command += "/runRemote.sh "; + ASSERT(GetenvThreadSafe("NSHOST")); + command += GetenvThreadSafeAsString("NSHOST"); // hostname of CORBA name server + command += " "; + ASSERT(GetenvThreadSafe("NSPORT")); + command += GetenvThreadSafeAsString("NSPORT"); // port of CORBA name server + command += " \"ls /tmp >/dev/null 2>&1\""; + + // Launch remote command + int status = SystemThreadSafe(command.c_str()); + if (status != 0) + { + // Error on resource - cannot launch commands + INFOS("[LaunchContainer] Cannot launch commands on machine " << hostname); + INFOS("[LaunchContainer] Command was " << command); #ifndef WIN32 - INFOS("[LaunchContainer] Command status is " << WEXITSTATUS(status)); + INFOS("[LaunchContainer] Command status is " << WEXITSTATUS(status)); #endif - return Engines::Container::_nil(); - } - } + return Engines::Container::_nil(); + } + } - // Step 3: start a new container - // Check if a PaCO container - // PaCO++ - if (std::string(local_params.parallelLib.in()) != "") - { - ret = StartPaCOPPContainer(params, resource_selected); - return ret; - } - // Other type of containers... - MESSAGE("[GiveContainer] Try to launch a new container on " << resource_selected); - std::string command; - // if a parallel container is launched in batch job, command is: "mpirun -np nbproc -machinefile nodesfile SALOME_MPIContainer" - if( getenv("LIBBATCH_NODEFILE") != NULL && params.isMPI ) - command = BuildCommandToLaunchLocalContainer(params, machFile, container_exe); - // if a container is launched on localhost, command is "SALOME_Container" or "mpirun -np nbproc SALOME_MPIContainer" - else if(hostname == Kernel_Utils::GetHostname()) - command = BuildCommandToLaunchLocalContainer(params, machFile, container_exe); - // if a container is launched in remote mode, command is "ssh resource_selected SALOME_Container" or "ssh resource_selected mpirun -np nbproc SALOME_MPIContainer" - else - command = BuildCommandToLaunchRemoteContainer(resource_selected, params, container_exe); + // Step 3: start a new container + // Check if a PaCO container + // PaCO++ + if (std::string(local_params.parallelLib.in()) != "") + { + ret = StartPaCOPPContainer(params, resource_selected); + return ret; + } + // Other type of containers... + MESSAGE("[GiveContainer] Try to launch a new container on " << resource_selected); + // if a parallel container is launched in batch job, command is: "mpirun -np nbproc -machinefile nodesfile SALOME_MPIContainer" + if( GetenvThreadSafe("LIBBATCH_NODEFILE") != NULL && params.isMPI ) + command = BuildCommandToLaunchLocalContainer(params, machFile, container_exe, tmpFileName); + // if a container is launched on localhost, command is "SALOME_Container" or "mpirun -np nbproc SALOME_MPIContainer" + else if(hostname == Kernel_Utils::GetHostname()) + command = BuildCommandToLaunchLocalContainer(params, machFile, container_exe, tmpFileName); + // if a container is launched in remote mode, command is "ssh resource_selected SALOME_Container" or "ssh resource_selected mpirun -np nbproc SALOME_MPIContainer" + else + command = BuildCommandToLaunchRemoteContainer(resource_selected, params, container_exe); - //redirect stdout and stderr in a file -#ifdef WNT - std::string logFilename=getenv("TEMP"); - logFilename += "\\"; - std::string user = getenv( "USERNAME" ); -#else - std::string user = getenv( "USER" ); - std::string logFilename="/tmp"; - char* val = getenv("SALOME_TMP_DIR"); - if(val) - { - struct stat file_info; - stat(val, &file_info); - bool is_dir = S_ISDIR(file_info.st_mode); - if (is_dir)logFilename=val; - else std::cerr << "SALOME_TMP_DIR environment variable is not a directory use /tmp instead" << std::endl; - } - logFilename += "/"; -#endif - logFilename += _NS->ContainerName(params)+"_"+ resource_selected +"_"+user; - std::ostringstream tmp; - tmp << "_" << getpid(); - logFilename += tmp.str(); - logFilename += ".log" ; - command += " > " + logFilename + " 2>&1"; -#ifdef WNT - command = "%PYTHONBIN% -c \"import win32pm ; win32pm.spawnpid(r'" + command + "', '')\""; + //redirect stdout and stderr in a file +#ifdef WIN32 + logFilename=GetenvThreadSafeAsString("TEMP"); + logFilename += "\\"; + user = GetenvThreadSafeAsString( "USERNAME" ); #else - command += " &"; + user = GetenvThreadSafeAsString( "USER" ); + if (user.empty()) + user = GetenvThreadSafeAsString( "LOGNAME" ); + logFilename="/tmp"; + char* val = GetenvThreadSafe("SALOME_TMP_DIR"); + if(val) + { + struct stat file_info; + stat(val, &file_info); + bool is_dir = S_ISDIR(file_info.st_mode); + if (is_dir)logFilename=val; + else std::cerr << "SALOME_TMP_DIR environment variable is not a directory use /tmp instead" << std::endl; + } + logFilename += "/"; #endif + logFilename += _NS->ContainerName(params)+"_"+ resource_selected +"_"+user; + std::ostringstream tmp; + tmp << "_" << getpid(); + logFilename += tmp.str(); + logFilename += ".log" ; + command += " > " + logFilename + " 2>&1"; + MakeTheCommandToBeLaunchedASync(command); - // launch container with a system call - int status=system(command.c_str()); + // launch container with a system call + status=SystemThreadSafe(command.c_str()); + }//end of critical of section - if (status == -1){ - INFOS("[LaunchContainer] command failed (system command status -1): " << command); - RmTmpFile(_TmpFileName); // command file can be removed here - _TmpFileName=""; - return Engines::Container::_nil(); - } - else if (status == 217){ - INFOS("[LaunchContainer] command failed (system command status 217): " << command); - RmTmpFile(_TmpFileName); // command file can be removed here - _TmpFileName=""; - return Engines::Container::_nil(); - } - else - { - // Step 4: Wait for the container - int count = TIME_OUT_TO_LAUNCH_CONT; - if (getenv("TIMEOUT_TO_LAUNCH_CONTAINER") != 0) - { - std::string new_count_str = getenv("TIMEOUT_TO_LAUNCH_CONTAINER"); - int new_count; - std::istringstream ss(new_count_str); - if (!(ss >> new_count)) - { - INFOS("[LaunchContainer] TIMEOUT_TO_LAUNCH_CONTAINER should be an int"); - } - else - count = new_count; - } - INFOS("[GiveContainer] waiting " << count << " second steps container " << containerNameInNS); - while (CORBA::is_nil(ret) && count) + if (status == -1) { -#ifndef WIN32 - sleep( 1 ) ; -#else - Sleep(1000); -#endif - count--; - MESSAGE("[GiveContainer] step " << count << " Waiting for container on " << resource_selected); - CORBA::Object_var obj = _NS->Resolve(containerNameInNS.c_str()); - ret=Engines::Container::_narrow(obj); + INFOS("[LaunchContainer] command failed (system command status -1): " << command); + RmTmpFile(tmpFileName); // command file can be removed here + return Engines::Container::_nil(); } - if (CORBA::is_nil(ret)) + else if (status == 217) { - INFOS("[GiveContainer] was not able to launch container " << containerNameInNS); + INFOS("[LaunchContainer] command failed (system command status 217): " << command); + RmTmpFile(tmpFileName); // command file can be removed here + return Engines::Container::_nil(); } - else + else { - // Setting log file name - logFilename=":"+logFilename; - logFilename="@"+Kernel_Utils::GetHostname()+logFilename; - logFilename=user+logFilename; - ret->logfilename(logFilename.c_str()); - RmTmpFile(_TmpFileName); // command file can be removed here - _TmpFileName=""; + // Step 4: Wait for the container + int count(GetTimeOutToLoaunchServer()); + INFOS("[GiveContainer] waiting " << count << " second steps container " << containerNameInNS); + while (CORBA::is_nil(ret) && count) + { + SleepInSecond(1); + count--; + MESSAGE("[GiveContainer] step " << count << " Waiting for container on " << resource_selected); + CORBA::Object_var obj(_NS->Resolve(containerNameInNS.c_str())); + ret=Engines::Container::_narrow(obj); + } + if (CORBA::is_nil(ret)) + { + INFOS("[GiveContainer] was not able to launch container " << containerNameInNS); + } + else + { + // Setting log file name + logFilename=":"+logFilename; + logFilename="@"+Kernel_Utils::GetHostname()+logFilename;//threadsafe + logFilename=user+logFilename; + ret->logfilename(logFilename.c_str()); + RmTmpFile(tmpFileName); // command file can be removed here + } } - } return ret; } //============================================================================= //! Find a container given constraints (params) on a list of machines (possibleComputers) +//! agy : this method is ThreadSafe /*! * */ //============================================================================= -Engines::Container_ptr -SALOME_ContainerManager::FindContainer(const Engines::ContainerParameters& params, - const Engines::ResourceList& possibleResources) +Engines::Container_ptr SALOME_ContainerManager::FindContainer(const Engines::ContainerParameters& params, const Engines::ResourceList& possibleResources) { MESSAGE("[FindContainer] FindContainer on " << possibleResources.length() << " resources"); for(unsigned int i=0; i < possibleResources.length();i++) @@ -669,17 +667,17 @@ SALOME_ContainerManager::FindContainer(const Engines::ContainerParameters& param //============================================================================= //! Find a container given constraints (params) on a machine (theMachine) +//! agy : this method is ThreadSafe /*! * */ //============================================================================= Engines::Container_ptr -SALOME_ContainerManager::FindContainer(const Engines::ContainerParameters& params, - const std::string& resource) +SALOME_ContainerManager::FindContainer(const Engines::ContainerParameters& params, const std::string& resource) { - Engines::ResourceDefinition_var resource_definition = _ResManager->GetResourceDefinition(resource.c_str()); - std::string hostname(resource_definition->hostname.in()); + ParserResourcesType resource_definition = _resManager->GetResourceDefinition(resource); + std::string hostname(resource_definition.HostName); std::string containerNameInNS(_NS->BuildContainerNameForNS(params, hostname.c_str())); MESSAGE("[FindContainer] Try to find a container " << containerNameInNS << " on resource " << resource); CORBA::Object_var obj = _NS->Resolve(containerNameInNS.c_str()); @@ -702,10 +700,11 @@ bool isPythonContainer(const char* ContainerName); //============================================================================= /*! * This is no longer valid (C++ container are also python containers) - */ + */ //============================================================================= bool isPythonContainer(const char* ContainerName) { + return false; // VSR 02/08/2013: Python containers are no more supported bool ret = false; int len = strlen(ContainerName); @@ -736,24 +735,19 @@ bool isPythonContainer(const char* ContainerName) * use to launch SALOME and servers in $APPLI: runAppli.sh, runRemote.sh) * - where workingdir is the requested working directory for the container. * If WORKINGDIR (and workingdir) is not present the working dir will be $HOME - */ + */ //============================================================================= std::string -SALOME_ContainerManager::BuildCommandToLaunchRemoteContainer -(const std::string& resource_name, - const Engines::ContainerParameters& params, const std::string& container_exe) +SALOME_ContainerManager::BuildCommandToLaunchRemoteContainer(const std::string& resource_name, const Engines::ContainerParameters& params, const std::string& container_exe) const { - - std::string command; + std::string command,tmpFileName; if (!_isAppliSalomeDefined) - command = BuildTempFileToLaunchRemoteContainer(resource_name, params); + command = BuildTempFileToLaunchRemoteContainer(resource_name, params, tmpFileName); else { int nbproc; - Engines::ResourceDefinition_var resource_definition = _ResManager->GetResourceDefinition(resource_name.c_str()); - std::string hostname(resource_definition->hostname.in()); - const ParserResourcesType& resInfo = _ResManager->GetImpl()->GetResourcesDescr(resource_name); + const ParserResourcesType resInfo(_resManager->GetResourceDefinition(resource_name)); if (params.isMPI) { @@ -771,18 +765,18 @@ SALOME_ContainerManager::BuildCommandToLaunchRemoteContainer command += resInfo.AppliPath; // path relative to user@machine $HOME else { - ASSERT(getenv("APPLI")); - command += getenv("APPLI"); // path relative to user@machine $HOME + ASSERT(GetenvThreadSafe("APPLI")); + command += GetenvThreadSafeAsString("APPLI"); // path relative to user@machine $HOME } command += "/runRemote.sh "; - ASSERT(getenv("NSHOST")); - command += getenv("NSHOST"); // hostname of CORBA name server + ASSERT(GetenvThreadSafe("NSHOST")); + command += GetenvThreadSafeAsString("NSHOST"); // hostname of CORBA name server command += " "; - ASSERT(getenv("NSPORT")); - command += getenv("NSPORT"); // port of CORBA name server + ASSERT(GetenvThreadSafe("NSPORT")); + command += GetenvThreadSafeAsString("NSPORT"); // port of CORBA name server std::string wdir = params.workingdir.in(); if(wdir != "") @@ -792,7 +786,7 @@ SALOME_ContainerManager::BuildCommandToLaunchRemoteContainer if(wdir == "$TEMPDIR") wdir="\\$TEMPDIR"; command += wdir; // requested working directory - command += "'"; + command += "'"; } if(params.isMPI) @@ -801,18 +795,18 @@ SALOME_ContainerManager::BuildCommandToLaunchRemoteContainer std::ostringstream o; o << nbproc << " "; command += o.str(); -#ifdef WITHLAM +#ifdef LAM_MPI command += "-x PATH,LD_LIBRARY_PATH,OMNIORB_CONFIG,SALOME_trace "; -#elif defined(WITHOPENMPI) - if( getenv("OMPI_URI_FILE") == NULL ) +#elif defined(OPEN_MPI) + if( GetenvThreadSafe("OMPI_URI_FILE") == NULL ) command += "-x PATH -x LD_LIBRARY_PATH -x OMNIORB_CONFIG -x SALOME_trace"; else{ command += "-x PATH -x LD_LIBRARY_PATH -x OMNIORB_CONFIG -x SALOME_trace -ompi-server file:"; - command += getenv("OMPI_URI_FILE"); + command += GetenvThreadSafeAsString("OMPI_URI_FILE"); } -#elif defined(WITHMPICH) +#elif defined(MPICH) command += "-nameserver " + Kernel_Utils::GetHostname(); -#endif +#endif command += " SALOME_MPIContainer "; } else @@ -831,13 +825,11 @@ SALOME_ContainerManager::BuildCommandToLaunchRemoteContainer //============================================================================= /*! * builds the command to be launched. - */ + */ //============================================================================= -std::string -SALOME_ContainerManager::BuildCommandToLaunchLocalContainer -(const Engines::ContainerParameters& params, const std::string& machinesFile, const std::string& container_exe) +std::string SALOME_ContainerManager::BuildCommandToLaunchLocalContainer(const Engines::ContainerParameters& params, const std::string& machinesFile, const std::string& container_exe, std::string& tmpFileName) const { - _TmpFileName = BuildTemporaryFileName(); + tmpFileName = BuildTemporaryFileName(); std::string command; int nbproc = 0; @@ -854,20 +846,20 @@ SALOME_ContainerManager::BuildCommandToLaunchLocalContainer o << nbproc << " "; - if( getenv("LIBBATCH_NODEFILE") != NULL ) + if( GetenvThreadSafe("LIBBATCH_NODEFILE") != NULL ) o << "-machinefile " << machinesFile << " "; -#ifdef WITHLAM +#ifdef LAM_MPI o << "-x PATH,LD_LIBRARY_PATH,OMNIORB_CONFIG,SALOME_trace "; -#elif defined(WITHOPENMPI) - if( getenv("OMPI_URI_FILE") == NULL ) +#elif defined(OPEN_MPI) + if( GetenvThreadSafe("OMPI_URI_FILE") == NULL ) o << "-x PATH -x LD_LIBRARY_PATH -x OMNIORB_CONFIG -x SALOME_trace"; else { o << "-x PATH -x LD_LIBRARY_PATH -x OMNIORB_CONFIG -x SALOME_trace -ompi-server file:"; - o << getenv("OMPI_URI_FILE"); + o << GetenvThreadSafeAsString("OMPI_URI_FILE"); } -#elif defined(WITHMPICH) +#elif defined(MPICH) o << "-nameserver " + Kernel_Utils::GetHostname(); #endif @@ -917,14 +909,14 @@ SALOME_ContainerManager::BuildCommandToLaunchLocalContainer o << " -"; AddOmninamesParams(o); - std::ofstream command_file( _TmpFileName.c_str() ); + std::ofstream command_file( tmpFileName.c_str() ); command_file << o.str(); command_file.close(); #ifndef WIN32 - chmod(_TmpFileName.c_str(), 0x1ED); + chmod(tmpFileName.c_str(), 0x1ED); #endif - command = _TmpFileName; + command = tmpFileName; MESSAGE("Command is file ... " << command); MESSAGE("Command is ... " << o.str()); @@ -935,7 +927,8 @@ SALOME_ContainerManager::BuildCommandToLaunchLocalContainer //============================================================================= /*! * removes the generated temporary file in case of a remote launch. - */ + * This method is thread safe + */ //============================================================================= void SALOME_ContainerManager::RmTmpFile(std::string& tmpFileName) @@ -946,14 +939,14 @@ void SALOME_ContainerManager::RmTmpFile(std::string& tmpFileName) #ifdef WIN32 std::string command = "del /F "; #else - std::string command = "rm "; + std::string command = "rm "; #endif if ( lenght > 4 ) command += tmpFileName.substr(0, lenght - 3 ); else command += tmpFileName; command += '*'; - system(command.c_str()); + SystemThreadSafe(command.c_str()); //if dir is empty - remove it std::string tmp_dir = Kernel_Utils::GetDirByPath( tmpFileName ); if ( Kernel_Utils::IsEmptyDir( tmp_dir ) ) @@ -963,7 +956,7 @@ void SALOME_ContainerManager::RmTmpFile(std::string& tmpFileName) #else command = "rmdir " + tmp_dir; #endif - system(command.c_str()); + SystemThreadSafe(command.c_str()); } } } @@ -971,49 +964,84 @@ void SALOME_ContainerManager::RmTmpFile(std::string& tmpFileName) //============================================================================= /*! * add to command all options relative to naming service. - */ + */ //============================================================================= void SALOME_ContainerManager::AddOmninamesParams(std::string& command) const { - CORBA::String_var iorstr = _NS->getIORaddr(); - command += "ORBInitRef NameService="; - command += iorstr; + std::ostringstream oss; + AddOmninamesParams(oss); + command+=oss.str(); } //============================================================================= /*! * add to command all options relative to naming service. - */ + */ //============================================================================= -void SALOME_ContainerManager::AddOmninamesParams(std::ofstream& fileStream) const +void SALOME_ContainerManager::AddOmninamesParams(std::ostream& fileStream) const { - CORBA::String_var iorstr = _NS->getIORaddr(); - fileStream << "ORBInitRef NameService="; - fileStream << iorstr; + AddOmninamesParams(fileStream,_NS); } //============================================================================= /*! * add to command all options relative to naming service. - */ + */ //============================================================================= -void SALOME_ContainerManager::AddOmninamesParams(std::ostringstream& oss) const +void SALOME_ContainerManager::AddOmninamesParams(std::ostream& fileStream, SALOME_NamingService *ns) +{ + CORBA::String_var iorstr(ns->getIORaddr()); + fileStream << "ORBInitRef NameService="; + fileStream << iorstr; +} + +void SALOME_ContainerManager::MakeTheCommandToBeLaunchedASync(std::string& command) +{ +#ifdef WIN32 + command = "%PYTHONBIN% -c \"import subprocess ; subprocess.Popen(r'" + command + "').pid\""; +#else + command += " &"; +#endif +} + +int SALOME_ContainerManager::GetTimeOutToLoaunchServer() +{ + int count(TIME_OUT_TO_LAUNCH_CONT); + if (GetenvThreadSafe("TIMEOUT_TO_LAUNCH_CONTAINER") != 0) + { + std::string new_count_str(GetenvThreadSafeAsString("TIMEOUT_TO_LAUNCH_CONTAINER")); + int new_count; + std::istringstream ss(new_count_str); + if (!(ss >> new_count)) + { + INFOS("[LaunchContainer] TIMEOUT_TO_LAUNCH_CONTAINER should be an int"); + } + else + count = new_count; + } + return count; +} + +void SALOME_ContainerManager::SleepInSecond(int ellapseTimeInSecond) { - CORBA::String_var iorstr = _NS->getIORaddr(); - oss << "ORBInitRef NameService="; - oss << iorstr; +#ifndef WIN32 + sleep( ellapseTimeInSecond ) ; +#else + int timeInMS(1000*ellapseTimeInSecond); + Sleep(timeInMS); +#endif } //============================================================================= /*! * generate a file name in /tmp directory - */ + */ //============================================================================= -std::string SALOME_ContainerManager::BuildTemporaryFileName() const +std::string SALOME_ContainerManager::BuildTemporaryFileName() { //build more complex file name to support multiple salome session std::string aFileName = Kernel_Utils::GetTmpFileName(); @@ -1028,24 +1056,21 @@ std::string SALOME_ContainerManager::BuildTemporaryFileName() const //============================================================================= /*! * Builds in a temporary file the script to be launched. - * + * * Used if SALOME Application ($APPLI) is not defined. * The command is build with data from CatalogResources, in which every path * used on remote computer must be defined. - */ + */ //============================================================================= -std::string -SALOME_ContainerManager::BuildTempFileToLaunchRemoteContainer -(const std::string& resource_name, - const Engines::ContainerParameters& params) throw(SALOME_Exception) +std::string SALOME_ContainerManager::BuildTempFileToLaunchRemoteContainer (const std::string& resource_name, const Engines::ContainerParameters& params, std::string& tmpFileName) const { int status; - _TmpFileName = BuildTemporaryFileName(); + tmpFileName = BuildTemporaryFileName(); std::ofstream tempOutputFile; - tempOutputFile.open(_TmpFileName.c_str(), std::ofstream::out ); - const ParserResourcesType& resInfo = _ResManager->GetImpl()->GetResourcesDescr(resource_name); + tempOutputFile.open(tmpFileName.c_str(), std::ofstream::out ); + const ParserResourcesType resInfo(_resManager->GetResourceDefinition(resource_name)); tempOutputFile << "#! /bin/sh" << std::endl; // --- set env vars @@ -1068,21 +1093,21 @@ SALOME_ContainerManager::BuildTempFileToLaunchRemoteContainer std::ostringstream o; tempOutputFile << nbproc << " "; -#ifdef WITHLAM +#ifdef LAM_MPI tempOutputFile << "-x PATH,LD_LIBRARY_PATH,OMNIORB_CONFIG,SALOME_trace "; -#elif defined(WITHOPENMPI) - if( getenv("OMPI_URI_FILE") == NULL ) +#elif defined(OPEN_MPI) + if( GetenvThreadSafe("OMPI_URI_FILE") == NULL ) tempOutputFile << "-x PATH -x LD_LIBRARY_PATH -x OMNIORB_CONFIG -x SALOME_trace"; else{ tempOutputFile << "-x PATH -x LD_LIBRARY_PATH -x OMNIORB_CONFIG -x SALOME_trace -ompi-server file:"; - tempOutputFile << getenv("OMPI_URI_FILE"); + tempOutputFile << GetenvThreadSafeAsString("OMPI_URI_FILE"); } -#elif defined(WITHMPICH) +#elif defined(MPICH) tempOutputFile << "-nameserver " + Kernel_Utils::GetHostname(); #endif } - tempOutputFile << getenv("KERNEL_ROOT_DIR") << "/bin/salome/"; + tempOutputFile << GetenvThreadSafeAsString("KERNEL_ROOT_DIR") << "/bin/salome/"; if (params.isMPI) { @@ -1106,7 +1131,7 @@ SALOME_ContainerManager::BuildTempFileToLaunchRemoteContainer tempOutputFile.flush(); tempOutputFile.close(); #ifndef WIN32 - chmod(_TmpFileName.c_str(), 0x1ED); + chmod(tmpFileName.c_str(), 0x1ED); #endif // --- Build command @@ -1117,47 +1142,46 @@ SALOME_ContainerManager::BuildTempFileToLaunchRemoteContainer { command = "rsh "; std::string commandRcp = "rcp "; - commandRcp += _TmpFileName; + commandRcp += tmpFileName; commandRcp += " "; commandRcp += resInfo.HostName; commandRcp += ":"; - commandRcp += _TmpFileName; - status = system(commandRcp.c_str()); + commandRcp += tmpFileName; + status = SystemThreadSafe(commandRcp.c_str()); } else if (resInfo.Protocol == ssh) { command = "ssh "; std::string commandRcp = "scp "; - commandRcp += _TmpFileName; + commandRcp += tmpFileName; commandRcp += " "; commandRcp += resInfo.HostName; commandRcp += ":"; - commandRcp += _TmpFileName; - status = system(commandRcp.c_str()); + commandRcp += tmpFileName; + status = SystemThreadSafe(commandRcp.c_str()); } else if (resInfo.Protocol == srun) { command = "srun -n 1 -N 1 --share --nodelist="; std::string commandRcp = "rcp "; - commandRcp += _TmpFileName; + commandRcp += tmpFileName; commandRcp += " "; commandRcp += resInfo.HostName; commandRcp += ":"; - commandRcp += _TmpFileName; - status = system(commandRcp.c_str()); + commandRcp += tmpFileName; + status = SystemThreadSafe(commandRcp.c_str()); } else throw SALOME_Exception("Unknown protocol"); if(status) - throw SALOME_Exception("Error of connection on remote host"); + throw SALOME_Exception("Error of connection on remote host"); command += resInfo.HostName; - _CommandForRemAccess = command; command += " "; - command += _TmpFileName; + command += tmpFileName; SCRUTE(command); @@ -1165,18 +1189,23 @@ SALOME_ContainerManager::BuildTempFileToLaunchRemoteContainer } -std::string SALOME_ContainerManager::GetMPIZeroNode(const std::string machine, const std::string machinesFile) +std::string SALOME_ContainerManager::GetMPIZeroNode(const std::string machine, const std::string machinesFile) const { int status; std::string zeronode; std::string command; std::string tmpFile = BuildTemporaryFileName(); + const ParserResourcesType resInfo(_resManager->GetResourceDefinition(machine)); + + if(resInfo.Protocol == sh) + { + return resInfo.HostName; + } - if( getenv("LIBBATCH_NODEFILE") == NULL ) + if( GetenvThreadSafe("LIBBATCH_NODEFILE") == NULL ) { if (_isAppliSalomeDefined) { - const ParserResourcesType& resInfo = _ResManager->GetImpl()->GetResourcesDescr(machine); if (resInfo.Protocol == rsh) command = "rsh "; @@ -1201,18 +1230,18 @@ std::string SALOME_ContainerManager::GetMPIZeroNode(const std::string machine, c command += resInfo.AppliPath; // path relative to user@machine $HOME else { - ASSERT(getenv("APPLI")); - command += getenv("APPLI"); // path relative to user@machine $HOME + ASSERT(GetenvThreadSafe("APPLI")); + command += GetenvThreadSafeAsString("APPLI"); // path relative to user@machine $HOME } command += "/runRemote.sh "; - ASSERT(getenv("NSHOST")); - command += getenv("NSHOST"); // hostname of CORBA name server + ASSERT(GetenvThreadSafe("NSHOST")); + command += GetenvThreadSafeAsString("NSHOST"); // hostname of CORBA name server command += " "; - ASSERT(getenv("NSPORT")); - command += getenv("NSPORT"); // port of CORBA name server + ASSERT(GetenvThreadSafe("NSPORT")); + command += GetenvThreadSafeAsString("NSPORT"); // port of CORBA name server command += " mpirun -np 1 hostname -s > " + tmpFile; } @@ -1222,7 +1251,7 @@ std::string SALOME_ContainerManager::GetMPIZeroNode(const std::string machine, c else command = "mpirun -np 1 -machinefile " + machinesFile + " hostname -s > " + tmpFile; - status = system(command.c_str()); + status = SystemThreadSafe(command.c_str()); if( status == 0 ){ std::ifstream fp(tmpFile.c_str(),std::ios::in); while(fp >> zeronode); @@ -1236,7 +1265,7 @@ std::string SALOME_ContainerManager::GetMPIZeroNode(const std::string machine, c std::string SALOME_ContainerManager::machinesFile(const int nbproc) { std::string tmp; - std::string nodesFile = getenv("LIBBATCH_NODEFILE"); + std::string nodesFile = GetenvThreadSafeAsString("LIBBATCH_NODEFILE"); std::string machinesFile = Kernel_Utils::GetTmpFileName(); std::ifstream fpi(nodesFile.c_str(),std::ios::in); std::ofstream fpo(machinesFile.c_str(),std::ios::out); @@ -1269,7 +1298,7 @@ std::set SALOME_ContainerManager::getpidofprogram(const std::string progr std::string cmd; std::string thepid; cmd = "pidof " + program + " > " + tmpFile; - system(cmd.c_str()); + SystemThreadSafe(cmd.c_str()); std::ifstream fpi(tmpFile.c_str(),std::ios::in); while(fpi >> thepid){ thepids.insert(atoi(thepid.c_str())); @@ -1309,7 +1338,7 @@ std::string SALOME_ContainerManager::getCommandToRunRemoteProcess(AccessProtocol command << "pbsdsh -o -h " << hostname << " "; break; case blaunch: - command << "blaunch " << hostname << " "; + command << "blaunch -no-shell " << hostname << " "; break; default: throw SALOME_Exception("Unknown protocol"); @@ -1318,11 +1347,11 @@ std::string SALOME_ContainerManager::getCommandToRunRemoteProcess(AccessProtocol return command.str(); } -bool +bool SALOME_ContainerManager::checkPaCOParameters(Engines::ContainerParameters & params, std::string resource_selected) { bool result = true; - + // Step 1 : check ContainerParameters // Check container_name, has to be defined if (std::string(params.container_name.in()) == "") @@ -1347,17 +1376,18 @@ SALOME_ContainerManager::checkPaCOParameters(Engines::ContainerParameters & para } // Step 2 : check resource_selected - Engines::ResourceDefinition_var resource_definition = _ResManager->GetResourceDefinition(resource_selected.c_str()); - std::string protocol = resource_definition->protocol.in(); - std::string username = resource_definition->username.in(); - std::string applipath = resource_definition->applipath.in(); + const ParserResourcesType resource_definition = _resManager->GetResourceDefinition(resource_selected); + //std::string protocol = resource_definition->protocol.in(); + std::string username = resource_definition.UserName; + std::string applipath = resource_definition.AppliPath; - if (protocol == "" || username == "" || applipath == "") + //if (protocol == "" || username == "" || applipath == "") + if (username == "" || applipath == "") { INFOS("[checkPaCOParameters] resource selected is not well defined"); - INFOS("[checkPaCOParameters] resource name: " << resource_definition->name.in()); - INFOS("[checkPaCOParameters] resource hostname: " << resource_definition->hostname.in()); - INFOS("[checkPaCOParameters] resource protocol: " << protocol); + INFOS("[checkPaCOParameters] resource name: " << resource_definition.Name); + INFOS("[checkPaCOParameters] resource hostname: " << resource_definition.HostName); + INFOS("[checkPaCOParameters] resource protocol: " << resource_definition.getAccessProtocolTypeStr()); INFOS("[checkPaCOParameters] resource username: " << username); INFOS("[checkPaCOParameters] resource applipath: " << applipath); result = false; @@ -1365,6 +1395,35 @@ SALOME_ContainerManager::checkPaCOParameters(Engines::ContainerParameters & para return result; } + +/* + * :WARNING: Do not directly convert returned value to std::string + * This function may return NULL if env variable is not defined. + * And std::string(NULL) causes undefined behavior. + * Use GetenvThreadSafeAsString to properly get a std::string. +*/ +char *SALOME_ContainerManager::GetenvThreadSafe(const char *name) +{// getenv is not thread safe. See man 7 pthread. + Utils_Locker lock (&_getenvMutex); + return getenv(name); +} + +/* + * Return env variable as a std::string. + * Return empty string if env variable is not set. + */ +std::string SALOME_ContainerManager::GetenvThreadSafeAsString(const char *name) +{ + char* var = GetenvThreadSafe(name); + return var ? std::string(var) : std::string(); +} + +int SALOME_ContainerManager::SystemThreadSafe(const char *command) +{ + Utils_Locker lock (&_systemMutex); + return system(command); +} + #ifdef WITH_PACO_PARALLEL //============================================================================= @@ -1396,7 +1455,7 @@ SALOME_ContainerManager::StartPaCOPPContainer(const Engines::ContainerParameters INFOS("[StartPaCOPPContainer] on resource : " << resource_selected); // Step 2 : Get a MachineFile for the parallel container - std::string machine_file_name = _ResManager->getMachineFile(resource_selected, + std::string machine_file_name = _resManager->getMachineFile(resource_selected, params.nb_proc, params.parallelLib.in()); @@ -1409,7 +1468,7 @@ SALOME_ContainerManager::StartPaCOPPContainer(const Engines::ContainerParameters // Step 3 : starting parallel container proxy std::string command_proxy(""); std::string proxy_machine; - try + try { command_proxy = BuildCommandToLaunchPaCOProxyContainer(params, machine_file_name, proxy_machine); } @@ -1431,7 +1490,7 @@ SALOME_ContainerManager::StartPaCOPPContainer(const Engines::ContainerParameters // Step 4 : starting parallel container nodes std::string command_nodes(""); SALOME_ContainerManager::actual_launch_machine_t nodes_machines; - try + try { command_nodes = BuildCommandToLaunchPaCONodeContainer(params, machine_file_name, nodes_machines, proxy_machine); } @@ -1448,7 +1507,7 @@ SALOME_ContainerManager::StartPaCOPPContainer(const Engines::ContainerParameters { INFOS("[StarPaCOPPContainer] LaunchPaCONodeContainer failed !"); // Il faut tuer le proxy - try + try { Engines::Container_var proxy = Engines::Container::_narrow(container_proxy); proxy->Shutdown(); @@ -1461,7 +1520,7 @@ SALOME_ContainerManager::StartPaCOPPContainer(const Engines::ContainerParameters } // Step 4 : connecting nodes and the proxy to actually create a parallel container - for (int i = 0; i < params.nb_proc; i++) + for (int i = 0; i < params.nb_proc; i++) { std::ostringstream tmp; tmp << i; @@ -1471,7 +1530,7 @@ SALOME_ContainerManager::StartPaCOPPContainer(const Engines::ContainerParameters std::string theNodeMachine(nodes_machines[i]); std::string containerNameInNS = _NS->BuildContainerNameForNS(container_node_name.c_str(), theNodeMachine.c_str()); obj = _NS->Resolve(containerNameInNS.c_str()); - if (CORBA::is_nil(obj)) + if (CORBA::is_nil(obj)) { INFOS("[StarPaCOPPContainer] CONNECTION FAILED From Naming Service !"); INFOS("[StarPaCOPPContainer] Container name is " << containerNameInNS); @@ -1505,7 +1564,7 @@ SALOME_ContainerManager::StartPaCOPPContainer(const Engines::ContainerParameters } // Step 5 : starting parallel container - try + try { MESSAGE ("[StarPaCOPPContainer] Starting parallel object"); container_proxy->start(); @@ -1526,7 +1585,7 @@ SALOME_ContainerManager::StartPaCOPPContainer(const Engines::ContainerParameters } catch(std::exception& exc) { - INFOS("Caught std::exception - "<GetResourceDefinition(params.resource_params.name); + ParserResourcesType resource_definition = + _resManager->GetResourceDefinition(params.resource_params.name.in()); // Choose hostname std::string hostname; @@ -1570,10 +1629,10 @@ SALOME_ContainerManager::BuildCommandToLaunchPaCOProxyContainer(const Engines::C MESSAGE("[BuildCommandToLaunchPaCOProxyContainer] remote machine case detected !"); remote_execution = true; } - + // Log environnement std::string log_type(""); - char * get_val = getenv("PARALLEL_LOG"); + char * get_val = GetenvThreadSafe("PARALLEL_LOG"); if (get_val) log_type = get_val; @@ -1590,17 +1649,17 @@ SALOME_ContainerManager::BuildCommandToLaunchPaCOProxyContainer(const Engines::C // a SALOME application if (remote_execution) { - ASSERT(getenv("NSHOST")); - ASSERT(getenv("NSPORT")); + ASSERT(GetenvThreadSafe("NSHOST")); + ASSERT(GetenvThreadSafe("NSPORT")); - command << resource_definition->protocol.in(); + command << resource_definition.getAccessProtocolTypeStr(); command << " -l "; - command << resource_definition->username.in(); + command << resource_definition.UserName; command << " " << hostname; - command << " " << resource_definition->applipath.in(); + command << " " << resource_definition.AppliPath; command << "/runRemote.sh "; - command << getenv("NSHOST") << " "; // hostname of CORBA name server - command << getenv("NSPORT") << " "; // port of CORBA name server + command << GetenvThreadSafeAsString("NSHOST") << " "; // hostname of CORBA name server + command << GetenvThreadSafeAsString("NSPORT") << " "; // port of CORBA name server } command << exe_name; @@ -1618,7 +1677,7 @@ SALOME_ContainerManager::BuildCommandToLaunchPaCOProxyContainer(const Engines::C return command.str(); } -std::string +std::string SALOME_ContainerManager::BuildCommandToLaunchPaCONodeContainer(const Engines::ContainerParameters& params, const std::string & machine_file_name, SALOME_ContainerManager::actual_launch_machine_t & vect_machine, @@ -1634,12 +1693,12 @@ SALOME_ContainerManager::BuildCommandToLaunchPaCONodeContainer(const Engines::Co nb_proc_stream << params.nb_proc; // Get resource definition - Engines::ResourceDefinition_var resource_definition = - _ResManager->GetResourceDefinition(params.resource_params.name); - + ParserResourcesType resource_definition = + _resManager->GetResourceDefinition(params.resource_params.name.in()); + // Log environnement std::string log_type(""); - char * get_val = getenv("PARALLEL_LOG"); + char * get_val = GetenvThreadSafe("PARALLEL_LOG"); if (get_val) log_type = get_val; @@ -1678,17 +1737,17 @@ SALOME_ContainerManager::BuildCommandToLaunchPaCONodeContainer(const Engines::Co // a SALOME application if (remote_execution) { - ASSERT(getenv("NSHOST")); - ASSERT(getenv("NSPORT")); + ASSERT(GetenvThreadSafe("NSHOST")); + ASSERT(GetenvThreadSafe("NSPORT")); - command_node_stream << resource_definition->protocol.in(); + command_node_stream << resource_definition.getAccessProtocolTypeStr(); command_node_stream << " -l "; - command_node_stream << resource_definition->username.in(); + command_node_stream << resource_definition.UserName; command_node_stream << " " << hostname; - command_node_stream << " " << resource_definition->applipath.in(); + command_node_stream << " " << resource_definition.AppliPath; command_node_stream << "/runRemote.sh "; - command_node_stream << getenv("NSHOST") << " "; // hostname of CORBA name server - command_node_stream << getenv("NSPORT") << " "; // port of CORBA name server + command_node_stream << GetenvThreadSafeAsString("NSHOST") << " "; // hostname of CORBA name server + command_node_stream << GetenvThreadSafeAsString("NSPORT") << " "; // port of CORBA name server } command_node_stream << exe_name; @@ -1726,17 +1785,16 @@ SALOME_ContainerManager::BuildCommandToLaunchPaCONodeContainer(const Engines::Co if (last == std::string::npos) last = -1; - std::string protocol = resource_definition->protocol.in(); - if (protocol == "rsh") + if (resource_definition.Protocol == rsh) command_remote_stream << "rcp "; - else + else command_remote_stream << "scp "; command_remote_stream << machine_file_name << " "; - command_remote_stream << resource_definition->username.in() << "@"; - command_remote_stream << hostname << ":" << resource_definition->applipath.in(); + command_remote_stream << resource_definition.UserName << "@"; + command_remote_stream << hostname << ":" << resource_definition.AppliPath; command_remote_stream << "/" << machine_file_name.substr(last+1); - int status = system(command_remote_stream.str().c_str()); + int status = SystemThreadSafe(command_remote_stream.str().c_str()); if (status == -1) { INFOS("copy of the MPI machine file failed ! - sorry !"); @@ -1755,23 +1813,23 @@ SALOME_ContainerManager::BuildCommandToLaunchPaCONodeContainer(const Engines::Co // a SALOME application if (remote_execution) { - ASSERT(getenv("NSHOST")); - ASSERT(getenv("NSPORT")); + ASSERT(GetenvThreadSafe("NSHOST")); + ASSERT(GetenvThreadSafe("NSPORT")); - command_nodes << resource_definition->protocol.in(); + command_nodes << resource_definition.getAccessProtocolTypeStr(); command_nodes << " -l "; - command_nodes << resource_definition->username.in(); + command_nodes << resource_definition.UserName; command_nodes << " " << hostname; - command_nodes << " " << resource_definition->applipath.in(); + command_nodes << " " << resource_definition.AppliPath; command_nodes << "/runRemote.sh "; - command_nodes << getenv("NSHOST") << " "; // hostname of CORBA name server - command_nodes << getenv("NSPORT") << " "; // port of CORBA name server + command_nodes << GetenvThreadSafeAsString("NSHOST") << " "; // hostname of CORBA name server + command_nodes << GetenvThreadSafeAsString("NSPORT") << " "; // port of CORBA name server } - if (std::string(resource_definition->mpiImpl.in()) == "lam") + if (resource_definition.mpi == lam) { command_nodes << "mpiexec -ssi boot "; - command_nodes << "-machinefile " << machine_file_name << " "; + command_nodes << "-machinefile " << machine_file_name << " "; command_nodes << "-n " << params.nb_proc; } else @@ -1787,7 +1845,7 @@ SALOME_ContainerManager::BuildCommandToLaunchPaCONodeContainer(const Engines::Co // We don't put hostname, because nodes are registered in the resource of the proxy for (int i= 0; i < params.nb_proc; i++) - vect_machine.push_back(proxy_hostname); + vect_machine.push_back(proxy_hostname); command_nodes << command_end; } @@ -1799,7 +1857,7 @@ SALOME_ContainerManager::LogConfiguration(const std::string & log_type, const std::string & exe_type, const std::string & container_name, const std::string & hostname, - std::string & begin, + std::string & begin, std::string & end) { if(log_type == "xterm") @@ -1816,20 +1874,23 @@ SALOME_ContainerManager::LogConfiguration(const std::string & log_type, { // default into a file... std::string logFilename = "/tmp/" + container_name + "_" + hostname + "_" + exe_type + "_"; - logFilename += std::string(getenv("USER")) + ".log"; + std::string user = GetenvThreadSafeAsString("USER"); + if (user.empty()) + user = GetenvThreadSafeAsString("LOGNAME"); + logFilename += user + ".log"; end = " > " + logFilename + " 2>&1 & "; } } -CORBA::Object_ptr -SALOME_ContainerManager::LaunchPaCOProxyContainer(const std::string& command, +CORBA::Object_ptr +SALOME_ContainerManager::LaunchPaCOProxyContainer(const std::string& command, const Engines::ContainerParameters& params, const std::string & hostname) { PaCO::InterfaceManager_ptr container_proxy = PaCO::InterfaceManager::_nil(); MESSAGE("[LaunchPaCOProxyContainer] Launch command"); - int status = system(command.c_str()); + int status = SystemThreadSafe(command.c_str()); if (status == -1) { INFOS("[LaunchPaCOProxyContainer] failed : system command status -1"); return container_proxy; @@ -1839,20 +1900,20 @@ SALOME_ContainerManager::LaunchPaCOProxyContainer(const std::string& command, return container_proxy; } - int count = TIME_OUT_TO_LAUNCH_CONT; + int count(GetTimeOutToLoaunchServer()); CORBA::Object_var obj = CORBA::Object::_nil(); - std::string containerNameInNS = _NS->BuildContainerNameForNS(params.container_name.in(), + std::string containerNameInNS = _NS->BuildContainerNameForNS(params.container_name.in(), hostname.c_str()); MESSAGE("[LaunchParallelContainer] Waiting for Parallel Container proxy : " << containerNameInNS); - while (CORBA::is_nil(obj) && count) + while (CORBA::is_nil(obj) && count) { sleep(1); count--; obj = _NS->Resolve(containerNameInNS.c_str()); } - try + try { container_proxy = PaCO::InterfaceManager::_narrow(obj); } @@ -1894,13 +1955,13 @@ SALOME_ContainerManager::LaunchPaCOProxyContainer(const std::string& command, */ //============================================================================= bool -SALOME_ContainerManager::LaunchPaCONodeContainer(const std::string& command, +SALOME_ContainerManager::LaunchPaCONodeContainer(const std::string& command, const Engines::ContainerParameters& params, const std::string& name, SALOME_ContainerManager::actual_launch_machine_t & vect_machine) { INFOS("[LaunchPaCONodeContainer] Launch command"); - int status = system(command.c_str()); + int status = SystemThreadSafe(command.c_str()); if (status == -1) { INFOS("[LaunchPaCONodeContainer] failed : system command status -1"); return false; @@ -1912,7 +1973,7 @@ SALOME_ContainerManager::LaunchPaCONodeContainer(const std::string& command, INFOS("[LaunchPaCONodeContainer] Waiting for the nodes of the parallel container"); // We are waiting all the nodes - for (int i = 0; i < params.nb_proc; i++) + for (int i = 0; i < params.nb_proc; i++) { CORBA::Object_var obj = CORBA::Object::_nil(); std::string theMachine(vect_machine[i]); @@ -1923,9 +1984,9 @@ SALOME_ContainerManager::LaunchPaCONodeContainer(const std::string& command, std::string container_node_name = name + proc_number; std::string containerNameInNS = _NS->BuildContainerNameForNS((char*) container_node_name.c_str(), theMachine.c_str()); INFOS("[LaunchPaCONodeContainer] Waiting for Parallel Container node " << containerNameInNS << " on " << theMachine); - int count = TIME_OUT_TO_LAUNCH_CONT; + int count(GetTimeOutToLoaunchServer()); while (CORBA::is_nil(obj) && count) { - sleep(1) ; + SleepInSecond(1); count-- ; obj = _NS->Resolve(containerNameInNS.c_str()); } @@ -1950,7 +2011,7 @@ SALOME_ContainerManager::StartPaCOPPContainer(const Engines::ContainerParameters return ret; } -std::string +std::string SALOME_ContainerManager::BuildCommandToLaunchPaCOProxyContainer(const Engines::ContainerParameters& params, std::string machine_file_name, std::string & proxy_hostname) @@ -1958,26 +2019,26 @@ SALOME_ContainerManager::BuildCommandToLaunchPaCOProxyContainer(const Engines::C return ""; } -std::string +std::string SALOME_ContainerManager::BuildCommandToLaunchPaCONodeContainer(const Engines::ContainerParameters& params, const std::string & machine_file_name, - SALOME_ContainerManager::actual_launch_machine_t & vect_machine, - const std::string & proxy_hostname) + SALOME_ContainerManager::actual_launch_machine_t & vect_machine, + const std::string & proxy_hostname) { return ""; } -void +void SALOME_ContainerManager::LogConfiguration(const std::string & log_type, const std::string & exe_type, const std::string & container_name, const std::string & hostname, - std::string & begin, + std::string & begin, std::string & end) { } -CORBA::Object_ptr -SALOME_ContainerManager::LaunchPaCOProxyContainer(const std::string& command, +CORBA::Object_ptr +SALOME_ContainerManager::LaunchPaCOProxyContainer(const std::string& command, const Engines::ContainerParameters& params, const std::string& hostname) { @@ -1985,8 +2046,8 @@ SALOME_ContainerManager::LaunchPaCOProxyContainer(const std::string& command, return ret; } -bool -SALOME_ContainerManager::LaunchPaCONodeContainer(const std::string& command, +bool +SALOME_ContainerManager::LaunchPaCONodeContainer(const std::string& command, const Engines::ContainerParameters& params, const std::string& name, SALOME_ContainerManager::actual_launch_machine_t & vect_machine)