-// Copyright (C) 2007-2023 CEA, EDF, OPEN CASCADE
+// Copyright (C) 2007-2024 CEA, EDF, OPEN CASCADE
//
// Copyright (C) 2003-2007 OPEN CASCADE, EADS/CCR, LIP6, CEA/DEN,
// CEDRAT, EDF R&D, LEG, PRINCIPIA R&D, BUREAU VERITAS
#include "SALOME_ResourcesManager.hxx"
#include "SALOME_LoadRateManager.hxx"
#include "SALOME_NamingService.hxx"
+#include "SALOME_Container_i.hxx"
#include "SALOME_ResourcesManager_Client.hxx"
#include "SALOME_Embedded_NamingService.hxx"
#include "SALOME_ModuleCatalog.hh"
#include "Basics_Utils.hxx"
#include "Basics_DirUtils.hxx"
#include "PythonCppUtils.hxx"
+#include "KernelBasis.hxx"
#include <sys/types.h>
#include <sys/stat.h>
#include <signal.h>
#include <sstream>
#include <string>
#include <queue>
+#include <thread>
+#include <chrono>
#include <SALOMEconfig.h>
#include CORBA_CLIENT_HEADER(SALOME_Session)
const int SALOME_ContainerManager::TIME_OUT_TO_LAUNCH_CONT=60;
-const char *SALOME_ContainerManager::_ContainerManagerNameInNS =
- "/ContainerManager";
+const int SALOME_ContainerManager::DFT_DELTA_TIME_NS_LOOKUP_IN_MS=1000;
+
+const char *SALOME_ContainerManager::_ContainerManagerNameInNS = "/ContainerManager";
omni_mutex SALOME_ContainerManager::_numInstanceMutex;
//=============================================================================
SALOME_ContainerManager::SALOME_ContainerManager(CORBA::ORB_ptr orb, PortableServer::POA_var poa, SALOME_NamingService_Abstract *ns)
- : _nbprocUsed(1)
+ : _nbprocUsed(1),_delta_time_ns_lookup_in_ms(DFT_DELTA_TIME_NS_LOOKUP_IN_MS),_delta_time_measure_in_ms(Abstract_Engines_Container_i::DFT_TIME_INTERVAL_BTW_MEASURE)
{
- MESSAGE("constructor");
_NS = ns;
_resManager = new SALOME_ResourcesManager_Client(ns);
+ _time_out_in_second = GetTimeOutToLoaunchServer();
PortableServer::POAManager_var pman = poa->the_POAManager();
_orb = CORBA::ORB::_duplicate(orb) ;
}
#endif
#endif
-
- MESSAGE("constructor end");
}
//=============================================================================
_poa->deactivate_object(oid);
}
+CORBA::Long SALOME_ContainerManager::GetTimeOutToLaunchServerInSecond()
+{
+ return this->_time_out_in_second;
+}
+
+void SALOME_ContainerManager::SetTimeOutToLaunchServerInSecond(CORBA::Long timeInSecond)
+{
+ this->_time_out_in_second = timeInSecond;
+}
+
+CORBA::Long SALOME_ContainerManager::GetDeltaTimeBetweenNSLookupAtLaunchTimeInMilliSecond()
+{
+ return this->_delta_time_ns_lookup_in_ms;
+}
+
+void SALOME_ContainerManager::SetDeltaTimeBetweenNSLookupAtLaunchTimeInMilliSecond(CORBA::Long timeInMS)
+{
+ this->_delta_time_ns_lookup_in_ms = timeInMS;
+}
+
+CORBA::Long SALOME_ContainerManager::GetDeltaTimeBetweenCPUMemMeasureInMilliSecond()
+{
+ return this->_delta_time_measure_in_ms;
+}
+
+void SALOME_ContainerManager::SetDeltaTimeBetweenCPUMemMeasureInMilliSecond(CORBA::Long timeInMS)
+{
+ this->_delta_time_measure_in_ms = timeInMS;
+}
+
//=============================================================================
//! Loop on all the containers listed in naming service, ask shutdown on each
/*! CORBA Method:
*/
//=============================================================================
-void SALOME_ContainerManager::ShutdownContainers()
+void SALOME_ContainerManager::ShutdownContainersGeneric(std::function<void(Engines::Container_ptr)> funcToBeCalledOnContainer)
{
- MESSAGE("ShutdownContainers");
+ MESSAGE("ShutdownContainersGeneric");
if(!_NS)
return ;
SALOME::Session_var session = SALOME::Session::_nil();
Engines::Container_var cont=Engines::Container::_narrow(obj);
if(!CORBA::is_nil(cont))
{
- MESSAGE("ShutdownContainers: " << (*iter));
- cont->Shutdown();
+ MESSAGE("ShutdownContainersGeneric: " << (*iter));
+ funcToBeCalledOnContainer( cont );
+ MESSAGE("ShutdownContainersGeneric: after call of shutdown" << (*iter));
}
else
- MESSAGE("ShutdownContainers: no container ref for " << (*iter));
+ MESSAGE("ShutdownContainersGeneric: no container ref for " << (*iter));
}
catch(CORBA::SystemException& e)
{
}
}
+void SALOME_ContainerManager::ShutdownContainers()
+{
+ this->ShutdownContainersGeneric( [](Engines::Container_ptr cont) { cont->Shutdown(); } );
+}
+
+void SALOME_ContainerManager::ShutdownContainersNow()
+{
+ this->ShutdownContainersGeneric( [](Engines::Container_ptr cont)
+ {
+ try
+ {
+ cont->ShutdownNow();
+ }
+ catch(...)
+ {
+ }
+ }
+ );
+}
+
void SALOME_ContainerManager::SetOverrideEnvForContainers(const Engines::KeyValDict& env)
{
this->_override_env.clear();
auto sz = env.length();
for(auto i = 0 ; i < sz ; ++i)
- _override_env.emplace_back( std::pair<std::string, std::string>(env[i].key,env[i].val) );
+ _override_env.emplace_back(env[i].key.in(), env[i].val.in());
}
Engines::KeyValDict *SALOME_ContainerManager::GetOverrideEnvForContainers()
return ret.release();
}
+void SALOME_ContainerManager::SetCodeOnContainerStartUp(const char *code)
+{
+ _code_to_exe_on_startup = code;
+}
+
//=============================================================================
//! Give a suitable Container given constraints
/*! CORBA Method:
if (!CORBA::is_nil(cont))
{
INFOS("[GiveContainer] container " << containerNameInNS << " launched");
+ cont->monitoringtimeresms( this->_delta_time_measure_in_ms );
+ INFOS("[GiveContainer] container " << containerNameInNS << " first CORBA invocation OK");
std::ostringstream envInfo;
- std::for_each( _override_env.begin(), _override_env.end(), [&envInfo](const std::pair<std::string,std::string>& p) { envInfo << p.first << " = " << p.second << std::endl; } );
+ std::for_each( _override_env.begin(), _override_env.end(), [&envInfo](const std::pair<std::string,std::string>& p) { envInfo << p.first << " = " << p.second << " "; } );
INFOS("[GiveContainer] container " << containerNameInNS << " override " << envInfo.str());
Engines::FieldsDict envCorba;
{
envCorba[i].value <<= CORBA::string_dup( _override_env[i].second.c_str() );
}
}
- cont->override_environment( envCorba );
+ cont->override_environment_python( envCorba );
+ if( !_code_to_exe_on_startup.empty() )
+ {
+ INFOS("[GiveContainer] container " << containerNameInNS << " python code executed " << _code_to_exe_on_startup);
+ cont->execute_python_code( _code_to_exe_on_startup.c_str() );
+ }
+ INFOS("[GiveContainer] container " << containerNameInNS << " verbosity positionning Activation = " << SALOME::VerbosityActivated() << " Verbosity Level = " << SALOME::VerbosityLevelStr());
+ cont->setVerbosity( SALOME::VerbosityActivated(), SALOME::VerbosityLevelStr().c_str() );
return cont._retn();
}
else
return ret;
}
+std::string SALOME_ContainerManager::GetCppBinaryOfKernelSSLContainer() const
+{
+ switch( SALOME::GetPyExecutionMode() )
+ {
+ case SALOME::PyExecutionMode::InProcess:
+ return "SALOME_Container_No_NS_Serv";
+ case SALOME::PyExecutionMode::OutOfProcessNoReplay:
+ return "SALOME_Container_No_NS_Serv_OutProcess";
+ case SALOME::PyExecutionMode::OutOfProcessWithReplay:
+ return "SALOME_Container_No_NS_Serv_OutProcess_Replay";
+ default:
+ {
+ ERROR_MESSAGE("Not manager py execution mode");
+ THROW_SALOME_EXCEPTION("GetCppBinaryOfKernelSSLContainer : Not manager py execution mode");
+ }
+ }
+}
+
std::string SALOME_ContainerManager::GetCppBinaryOfKernelContainer() const
{
- std::string ret = this->_isSSL ? "SALOME_Container_No_NS_Serv" : "SALOME_Container";
+ std::string ret = this->_isSSL ? GetCppBinaryOfKernelSSLContainer() : "SALOME_Container";
return ret;
}
MESSAGE("[GiveContainer] Try to launch a new container on " << resource_selected);
// if a parallel container is launched in batch job, command is: "mpirun -np nbproc -machinefile nodesfile SALOME_MPIContainer"
if( GetenvThreadSafe("LIBBATCH_NODEFILE") != NULL && params.isMPI )
+ {
command = BuildCommandToLaunchLocalContainer(params, machFile, container_exe, tmpFileName);
+ MESSAGE("[LaunchContainer] LIBBATCH_NODEFILE : \"" << command << "\"");
+ }
// if a container is launched on localhost, command is "SALOME_Container" or "mpirun -np nbproc SALOME_MPIContainer"
else if(hostname == Kernel_Utils::GetHostname())
+ {
command = BuildCommandToLaunchLocalContainer(params, machFile, container_exe, tmpFileName);
+ MESSAGE("[LaunchContainer] hostname local : \"" << command << "\"");
+ }
// if a container is launched in remote mode, command is "ssh resource_selected SALOME_Container" or "ssh resource_selected mpirun -np nbproc SALOME_MPIContainer"
else
+ {
command = BuildCommandToLaunchRemoteContainer(resource_selected, params, container_exe);
+ MESSAGE("[LaunchContainer] remote : \"" << command << "\"");
+ }
//redirect stdout and stderr in a file
#ifdef WIN32
struct stat file_info;
stat(val, &file_info);
bool is_dir = S_ISDIR(file_info.st_mode);
- if (is_dir)logFilename=val;
- else std::cerr << "SALOME_TMP_DIR environment variable is not a directory use /tmp instead" << std::endl;
+ if (is_dir)
+ logFilename=val;
+ else
+ MESSAGE( "SALOME_TMP_DIR environment variable is not a directory use /tmp instead" << std::endl );
}
logFilename += "/";
#endif
command += " > " + logFilename + " 2>&1";
MakeTheCommandToBeLaunchedASync(command);
+ MESSAGE("[LaunchContainer] SYSTEM COMMAND that will be launched : \"" << command << "\"");
// launch container with a system call
status=SystemThreadSafe(command.c_str());
}//end of critical of section
else
{
// Step 4: Wait for the container
- int count(GetTimeOutToLoaunchServer());
- INFOS("[GiveContainer] waiting " << count << " second steps container " << containerNameInNS);
+ double nbTurn = ( (double)this->_time_out_in_second ) * ( 1000.0 / ( (double) this->_delta_time_ns_lookup_in_ms) );
+ int count( (int)nbTurn );
+ INFOS("[GiveContainer] # attempts : " << count << " name in NS : \"" << containerNameInNS << "\"");
+ INFOS("[GiveContainer] # attempts : Time in second before time out : " << this->_time_out_in_second << " Delta time in ms between NS lookup : " << this->_delta_time_ns_lookup_in_ms);
while (CORBA::is_nil(ret) && count)
{
- SleepInSecond(1);
+ std::this_thread::sleep_for(std::chrono::milliseconds(_delta_time_ns_lookup_in_ms));
count--;
- MESSAGE("[GiveContainer] step " << count << " Waiting for container on " << resource_selected);
+ MESSAGE("[GiveContainer] step " << count << " Waiting for container on " << resource_selected << " with entry in NS = \"" << containerNameInNS << "\"" );
CORBA::Object_var obj(_NS->Resolve(containerNameInNS.c_str()));
ret=Engines::Container::_narrow(obj);
}
else
{
// Setting log file name
+ ret->locallogfilename( logFilename.c_str() );
logFilename=":"+logFilename;
logFilename="@"+Kernel_Utils::GetHostname()+logFilename;//threadsafe
logFilename=user+logFilename;
std::string wdir = params.workingdir.in();
if (!_isAppliSalomeDefined)
{
+ MESSAGE("[BuildCommandToLaunchRemoteContainer] NO APPLI MODE : " << " Protocol :" << resInfo.Protocol << " hostname :" << resInfo.HostName << " username : " << resInfo.UserName << " appli : " << resInfo.AppliPath << " wdir : \"" << wdir << "\"");
command = getCommandToRunRemoteProcessNoAppli(resInfo.Protocol, resInfo.HostName,
resInfo.UserName, resInfo.AppliPath,
wdir);
}
else
{
+ MESSAGE("[BuildCommandToLaunchRemoteContainer] WITH APPLI MODE : " << " Protocol :" << resInfo.Protocol << " hostname :" << resInfo.HostName << " username : " << resInfo.UserName << " appli : " << resInfo.AppliPath << " wdir : \"" << wdir << "\"");
// "ssh -l user machine distantPath/runRemote.sh hostNS portNS WORKINGDIR workingdir
// SALOME_Container containerName -ORBInitRef NameService=IOR:01000..."
// or
// manage GIL
PyObject* mod(PyImport_ImportModule(theScriptName.c_str()));
+ MESSAGE("Template name :"<< theScriptName.c_str());
+
if (!mod)
{
- PyObject* sys = PyImport_ImportModule("sys");
- PyObject* sys_path = PyObject_GetAttrString(sys, "path");
- PyObject* folder_path = PyUnicode_FromString(getScriptTemplateFilePath().c_str());
+ AutoPyRef sys = PyImport_ImportModule("sys");
+ AutoPyRef sys_path = PyObject_GetAttrString(sys, "path");
+ AutoPyRef folder_path = PyUnicode_FromString(getScriptTemplateFilePath().c_str());
PyList_Append(sys_path, folder_path);
mod = PyImport_ImportModule(theScriptName.c_str());
- Py_XDECREF(folder_path);
- Py_XDECREF(sys_path);
- Py_XDECREF(sys);
}
if (mod)
Py_XDECREF(mod);
}
}
+ else
+ {
+ ERROR_MESSAGE("Can not import the template script \"" << theScriptName << "\" !");
+ }
MESSAGE("Command from template is ... " << command << std::endl);
return command;
std::string ompi_uri_file = GetenvThreadSafeAsString("OMPI_URI_FILE");
script_parameters.push(ompi_uri_file.empty() ? "NULL" : ompi_uri_file);
+ MESSAGE("Retrieving command from template (python module) \"" << script_name << "\"" << std::endl);
std::string command_from_template = GetCommandFromTemplate(script_name, script_parameters);
std::ostringstream o;
#endif
}
+/*!
+ * Return in second the time out to give chance to server to be launched and
+ * to register into NS
+ */
int SALOME_ContainerManager::GetTimeOutToLoaunchServer()
{
int count(TIME_OUT_TO_LAUNCH_CONT);
else if (resInfo.Protocol == srun)
{
- command = "srun -n 1 -N 1 -s --mem-per-cpu=0 --cpu-bind=none --nodelist=";
+ command = "srun -n 1 -N 1 --overlap --mem-per-cpu=0 --cpu-bind=none --nodelist=";
std::string commandRcp = "rcp ";
commandRcp += tmpFileName;
commandRcp += " ";