From: Anthony Geay Date: Tue, 26 Mar 2024 07:44:09 +0000 (+0100) Subject: [EDF29852] : First implementation of out of process execution management X-Git-Url: http://git.salome-platform.org/gitweb/?a=commitdiff_plain;h=2e3012f890788d04b21d68ae959c0af481cd741b;p=modules%2Fkernel.git [EDF29852] : First implementation of out of process execution management --- diff --git a/src/Basics/KernelBasis.cxx b/src/Basics/KernelBasis.cxx index b26c0df57..86a38745b 100644 --- a/src/Basics/KernelBasis.cxx +++ b/src/Basics/KernelBasis.cxx @@ -72,3 +72,94 @@ void WriteInStderr(const std::string& msg) { std::cerr << msg << std::endl << std::flush; } + +namespace SALOME +{ + enum class PyExecutionMode { NotSet, InProcess, OutOfProcessNoReplay, OutOfProcessWithReplay }; + + static constexpr char IN_PROCESS_VALUE = 0; + static constexpr char IN_PROCESS_VALUE_STR[] = "InProcess"; + static constexpr char OUT_OF_PROCESS_NO_REPLAY_VALUE = 1; + static constexpr char OUT_OF_PROCESS_NO_REPLAY_VALUE_STR[] = "OutOfProcessNoReplay"; + static constexpr char OUT_OF_PROCESS_WITH_REPLAY_VALUE = 2; + static constexpr char OUT_OF_PROCESS_WITH_REPLAY_VALUE_STR[] = "OutOfProcessWithReplay"; + + static PyExecutionMode FromIntToPyExecutionMode(char value) + { + switch(value) + { + case IN_PROCESS_VALUE: + return PyExecutionMode::InProcess; + case OUT_OF_PROCESS_NO_REPLAY_VALUE: + return PyExecutionMode::OutOfProcessNoReplay; + case OUT_OF_PROCESS_WITH_REPLAY_VALUE: + return PyExecutionMode::OutOfProcessWithReplay; + } + throw std::range_error("FromIntToPyExecutionMode : Invalid value for Py Execution Mode ! Must be in 0 (InProcess), 1 (OutOfProcessNoReplay) or 2 (OutOfProcessWithReplay) !"); + } + + static PyExecutionMode FromStrToPyExecutionMode(const std::string& value) + { + if(value == IN_PROCESS_VALUE_STR) + return PyExecutionMode::InProcess; + if(value == OUT_OF_PROCESS_NO_REPLAY_VALUE_STR) + return PyExecutionMode::OutOfProcessNoReplay; + if(value == OUT_OF_PROCESS_WITH_REPLAY_VALUE_STR) + return PyExecutionMode::OutOfProcessWithReplay; + throw std::range_error("FromStrToPyExecutionMode : Invalid str value for py execution mode !"); + } + + static std::string FromExecutionModeToStr(PyExecutionMode execMode) + { + switch(execMode) + { + case PyExecutionMode::InProcess: + return IN_PROCESS_VALUE_STR; + case PyExecutionMode::OutOfProcessNoReplay: + return OUT_OF_PROCESS_NO_REPLAY_VALUE_STR; + case PyExecutionMode::OutOfProcessWithReplay: + return OUT_OF_PROCESS_WITH_REPLAY_VALUE_STR; + default: + throw std::range_error("FromExecutionModeToStr : Invalid str value for py execution mode !"); + } + } +} + +static SALOME::PyExecutionMode DefaultPyExecMode = SALOME::PyExecutionMode::NotSet; + +void SALOME::SetPyExecutionMode(PyExecutionMode mode) +{ + DefaultPyExecMode = mode; +} + +void SALOME::SetPyExecutionModeStr(const std::string& mode) +{ + SALOME::SetPyExecutionMode( SALOME::FromStrToPyExecutionMode(mode) ); +} + +std::vector SALOME::GetAllPyExecutionModes() +{ + return {IN_PROCESS_VALUE_STR,OUT_OF_PROCESS_NO_REPLAY_VALUE_STR,OUT_OF_PROCESS_WITH_REPLAY_VALUE_STR}; +} + +std::string SALOME::GetPyExecutionModeStr() +{ + return SALOME::FromExecutionModeToStr( SALOME::GetPyExecutionMode() ); +} + +SALOME::PyExecutionMode SALOME::GetPyExecutionMode() +{ + auto isEnvVarSet = []() -> SALOME::PyExecutionMode + { + const char *envVar = std::getenv("SALOME_PY_EXECUTION_MODE"); + if (envVar && (envVar[0] != '\0')) + { + const int numValue = std::stoi(envVar); + return SALOME::FromIntToPyExecutionMode( static_cast(numValue) ); + } + return SALOME::PyExecutionMode::InProcess; + }; + if(DefaultPyExecMode == SALOME::PyExecutionMode::NotSet) + DefaultPyExecMode = isEnvVarSet(); + return DefaultPyExecMode; +} diff --git a/src/Basics/KernelBasis.hxx b/src/Basics/KernelBasis.hxx index cafaad2b3..76d25e892 100644 --- a/src/Basics/KernelBasis.hxx +++ b/src/Basics/KernelBasis.hxx @@ -22,6 +22,7 @@ #include "SALOME_Basics.hxx" #include +#include bool BASICS_EXPORT getSSLMode(); void BASICS_EXPORT setSSLMode(bool sslMode); @@ -33,3 +34,13 @@ void BASICS_EXPORT setIOROfEmbeddedNS(const std::string& ior); void BASICS_EXPORT WriteInStdout(const std::string& msg); void BASICS_EXPORT WriteInStderr(const std::string& msg); + +namespace SALOME +{ + enum class PyExecutionMode; + void BASICS_EXPORT SetPyExecutionMode(PyExecutionMode mode); + void BASICS_EXPORT SetPyExecutionModeStr(const std::string& mode); + std::vector BASICS_EXPORT GetAllPyExecutionModes(); + std::string BASICS_EXPORT GetPyExecutionModeStr(); + PyExecutionMode BASICS_EXPORT GetPyExecutionMode(); +} diff --git a/src/Basics/KernelBasis.i b/src/Basics/KernelBasis.i index 54616f81e..904878e3b 100644 --- a/src/Basics/KernelBasis.i +++ b/src/Basics/KernelBasis.i @@ -84,6 +84,9 @@ void WriteInStderr(const std::string& msg); %rename (SetVerbosityLevel) SetVerbosityLevelSwig; %rename (VerbosityLevel) VerbosityLevelSwig; +%rename (SetPyExecutionMode) SetPyExecutionModeStrSwig; +%rename (GetPyExecutionMode) GetPyExecutionModeStrSwig; +%rename (GetAllPyExecutionModes) GetAllPyExecutionModesSwig; %inline { @@ -117,6 +120,21 @@ std::string VerbosityLevelSwig() { return VerbosityLevelStr(); } + +void SetPyExecutionModeStrSwig(const std::string& mode) +{ + SetPyExecutionModeStr( mode ); +} + +std::string GetPyExecutionModeStrSwig() +{ + return GetPyExecutionModeStr(); +} + +std::vector GetAllPyExecutionModesSwig() +{ + return GetAllPyExecutionModes(); +} } %pythoncode %{ diff --git a/src/Basics/libSALOMELog.cxx b/src/Basics/libSALOMELog.cxx index fab89f5d1..ece092484 100644 --- a/src/Basics/libSALOMELog.cxx +++ b/src/Basics/libSALOMELog.cxx @@ -160,6 +160,11 @@ namespace SALOME verbosityLevel = FromStrToVerbosityLevel(level); } + std::vector GetAllVerbosityLevelPossibilitiesStr() + { + return {ERROR_LEVEL_VALUE_STR,WARNING_LEVEL_VALUE_STR,INFO_LEVEL_VALUE_STR,DEBUG_LEVEL_VALUE_STR}; + } + std::string VerbosityLevelStr() { return FromVerbosityLevelToStr( VerbosityLevel() ); diff --git a/src/Basics/libSALOMELog.hxx b/src/Basics/libSALOMELog.hxx index ef7e1774f..6a9468c6c 100644 --- a/src/Basics/libSALOMELog.hxx +++ b/src/Basics/libSALOMELog.hxx @@ -27,6 +27,7 @@ #include "SALOME_Basics.hxx" #include +#include namespace SALOME { @@ -44,4 +45,5 @@ namespace SALOME void BASICS_EXPORT AppendTimeClock(std::ostream& os); VerbosityLevelType BASICS_EXPORT VerbosityLevel(); std::string BASICS_EXPORT VerbosityLevelStr(); + std::vector BASICS_EXPORT GetAllVerbosityLevelPossibilitiesStr(); } diff --git a/src/Container/Container_i.cxx b/src/Container/Container_i.cxx index 0ad306bb0..995b62444 100644 --- a/src/Container/Container_i.cxx +++ b/src/Container/Container_i.cxx @@ -145,14 +145,15 @@ Abstract_Engines_Container_i::Abstract_Engines_Container_i () : */ //============================================================================= -Abstract_Engines_Container_i::Abstract_Engines_Container_i (CORBA::ORB_ptr orb, +Abstract_Engines_Container_i::Abstract_Engines_Container_i (const std::string& pyContainerClsName, + CORBA::ORB_ptr orb, PortableServer::POA_ptr poa, char *containerName , int argc , char* argv[], SALOME_NamingService_Container_Abstract *ns, bool isServantAloneInProcess ) : - _NS(nullptr),_id(0),_numInstance(0),_isServantAloneInProcess(isServantAloneInProcess) + _NS(nullptr),_py_container_name(pyContainerClsName),_id(0),_numInstance(0),_isServantAloneInProcess(isServantAloneInProcess) { _pid = (long)getpid(); @@ -195,7 +196,7 @@ Abstract_Engines_Container_i::Abstract_Engines_Container_i (CORBA::ORB_ptr orb, CORBA::String_var sior = _orb->object_to_string(pCont); std::ostringstream myCommand; - myCommand << "pyCont = SALOME_Container.SALOME_Container_i('" << _containerName << "','" << sior << "'," << DFT_TIME_INTERVAL_BTW_MEASURE << ")\n"; + myCommand << "pyCont = SALOME_Container." << this->getPyContainerClassName() << "('" << _containerName << "','" << sior << "'," << DFT_TIME_INTERVAL_BTW_MEASURE << ")\n"; INFO_MESSAGE("Python command executed : " << myCommand.str()); //[RNV]: Comment the PyEval_AcquireLock() and PyEval_ReleaseLock() because this @@ -2404,7 +2405,7 @@ static Engines_Container_SSL_i *_container_singleton_ssl = nullptr; static Engines::Container_var _container_ref_singleton_ssl; -Engines_Container_SSL_i *KERNEL::getContainerSA() +Abstract_Engines_Container_SSL_i *KERNEL::getContainerSA() { if(!_container_singleton_ssl) { diff --git a/src/Container/SALOME_Container.py b/src/Container/SALOME_Container.py index c9bc72d3a..71cc038d6 100644 --- a/src/Container/SALOME_Container.py +++ b/src/Container/SALOME_Container.py @@ -31,6 +31,7 @@ # \brief python implementation of container interface for Kernel # +import abc import os import sys import traceback @@ -52,7 +53,7 @@ from KernelBasis import VerbosityActivated,getSSLMode #define an implementation of the container interface for embedding in Container implemented in C++ -class SALOME_Container_i: +class SALOME_Container_Abstract_i(metaclass=abc.ABCMeta): _orb = None _poa = None _containerName = "" @@ -60,7 +61,7 @@ class SALOME_Container_i: #------------------------------------------------------------------------- - def __init__(self ,containerName, containerIORStr, dftTimeIntervalInMs): + def __init__(self, containerName, containerIORStr, dftTimeIntervalInMs): # Warning this part of code is called at the very first step of container launching # so logging is not instanciate. So use verbose method to discrimine if a message should be printed or not try: @@ -80,6 +81,10 @@ class SALOME_Container_i: self._log = None self._container = self._orb.string_to_object(containerIORStr) + @abc.abstractmethod + def getPyScriptCls(self): + raise RuntimeError("Must be overloaded") + @property def logm(self): logging.debug("Logm PID = {}".format(os.getpid())) @@ -160,7 +165,8 @@ class SALOME_Container_i: logscript = None if getSSLMode(): logscript = self._log.addScript(nodeName,code) - node=SALOME_PyNode.PyScriptNode_i(nodeName,code,self._poa,self, logscript) + cls = self.getPyScriptCls() + node = cls(nodeName,code,self._poa,self, logscript) id_o = self._poa.activate_object(node) comp_o = self._poa.id_to_reference(id_o) comp_iors = self._orb.object_to_string(comp_o) @@ -197,3 +203,24 @@ class SALOME_Container_i: def SetMonitoringtimeresms(self , value): self._timeIntervalInMs = value + +class SALOME_Container_i(SALOME_Container_Abstract_i): + def __init__(self, containerName, containerIORStr, dftTimeIntervalInMs): + super().__init__(containerName, containerIORStr, dftTimeIntervalInMs) + + def getPyScriptCls(self): + return SALOME_PyNode.PyScriptNode_i + +class SALOME_Container_OutOfProcess_i(SALOME_Container_i): + def __init__(self, containerName, containerIORStr, dftTimeIntervalInMs): + super().__init__(containerName, containerIORStr, dftTimeIntervalInMs) + + def getPyScriptCls(self): + return SALOME_PyNode.PyScriptNode_OutOfProcess_i + +class SALOME_Container_OutOfProcess_Replay_i(SALOME_Container_i): + def __init__(self, containerName, containerIORStr, dftTimeIntervalInMs): + super().__init__(containerName, containerIORStr, dftTimeIntervalInMs) + + def getPyScriptCls(self): + return SALOME_PyNode.PyScriptNode_i diff --git a/src/Container/SALOME_Container_i.hxx b/src/Container/SALOME_Container_i.hxx index 9f67a52ce..aa7fd5a83 100644 --- a/src/Container/SALOME_Container_i.hxx +++ b/src/Container/SALOME_Container_i.hxx @@ -55,7 +55,8 @@ class CONTAINER_EXPORT Abstract_Engines_Container_i : public virtual POA_Engines { public: Abstract_Engines_Container_i(); - Abstract_Engines_Container_i(CORBA::ORB_ptr orb, + Abstract_Engines_Container_i(const std::string& pyContainerClsName, + CORBA::ORB_ptr orb, PortableServer::POA_ptr poa, char *containerName, int argc, char *argv[], @@ -65,6 +66,8 @@ public: virtual bool isSSLMode() const = 0; + std::string getPyContainerClassName() const { return _py_container_name; } + // --- CORBA methods virtual bool load_component_Library(const char *componentName, CORBA::String_out reason); @@ -187,6 +190,7 @@ protected: std::string _logfilename; std::string _localfilename; std::string _load_script; + std::string _py_container_name; CORBA::ORB_var _orb; PortableServer::POA_var _poa; PortableServer::ObjectId *_id; @@ -206,6 +210,11 @@ protected: bool _isServantAloneInProcess; }; +constexpr char PY_CONTAINER_CLS_NAME_IN_PROCESS[] = "SALOME_Container_i"; +constexpr char PY_CONTAINER_CLS_NAME_OUT_PROCESS_NO_REPLAY[] = "SALOME_Container_OutOfProcess_i"; +constexpr char PY_CONTAINER_CLS_NAME_OUT_PROCESS_WITH_REPLAY[] = "SALOME_Container_OutOfProcess_Replay_i"; + + class CONTAINER_EXPORT Engines_Container_i : public Abstract_Engines_Container_i { public: @@ -216,30 +225,66 @@ public: int argc, char *argv[], SALOME_NamingService_Container_Abstract *ns = nullptr, bool isServantAloneInProcess = true) : - Abstract_Engines_Container_i(orb, poa, containerName, argc, argv, ns, isServantAloneInProcess) {} + Abstract_Engines_Container_i(PY_CONTAINER_CLS_NAME_IN_PROCESS, orb, poa, containerName, argc, argv, ns, isServantAloneInProcess) {} bool isSSLMode() const override { return false; } }; -class CONTAINER_EXPORT Engines_Container_SSL_i : public Abstract_Engines_Container_i +class CONTAINER_EXPORT Abstract_Engines_Container_SSL_i : public Abstract_Engines_Container_i { public: - Engines_Container_SSL_i(); - Engines_Container_SSL_i(CORBA::ORB_ptr orb, + Abstract_Engines_Container_SSL_i(const std::string& pyContainerClsName, + CORBA::ORB_ptr orb, PortableServer::POA_ptr poa, char *containerName, int argc, char *argv[], SALOME_NamingService_Container_Abstract *ns = nullptr, bool isServantAloneInProcess = true) : - Abstract_Engines_Container_i(orb, poa, containerName, argc, argv, ns, isServantAloneInProcess) {} + Abstract_Engines_Container_i(pyContainerClsName, orb, poa, containerName, argc, argv, ns, isServantAloneInProcess) {} bool isSSLMode() const override { return true; } }; +class CONTAINER_EXPORT Engines_Container_SSL_i : public Abstract_Engines_Container_SSL_i +{ +public: + Engines_Container_SSL_i(CORBA::ORB_ptr orb, + PortableServer::POA_ptr poa, + char *containerName, + int argc, char *argv[], + SALOME_NamingService_Container_Abstract *ns = nullptr, + bool isServantAloneInProcess = true) : + Abstract_Engines_Container_SSL_i(PY_CONTAINER_CLS_NAME_IN_PROCESS, orb, poa, containerName, argc, argv, ns, isServantAloneInProcess) {} +}; + +class CONTAINER_EXPORT Engines_Container_SSL_OutOfProcess_i : public Abstract_Engines_Container_SSL_i +{ +public: + Engines_Container_SSL_OutOfProcess_i(CORBA::ORB_ptr orb, + PortableServer::POA_ptr poa, + char *containerName, + int argc, char *argv[], + SALOME_NamingService_Container_Abstract *ns = nullptr, + bool isServantAloneInProcess = true) : + Abstract_Engines_Container_SSL_i(PY_CONTAINER_CLS_NAME_OUT_PROCESS_NO_REPLAY, orb, poa, containerName, argc, argv, ns, isServantAloneInProcess) {} +}; + +class CONTAINER_EXPORT Engines_Container_SSL_OutOfProcess_Replay_i : public Abstract_Engines_Container_SSL_i +{ +public: + Engines_Container_SSL_OutOfProcess_Replay_i(CORBA::ORB_ptr orb, + PortableServer::POA_ptr poa, + char *containerName, + int argc, char *argv[], + SALOME_NamingService_Container_Abstract *ns = nullptr, + bool isServantAloneInProcess = true) : + Abstract_Engines_Container_SSL_i(PY_CONTAINER_CLS_NAME_OUT_PROCESS_WITH_REPLAY, orb, poa, containerName, argc, argv, ns, isServantAloneInProcess) {} +}; + /*! * Methods to be used in SSL mode to skip NS. */ namespace KERNEL { - CONTAINER_EXPORT Engines_Container_SSL_i *getContainerSA(); + CONTAINER_EXPORT Abstract_Engines_Container_SSL_i *getContainerSA(); CONTAINER_EXPORT Engines::Container_var getContainerRefSA(); } // namespace KERNEL diff --git a/src/Container/SALOME_PyNode.py b/src/Container/SALOME_PyNode.py index df2e5bb5e..4316aa5a8 100644 --- a/src/Container/SALOME_PyNode.py +++ b/src/Container/SALOME_PyNode.py @@ -30,6 +30,7 @@ import Engines__POA import SALOME__POA import SALOME import logging +import abc import os import sys from SALOME_ContainerHelper import ScriptExecInfo @@ -774,7 +775,7 @@ Looks like a hard crash as returnCode {returnCode} != 1 {banner} """ -def ExecCrashProof( code, context, outargsname ): +def ExecCrashProofGeneric( code, context, outargsname, keepFilesToReplay ): """ Equivalent of exec(code,context) but executed in a separate subprocess to avoid to make the current process crash. @@ -783,6 +784,7 @@ def ExecCrashProof( code, context, outargsname ): code (str) : python code to be executed using context context (dict) : context to be used for execution. This context will be updated in accordance with the execution of code. + keepFilesToReplay (bool) : if True when something goes wrong during execution all the files to replay post mortem case are kept. If False only error is reported but files to replay are destoyed. """ import tempfile import pickle @@ -817,9 +819,18 @@ def ExecCrashProof( code, context, outargsname ): context.update( evParams.result ) evParams.destroyOnOK() if returnCode != 0: - evParams.destroyOnKO() + if keepFilesToReplay: + evParams.destroyOnKO() + else: + evParams.destroyOnOK() raise RuntimeError(f"Subprocess launched {evParams.strDependingOnReturnCode(returnCode)}stdout :\n{stdout}\nstderr :\n{stderr}") +def ExecCrashProofWithReplay( code, context, outargsname ): + return ExecCrashProofGeneric(code, context, outargsname, True) + +def ExecCrashProofWithoutReplay( code, context, outargsname ): + return ExecCrashProofGeneric(code, context, outargsname, False) + def ExecLocal( code, context, outargsname ): exec( code, context ) @@ -838,9 +849,9 @@ class LogOfCurrentExecutionSession: def finalizeAndPushToMaster(self): self._remote_handle.assign( pickle.dumps( self._current_instance ) ) -class PyScriptNode_i (Engines__POA.PyScriptNode,Generic): +class PyScriptNode_Abstract_i(Engines__POA.PyScriptNode,Generic,metaclass=abc.ABCMeta): """The implementation of the PyScriptNode CORBA IDL that executes a script""" - def __init__(self, nodeName,code,poa,my_container,logscript): + def __init__(self, nodeName, code, poa, my_container, logscript): """Initialize the node : compilation in the local context""" Generic.__init__(self,poa) self.nodeName=nodeName @@ -854,6 +865,10 @@ class PyScriptNode_i (Engines__POA.PyScriptNode,Generic): self._log_script = logscript self._current_execution_session = None sys.stdout.flush() ; sys.stderr.flush() # flush to correctly capture log per execution session + + @abc.abstractmethod + def executeNow(self, outargsname): + raise RuntimeError("Must be overloaded") def __del__(self): # force removal of self.context. Don t know why it s not done by default @@ -954,7 +969,7 @@ class PyScriptNode_i (Engines__POA.PyScriptNode,Generic): ## self.addInfoOnLevel2("measureTimeResolution",self.my_container_py.monitoringtimeresms()) with GenericPythonMonitoringLauncherCtxMgr( CPUMemoryMonitoring( self.my_container_py.monitoringtimeresms() ) ) as monitoringParams: - exec(self.ccode, self.context) + self.executeNow( outargsname ) cpumeminfo = ReadCPUMemInfo( monitoringParams ) ## self.addInfoOnLevel2("CPUMemDuringExec",cpumeminfo) @@ -1038,3 +1053,24 @@ class PyScriptNode_i (Engines__POA.PyScriptNode,Generic): def addTimeInfoOnLevel2(self, key): from datetime import datetime self._current_execution_session.addInfoOnLevel2(key,datetime.now()) + +class PyScriptNode_i(PyScriptNode_Abstract_i): + def __init__(self, nodeName, code, poa, my_container, logscript): + super().__init__(nodeName, code, poa, my_container, logscript) + + def executeNow(self, outargsname): + ExecLocal(self.ccode,self.context,outargsname) + +class PyScriptNode_OutOfProcess_i(PyScriptNode_Abstract_i): + def __init__(self, nodeName, code, poa, my_container, logscript): + super().__init__(nodeName, code, poa, my_container, logscript) + + def executeNow(self, outargsname): + ExecCrashProofWithoutReplay(self.ccode,self.context,outargsname) + +class PyScriptNode_OutOfProcess_Replay_i(PyScriptNode_Abstract_i): + def __init__(self, nodeName, code, poa, my_container, logscript): + super().__init__(nodeName, code, poa, my_container, logscript) + + def executeNow(self, outargsname): + ExecCrashProofWithReplay(self.ccode,self.context,outargsname)