From b397830a2104a47d47a190819eca35a269b32b8f Mon Sep 17 00:00:00 2001 From: Anthony Geay Date: Mon, 25 Mar 2024 11:47:54 +0100 Subject: [PATCH] [EDF29852] : Mecanism of fault tolerant in SALOME_Container to resist against emitted signals during computation --- idl/SALOME_Component.idl | 5 + src/Basics/KernelBasis.cxx | 89 +++++++++ src/Basics/KernelBasis.hxx | 11 + src/Basics/KernelBasis.i | 19 ++ src/Basics/libSALOMELog.cxx | 5 + src/Basics/libSALOMELog.hxx | 2 + src/Container/CMakeLists.txt | 8 +- src/Container/Container_i.cxx | 49 ++++- src/Container/SALOME_Container.py | 33 ++- src/Container/SALOME_ContainerHelper.py | 7 +- src/Container/SALOME_ContainerManager.cxx | 21 +- src/Container/SALOME_ContainerManager.hxx | 2 + src/Container/SALOME_Container_No_NS_Serv.cxx | 24 +-- .../SALOME_Container_No_NS_Serv_Generic.hxx | 43 ++++ ...SALOME_Container_No_NS_Serv_OutProcess.cxx | 22 ++ ...Container_No_NS_Serv_OutProcess_Replay.cxx | 22 ++ src/Container/SALOME_Container_i.hxx | 65 +++++- src/Container/SALOME_PyNode.py | 189 +++++++++++++++++- src/KERNEL_PY/__init__.py | 8 +- src/Launcher/Test/CMakeLists.txt | 2 +- src/Launcher/Test/CTestTestfileInstall.cmake | 4 + src/Launcher/Test/testCrashProofContainer.py | 127 ++++++++++++ 22 files changed, 711 insertions(+), 46 deletions(-) create mode 100644 src/Container/SALOME_Container_No_NS_Serv_Generic.hxx create mode 100644 src/Container/SALOME_Container_No_NS_Serv_OutProcess.cxx create mode 100644 src/Container/SALOME_Container_No_NS_Serv_OutProcess_Replay.cxx create mode 100644 src/Launcher/Test/testCrashProofContainer.py diff --git a/idl/SALOME_Component.idl b/idl/SALOME_Component.idl index b3585af45..f31438844 100644 --- a/idl/SALOME_Component.idl +++ b/idl/SALOME_Component.idl @@ -65,6 +65,7 @@ module Engines typedef sequence FieldsDict; typedef sequence vectorOfDouble; typedef sequence vectorOfString; + typedef sequence vectorOfVectorOfString; interface EngineComponent ; interface fileRef ; @@ -85,6 +86,10 @@ module Engines FieldsDict get_os_environment(); + void addLogFileNameGroup(in vectorOfString groupOfLogFileNames); + + vectorOfVectorOfString getAllLogFileNameGroups(); + void execute_python_code( in string code ) raises(SALOME::SALOME_Exception); /*! \brief Loads a new component class (dynamic library). diff --git a/src/Basics/KernelBasis.cxx b/src/Basics/KernelBasis.cxx index b26c0df57..db29c9d87 100644 --- a/src/Basics/KernelBasis.cxx +++ b/src/Basics/KernelBasis.cxx @@ -72,3 +72,92 @@ void WriteInStderr(const std::string& msg) { std::cerr << msg << std::endl << std::flush; } + +namespace SALOME +{ + static constexpr char IN_PROCESS_VALUE = 0; + static constexpr char IN_PROCESS_VALUE_STR[] = "InProcess"; + static constexpr char OUT_OF_PROCESS_NO_REPLAY_VALUE = 1; + static constexpr char OUT_OF_PROCESS_NO_REPLAY_VALUE_STR[] = "OutOfProcessNoReplay"; + static constexpr char OUT_OF_PROCESS_WITH_REPLAY_VALUE = 2; + static constexpr char OUT_OF_PROCESS_WITH_REPLAY_VALUE_STR[] = "OutOfProcessWithReplay"; + + static PyExecutionMode FromIntToPyExecutionMode(char value) + { + switch(value) + { + case IN_PROCESS_VALUE: + return PyExecutionMode::InProcess; + case OUT_OF_PROCESS_NO_REPLAY_VALUE: + return PyExecutionMode::OutOfProcessNoReplay; + case OUT_OF_PROCESS_WITH_REPLAY_VALUE: + return PyExecutionMode::OutOfProcessWithReplay; + } + throw std::range_error("FromIntToPyExecutionMode : Invalid value for Py Execution Mode ! Must be in 0 (InProcess), 1 (OutOfProcessNoReplay) or 2 (OutOfProcessWithReplay) !"); + } + + static PyExecutionMode FromStrToPyExecutionMode(const std::string& value) + { + if(value == IN_PROCESS_VALUE_STR) + return PyExecutionMode::InProcess; + if(value == OUT_OF_PROCESS_NO_REPLAY_VALUE_STR) + return PyExecutionMode::OutOfProcessNoReplay; + if(value == OUT_OF_PROCESS_WITH_REPLAY_VALUE_STR) + return PyExecutionMode::OutOfProcessWithReplay; + throw std::range_error("FromStrToPyExecutionMode : Invalid str value for py execution mode !"); + } + + static std::string FromExecutionModeToStr(PyExecutionMode execMode) + { + switch(execMode) + { + case PyExecutionMode::InProcess: + return IN_PROCESS_VALUE_STR; + case PyExecutionMode::OutOfProcessNoReplay: + return OUT_OF_PROCESS_NO_REPLAY_VALUE_STR; + case PyExecutionMode::OutOfProcessWithReplay: + return OUT_OF_PROCESS_WITH_REPLAY_VALUE_STR; + default: + throw std::range_error("FromExecutionModeToStr : Invalid str value for py execution mode !"); + } + } +} + +static SALOME::PyExecutionMode DefaultPyExecMode = SALOME::PyExecutionMode::NotSet; + +void SALOME::SetPyExecutionMode(PyExecutionMode mode) +{ + DefaultPyExecMode = mode; +} + +void SALOME::SetPyExecutionModeStr(const std::string& mode) +{ + SALOME::SetPyExecutionMode( SALOME::FromStrToPyExecutionMode(mode) ); +} + +std::vector SALOME::GetAllPyExecutionModes() +{ + return {IN_PROCESS_VALUE_STR,OUT_OF_PROCESS_NO_REPLAY_VALUE_STR,OUT_OF_PROCESS_WITH_REPLAY_VALUE_STR}; +} + +std::string SALOME::GetPyExecutionModeStr() +{ + return SALOME::FromExecutionModeToStr( SALOME::GetPyExecutionMode() ); +} + +SALOME::PyExecutionMode SALOME::GetPyExecutionMode() +{ + auto isEnvVarSet = []() -> SALOME::PyExecutionMode + { + const char *envVar = std::getenv("SALOME_PY_EXECUTION_MODE"); + if (envVar && (envVar[0] != '\0')) + { + const int numValue = std::stoi(envVar); + return SALOME::FromIntToPyExecutionMode( static_cast(numValue) ); + } + return SALOME::PyExecutionMode::InProcess; + }; + if(DefaultPyExecMode == SALOME::PyExecutionMode::NotSet) + DefaultPyExecMode = isEnvVarSet(); + return DefaultPyExecMode; +} diff --git a/src/Basics/KernelBasis.hxx b/src/Basics/KernelBasis.hxx index cafaad2b3..4a42e4257 100644 --- a/src/Basics/KernelBasis.hxx +++ b/src/Basics/KernelBasis.hxx @@ -22,6 +22,7 @@ #include "SALOME_Basics.hxx" #include +#include bool BASICS_EXPORT getSSLMode(); void BASICS_EXPORT setSSLMode(bool sslMode); @@ -33,3 +34,13 @@ void BASICS_EXPORT setIOROfEmbeddedNS(const std::string& ior); void BASICS_EXPORT WriteInStdout(const std::string& msg); void BASICS_EXPORT WriteInStderr(const std::string& msg); + +namespace SALOME +{ + enum class PyExecutionMode { NotSet, InProcess, OutOfProcessNoReplay, OutOfProcessWithReplay }; + void BASICS_EXPORT SetPyExecutionMode(PyExecutionMode mode); + void BASICS_EXPORT SetPyExecutionModeStr(const std::string& mode); + std::vector BASICS_EXPORT GetAllPyExecutionModes(); + std::string BASICS_EXPORT GetPyExecutionModeStr(); + PyExecutionMode BASICS_EXPORT GetPyExecutionMode(); +} diff --git a/src/Basics/KernelBasis.i b/src/Basics/KernelBasis.i index 54616f81e..8979e3eab 100644 --- a/src/Basics/KernelBasis.i +++ b/src/Basics/KernelBasis.i @@ -33,6 +33,7 @@ using namespace SALOME; %include std_vector.i %template(dvec) std::vector; +%template(strvec) std::vector; %exception { try @@ -84,6 +85,9 @@ void WriteInStderr(const std::string& msg); %rename (SetVerbosityLevel) SetVerbosityLevelSwig; %rename (VerbosityLevel) VerbosityLevelSwig; +%rename (SetPyExecutionMode) SetPyExecutionModeStrSwig; +%rename (GetPyExecutionMode) GetPyExecutionModeStrSwig; +%rename (GetAllPyExecutionModes) GetAllPyExecutionModesSwig; %inline { @@ -117,6 +121,21 @@ std::string VerbosityLevelSwig() { return VerbosityLevelStr(); } + +void SetPyExecutionModeStrSwig(const std::string& mode) +{ + SetPyExecutionModeStr( mode ); +} + +std::string GetPyExecutionModeStrSwig() +{ + return GetPyExecutionModeStr(); +} + +std::vector GetAllPyExecutionModesSwig() +{ + return GetAllPyExecutionModes(); +} } %pythoncode %{ diff --git a/src/Basics/libSALOMELog.cxx b/src/Basics/libSALOMELog.cxx index fab89f5d1..ece092484 100644 --- a/src/Basics/libSALOMELog.cxx +++ b/src/Basics/libSALOMELog.cxx @@ -160,6 +160,11 @@ namespace SALOME verbosityLevel = FromStrToVerbosityLevel(level); } + std::vector GetAllVerbosityLevelPossibilitiesStr() + { + return {ERROR_LEVEL_VALUE_STR,WARNING_LEVEL_VALUE_STR,INFO_LEVEL_VALUE_STR,DEBUG_LEVEL_VALUE_STR}; + } + std::string VerbosityLevelStr() { return FromVerbosityLevelToStr( VerbosityLevel() ); diff --git a/src/Basics/libSALOMELog.hxx b/src/Basics/libSALOMELog.hxx index ef7e1774f..6a9468c6c 100644 --- a/src/Basics/libSALOMELog.hxx +++ b/src/Basics/libSALOMELog.hxx @@ -27,6 +27,7 @@ #include "SALOME_Basics.hxx" #include +#include namespace SALOME { @@ -44,4 +45,5 @@ namespace SALOME void BASICS_EXPORT AppendTimeClock(std::ostream& os); VerbosityLevelType BASICS_EXPORT VerbosityLevel(); std::string BASICS_EXPORT VerbosityLevelStr(); + std::vector BASICS_EXPORT GetAllVerbosityLevelPossibilitiesStr(); } diff --git a/src/Container/CMakeLists.txt b/src/Container/CMakeLists.txt index e05955add..4eb507ecc 100644 --- a/src/Container/CMakeLists.txt +++ b/src/Container/CMakeLists.txt @@ -113,11 +113,17 @@ TARGET_LINK_LIBRARIES(SALOME_Container SalomeContainerServer) ADD_EXECUTABLE(SALOME_Container_No_NS_Serv SALOME_Container_No_NS_Serv.cxx) TARGET_LINK_LIBRARIES(SALOME_Container_No_NS_Serv SalomeContainerServer) +ADD_EXECUTABLE(SALOME_Container_No_NS_Serv_OutProcess SALOME_Container_No_NS_Serv_OutProcess.cxx) +TARGET_LINK_LIBRARIES(SALOME_Container_No_NS_Serv_OutProcess SalomeContainerServer) + +ADD_EXECUTABLE(SALOME_Container_No_NS_Serv_OutProcess_Replay SALOME_Container_No_NS_Serv_OutProcess_Replay.cxx) +TARGET_LINK_LIBRARIES(SALOME_Container_No_NS_Serv_OutProcess_Replay SalomeContainerServer) + IF(SALOME_BUILD_TESTS) ADD_EXECUTABLE(TestSalome_file TestSalome_file.cxx) TARGET_LINK_LIBRARIES(TestSalome_file SALOMETraceCollectorTest ${SALOME_Container_LIBS}) ENDIF() -INSTALL(TARGETS SALOME_Container SALOME_Container_No_NS_Serv DESTINATION ${SALOME_INSTALL_BINS}) +INSTALL(TARGETS SALOME_Container SALOME_Container_No_NS_Serv SALOME_Container_No_NS_Serv_OutProcess SALOME_Container_No_NS_Serv_OutProcess_Replay DESTINATION ${SALOME_INSTALL_BINS}) # Executable scripts to be installed SALOME_INSTALL_SCRIPTS("${SCRIPTS}" ${SALOME_INSTALL_SCRIPT_PYTHON}) diff --git a/src/Container/Container_i.cxx b/src/Container/Container_i.cxx index 0ad306bb0..aee98c06c 100644 --- a/src/Container/Container_i.cxx +++ b/src/Container/Container_i.cxx @@ -145,14 +145,15 @@ Abstract_Engines_Container_i::Abstract_Engines_Container_i () : */ //============================================================================= -Abstract_Engines_Container_i::Abstract_Engines_Container_i (CORBA::ORB_ptr orb, +Abstract_Engines_Container_i::Abstract_Engines_Container_i (const std::string& pyContainerClsName, + CORBA::ORB_ptr orb, PortableServer::POA_ptr poa, char *containerName , int argc , char* argv[], SALOME_NamingService_Container_Abstract *ns, bool isServantAloneInProcess ) : - _NS(nullptr),_id(0),_numInstance(0),_isServantAloneInProcess(isServantAloneInProcess) + _NS(nullptr),_py_container_name(pyContainerClsName),_id(0),_numInstance(0),_isServantAloneInProcess(isServantAloneInProcess) { _pid = (long)getpid(); @@ -195,7 +196,7 @@ Abstract_Engines_Container_i::Abstract_Engines_Container_i (CORBA::ORB_ptr orb, CORBA::String_var sior = _orb->object_to_string(pCont); std::ostringstream myCommand; - myCommand << "pyCont = SALOME_Container.SALOME_Container_i('" << _containerName << "','" << sior << "'," << DFT_TIME_INTERVAL_BTW_MEASURE << ")\n"; + myCommand << "pyCont = SALOME_Container." << this->getPyContainerClassName() << "('" << _containerName << "','" << sior << "'," << DFT_TIME_INTERVAL_BTW_MEASURE << ")\n"; INFO_MESSAGE("Python command executed : " << myCommand.str()); //[RNV]: Comment the PyEval_AcquireLock() and PyEval_ReleaseLock() because this @@ -1168,6 +1169,46 @@ Engines::FieldsDict *Abstract_Engines_Container_i::get_os_environment() return ret.release(); } +Engines::vectorOfString_var FromVecStringCppToCORBA( const std::vector& group) +{ + Engines::vectorOfString_var ret( new Engines::vectorOfString ); + auto sz( group.size() ); + ret->length( sz ); + for(auto i = 0 ; i < sz ; ++i) + { + ret[i] = CORBA::string_dup( group[i].c_str() ); + } + return ret; +} + +std::vector FromCORBAVecStringToCpp(const Engines::vectorOfString& groupOfLogFileNames) +{ + auto len = groupOfLogFileNames.length(); + std::vector ret( len ); + for( auto i = 0 ; i < len ; ++i ) + { + ret[i] = groupOfLogFileNames[i]; + } + return ret; +} + +void Abstract_Engines_Container_i::addLogFileNameGroup(const Engines::vectorOfString& groupOfLogFileNames) +{ + this->_groups_of_log_files.push_back( FromCORBAVecStringToCpp(groupOfLogFileNames) ); +} + +Engines::vectorOfVectorOfString *Abstract_Engines_Container_i::getAllLogFileNameGroups() +{ + std::unique_ptr ret( new Engines::vectorOfVectorOfString ); + auto nbOfGrps = this->_groups_of_log_files.size(); + ret->length( nbOfGrps ); + for(auto i = 0 ; i < nbOfGrps ; ++i) + { + (*ret)[i] = FromVecStringCppToCORBA( _groups_of_log_files[i] ); + } + return ret.release(); +} + void Abstract_Engines_Container_i::execute_python_code(const char *code) { AutoGIL gstate; @@ -2404,7 +2445,7 @@ static Engines_Container_SSL_i *_container_singleton_ssl = nullptr; static Engines::Container_var _container_ref_singleton_ssl; -Engines_Container_SSL_i *KERNEL::getContainerSA() +Abstract_Engines_Container_SSL_i *KERNEL::getContainerSA() { if(!_container_singleton_ssl) { diff --git a/src/Container/SALOME_Container.py b/src/Container/SALOME_Container.py index c9bc72d3a..94c48e7c5 100644 --- a/src/Container/SALOME_Container.py +++ b/src/Container/SALOME_Container.py @@ -31,6 +31,7 @@ # \brief python implementation of container interface for Kernel # +import abc import os import sys import traceback @@ -52,7 +53,7 @@ from KernelBasis import VerbosityActivated,getSSLMode #define an implementation of the container interface for embedding in Container implemented in C++ -class SALOME_Container_i: +class SALOME_Container_Abstract_i(abc.ABC): _orb = None _poa = None _containerName = "" @@ -60,7 +61,7 @@ class SALOME_Container_i: #------------------------------------------------------------------------- - def __init__(self ,containerName, containerIORStr, dftTimeIntervalInMs): + def __init__(self, containerName, containerIORStr, dftTimeIntervalInMs): # Warning this part of code is called at the very first step of container launching # so logging is not instanciate. So use verbose method to discrimine if a message should be printed or not try: @@ -80,6 +81,10 @@ class SALOME_Container_i: self._log = None self._container = self._orb.string_to_object(containerIORStr) + @abc.abstractmethod + def getPyScriptCls(self): + raise RuntimeError("Must be overloaded") + @property def logm(self): logging.debug("Logm PID = {}".format(os.getpid())) @@ -160,7 +165,8 @@ class SALOME_Container_i: logscript = None if getSSLMode(): logscript = self._log.addScript(nodeName,code) - node=SALOME_PyNode.PyScriptNode_i(nodeName,code,self._poa,self, logscript) + cls = self.getPyScriptCls() + node = cls(nodeName,code,self._poa,self, logscript) id_o = self._poa.activate_object(node) comp_o = self._poa.id_to_reference(id_o) comp_iors = self._orb.object_to_string(comp_o) @@ -197,3 +203,24 @@ class SALOME_Container_i: def SetMonitoringtimeresms(self , value): self._timeIntervalInMs = value + +class SALOME_Container_i(SALOME_Container_Abstract_i): + def __init__(self, containerName, containerIORStr, dftTimeIntervalInMs): + super().__init__(containerName, containerIORStr, dftTimeIntervalInMs) + + def getPyScriptCls(self): + return SALOME_PyNode.PyScriptNode_i + +class SALOME_Container_OutOfProcess_i(SALOME_Container_i): + def __init__(self, containerName, containerIORStr, dftTimeIntervalInMs): + super().__init__(containerName, containerIORStr, dftTimeIntervalInMs) + + def getPyScriptCls(self): + return SALOME_PyNode.PyScriptNode_OutOfProcess_i + +class SALOME_Container_OutOfProcess_Replay_i(SALOME_Container_i): + def __init__(self, containerName, containerIORStr, dftTimeIntervalInMs): + super().__init__(containerName, containerIORStr, dftTimeIntervalInMs) + + def getPyScriptCls(self): + return SALOME_PyNode.PyScriptNode_OutOfProcess_Replay_i diff --git a/src/Container/SALOME_ContainerHelper.py b/src/Container/SALOME_ContainerHelper.py index 70bd9d6f6..82bef6bfa 100644 --- a/src/Container/SALOME_ContainerHelper.py +++ b/src/Container/SALOME_ContainerHelper.py @@ -432,9 +432,14 @@ class ScriptInfoAbstract: class ScriptInfoClt(ScriptInfoAbstract): def __init__(self, scriptPtr): + def unPickledSafe( dataPickled ): + if len(dataPickled) > 0: + return pickle.loads(dataPickled) + else: + return None self._node_name = scriptPtr.getName() self._code = scriptPtr.getCode() - self._exec = [pickle.loads(elt.getObj()) for elt in scriptPtr.listOfExecs()] + self._exec = [unPickledSafe(elt.getObj()) for elt in scriptPtr.listOfExecs()] class ScriptInfo(ScriptInfoAbstract): def __init__(self, nodeName, code, execs): diff --git a/src/Container/SALOME_ContainerManager.cxx b/src/Container/SALOME_ContainerManager.cxx index 20776769f..232f47eb1 100644 --- a/src/Container/SALOME_ContainerManager.cxx +++ b/src/Container/SALOME_ContainerManager.cxx @@ -31,6 +31,7 @@ #include "Basics_Utils.hxx" #include "Basics_DirUtils.hxx" #include "PythonCppUtils.hxx" +#include "KernelBasis.hxx" #include #include #include @@ -558,9 +559,27 @@ Engines::Container_ptr SALOME_ContainerManager::GiveContainer(const Engines::Con return ret; } +std::string SALOME_ContainerManager::GetCppBinaryOfKernelSSLContainer() const +{ + switch( SALOME::GetPyExecutionMode() ) + { + case SALOME::PyExecutionMode::InProcess: + return "SALOME_Container_No_NS_Serv"; + case SALOME::PyExecutionMode::OutOfProcessNoReplay: + return "SALOME_Container_No_NS_Serv_OutProcess"; + case SALOME::PyExecutionMode::OutOfProcessWithReplay: + return "SALOME_Container_No_NS_Serv_OutProcess_Replay"; + default: + { + ERROR_MESSAGE("Not manager py execution mode"); + THROW_SALOME_EXCEPTION("GetCppBinaryOfKernelSSLContainer : Not manager py execution mode"); + } + } +} + std::string SALOME_ContainerManager::GetCppBinaryOfKernelContainer() const { - std::string ret = this->_isSSL ? "SALOME_Container_No_NS_Serv" : "SALOME_Container"; + std::string ret = this->_isSSL ? GetCppBinaryOfKernelSSLContainer() : "SALOME_Container"; return ret; } diff --git a/src/Container/SALOME_ContainerManager.hxx b/src/Container/SALOME_ContainerManager.hxx index 330d3ed7c..1ca71fef2 100644 --- a/src/Container/SALOME_ContainerManager.hxx +++ b/src/Container/SALOME_ContainerManager.hxx @@ -94,6 +94,8 @@ protected: FindContainer(const Engines::ContainerParameters& params, const std::string& resource); + std::string GetCppBinaryOfKernelSSLContainer() const; + std::string GetCppBinaryOfKernelContainer() const; std::string GetRunRemoteExecutableScript() const; diff --git a/src/Container/SALOME_Container_No_NS_Serv.cxx b/src/Container/SALOME_Container_No_NS_Serv.cxx index 2494313d9..f7df00250 100644 --- a/src/Container/SALOME_Container_No_NS_Serv.cxx +++ b/src/Container/SALOME_Container_No_NS_Serv.cxx @@ -17,26 +17,6 @@ // See http://www.salome-platform.org/ or email : webmaster.salome@opencascade.com // -#include "SALOME_Container_Common.cxx" -#include "SALOME_Container_i.hxx" -#include "SALOME_Embedded_NamingService_Client.hxx" -#include "Utils_SALOME_Exception.hxx" -#include "SALOME_KernelORB.hxx" -#include "KernelBasis.hxx" +#include "SALOME_Container_No_NS_Serv_Generic.hxx" -int main(int argc, char* argv[]) -{ - if(argc<3) - THROW_SALOME_EXCEPTION( "SALOME_Container_No_NS_Serv : requires 2 input arguments " ); - CORBA::ORB_ptr orb(KERNEL::getORB()); - std::string IOROfEmbeddedNamingService(argv[2]); - setIOROfEmbeddedNS(IOROfEmbeddedNamingService); - CORBA::Object_var ns_serv_obj_base = orb->string_to_object(IOROfEmbeddedNamingService.c_str()); - if( CORBA::is_nil(ns_serv_obj_base) ) - THROW_SALOME_EXCEPTION( "SALOME_Container_No_NS_Serv : argument 2 is NOT a valid IOR" ); - Engines::EmbeddedNamingService_var ns_serv_obj = Engines::EmbeddedNamingService::_narrow(ns_serv_obj_base); - if( CORBA::is_nil(ns_serv_obj) ) - THROW_SALOME_EXCEPTION( "SALOME_Container_No_NS_Serv : argument 2 is NOT a valid IOR of Engines::EmbeddedNamingService" ); - std::unique_ptr ns( new SALOME_Embedded_NamingService_Client(ns_serv_obj) ); - return container_common_main(argc,argv,std::move(ns)); -} +GENERIC_CONTAINER_EXECUTABLE( Engines_Container_SSL_i ) diff --git a/src/Container/SALOME_Container_No_NS_Serv_Generic.hxx b/src/Container/SALOME_Container_No_NS_Serv_Generic.hxx new file mode 100644 index 000000000..d5bc1e7f6 --- /dev/null +++ b/src/Container/SALOME_Container_No_NS_Serv_Generic.hxx @@ -0,0 +1,43 @@ +// Copyright (C) 2021-2024 CEA, EDF +// +// This library is free software; you can redistribute it and/or +// modify it under the terms of the GNU Lesser General Public +// License as published by the Free Software Foundation; either +// version 2.1 of the License, or (at your option) any later version. +// +// This library is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +// Lesser General Public License for more details. +// +// You should have received a copy of the GNU Lesser General Public +// License along with this library; if not, write to the Free Software +// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +// +// See http://www.salome-platform.org/ or email : webmaster.salome@opencascade.com +// + +#include "SALOME_Container_Common.cxx" +#include "SALOME_Container_i.hxx" +#include "SALOME_Embedded_NamingService_Client.hxx" +#include "Utils_SALOME_Exception.hxx" +#include "SALOME_KernelORB.hxx" +#include "KernelBasis.hxx" + +#define GENERIC_CONTAINER_EXECUTABLE( cls ) \ +int main(int argc, char* argv[]) \ +{ \ + if(argc<3) \ + THROW_SALOME_EXCEPTION( "SALOME_Container_No_NS_Serv : requires 2 input arguments " ); \ + CORBA::ORB_ptr orb(KERNEL::getORB()); \ + std::string IOROfEmbeddedNamingService(argv[2]); \ + setIOROfEmbeddedNS(IOROfEmbeddedNamingService); \ + CORBA::Object_var ns_serv_obj_base = orb->string_to_object(IOROfEmbeddedNamingService.c_str()); \ + if( CORBA::is_nil(ns_serv_obj_base) ) \ + THROW_SALOME_EXCEPTION( "SALOME_Container_No_NS_Serv : argument 2 is NOT a valid IOR" ); \ + Engines::EmbeddedNamingService_var ns_serv_obj = Engines::EmbeddedNamingService::_narrow(ns_serv_obj_base); \ + if( CORBA::is_nil(ns_serv_obj) ) \ + THROW_SALOME_EXCEPTION( "SALOME_Container_No_NS_Serv : argument 2 is NOT a valid IOR of Engines::EmbeddedNamingService" ); \ + std::unique_ptr ns( new SALOME_Embedded_NamingService_Client(ns_serv_obj) ); \ + return container_common_main(argc,argv,std::move(ns)); \ +} diff --git a/src/Container/SALOME_Container_No_NS_Serv_OutProcess.cxx b/src/Container/SALOME_Container_No_NS_Serv_OutProcess.cxx new file mode 100644 index 000000000..f535d073a --- /dev/null +++ b/src/Container/SALOME_Container_No_NS_Serv_OutProcess.cxx @@ -0,0 +1,22 @@ +// Copyright (C) 2021-2024 CEA, EDF +// +// This library is free software; you can redistribute it and/or +// modify it under the terms of the GNU Lesser General Public +// License as published by the Free Software Foundation; either +// version 2.1 of the License, or (at your option) any later version. +// +// This library is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +// Lesser General Public License for more details. +// +// You should have received a copy of the GNU Lesser General Public +// License along with this library; if not, write to the Free Software +// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +// +// See http://www.salome-platform.org/ or email : webmaster.salome@opencascade.com +// + +#include "SALOME_Container_No_NS_Serv_Generic.hxx" + +GENERIC_CONTAINER_EXECUTABLE( Engines_Container_SSL_OutOfProcess_i ) diff --git a/src/Container/SALOME_Container_No_NS_Serv_OutProcess_Replay.cxx b/src/Container/SALOME_Container_No_NS_Serv_OutProcess_Replay.cxx new file mode 100644 index 000000000..27a90f58e --- /dev/null +++ b/src/Container/SALOME_Container_No_NS_Serv_OutProcess_Replay.cxx @@ -0,0 +1,22 @@ +// Copyright (C) 2021-2024 CEA, EDF +// +// This library is free software; you can redistribute it and/or +// modify it under the terms of the GNU Lesser General Public +// License as published by the Free Software Foundation; either +// version 2.1 of the License, or (at your option) any later version. +// +// This library is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +// Lesser General Public License for more details. +// +// You should have received a copy of the GNU Lesser General Public +// License along with this library; if not, write to the Free Software +// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +// +// See http://www.salome-platform.org/ or email : webmaster.salome@opencascade.com +// + +#include "SALOME_Container_No_NS_Serv_Generic.hxx" + +GENERIC_CONTAINER_EXECUTABLE( Engines_Container_SSL_OutOfProcess_Replay_i ) diff --git a/src/Container/SALOME_Container_i.hxx b/src/Container/SALOME_Container_i.hxx index 9f67a52ce..81643dc17 100644 --- a/src/Container/SALOME_Container_i.hxx +++ b/src/Container/SALOME_Container_i.hxx @@ -47,6 +47,7 @@ #include #include #include +#include class SALOME_NamingService_Container_Abstract; @@ -55,7 +56,8 @@ class CONTAINER_EXPORT Abstract_Engines_Container_i : public virtual POA_Engines { public: Abstract_Engines_Container_i(); - Abstract_Engines_Container_i(CORBA::ORB_ptr orb, + Abstract_Engines_Container_i(const std::string& pyContainerClsName, + CORBA::ORB_ptr orb, PortableServer::POA_ptr poa, char *containerName, int argc, char *argv[], @@ -65,6 +67,8 @@ public: virtual bool isSSLMode() const = 0; + std::string getPyContainerClassName() const { return _py_container_name; } + // --- CORBA methods virtual bool load_component_Library(const char *componentName, CORBA::String_out reason); @@ -77,6 +81,10 @@ public: void override_environment( const Engines::FieldsDict& env ) override; Engines::FieldsDict *get_os_environment() override; + + void addLogFileNameGroup(const Engines::vectorOfString& groupOfLogFileNames) override; + + Engines::vectorOfVectorOfString *getAllLogFileNameGroups() override; void execute_python_code(const char *code) override; @@ -187,6 +195,7 @@ protected: std::string _logfilename; std::string _localfilename; std::string _load_script; + std::string _py_container_name; CORBA::ORB_var _orb; PortableServer::POA_var _poa; PortableServer::ObjectId *_id; @@ -199,6 +208,7 @@ protected: Utils_Mutex _mutexForDftPy; std::list _tmp_files; Engines::fileTransfer_var _fileTransfer; + std::vector< std::vector > _groups_of_log_files; int _argc; char **_argv; @@ -206,6 +216,11 @@ protected: bool _isServantAloneInProcess; }; +constexpr char PY_CONTAINER_CLS_NAME_IN_PROCESS[] = "SALOME_Container_i"; +constexpr char PY_CONTAINER_CLS_NAME_OUT_PROCESS_NO_REPLAY[] = "SALOME_Container_OutOfProcess_i"; +constexpr char PY_CONTAINER_CLS_NAME_OUT_PROCESS_WITH_REPLAY[] = "SALOME_Container_OutOfProcess_Replay_i"; + + class CONTAINER_EXPORT Engines_Container_i : public Abstract_Engines_Container_i { public: @@ -216,30 +231,66 @@ public: int argc, char *argv[], SALOME_NamingService_Container_Abstract *ns = nullptr, bool isServantAloneInProcess = true) : - Abstract_Engines_Container_i(orb, poa, containerName, argc, argv, ns, isServantAloneInProcess) {} + Abstract_Engines_Container_i(PY_CONTAINER_CLS_NAME_IN_PROCESS, orb, poa, containerName, argc, argv, ns, isServantAloneInProcess) {} bool isSSLMode() const override { return false; } }; -class CONTAINER_EXPORT Engines_Container_SSL_i : public Abstract_Engines_Container_i +class CONTAINER_EXPORT Abstract_Engines_Container_SSL_i : public Abstract_Engines_Container_i { public: - Engines_Container_SSL_i(); - Engines_Container_SSL_i(CORBA::ORB_ptr orb, + Abstract_Engines_Container_SSL_i(const std::string& pyContainerClsName, + CORBA::ORB_ptr orb, PortableServer::POA_ptr poa, char *containerName, int argc, char *argv[], SALOME_NamingService_Container_Abstract *ns = nullptr, bool isServantAloneInProcess = true) : - Abstract_Engines_Container_i(orb, poa, containerName, argc, argv, ns, isServantAloneInProcess) {} + Abstract_Engines_Container_i(pyContainerClsName, orb, poa, containerName, argc, argv, ns, isServantAloneInProcess) {} bool isSSLMode() const override { return true; } }; +class CONTAINER_EXPORT Engines_Container_SSL_i : public Abstract_Engines_Container_SSL_i +{ +public: + Engines_Container_SSL_i(CORBA::ORB_ptr orb, + PortableServer::POA_ptr poa, + char *containerName, + int argc, char *argv[], + SALOME_NamingService_Container_Abstract *ns = nullptr, + bool isServantAloneInProcess = true) : + Abstract_Engines_Container_SSL_i(PY_CONTAINER_CLS_NAME_IN_PROCESS, orb, poa, containerName, argc, argv, ns, isServantAloneInProcess) {} +}; + +class CONTAINER_EXPORT Engines_Container_SSL_OutOfProcess_i : public Abstract_Engines_Container_SSL_i +{ +public: + Engines_Container_SSL_OutOfProcess_i(CORBA::ORB_ptr orb, + PortableServer::POA_ptr poa, + char *containerName, + int argc, char *argv[], + SALOME_NamingService_Container_Abstract *ns = nullptr, + bool isServantAloneInProcess = true) : + Abstract_Engines_Container_SSL_i(PY_CONTAINER_CLS_NAME_OUT_PROCESS_NO_REPLAY, orb, poa, containerName, argc, argv, ns, isServantAloneInProcess) {} +}; + +class CONTAINER_EXPORT Engines_Container_SSL_OutOfProcess_Replay_i : public Abstract_Engines_Container_SSL_i +{ +public: + Engines_Container_SSL_OutOfProcess_Replay_i(CORBA::ORB_ptr orb, + PortableServer::POA_ptr poa, + char *containerName, + int argc, char *argv[], + SALOME_NamingService_Container_Abstract *ns = nullptr, + bool isServantAloneInProcess = true) : + Abstract_Engines_Container_SSL_i(PY_CONTAINER_CLS_NAME_OUT_PROCESS_WITH_REPLAY, orb, poa, containerName, argc, argv, ns, isServantAloneInProcess) {} +}; + /*! * Methods to be used in SSL mode to skip NS. */ namespace KERNEL { - CONTAINER_EXPORT Engines_Container_SSL_i *getContainerSA(); + CONTAINER_EXPORT Abstract_Engines_Container_SSL_i *getContainerSA(); CONTAINER_EXPORT Engines::Container_var getContainerRefSA(); } // namespace KERNEL diff --git a/src/Container/SALOME_PyNode.py b/src/Container/SALOME_PyNode.py index 832118345..26493bf81 100644 --- a/src/Container/SALOME_PyNode.py +++ b/src/Container/SALOME_PyNode.py @@ -30,6 +30,7 @@ import Engines__POA import SALOME__POA import SALOME import logging +import abc import os import sys from SALOME_ContainerHelper import ScriptExecInfo @@ -714,6 +715,163 @@ class SeqByteReceiver: data_for_split_case = bytes(0).join( [data_for_split_case,part] ) iStart = iEnd; iEnd = min(iStart + EFF_CHUNK_SIZE,size) return data_for_split_case + +FinalCode = """import pickle +from SALOME_PyNode import LogOfCurrentExecutionSession,MY_PERFORMANCE_LOG_ENTRY_IN_GLBS +import CORBA +import Engines +orb = CORBA.ORB_init(['']) +codeFileName = "{}" +inputFileName = "{}" +outputFileName = "{}" +outputsKeys = {} +exec( "{{}} = LogOfCurrentExecutionSession( orb.string_to_object( \\"{}\\" ) )".format(MY_PERFORMANCE_LOG_ENTRY_IN_GLBS) ) +with open(inputFileName,"rb") as f: + context = pickle.load( f ) +with open(codeFileName,"r") as f: + code = f.read() +# go for execution +exec( code , context ) +# filter part of context to be exported to father process +context = dict( [(k,v) for k,v in context.items() if k in outputsKeys] ) +# +with open(outputFileName,"wb") as f: + pickle.dump( context, f ) +""" + +class PythonFunctionEvaluatorParams: + def __init__(self, mainFileName, codeFileName, inContextFileName, outContextFileName): + self._main_filename = mainFileName + self._code_filename = codeFileName + self._in_context_filename = inContextFileName + self._out_context_filename = outContextFileName + @property + def result(self): + import pickle + with open(self._out_context_filename,"rb") as f: + return pickle.load( f ) + def destroyOnOK(self): + for fileToDestroy in [self._main_filename,self._code_filename,self._in_context_filename,self._out_context_filename]: + if os.path.exists( fileToDestroy ): + os.unlink( fileToDestroy ) + def destroyOnKO(self, containerRef): + """ + Called in the context of failure with replay mode activated + """ + for fileToDestroy in [self._out_context_filename]: + if os.path.exists( fileToDestroy ): + os.unlink( fileToDestroy ) + # register to container files group associated to the + containerRef.addLogFileNameGroup([self._main_filename,self._code_filename,self._in_context_filename]) + @property + def replayCmd(self): + return "To replay : ( cd {} && python3 {} )".format(os.path.dirname(self._main_filename),os.path.basename(self._main_filename)) + + @property + def cleanOperations(self): + import os + return "To clean files : ( cd {} && rm {} )".format( os.path.dirname(self._main_filename)," ".join( [os.path.basename(self._main_filename),self._code_filename,self._in_context_filename] ) ) + + def strDependingOnReturnCode(self, keepFilesToReplay, returnCode): + if returnCode == -1: + return f"return with non zero code ({returnCode})" + else: + banner = 200*"*" + if keepFilesToReplay: + return f"""return with non zero code ({returnCode}) +{banner} +Looks like a hard crash as returnCode {returnCode} != 0 +{self.replayCmd} +{self.cleanOperations} +{banner} +""" + else: + return f"""return with non zero code ({returnCode}) +{banner} +Looks like a hard crash as returnCode {returnCode} != 0 +{banner} +""" + +def ExecCrashProofGeneric( code, context, outargsname, containerRef, instanceOfLogOfCurrentSession, keepFilesToReplay ): + """ + Equivalent of exec(code,context) but executed in a separate subprocess to avoid to make the current process crash. + + Args: + ----- + + code (str) : python code to be executed using context + context (dict) : context to be used for execution. This context will be updated in accordance with the execution of code. + outargsname (list) : list of arguments to be exported + containerRef (Engines.Container) : Container ref (retrieving the Files to created when keepFilesToReplay is set to False) + instanceOfLogOfCurrentSession (LogOfCurrentExecutionSession) : instance of LogOfCurrentExecutionSession to build remotely the reference in order to log information + keepFilesToReplay (bool) : if True when something goes wrong during execution all the files to replay post mortem case are kept. If False only error is reported but files to replay are destoyed. + + Return: + ------- + + ScriptExecInfo : instance serverside + + In/Out: + ------- + + context will be modified by this method. elts in outargsname will be added and their corresponding value coming from evaluation. + """ + import tempfile + import pickle + import subprocess as sp + import CORBA + # + def InternalExecResistant( code, context, outargsname): + orb = CORBA.ORB_init(['']) + iorScriptLog = orb.object_to_string( instanceOfLogOfCurrentSession._remote_handle )#ref ContainerScriptPerfLog_ptr + #### + EXEC_CODE_FNAME_PXF = "execsafe_" + def RetrieveUniquePartFromPfx( fname ): + return os.path.splitext( os.path.basename(fname)[len(EXEC_CODE_FNAME_PXF):] )[0] + with tempfile.NamedTemporaryFile(dir=os.getcwd(),prefix=EXEC_CODE_FNAME_PXF,suffix=".py", mode="w", delete = False) as codeFd: + codeFd.write( code ) + codeFd.flush() + codeFileName = os.path.basename( codeFd.name ) + contextFileName = "contextsafe_{}.pckl".format( RetrieveUniquePartFromPfx( codeFileName ) ) + with open(contextFileName,"wb") as contextFd: + pickle.dump( context, contextFd) + resFileName = "outcontextsafe_{}.pckl".format( RetrieveUniquePartFromPfx( codeFileName ) ) + mainExecFileName = os.path.abspath( "mainexecsafe_{}.py".format( RetrieveUniquePartFromPfx( codeFileName ) ) ) + with open(mainExecFileName,"w") as f: + f.write( FinalCode.format( codeFileName, contextFileName, resFileName, outargsname, iorScriptLog ) ) + p = sp.Popen(["python3", mainExecFileName],stdout = sp.PIPE, stderr = sp.PIPE) + stdout, stderr = p.communicate() + returnCode = p.returncode + return returnCode, stdout, stderr, PythonFunctionEvaluatorParams(mainExecFileName,codeFileName,contextFileName,resFileName) + ret = instanceOfLogOfCurrentSession._current_instance + returnCode, stdout, stderr, evParams = InternalExecResistant( code, context, outargsname ) + stdout = stdout.decode() + stderr = stderr.decode() + sys.stdout.write( stdout ) ; sys.stdout.flush() + sys.stderr.write( stderr ) ; sys.stderr.flush() + if returnCode == 0: + pcklData = instanceOfLogOfCurrentSession._remote_handle.getObj() + if len(pcklData) > 0: + ret = pickle.loads( pcklData ) + context.update( evParams.result ) + evParams.destroyOnOK() + return ret + if returnCode != 0: + if keepFilesToReplay: + evParams.destroyOnKO( containerRef ) + else: + evParams.destroyOnOK() + raise RuntimeError(f"Subprocess launched {evParams.strDependingOnReturnCode(keepFilesToReplay,returnCode)}stdout :\n{stdout}\nstderr :\n{stderr}") + +def ExecCrashProofWithReplay( code, context, outargsname, containerRef, instanceOfLogOfCurrentSession ): + return ExecCrashProofGeneric(code, context, outargsname, containerRef, instanceOfLogOfCurrentSession, True) + +def ExecCrashProofWithoutReplay( code, context, outargsname, containerRef, instanceOfLogOfCurrentSession ): + return ExecCrashProofGeneric(code, context, outargsname, containerRef, instanceOfLogOfCurrentSession, False) + +def ExecLocal( code, context, outargsname, containerRef, instanceOfLogOfCurrentSession ): + exec( code, context ) + return instanceOfLogOfCurrentSession._current_instance class LogOfCurrentExecutionSession: def __init__(self, handleToCentralizedInst): @@ -730,9 +888,9 @@ class LogOfCurrentExecutionSession: def finalizeAndPushToMaster(self): self._remote_handle.assign( pickle.dumps( self._current_instance ) ) -class PyScriptNode_i (Engines__POA.PyScriptNode,Generic): +class PyScriptNode_Abstract_i(Engines__POA.PyScriptNode,Generic,abc.ABC): """The implementation of the PyScriptNode CORBA IDL that executes a script""" - def __init__(self, nodeName,code,poa,my_container,logscript): + def __init__(self, nodeName, code, poa, my_container, logscript): """Initialize the node : compilation in the local context""" Generic.__init__(self,poa) self.nodeName=nodeName @@ -746,6 +904,10 @@ class PyScriptNode_i (Engines__POA.PyScriptNode,Generic): self._log_script = logscript self._current_execution_session = None sys.stdout.flush() ; sys.stderr.flush() # flush to correctly capture log per execution session + + @abc.abstractmethod + def executeNow(self, outargsname): + raise RuntimeError("Must be overloaded") def __del__(self): # force removal of self.context. Don t know why it s not done by default @@ -846,7 +1008,7 @@ class PyScriptNode_i (Engines__POA.PyScriptNode,Generic): ## self.addInfoOnLevel2("measureTimeResolution",self.my_container_py.monitoringtimeresms()) with GenericPythonMonitoringLauncherCtxMgr( CPUMemoryMonitoring( self.my_container_py.monitoringtimeresms() ) ) as monitoringParams: - exec(self.ccode, self.context) + self._current_execution_session._current_instance = self.executeNow( outargsname ) cpumeminfo = ReadCPUMemInfo( monitoringParams ) ## self.addInfoOnLevel2("CPUMemDuringExec",cpumeminfo) @@ -930,3 +1092,24 @@ class PyScriptNode_i (Engines__POA.PyScriptNode,Generic): def addTimeInfoOnLevel2(self, key): from datetime import datetime self._current_execution_session.addInfoOnLevel2(key,datetime.now()) + +class PyScriptNode_i(PyScriptNode_Abstract_i): + def __init__(self, nodeName, code, poa, my_container, logscript): + super().__init__(nodeName, code, poa, my_container, logscript) + + def executeNow(self, outargsname): + return ExecLocal(self.ccode,self.context,outargsname,self.my_container,self._current_execution_session) + +class PyScriptNode_OutOfProcess_i(PyScriptNode_Abstract_i): + def __init__(self, nodeName, code, poa, my_container, logscript): + super().__init__(nodeName, code, poa, my_container, logscript) + + def executeNow(self, outargsname): + return ExecCrashProofWithoutReplay(self.code,self.context,outargsname,self.my_container,self._current_execution_session) + +class PyScriptNode_OutOfProcess_Replay_i(PyScriptNode_Abstract_i): + def __init__(self, nodeName, code, poa, my_container, logscript): + super().__init__(nodeName, code, poa, my_container, logscript) + + def executeNow(self, outargsname): + return ExecCrashProofWithReplay(self.code,self.context,outargsname,self.my_container,self._current_execution_session) diff --git a/src/KERNEL_PY/__init__.py b/src/KERNEL_PY/__init__.py index 615332500..6d7d73211 100644 --- a/src/KERNEL_PY/__init__.py +++ b/src/KERNEL_PY/__init__.py @@ -430,11 +430,13 @@ def salome_shutdown_containers_with_session(): def retrieve_containers_in_ns(): return [elt for elt in naming_service.repr() if "/Containers/" == elt[:12]] + +def get_all_containers(): + containersEntries = retrieve_containers_in_ns() + return [naming_service.Resolve(containerEntry) for containerEntry in containersEntries] def salome_shutdown_containers_without_session(): - containersEntries = retrieve_containers_in_ns() - for containerEntry in containersEntries: - cont = naming_service.Resolve(containerEntry) + for cont in get_all_containers(): try: cont.Shutdown() except: diff --git a/src/Launcher/Test/CMakeLists.txt b/src/Launcher/Test/CMakeLists.txt index b266d0ce7..0271f039b 100644 --- a/src/Launcher/Test/CMakeLists.txt +++ b/src/Launcher/Test/CMakeLists.txt @@ -22,7 +22,7 @@ IF(NOT WIN32) INSTALL(FILES ${CMAKE_CURRENT_SOURCE_DIR}/test_launcher.py ${CMAKE_CURRENT_SOURCE_DIR}/TestSSLAttached.py - ${CMAKE_CURRENT_SOURCE_DIR}/testPerfLogManager1.py DESTINATION ${KERNEL_TEST_DIR}/Launcher) + ${CMAKE_CURRENT_SOURCE_DIR}/testPerfLogManager1.py ${CMAKE_CURRENT_SOURCE_DIR}/testCrashProofContainer.py DESTINATION ${KERNEL_TEST_DIR}/Launcher) INSTALL(FILES CTestTestfileInstall.cmake DESTINATION ${KERNEL_TEST_DIR}/Launcher diff --git a/src/Launcher/Test/CTestTestfileInstall.cmake b/src/Launcher/Test/CTestTestfileInstall.cmake index add423874..69f747159 100644 --- a/src/Launcher/Test/CTestTestfileInstall.cmake +++ b/src/Launcher/Test/CTestTestfileInstall.cmake @@ -33,6 +33,10 @@ IF(NOT WIN32) SET(TEST_NAME ${COMPONENT_NAME}_PerfLogManager1) ADD_TEST(${TEST_NAME} ${PYTHON_TEST_DRIVER} 2000 testPerfLogManager1.py) SET_TESTS_PROPERTIES(${TEST_NAME} PROPERTIES LABELS "${COMPONENT_NAME}") + + SET(TEST_NAME ${COMPONENT_NAME}_testCrashProofContainer) + ADD_TEST(${TEST_NAME} ${PYTHON_TEST_DRIVER} 2000 testCrashProofContainer.py) + SET_TESTS_PROPERTIES(${TEST_NAME} PROPERTIES LABELS "${COMPONENT_NAME}") # /!\ DO NOT SET TIMEOUT PROPERTY IF USING ${SALOME_TEST_DRIVER} # BUT PASS TIMEOUT VALUE TO THE DRIVER diff --git a/src/Launcher/Test/testCrashProofContainer.py b/src/Launcher/Test/testCrashProofContainer.py new file mode 100644 index 000000000..027cf7904 --- /dev/null +++ b/src/Launcher/Test/testCrashProofContainer.py @@ -0,0 +1,127 @@ +# -*- coding: iso-8859-1 -*- +# Copyright (C) 2024 CEA/DEN, EDF R&D +# +# This library is free software; you can redistribute it and/or +# modify it under the terms of the GNU Lesser General Public +# License as published by the Free Software Foundation; either +# version 2.1 of the License, or (at your option) any later version. +# +# This library is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +# Lesser General Public License for more details. +# +# You should have received a copy of the GNU Lesser General Public +# License along with this library; if not, write to the Free Software +# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +# +# See http://www.salome-platform.org/ or email : webmaster.salome@opencascade.com +# + +import unittest +import os +import salome +import Engines +import pylauncher +import SALOME_PyNode +import KernelBasis +import SALOME + +import glob +import pickle +import tempfile +import logging +from datetime import datetime +import subprocess as sp + +killMeCode = """ +import os +import sys +j = 7 * i +sys.stdout.write(str(j)) ; sys.stdout.flush() # the aime of test in replay mode to be sure that case is runnable +os.kill( os.getpid() , signal.SIGKILL)# the aim of test is here +""" + +normalCode = """ +j = 8 * i +my_log_4_this_session.addFreestyleAndFlush( ("a",777) ) # to check that hidden var is still accessible +""" + +class testPerfLogManager1(unittest.TestCase): + def test0(self): + """ + EDF29852 : Kill container with OutOfProcessNoReplay mode and see if container still responds. + """ + salome.salome_init() + assert(isinstance(KernelBasis.GetAllPyExecutionModes(),tuple)) + KernelBasis.SetPyExecutionMode("OutOfProcessNoReplay") # the aim of test is here + hostname = "localhost" + cp = pylauncher.GetRequestForGiveContainer(hostname,"container_crash_test") + salome.cm.SetOverrideEnvForContainersSimple(env = [("SALOME_BIG_OBJ_ON_DISK_THRES","1000")]) + cont = salome.cm.GiveContainer(cp) + poa = salome.orb.resolve_initial_references("RootPOA") + obj = SALOME_PyNode.SenderByte_i(poa,pickle.dumps( (["i"],{"i": 3} ) )) ; id_o = poa.activate_object(obj) ; refPtr = poa.id_to_reference(id_o) + pyscript2 = cont.createPyScriptNode("testScript2",killMeCode) + pyscript2.executeFirst(refPtr) + self.assertRaises(SALOME.SALOME_Exception,pyscript2.executeSecond,["j"]) # an agressive SIGKILL has been received and container is still alive :) - it throws an exception :) + pyscript2.UnRegister() + pyscript3 = cont.createPyScriptNode("testScript3",normalCode) + obj = SALOME_PyNode.SenderByte_i(poa,pickle.dumps( (["i"],{"i": 3} ) )) ; id_o = poa.activate_object(obj) ; refPtr = poa.id_to_reference(id_o) + pyscript3.executeFirst(refPtr) + ret = pyscript3.executeSecond(["j"]) + ret = pickle.loads( SALOME_PyNode.SeqByteReceiver(ret[0]).data() ) + self.assertEqual(ret,24) # container has received a SIGKILL but it kindly continue to respond :) + a = salome.logm.NaiveFetch() + self.assertEqual(a[0][2][0].get().freestyle,[('a',777)]) + cont.Shutdown() + + def test1(self): + """ + EDF29852 : Same than test0 Kill container with OutOfProcessWithReplay mode and see if container still responds. But in addition we test if the python script is runnable ! + """ + salome.salome_init() + assert(isinstance(KernelBasis.GetAllPyExecutionModes(),tuple)) + KernelBasis.SetPyExecutionMode("OutOfProcessWithReplay") # the aim of test is here + hostname = "localhost" + cp = pylauncher.GetRequestForGiveContainer(hostname,"container_crash_test") + salome.cm.SetOverrideEnvForContainersSimple(env = [("SALOME_BIG_OBJ_ON_DISK_THRES","1000")]) + cont = salome.cm.GiveContainer(cp) + poa = salome.orb.resolve_initial_references("RootPOA") + obj = SALOME_PyNode.SenderByte_i(poa,pickle.dumps( (["i"],{"i": 3} ) )) ; id_o = poa.activate_object(obj) ; refPtr = poa.id_to_reference(id_o) + pyscript2 = cont.createPyScriptNode("testScript2",killMeCode) + pyscript2.executeFirst(refPtr) + self.assertRaises(SALOME.SALOME_Exception,pyscript2.executeSecond,["j"]) # an agressive SIGKILL has been received and container is still alive :) - it throws an exception :) + pyscript2.UnRegister() + pyscript3 = cont.createPyScriptNode("testScript3",normalCode) + obj = SALOME_PyNode.SenderByte_i(poa,pickle.dumps( (["i"],{"i": 3} ) )) ; id_o = poa.activate_object(obj) ; refPtr = poa.id_to_reference(id_o) + pyscript3.executeFirst(refPtr) + ret = pyscript3.executeSecond(["j"]) + ret = pickle.loads( SALOME_PyNode.SeqByteReceiver(ret[0]).data() ) + self.assertEqual(ret,24) # container has received a SIGKILL but it kindly continue to respond :) + a = salome.logm.NaiveFetch() + self.assertEqual(a[0][2][0].get().freestyle,[('a',777)]) + grpsOfLogToKill = cont.getAllLogFileNameGroups() + self.assertEqual(1,len(grpsOfLogToKill)) + replayInput = grpsOfLogToKill[0] + # now try to replay the failing case + p = sp.Popen(["python3",os.path.basename(replayInput[0])],cwd = os.path.dirname(replayInput[0]),stdout=sp.PIPE,stderr=sp.PIPE) + out,err = p.communicate() + self.assertEqual(1,p.returncode) # very important ! The failing case must continue to fail :) + self.assertEqual("21".encode(),out) # very important to check that the reported case is standalone enough to be replayable poste mortem + # cleanup + dn = os.path.dirname(replayInput[0]) + for elt in replayInput: + zeFile = os.path.join( dn, os.path.basename(elt) ) + if os.path.exists( zeFile ): + os.unlink( zeFile ) + cont.Shutdown() + +if __name__ == '__main__': + from salome_utils import positionVerbosityOfLoggerRegardingState,setVerboseLevel,setVerbose + salome.standalone() + salome.salome_init() + setVerbose(True) + setVerboseLevel(logging.DEBUG) + positionVerbosityOfLoggerRegardingState() + unittest.main() + -- 2.39.2