void set_big_obj_on_disk_threshold(in long thresholdInByte);
void set_big_obj_on_disk_directory(in string directory);
+
+ void set_number_of_retry(in long nbRetry);
void addLogFileNameGroup(in vectorOfString groupOfLogFileNames);
void SetBigObjOnDiskDirectory(in string directory);
+ void SetNumberOfRetry(in long nbRetry);
+
+ long GetNumberOfRetry();
+
void SetCodeOnContainerStartUp(in string code);
string GetCodeOnContainerStartUp();
static std::string SALOME_FILE_BIG_OBJ_DIR;
+constexpr int DFT_SALOME_NB_RETRY = 1;
+
+static int SALOME_NB_RETRY = DFT_SALOME_NB_RETRY;
+
std::string SALOME::GetBigObjOnDiskDirectory()
{
return SALOME_FILE_BIG_OBJ_DIR;
return ! SALOME_FILE_BIG_OBJ_DIR.empty();
}
+void SALOME::SetNumberOfRetry(int nbRetry)
+{
+ SALOME_NB_RETRY = nbRetry;
+}
+
+int SALOME::GetNumberOfRetry()
+{
+ return SALOME_NB_RETRY;
+}
+
static SALOME::PyExecutionMode DefaultPyExecMode = SALOME::PyExecutionMode::NotSet;
void SALOME::SetPyExecutionMode(PyExecutionMode mode)
std::string BASICS_EXPORT GetBigObjOnDiskDirectory();
void BASICS_EXPORT SetBigObjOnDiskDirectory(const std::string& directory);
bool BASICS_EXPORT BigObjOnDiskDirectoryDefined();
+ void BASICS_EXPORT SetNumberOfRetry(int nbRetry);
+ int BASICS_EXPORT GetNumberOfRetry();
}
%rename (GetBigObjOnDiskDirectory) GetBigObjOnDiskDirectorySwig;
%rename (SetBigObjOnDiskDirectory) SetBigObjOnDiskDirectorySwig;
%rename (BigObjOnDiskDirectoryDefined) BigObjOnDiskDirectoryDefinedSwig;
+%rename (SetNumberOfRetry) SetNumberOfRetrySwig;
+%rename (GetNumberOfRetry) GetNumberOfRetrySwig;
bool getSSLMode();
void setSSLMode(bool sslMode);
return SALOME::BigObjOnDiskDirectoryDefined();
}
+void SetNumberOfRetrySwig(int nbRetry)
+{
+ SALOME::SetNumberOfRetry( nbRetry );
+}
+
+int GetNumberOfRetrySwig()
+{
+ return SALOME::GetNumberOfRetry( );
+}
+
void SetVerbosityLevelSwig(const std::string& level)
{
SetVerbosityLevelStr(level);
SALOME::SetBigObjOnDiskDirectory(directory);
}
+void Abstract_Engines_Container_i::set_number_of_retry(CORBA::Long nbRetry)
+{
+ SALOME::SetNumberOfRetry( nbRetry );
+}
+
Engines::vectorOfString_var FromVecStringCppToCORBA( const std::vector<std::string>& group)
{
Engines::vectorOfString_var ret( new Engines::vectorOfString );
SALOME::SetBigObjOnDiskDirectory(directory);
}
+ void SALOME_ContainerManager::SetNumberOfRetry(CORBA::Long nbRetry)
+ {
+ SALOME::SetNumberOfRetry( nbRetry );
+ }
+
+CORBA::Long SALOME_ContainerManager::GetNumberOfRetry()
+{
+ return SALOME::GetNumberOfRetry();
+}
+
//=============================================================================
//! Loop on all the containers listed in naming service, ask shutdown on each
/*! CORBA Method:
INFOS("[GiveContainer] container " << containerNameInNS << " override " << envInfo.str());
cont->set_big_obj_on_disk_directory( SALOME::GetBigObjOnDiskDirectory().c_str() );
cont->set_big_obj_on_disk_threshold( SALOME::GetBigObjOnDiskThreshold() );
+ cont->set_number_of_retry( SALOME::GetNumberOfRetry() );
Engines::FieldsDict envCorba;
{
auto sz = _override_env.size();
void SetBigObjOnDiskDirectory(const char *directory) override;
+ void SetNumberOfRetry(CORBA::Long nbRetry) override;
+
+ CORBA::Long GetNumberOfRetry() override;
+
static const char *_ContainerManagerNameInNS;
private:
void set_big_obj_on_disk_directory(const char *directory) override;
+ void set_number_of_retry(CORBA::Long nbRetry) override;
+
void addLogFileNameGroup(const Engines::vectorOfString& groupOfLogFileNames) override;
Engines::vectorOfVectorOfString *getAllLogFileNameGroups() override;
def __exit__(self,exctype, exc, tb):
StopMonitoring( self._monitoring_params )
+ del self._monitoring_params
+ import gc
+ gc.collect() # force destruction of objects even in raise context
def StopMonitoring( monitoringInfo ):
"""
context[MY_PERFORMANCE_LOG_ENTRY_IN_GLBS] = eval( MY_PERFORMANCE_LOG_ENTRY_IN_GLBS )
with open(codeFileName,"r") as f:
code = f.read()
+#
+import gc
+gc.disable()
# go for execution
exec( code , context )
# filter part of context to be exported to father process
#
def InternalExecResistant( code, context, outargsname):
+ import KernelBasis
orb = CORBA.ORB_init([''])
iorScriptLog = orb.object_to_string( instanceOfLogOfCurrentSession._remote_handle )#ref ContainerScriptPerfLog_ptr
####
mainExecFileName = os.path.abspath( "mainexecsafe_{}.py".format( RetrieveUniquePartFromPfx( codeFileName ) ) )
with open(mainExecFileName,"w") as f:
f.write( FinalCode.format( codeFileName, contextFileName, resFileName, outargsname, iorScriptLog ) )
- p = sp.Popen(["python3", mainExecFileName],stdout = sp.PIPE, stderr = sp.PIPE)
- stdout, stderr = p.communicate()
- returnCode = p.returncode
+ for iTry in range( KernelBasis.GetNumberOfRetry() ):
+ if iTry > 0:
+ print( "WARNING : Retry # {}. Following code has generated non zero return code ( {} ). Trying again ... \n{}".format( iTry, returnCode, code ) )
+ p = sp.Popen(["python3", mainExecFileName],stdout = sp.PIPE, stderr = sp.PIPE)
+ stdout, stderr = p.communicate()
+ returnCode = p.returncode
+ if returnCode == 0:
+ break
return returnCode, stdout, stderr, PythonFunctionEvaluatorParams(mainExecFileName,codeFileName,contextFileName,resFileName)
ret = instanceOfLogOfCurrentSession._current_instance
returnCode, stdout, stderr, evParams = InternalExecResistant( code, context, outargsname )
self.finalizeAndPushToMaster()
def finalizeAndPushToMaster(self):
- self._remote_handle.assign( pickle.dumps( self._current_instance ) )
+ """
+ Voluntary do nothing in case of problem to avoid to trouble execution
+ """
+ try:
+ self._remote_handle.assign( pickle.dumps( self._current_instance ) )
+ except:
+ pass
class LogOfCurrentExecutionSessionStub(LogOfCurrentExecutionSessionAbs):
"""
def executeSecond(self,outargsname):
""" Same than second part of self.execute to reduce memory peak."""
+ def executeSecondInternal(monitoringtimeresms):
+ with GenericPythonMonitoringLauncherCtxMgr( CPUMemoryMonitoring( monitoringtimeresms ) ) as monitoringParams:
+ currentInstance = self.executeNow( outargsname )
+ cpumeminfo = ReadCPUMemInfo( monitoringParams )
+ return cpumeminfo, currentInstance
+
import sys
try:
self.addTimeInfoOnLevel2("startExecTime")
##
self.addInfoOnLevel2("measureTimeResolution",self.my_container_py.monitoringtimeresms())
- with GenericPythonMonitoringLauncherCtxMgr( CPUMemoryMonitoring( self.my_container_py.monitoringtimeresms() ) ) as monitoringParams:
- self._current_execution_session._current_instance = self.executeNow( outargsname )
- cpumeminfo = ReadCPUMemInfo( monitoringParams )
+ cpumeminfo, self._current_execution_session._current_instance = executeSecondInternal( self.my_container_py.monitoringtimeresms() )
##
self.addInfoOnLevel2("CPUMemDuringExec",cpumeminfo)
- del monitoringParams
self.addTimeInfoOnLevel2("endExecTime")
self.addTimeInfoOnLevel2("startOutputTime")
argsout=[]
killMeCode = """
import os
import sys
+import signal
j = 7 * i
sys.stdout.write(str(j)) ; sys.stdout.flush() # the aime of test in replay mode to be sure that case is runnable
os.kill( os.getpid() , signal.SIGKILL)# the aim of test is here
# now try to replay the failing case
p = sp.Popen(["python3",os.path.basename(replayInput[0])],cwd = os.path.dirname(replayInput[0]),stdout=sp.PIPE,stderr=sp.PIPE)
out,err = p.communicate()
- self.assertEqual(1,p.returncode) # very important ! The failing case must continue to fail :)
+ self.assertNotEqual(p.returncode,0) # very important ! The failing case must continue to fail :)
self.assertEqual("21".encode(),out) # very important to check that the reported case is standalone enough to be replayable poste mortem
# cleanup
dn = os.path.dirname(replayInput[0])
KernelBasis.SetPyExecutionMode("OutOfProcessWithReplayFT")
hostname = "localhost"
cp = pylauncher.GetRequestForGiveContainer(hostname,"container_crash_test")
+ salome.cm.SetNumberOfRetry( 3 )
salome.cm.SetBigObjOnDiskThreshold(1000)
salome.cm.SetOverrideEnvForContainersSimple(env = [])
cont = salome.cm.GiveContainer(cp)
ret = pickle.loads( SALOME_PyNode.SeqByteReceiver(ret[0]).data() )
self.assertEqual(ret,27)
with open(cont.locallogfilename) as f:
- self.assertTrue( "WARNING : Following code has generated non zero return code" in f.read() )# should report something into the container
+ logCont = f.read( )
+ self.assertTrue( "WARNING : Retry #" in logCont)
+ self.assertTrue( "WARNING : Following code has generated non zero return code" in logCont )# should report something into the container
cont.Shutdown()
if __name__ == '__main__':