From e6e7564209709ef7b1b19a96db646af87b0aace0 Mon Sep 17 00:00:00 2001 From: Anthony Geay Date: Thu, 12 Sep 2024 17:04:54 +0200 Subject: [PATCH] [EDF30875] : Fix lack of remove files in case of File exchange mode --- src/Container/SALOME_PyNode.py | 49 +++++++++------- src/Launcher/Test/testCrashProofContainer.py | 62 ++++++++++++++++++++ 2 files changed, 89 insertions(+), 22 deletions(-) diff --git a/src/Container/SALOME_PyNode.py b/src/Container/SALOME_PyNode.py index d38a351fd..8cbbca922 100644 --- a/src/Container/SALOME_PyNode.py +++ b/src/Container/SALOME_PyNode.py @@ -916,16 +916,16 @@ class PythonFunctionEvaluatorParams: def result(self): return self._out_context_filename.retrieveRemoteContext() - def destroyOnOK(self): - self._out_context_filename.removeContextSupport() + def destroyOnOKAndReplay(self): + self._out_context_filename.removeContextSupport( True ) for fileToDestroy in [self._main_filename,self._code_filename,self._in_context_filename]: if os.path.exists( fileToDestroy ): os.unlink( fileToDestroy ) - def destroyOnKO(self, containerRef): + def destroyOnKOAndReplay(self, containerRef): """ Called in the context of failure with replay mode activated """ - self._out_context_filename.removeContextSupport() + self._out_context_filename.removeContextSupport( False ) # register to container files group associated to the containerRef.addLogFileNameGroup([self._main_filename,self._code_filename,self._in_context_filename]) @@ -1008,9 +1008,7 @@ class ContextExchanger_i(Engines__POA.ContextExchanger): def finishPushContext(self): try: - #raise RuntimeError(f"Anthony {dir(self)}") self._output_context = pickle.loads( self._out_ctx ) - #del self._out_ctx except Exception as e: raise SALOME.SALOME_Exception( SALOME.ExceptionStruct(SALOME.INTERNAL_ERROR,str(e),"finishPushContext",0) ) @@ -1063,16 +1061,19 @@ class ExchangeContextCltAbs(abc.ABC): raise RuntimeError("Must be overloaded") @abc.abstractmethod - def removeContextSupport(self): + def removeContextSupport(self, isOK): raise RuntimeError("Must be overloaded") class ExchangeContextUsingFileClt(ExchangeContextCltAbs): + def __init__(self, keepFilesToReplay): + self._keep_in_files = keepFilesToReplay + def hostInputContext(self, dirForReplayFiles, contextFileBaseName, context): - contextFileName = os.path.join( dirForReplayFiles, contextFileBaseName) - with open(contextFileName,"wb") as contextFd: + self._in_ctx_entry_point = os.path.join( dirForReplayFiles, contextFileBaseName) + with open(self._in_ctx_entry_point,"wb") as contextFd: pickle.dump( context, contextFd) - return os.path.basename( contextFileName ) + return os.path.basename( self._in_ctx_entry_point ) def setOutputContextEntryPoint(self, dirForReplayFiles, outCtxtEntryPoint): self._out_ctx_entry_point = os.path.join( dirForReplayFiles, outCtxtEntryPoint ) @@ -1082,9 +1083,13 @@ class ExchangeContextUsingFileClt(ExchangeContextCltAbs): with open(self._out_ctx_entry_point,"rb") as f: return pickle.load( f ) - def removeContextSupport(self): - if os.path.exists( self._out_ctx_entry_point ): - os.unlink( self._out_ctx_entry_point ) + def removeContextSupport(self, isOK): + fileNamesToRm = [self._out_ctx_entry_point] + if isOK or not self._keep_in_files: + fileNamesToRm += [self._in_ctx_entry_point] + for fileName in fileNamesToRm: + if os.path.exists( fileName ): + os.unlink( fileName ) class ExchangeContextUsingTCPClt(ExchangeContextCltAbs): @@ -1102,16 +1107,16 @@ class ExchangeContextUsingTCPClt(ExchangeContextCltAbs): def retrieveRemoteContext(self): return self._servant.getOutputContext() - def removeContextSupport(self): + def removeContextSupport(self, isOK):# isOK ignored. Because in memory mode nothing to save poa = self._servant.getPOA() poa.deactivate_object(self._id_o) del self._servant import gc gc.collect() -def ExchangeModeCltSideFactory( exchangeMode ): +def ExchangeModeCltSideFactory( exchangeMode, keepFilesToReplay ): if exchangeMode == "File": - return ExchangeContextUsingFileClt() + return ExchangeContextUsingFileClt(keepFilesToReplay) elif exchangeMode == "TCP": return ExchangeContextUsingTCPClt() else: @@ -1163,7 +1168,7 @@ def ExecCrashProofGeneric( code, context, outargsname, containerRef, instanceOfL return False,stderr # - def InternalExecResistant( exchangeMode, code, context, outargsname): + def InternalExecResistant( exchangeMode, keepFilesToReplay, code, context, outargsname): import KernelBasis import salome salome.salome_init() @@ -1185,7 +1190,7 @@ import sys sys.stderr.write({!r}) sys.stderr.flush()""".format( MY_KEY_TO_DETECT_FINISH ) ) codeFd.flush() - exCtx = ExchangeModeCltSideFactory(exchangeMode) + exCtx = ExchangeModeCltSideFactory(exchangeMode, keepFilesToReplay) codeFileNameFull = codeFd.name codeFileName = os.path.basename( codeFileNameFull ) contextFileName = exCtx.hostInputContext(dirForReplayFiles, "contextsafe_{}.pckl".format( RetrieveUniquePartFromPfx( codeFileName ) ), context) @@ -1221,7 +1226,7 @@ sys.stderr.flush()""".format( MY_KEY_TO_DETECT_FINISH ) ) exchangeMode = "File" if not keepFilesToReplay: exchangeMode = "TCP" - returnCode, stdout, stderr, evParams = InternalExecResistant( exchangeMode, code, context, outargsname ) + returnCode, stdout, stderr, evParams = InternalExecResistant( exchangeMode, keepFilesToReplay, code, context, outargsname ) stdout = stdout.decode() stderr = stderr.decode() sys.stdout.write( stdout ) ; sys.stdout.flush() @@ -1232,15 +1237,15 @@ sys.stderr.flush()""".format( MY_KEY_TO_DETECT_FINISH ) ) if len(pcklData) > 0: ret = pickle.loads( pcklData ) context.update( evParams.result ) - evParams.destroyOnOK() + evParams.destroyOnOKAndReplay() if returnCode != 0: print( "WARNING : Following code has generated non zero return code ( {} ) but considered as OK\n{}".format( returnCode, code ) ) return ret else: if keepFilesToReplay: - evParams.destroyOnKO( containerRef ) + evParams.destroyOnKOAndReplay( containerRef ) else: - evParams.destroyOnOK() + evParams.destroyOnOKAndReplay() raise RuntimeError(f"Subprocess launched {evParams.strDependingOnReturnCode(keepFilesToReplay,returnCode)}stdout :\n{stdout}\nstderr :\n{stderr}") def ExecCrashProofWithReplay( code, context, outargsname, containerRef, instanceOfLogOfCurrentSession ): diff --git a/src/Launcher/Test/testCrashProofContainer.py b/src/Launcher/Test/testCrashProofContainer.py index 044a62c38..9fe5aa785 100644 --- a/src/Launcher/Test/testCrashProofContainer.py +++ b/src/Launcher/Test/testCrashProofContainer.py @@ -103,6 +103,12 @@ del i j = np.zeros(shape=(2*nb,),dtype=np.float64) """ +FunnyCase_test7 = """ +import numpy as np +nb = i.shape[0] +j = np.zeros(shape=(2*nb,),dtype=np.float64) +""" + class testPerfLogManager1(unittest.TestCase): def test0(self): """ @@ -349,6 +355,62 @@ class testPerfLogManager1(unittest.TestCase): gc.collect() #time.sleep(10) + def test7(self): + """ + [EDF30875] : Garanty that DirectoryForReplay is clean after execution. + """ + import numpy as np + import gc + szOfData = 12000 + KernelBasis.SetPyExecutionMode("OutOfProcessWithReplayFT") + salome.cm.SetBigObjOnDiskThreshold( 1 ) # enable proxy + with tempfile.TemporaryDirectory() as tmpdirname: + os.chdir( tmpdirname ) + hostname = "localhost" + cp = pylauncher.GetRequestForGiveContainer(hostname,"container_crash_test_7") + salome.cm.SetDirectoryForReplayFiles( str( tmpdirname ) ) + KernelBasis.SetBigObjOnDiskDirectory( str( tmpdirname ) ) + with salome.ContainerLauncherCM(cp,True) as cont: + poa = salome.orb.resolve_initial_references("RootPOA") + arr = np.zeros(shape=(szOfData,),dtype=np.float64) + obj = SALOME_PyNode.SenderByte_i(poa,pickle.dumps( (["i"],{"i": arr} ) )) ; id_o = poa.activate_object(obj) ; refPtr = poa.id_to_reference(id_o) + gc.collect() + pyscript = cont.createPyScriptNode("testScript",FunnyCase_test7) + pyscript.executeFirst(refPtr) + ret = pyscript.executeSecond(["j"]) + pxy = pickle.loads( SALOME_PyNode.SeqByteReceiver(ret[0]).data() ) # receiving twice size of input -> 2 GB + ret0 = UnProxyObjectSimple( pxy ) # it's a proxy -> un proxyfy it + DecrRefInFile( pxy.getFileName() ) + self.assertEqual( len( os.listdir( str( tmpdirname ) ) ) , 0 ) # very important it must be clean + + def test8(self): + """ + [EDF30875] : same than test7 but with OutOfProcessWithReplay. + """ + import numpy as np + import gc + szOfData = 12000 + KernelBasis.SetPyExecutionMode("OutOfProcessWithReplay") + salome.cm.SetBigObjOnDiskThreshold( 1 ) # enable proxy + with tempfile.TemporaryDirectory() as tmpdirname: + os.chdir( tmpdirname ) + hostname = "localhost" + cp = pylauncher.GetRequestForGiveContainer(hostname,"container_crash_test_8") + salome.cm.SetDirectoryForReplayFiles( str( tmpdirname ) ) + KernelBasis.SetBigObjOnDiskDirectory( str( tmpdirname ) ) + with salome.ContainerLauncherCM(cp,True) as cont: + poa = salome.orb.resolve_initial_references("RootPOA") + arr = np.zeros(shape=(szOfData,),dtype=np.float64) + obj = SALOME_PyNode.SenderByte_i(poa,pickle.dumps( (["i"],{"i": arr} ) )) ; id_o = poa.activate_object(obj) ; refPtr = poa.id_to_reference(id_o) + gc.collect() + pyscript = cont.createPyScriptNode("testScript",FunnyCase_test7) + pyscript.executeFirst(refPtr) + ret = pyscript.executeSecond(["j"]) + pxy = pickle.loads( SALOME_PyNode.SeqByteReceiver(ret[0]).data() ) # receiving twice size of input -> 2 GB + ret0 = UnProxyObjectSimple( pxy ) # it's a proxy -> un proxyfy it + DecrRefInFile( pxy.getFileName() ) + self.assertEqual( len( os.listdir( str( tmpdirname ) ) ) , 0 ) # very important it must be clean + if __name__ == '__main__': from salome_utils import positionVerbosityOfLoggerRegardingState,setVerboseLevel,setVerbose -- 2.39.2