From: Anthony Geay Date: Mon, 25 Mar 2024 10:47:54 +0000 (+0100) Subject: [EDF29852] : Mecanism of fault tolerant in SALOME_Container to resist against emitted... X-Git-Url: http://git.salome-platform.org/gitweb/?a=commitdiff_plain;h=83204b2f907b555f18424dc438d439545be77555;p=modules%2Fkernel.git [EDF29852] : Mecanism of fault tolerant in SALOME_Container to resist against emitted signals during computation --- diff --git a/src/Container/SALOME_PyNode.py b/src/Container/SALOME_PyNode.py index 832118345..df2e5bb5e 100644 --- a/src/Container/SALOME_PyNode.py +++ b/src/Container/SALOME_PyNode.py @@ -714,6 +714,114 @@ class SeqByteReceiver: data_for_split_case = bytes(0).join( [data_for_split_case,part] ) iStart = iEnd; iEnd = min(iStart + EFF_CHUNK_SIZE,size) return data_for_split_case + +FinalCode = """import pickle +codeFileName = "{}" +inputFileName = "{}" +outputFileName = "{}" +outputsKeys = {} +with open(inputFileName,"rb") as f: + context = pickle.load( f ) +with open(codeFileName,"r") as f: + code = f.read() +# go for execution +exec( code , context ) +# filter part of context to be exported to father process +context = dict( [(k,v) for k,v in context.items() if k in outputsKeys] ) +# +with open(outputFileName,"wb") as f: + pickle.dump( context, f ) +""" + +class PythonFunctionEvaluatorParams: + def __init__(self, mainFileName, codeFileName, inContextFileName, outContextFileName): + self._main_filename = mainFileName + self._code_filename = codeFileName + self._in_context_filename = inContextFileName + self._out_context_filename = outContextFileName + @property + def result(self): + import pickle + with open(self._out_context_filename,"rb") as f: + return pickle.load( f ) + def destroyOnOK(self): + for fileToDestroy in [self._main_filename,self._code_filename,self._in_context_filename,self._out_context_filename]: + if os.path.exists( fileToDestroy ): + os.unlink( fileToDestroy ) + def destroyOnKO(self): + for fileToDestroy in [self._out_context_filename]: + if os.path.exists( fileToDestroy ): + os.unlink( fileToDestroy ) + @property + def replayCmd(self): + return "To replay : ( cd {} && python3 {} )".format(os.path.dirname(self._main_filename),os.path.basename(self._main_filename)) + + @property + def cleanOperations(self): + import os + return "To clean files : ( cd {} && rm {} )".format( os.path.dirname(self._main_filename)," ".join( [os.path.basename(self._main_filename),self._code_filename,self._in_context_filename] ) ) + + def strDependingOnReturnCode(self, returnCode): + if returnCode == -1: + return f"return with non zero code ({returnCode})" + else: + banner = 200*"*" + return f"""return with non zero code ({returnCode}) +{banner} +Looks like a hard crash as returnCode {returnCode} != 1 +{self.replayCmd} +{self.cleanOperations} +{banner} +""" + +def ExecCrashProof( code, context, outargsname ): + """ + Equivalent of exec(code,context) but executed in a separate subprocess to avoid to make the current process crash. + + Args: + ----- + + code (str) : python code to be executed using context + context (dict) : context to be used for execution. This context will be updated in accordance with the execution of code. + """ + import tempfile + import pickle + import subprocess as sp + # + def InternalExecResistant( code, context, outargsname): + EXEC_CODE_FNAME_PXF = "execsafe_" + def RetrieveUniquePartFromPfx( fname ): + return os.path.splitext( os.path.basename(fname)[len(EXEC_CODE_FNAME_PXF):] )[0] + with tempfile.NamedTemporaryFile(dir=os.getcwd(),prefix=EXEC_CODE_FNAME_PXF,suffix=".py", mode="w", delete = False) as codeFd: + codeFd.write( code ) + codeFd.flush() + codeFileName = os.path.basename( codeFd.name ) + contextFileName = "contextsafe_{}.pckl".format( RetrieveUniquePartFromPfx( codeFileName ) ) + with open(contextFileName,"wb") as contextFd: + pickle.dump( context, contextFd) + resFileName = "outcontextsafe_{}.pckl".format( RetrieveUniquePartFromPfx( codeFileName ) ) + mainExecFileName = os.path.abspath( "mainexecsafe_{}.py".format( RetrieveUniquePartFromPfx( codeFileName ) ) ) + with open(mainExecFileName,"w") as f: + f.write( FinalCode.format( codeFileName, contextFileName, resFileName, outargsname ) ) + p = sp.Popen(["python3", mainExecFileName],stdout = sp.PIPE, stderr = sp.PIPE) + stdout, stderr = p.communicate() + returnCode = p.returncode + return returnCode, stdout, stderr, PythonFunctionEvaluatorParams(mainExecFileName,codeFileName,contextFileName,resFileName) + + returnCode, stdout, stderr, evParams = InternalExecResistant( code, context, outargsname ) + stdout = stdout.decode() + stderr = stderr.decode() + sys.stdout.write( stdout ) ; sys.stdout.flush() + sys.stderr.write( stderr ) ; sys.stderr.flush() + if returnCode == 0: + context.update( evParams.result ) + evParams.destroyOnOK() + if returnCode != 0: + evParams.destroyOnKO() + raise RuntimeError(f"Subprocess launched {evParams.strDependingOnReturnCode(returnCode)}stdout :\n{stdout}\nstderr :\n{stderr}") + +def ExecLocal( code, context, outargsname ): + exec( code, context ) class LogOfCurrentExecutionSession: def __init__(self, handleToCentralizedInst):