From f7742f0839500df3c5da0a5d88233b02f6059948 Mon Sep 17 00:00:00 2001 From: Anthony Geay Date: Tue, 2 Jan 2024 15:11:09 +0100 Subject: [PATCH] [EDF29150] : monitoring of CPU and memory is integrated into log info of containers. --- idl/SALOME_Component.idl | 3 ++ idl/SALOME_ContainerManager.idl | 4 +++ src/Container/Container_i.cxx | 40 +++++++++++++++++++---- src/Container/SALOME_Container.py | 9 ++++- src/Container/SALOME_ContainerHelper.py | 16 +++++++++ src/Container/SALOME_ContainerManager.cxx | 15 ++++++++- src/Container/SALOME_ContainerManager.hxx | 5 +++ src/Container/SALOME_Container_i.hxx | 5 ++- src/Container/SALOME_PyNode.py | 14 +++++--- 9 files changed, 98 insertions(+), 13 deletions(-) diff --git a/idl/SALOME_Component.idl b/idl/SALOME_Component.idl index bcce8b779..5de885005 100644 --- a/idl/SALOME_Component.idl +++ b/idl/SALOME_Component.idl @@ -182,6 +182,9 @@ module Engines //! name of the %container log file attribute string locallogfilename ; + + //! interval of time between two measures of CPU/time process container + attribute long monitoringtimeresms; void verbosity(out boolean activated, out string level); diff --git a/idl/SALOME_ContainerManager.idl b/idl/SALOME_ContainerManager.idl index 474d51642..3a58f620b 100644 --- a/idl/SALOME_ContainerManager.idl +++ b/idl/SALOME_ContainerManager.idl @@ -96,6 +96,10 @@ interface ContainerManager long GetDeltaTimeBetweenNSLookupAtLaunchTimeInMilliSecond(); void SetDeltaTimeBetweenNSLookupAtLaunchTimeInMilliSecond(in long timeInMS); + + long GetDeltaTimeBetweenCPUMemMeasureInMilliSecond(); + + void SetDeltaTimeBetweenCPUMemMeasureInMilliSecond(in long timeInMS); void SetOverrideEnvForContainers(in KeyValDict env); diff --git a/src/Container/Container_i.cxx b/src/Container/Container_i.cxx index 9b71def0b..b7e084ac7 100644 --- a/src/Container/Container_i.cxx +++ b/src/Container/Container_i.cxx @@ -110,6 +110,8 @@ extern "C" {void SigIntHandler( int ) ; } #define SLASH '/' #endif +const int Abstract_Engines_Container_i::DFT_TIME_INTERVAL_BTW_MEASURE = 500; + std::map Abstract_Engines_Container_i::_cntInstances_map; std::map Abstract_Engines_Container_i::_library_map; std::map Abstract_Engines_Container_i::_toRemove_map; @@ -211,11 +213,9 @@ Abstract_Engines_Container_i::Abstract_Engines_Container_i (CORBA::ORB_ptr orb, // pycont = SALOME_Container.SALOME_Container_i(containerIORStr) CORBA::String_var sior = _orb->object_to_string(pCont); - std::string myCommand="pyCont = SALOME_Container.SALOME_Container_i('"; - myCommand += _containerName + "','"; - myCommand += sior; - myCommand += "')\n"; - SCRUTE(myCommand); + std::ostringstream myCommand; + myCommand << "pyCont = SALOME_Container.SALOME_Container_i('" << _containerName << "','" << sior << "','" << DFT_TIME_INTERVAL_BTW_MEASURE << "')\n"; + SCRUTE(myCommand.str()); //[RNV]: Comment the PyEval_AcquireLock() and PyEval_ReleaseLock() because this //approach leads to the deadlock of the main thread of the application on Windows platform @@ -241,7 +241,7 @@ Abstract_Engines_Container_i::Abstract_Engines_Container_i (CORBA::ORB_ptr orb, PyRun_SimpleString("sys.path = sys.path[1:]\n"); #endif PyRun_SimpleString("import SALOME_Container\n"); - PyRun_SimpleString((char*)myCommand.c_str()); + PyRun_SimpleString((char*)myCommand.str().c_str()); PyObject *mainmod = PyImport_AddModule("__main__"); PyObject *globals = PyModule_GetDict(mainmod); _pyCont = PyDict_GetItemString(globals, "pyCont"); @@ -326,6 +326,34 @@ void Abstract_Engines_Container_i::locallogfilename(const char *name) _localfilename = name; } +CORBA::Long Abstract_Engines_Container_i::monitoringtimeresms() +{ + AutoGIL gstate; + PyObject *result = PyObject_CallMethod(_pyCont,(char*)"monitoringtimeresms",nullptr); + if (PyErr_Occurred()) + { + std::string error("can not retrieve time interval between 2 measures"); + PyErr_Print(); + THROW_SALOME_CORBA_EXCEPTION(error.c_str(),SALOME::INTERNAL_ERROR); + } + CORBA::Long ret = PyLong_AsLong( result ); + Py_XDECREF(result); + return ret; +} + +void Abstract_Engines_Container_i::monitoringtimeresms(CORBA::Long intervalInMs) +{ + AutoGIL gstate; + PyObject *result = PyObject_CallMethod(_pyCont,(char*)"SetMonitoringtimeresms","i",intervalInMs,nullptr); + if (PyErr_Occurred()) + { + std::string error("can not set time interval between 2 measures"); + PyErr_Print(); + THROW_SALOME_CORBA_EXCEPTION(error.c_str(),SALOME::INTERNAL_ERROR); + } + Py_XDECREF(result); +} + void Abstract_Engines_Container_i::verbosity(bool& activated, CORBA::String_out level) { activated = SALOME::VerbosityActivated(); diff --git a/src/Container/SALOME_Container.py b/src/Container/SALOME_Container.py index e9ad62f99..61ed8bff4 100644 --- a/src/Container/SALOME_Container.py +++ b/src/Container/SALOME_Container.py @@ -60,7 +60,7 @@ class SALOME_Container_i: #------------------------------------------------------------------------- - def __init__(self ,containerName, containerIORStr): + def __init__(self ,containerName, containerIORStr, dftTimeIntervalInMs): MESSAGE( "SALOME_Container_i::__init__" ) try: argv = sys.argv @@ -73,6 +73,7 @@ class SALOME_Container_i: self._poa = self._orb.resolve_initial_references("RootPOA") self._containerName = containerName self._dbg_info = [] + self._timeIntervalInMs = dftTimeIntervalInMs if verbose(): print("SALOME_Container.SALOME_Container_i : _containerName ",self._containerName) self._container = self._orb.string_to_object(containerIORStr) @@ -180,3 +181,9 @@ class SALOME_Container_i: def getAllInfo(self): import pickle return pickle.dumps( self._dbg_info ) + + def monitoringtimeresms(self): + return self._timeIntervalInMs + + def SetMonitoringtimeresms(self , value): + self._timeIntervalInMs = value diff --git a/src/Container/SALOME_ContainerHelper.py b/src/Container/SALOME_ContainerHelper.py index 439be0215..0d38486d0 100644 --- a/src/Container/SALOME_ContainerHelper.py +++ b/src/Container/SALOME_ContainerHelper.py @@ -48,6 +48,7 @@ class ScriptExecInfo: return "{} {}".format(m,UNITS[3]) def __init__(self): + self._cpu_mem_during_exec = None self._start_exec_time = None self._end_exec_time = None self._start_input_time = None @@ -58,6 +59,20 @@ class ScriptExecInfo: self._input_hdd_mem = None self._output_mem = 0 self._output_hdd_mem = None + + @property + def CPUMemDuringExec(self): + return self._cpu_mem_during_exec + + @CPUMemDuringExec.setter + def CPUMemDuringExec(self,value): + self._cpu_mem_during_exec = value + + @property + def CPUMemDuringExecStr(self): + cpu = self._cpu_mem_during_exec[::2] + mem_rss = self._cpu_mem_during_exec[1::2] + return [(a,ScriptExecInfo.MemRepr(b)) for a,b in self._cpu_mem_during_exec] @property def inputMem(self): @@ -175,6 +190,7 @@ class ScriptExecInfo: return """start exec time = {self.startExecTime} end exec time = {self.endExecTime} exec_time = {self.execTimeStr} +CPU and mem monitoring = {self.CPUMemDuringExecStr} input unpickling and ev load from disk time = {self.inputTimeStr} output serialization and ev write to disk time = {self.outputTimeStr} input memory size before exec (MemoryPeak 2x) = {self.inputMemStr} diff --git a/src/Container/SALOME_ContainerManager.cxx b/src/Container/SALOME_ContainerManager.cxx index 730f35cd6..0ec60bba3 100644 --- a/src/Container/SALOME_ContainerManager.cxx +++ b/src/Container/SALOME_ContainerManager.cxx @@ -24,6 +24,7 @@ #include "SALOME_ResourcesManager.hxx" #include "SALOME_LoadRateManager.hxx" #include "SALOME_NamingService.hxx" +#include "SALOME_Container_i.hxx" #include "SALOME_ResourcesManager_Client.hxx" #include "SALOME_Embedded_NamingService.hxx" #include "SALOME_ModuleCatalog.hh" @@ -88,7 +89,7 @@ Utils_Mutex SALOME_ContainerManager::_systemMutex; //============================================================================= SALOME_ContainerManager::SALOME_ContainerManager(CORBA::ORB_ptr orb, PortableServer::POA_var poa, SALOME_NamingService_Abstract *ns) - : _nbprocUsed(1),_delta_time_ns_lookup_in_ms(DFT_DELTA_TIME_NS_LOOKUP_IN_MS) + : _nbprocUsed(1),_delta_time_ns_lookup_in_ms(DFT_DELTA_TIME_NS_LOOKUP_IN_MS),_delta_time_measure_in_ms(Abstract_Engines_Container_i::DFT_TIME_INTERVAL_BTW_MEASURE) { MESSAGE("constructor"); _NS = ns; @@ -227,6 +228,16 @@ void SALOME_ContainerManager::SetDeltaTimeBetweenNSLookupAtLaunchTimeInMilliSeco this->_delta_time_ns_lookup_in_ms = timeInMS; } +CORBA::Long SALOME_ContainerManager::GetDeltaTimeBetweenCPUMemMeasureInMilliSecond() +{ + return this->_delta_time_measure_in_ms; +} + +void SALOME_ContainerManager::SetDeltaTimeBetweenCPUMemMeasureInMilliSecond(CORBA::Long timeInMS) +{ + this->_delta_time_measure_in_ms = timeInMS; +} + //============================================================================= //! Loop on all the containers listed in naming service, ask shutdown on each /*! CORBA Method: @@ -489,6 +500,8 @@ Engines::Container_ptr SALOME_ContainerManager::GiveContainer(const Engines::Con if (!CORBA::is_nil(cont)) { INFOS("[GiveContainer] container " << containerNameInNS << " launched"); + cont->monitoringtimeresms( this->_delta_time_measure_in_ms ); + INFOS("[GiveContainer] container " << containerNameInNS << " first CORBA invocation OK"); std::ostringstream envInfo; std::for_each( _override_env.begin(), _override_env.end(), [&envInfo](const std::pair& p) { envInfo << p.first << " = " << p.second << std::endl; } ); INFOS("[GiveContainer] container " << containerNameInNS << " override " << envInfo.str()); diff --git a/src/Container/SALOME_ContainerManager.hxx b/src/Container/SALOME_ContainerManager.hxx index b579d5c3d..1d517c237 100644 --- a/src/Container/SALOME_ContainerManager.hxx +++ b/src/Container/SALOME_ContainerManager.hxx @@ -71,6 +71,10 @@ public: void SetDeltaTimeBetweenNSLookupAtLaunchTimeInMilliSecond(CORBA::Long timeInMS) override; + CORBA::Long GetDeltaTimeBetweenCPUMemMeasureInMilliSecond() override; + + void SetDeltaTimeBetweenCPUMemMeasureInMilliSecond(CORBA::Long timeInMS) override; + static const char *_ContainerManagerNameInNS; protected: @@ -214,6 +218,7 @@ private: std::vector< std::pair > _override_env; int _time_out_in_second; int _delta_time_ns_lookup_in_ms; + int _delta_time_measure_in_ms; std::string _code_to_exe_on_startup; }; #endif diff --git a/src/Container/SALOME_Container_i.hxx b/src/Container/SALOME_Container_i.hxx index 882581e69..7f974bd5d 100644 --- a/src/Container/SALOME_Container_i.hxx +++ b/src/Container/SALOME_Container_i.hxx @@ -119,6 +119,8 @@ public: void logfilename(const char *name) override; char *locallogfilename() override; void locallogfilename(const char *name) override; + CORBA::Long monitoringtimeresms() override; + void monitoringtimeresms(CORBA::Long intervalInMs) override; void verbosity(bool& activated, CORBA::String_out level) override; void setVerbosity(bool activated, const char *level) override; SALOME::vectorOfByte *getAllInfo() override; @@ -167,7 +169,8 @@ public: void unregisterTemporaryFile(const std::string &fileName); void clearTemporaryFiles(); PortableServer::ObjectId *getCORBAId() const { return _id; } - +public: + static const int DFT_TIME_INTERVAL_BTW_MEASURE; protected: static std::map _cntInstances_map; static std::map _library_map; // library names, loaded diff --git a/src/Container/SALOME_PyNode.py b/src/Container/SALOME_PyNode.py index d93751eaf..0baa785c0 100644 --- a/src/Container/SALOME_PyNode.py +++ b/src/Container/SALOME_PyNode.py @@ -31,6 +31,7 @@ import SALOME__POA import SALOME import logging import os +import sys MY_CONTAINER_ENTRY_IN_GLBS = "my_container" @@ -452,10 +453,9 @@ def StopMonitoring( ): list : list of pairs. First param of pair is CPU usage. Second param of pair is rss memory usage """ import KernelBasis - from SALOME_ContainerHelper import ScriptExecInfo ret = KernelBasis.StopMonitoring() cpu = ret[::2] - mem_rss = [ScriptExecInfo.MemRepr( int(elt) ) for elt in ret[1::2]] + mem_rss = [ int(elt) for elt in ret[1::2]] return [(a,b) for a,b in zip(cpu,mem_rss)] class SeqByteReceiver: @@ -502,6 +502,7 @@ class PyScriptNode_i (Engines__POA.PyScriptNode,Generic): self.context[MY_CONTAINER_ENTRY_IN_GLBS] = self.my_container self._pos = None self._current_exec = 0 + sys.stdout.flush() ; sys.stderr.flush() # flush to correctly capture log per execution session #start of non remote callable methods @@ -584,6 +585,7 @@ class PyScriptNode_i (Engines__POA.PyScriptNode,Generic): def executeFirst(self,argsin): """ Same than first part of self.execute to reduce memory peak.""" try: + #self.my_container_py.addInfoOnLevel2(self.getIDInContainer(),self._current_exec,"tracePosStart",) data = None self.my_container_py.addTimeInfoOnLevel2(self.getIDInContainer(),self._current_exec,"startInputTime") if True: # to force call of SeqByteReceiver's destructor @@ -609,9 +611,13 @@ class PyScriptNode_i (Engines__POA.PyScriptNode,Generic): import sys try: self.my_container_py.addTimeInfoOnLevel2(self.getIDInContainer(),self._current_exec,"startExecTime") - pyfile = BuildPythonFileForCPUPercent() - + ## + monitoringParams = LaunchMonitoring( self.my_container_py.monitoringtimeresms() ) exec(self.ccode, self.context) + cpumeminfo = StopMonitoring( ) + ## + self.my_container_py.addInfoOnLevel2(self.getIDInContainer(),self._current_exec,"CPUMemDuringExec",cpumeminfo) + del monitoringParams self.my_container_py.addTimeInfoOnLevel2(self.getIDInContainer(),self._current_exec,"endExecTime") self.my_container_py.addTimeInfoOnLevel2(self.getIDInContainer(),self._current_exec,"startOutputTime") argsout=[] -- 2.39.2