From: Ovidiu Mircescu Date: Fri, 7 Aug 2020 13:25:15 +0000 (+0200) Subject: Deal with random CORBA::COMM_FAILURE. X-Git-Tag: V9_6_0a1~7^2 X-Git-Url: http://git.salome-platform.org/gitweb/?a=commitdiff_plain;h=c6a05eb8dba9298dcfe004e15996b67a5b7bda5b;p=modules%2Fyacs.git Deal with random CORBA::COMM_FAILURE. --- diff --git a/src/runtime/PythonNode.cxx b/src/runtime/PythonNode.cxx index a0375d662..a20eca382 100644 --- a/src/runtime/PythonNode.cxx +++ b/src/runtime/PythonNode.cxx @@ -342,10 +342,7 @@ PythonNode::PythonNode(const std::string& name):InlineNode(name) PythonNode::~PythonNode() { - if(!CORBA::is_nil(_pynode)) - { - _pynode->UnRegister(); - } + freeKernelPynode(); } void PythonNode::checkBasicConsistency() const @@ -568,11 +565,7 @@ void PythonNode::executeRemote() // if(!isUsingPythonCache()) { - if(!CORBA::is_nil(_pynode)) - { - _pynode->UnRegister(); - } - _pynode = Engines::PyScriptNode::_nil(); + freeKernelPynode(); bool dummy; Engines::Container_var cont(GetContainerObj(this,dummy)); cont->removePyScriptNode(getName().c_str()); @@ -738,8 +731,7 @@ void PythonNode::shutdown(int level) if(_mode=="local")return; if(_container) { - if(!CORBA::is_nil(_pynode)) _pynode->UnRegister(); - _pynode=Engines::PyScriptNode::_nil(); + freeKernelPynode(); _container->shutdown(level); } } @@ -780,6 +772,22 @@ bool PythonNode::isUsingPythonCache()const return found; } +void PythonNode::freeKernelPynode() +{ + if(!CORBA::is_nil(_pynode)) + { + try + { + _pynode->UnRegister(); + } + catch(...) + { + DEBTRACE("Trouble when pynode->UnRegister!") + } + _pynode = Engines::PyScriptNode::_nil(); + } +} + Node *PythonNode::simpleClone(ComposedNode *father, bool editionOnly) const { return new PythonNode(*this,father); @@ -787,8 +795,7 @@ Node *PythonNode::simpleClone(ComposedNode *father, bool editionOnly) const void PythonNode::createRemoteAdaptedPyInterpretor(Engines::Container_ptr objContainer) { - if(!CORBA::is_nil(_pynode)) - _pynode->UnRegister(); + freeKernelPynode(); _pynode=objContainer->createPyScriptNode(pythonEntryName().c_str(),getScript().c_str()); _pynode->Register(); } @@ -812,7 +819,7 @@ void PythonNode::assignRemotePyInterpretor(Engines::PyNodeBase_var remoteInterp) Engines::PyScriptNode_var tmpp(Engines::PyScriptNode::_narrow(remoteInterp)); if(!_pynode->_is_equivalent(tmpp)) { - _pynode->UnRegister(); + freeKernelPynode(); _pynode=Engines::PyScriptNode::_narrow(remoteInterp); } } diff --git a/src/runtime/PythonNode.hxx b/src/runtime/PythonNode.hxx index 574ac9a95..5114ae548 100644 --- a/src/runtime/PythonNode.hxx +++ b/src/runtime/PythonNode.hxx @@ -103,6 +103,7 @@ namespace YACS protected: void squeezeMemory(); void squeezeMemoryRemote(); + void freeKernelPynode(); public: static const char KIND[]; static const char IMPL_NAME[]; diff --git a/src/yacsloader/samples/wlm_2foreach_with_cache.xml b/src/yacsloader/samples/wlm_2foreach_with_cache.xml index bfdd21a17..4bda1d82c 100644 --- a/src/yacsloader/samples/wlm_2foreach_with_cache.xml +++ b/src/yacsloader/samples/wlm_2foreach_with_cache.xml @@ -95,6 +95,7 @@ if s_expected != s2: print("Expected sum:", s_expected) print("Obtained sum:", s2) +coeff_cont = len(tr1) + ( 4 * len(tr2)) #print(r1) #print(r2) ]]> @@ -104,6 +105,7 @@ if s_expected != s2: + @@ -150,7 +152,7 @@ time.sleep(1) Begin vals - End vals + ForEach2 SmplsCollection Begin vals @@ -158,15 +160,15 @@ time.sleep(1) Begin vals - ForEach2 SmplsCollection + End vals Begin nbbranches - ForEach1 nbBranches + ForEach2 nbBranches Begin nbbranches - ForEach2 nbBranches + ForEach1 nbBranches ForEach1 evalSamples @@ -185,18 +187,18 @@ time.sleep(1) End r2 - ForEach1nbBranches + ForEach2nbBranches 1 - ForEach2nbBranches + ForEach1nbBranches 1 - - - + + + diff --git a/src/yacsloader_swig/Test/testWorkloadManager.py b/src/yacsloader_swig/Test/testWorkloadManager.py index ab07a4089..334dcd2af 100755 --- a/src/yacsloader_swig/Test/testWorkloadManager.py +++ b/src/yacsloader_swig/Test/testWorkloadManager.py @@ -27,6 +27,7 @@ import tempfile import os import salome +NB_NODE=16 class TestEdit(unittest.TestCase): def setUp(self): @@ -40,7 +41,7 @@ class TestEdit(unittest.TestCase): salome.salome_init() resourceManager = salome.lcc.getResourcesManager() resource_definition = resourceManager.GetResourceDefinition("localhost") - resource_definition.nb_node = 16 + resource_definition.nb_node = NB_NODE resourceManager.AddResource(resource_definition, False, "") resource_required = salome.ResourceParameters() resource_required.can_run_containers = True @@ -48,8 +49,8 @@ class TestEdit(unittest.TestCase): for r in res_list: if r != "localhost": resourceManager.RemoveResource(r, False, "") - #resource_definition = resourceManager.GetResourceDefinition("localhost") - #self.assertEqual(resource_definition.nb_node, 16) + resource_definition = resourceManager.GetResourceDefinition("localhost") + self.assertEqual(resource_definition.nb_node, NB_NODE) def tearDown(self): cm = salome.lcc.getContainerManager() @@ -65,10 +66,12 @@ class TestEdit(unittest.TestCase): # theoretical time should be 15s execution_time = res_port.getPyObj() # lower time means some resources are overloaded - self.assertTrue(execution_time > 13) + msg = "Execution time is too short : {}s".format(execution_time) + self.assertTrue(execution_time > 13, msg) # The containers may need some time to be launched. # We need some delay to add to the 15s. - self.assertTrue(execution_time < 20) + msg = "Execution time is too long : {}s".format(execution_time) + self.assertTrue(execution_time < 25, msg) def test2(self): """ Two parallel foreach-s with different containers and python nodes @@ -83,10 +86,17 @@ class TestEdit(unittest.TestCase): # theoretical time should be 16s execution_time = total_time.getPyObj() # lower time means some resources are overloaded - self.assertTrue(execution_time > 14) + msg = "Execution time is too short : {}s".format(execution_time) + self.assertTrue(execution_time > 14, msg) # The containers may need some time to be launched. # We need some delay to add to the 16s. - self.assertTrue(execution_time < 20) + msg = "Execution time is too long : {}s".format(execution_time) + self.assertTrue(execution_time < 26, msg) + coeff_cont = proc.getChildByName("End").getOutputPort("coeff_cont").getPyObj() + msg = "coeff_cont too low:"+str(coeff_cont) + self.assertTrue(coeff_cont >= NB_NODE, msg) + msg = "coeff_cont too high:"+str(coeff_cont) + self.assertTrue(coeff_cont <= 2*NB_NODE, msg) def test3(self): """ Launch 8 independent nodes in parallel. @@ -96,7 +106,7 @@ class TestEdit(unittest.TestCase): self.assertEqual(proc.getState(),pilot.DONE) ok = proc.getChildByName("End").getOutputPort("ok") if not ok : - err_message = proc.getChildByName("End").getOutputPort("err_message") + err_message = proc.getChildByName("End").getOutputPort("err_message").getPyObj() self.fail(err_message) if __name__ == '__main__':