Salome HOME
Deal with random CORBA::COMM_FAILURE. omu/workloadmanager
authorOvidiu Mircescu <ovidiu.mircescu@edf.fr>
Fri, 7 Aug 2020 13:25:15 +0000 (15:25 +0200)
committerOvidiu Mircescu <ovidiu.mircescu@edf.fr>
Fri, 7 Aug 2020 13:25:15 +0000 (15:25 +0200)
src/runtime/PythonNode.cxx
src/runtime/PythonNode.hxx
src/yacsloader/samples/wlm_2foreach_with_cache.xml
src/yacsloader_swig/Test/testWorkloadManager.py

index a0375d662fd4404a57d0b8d808718c583a0291eb..a20eca38214cce02c27aff039eb416d29624f303 100644 (file)
@@ -342,10 +342,7 @@ PythonNode::PythonNode(const std::string& name):InlineNode(name)
 
 PythonNode::~PythonNode()
 {
-  if(!CORBA::is_nil(_pynode))
-    {
-      _pynode->UnRegister();
-    }
+  freeKernelPynode();
 }
 
 void PythonNode::checkBasicConsistency() const
@@ -568,11 +565,7 @@ void PythonNode::executeRemote()
   //
   if(!isUsingPythonCache())
   {
-    if(!CORBA::is_nil(_pynode))
-      {
-        _pynode->UnRegister();
-      }
-    _pynode = Engines::PyScriptNode::_nil();
+    freeKernelPynode();
     bool dummy;
     Engines::Container_var cont(GetContainerObj(this,dummy));
     cont->removePyScriptNode(getName().c_str());
@@ -738,8 +731,7 @@ void PythonNode::shutdown(int level)
   if(_mode=="local")return;
   if(_container)
     {
-      if(!CORBA::is_nil(_pynode)) _pynode->UnRegister();
-      _pynode=Engines::PyScriptNode::_nil();
+      freeKernelPynode();
       _container->shutdown(level);
     }
 }
@@ -780,6 +772,22 @@ bool PythonNode::isUsingPythonCache()const
   return found;
 }
 
+void PythonNode::freeKernelPynode()
+{
+  if(!CORBA::is_nil(_pynode))
+  {
+    try
+    {
+      _pynode->UnRegister();
+    }
+    catch(...)
+    {
+      DEBTRACE("Trouble when pynode->UnRegister!")
+    }
+    _pynode = Engines::PyScriptNode::_nil();
+  }
+}
+
 Node *PythonNode::simpleClone(ComposedNode *father, bool editionOnly) const
 {
   return new PythonNode(*this,father);
@@ -787,8 +795,7 @@ Node *PythonNode::simpleClone(ComposedNode *father, bool editionOnly) const
 
 void PythonNode::createRemoteAdaptedPyInterpretor(Engines::Container_ptr objContainer)
 {
-  if(!CORBA::is_nil(_pynode))
-    _pynode->UnRegister();
+  freeKernelPynode();
   _pynode=objContainer->createPyScriptNode(pythonEntryName().c_str(),getScript().c_str());
   _pynode->Register();
 }
@@ -812,7 +819,7 @@ void PythonNode::assignRemotePyInterpretor(Engines::PyNodeBase_var remoteInterp)
     Engines::PyScriptNode_var tmpp(Engines::PyScriptNode::_narrow(remoteInterp));
     if(!_pynode->_is_equivalent(tmpp))
     {
-      _pynode->UnRegister();
+      freeKernelPynode();
       _pynode=Engines::PyScriptNode::_narrow(remoteInterp);
     }
   }
index 574ac9a956d2dcdc91e6c69f623fb9953cfb4de4..5114ae548c1fb84ec40fe4e158e7ad3d0c64d574 100644 (file)
@@ -103,6 +103,7 @@ namespace YACS
     protected:
       void squeezeMemory();
       void squeezeMemoryRemote();
+      void freeKernelPynode();
     public:
       static const char KIND[];
       static const char IMPL_NAME[];
index bfdd21a17e35aabdfc1ebb92e35d44e439bd1638..4bda1d82c4f91b92beb355d5ceb91f8e10b95a4e 100644 (file)
@@ -95,6 +95,7 @@ if s_expected != s2:
     print("Expected sum:", s_expected)
     print("Obtained sum:", s2)
 
+coeff_cont = len(tr1) + ( 4 * len(tr2))
 #print(r1)
 #print(r2)
 ]]></code></script>
@@ -104,6 +105,7 @@ if s_expected != s2:
       <inport name="vals" type="intvec"/>
       <outport name="total_time" type="int"/>
       <outport name="ok" type="bool"/>
+      <outport name="coeff_cont" type="int"/>
    </inline>
    <foreach name="ForEach1" nbranch="1" loopWeight="-1" type="int">
       <remote name="PyNode1" elementaryWeight="-1">
@@ -150,7 +152,7 @@ time.sleep(1)
    </datalink>
    <datalink control="false">
       <fromnode>Begin</fromnode> <fromport>vals</fromport>
-      <tonode>End</tonode> <toport>vals</toport>
+      <tonode>ForEach2</tonode> <toport>SmplsCollection</toport>
    </datalink>
    <datalink control="false">
       <fromnode>Begin</fromnode> <fromport>vals</fromport>
@@ -158,15 +160,15 @@ time.sleep(1)
    </datalink>
    <datalink control="false">
       <fromnode>Begin</fromnode> <fromport>vals</fromport>
-      <tonode>ForEach2</tonode> <toport>SmplsCollection</toport>
+      <tonode>End</tonode> <toport>vals</toport>
    </datalink>
    <datalink control="false">
       <fromnode>Begin</fromnode> <fromport>nbbranches</fromport>
-      <tonode>ForEach1</tonode> <toport>nbBranches</toport>
+      <tonode>ForEach2</tonode> <toport>nbBranches</toport>
    </datalink>
    <datalink control="false">
       <fromnode>Begin</fromnode> <fromport>nbbranches</fromport>
-      <tonode>ForEach2</tonode> <toport>nbBranches</toport>
+      <tonode>ForEach1</tonode> <toport>nbBranches</toport>
    </datalink>
    <datalink control="false">
       <fromnode>ForEach1</fromnode> <fromport>evalSamples</fromport>
@@ -185,18 +187,18 @@ time.sleep(1)
       <tonode>End</tonode> <toport>r2</toport>
    </datalink>
    <parameter>
-      <tonode>ForEach1</tonode><toport>nbBranches</toport>
+      <tonode>ForEach2</tonode><toport>nbBranches</toport>
       <value><int>1</int></value>
    </parameter>
    <parameter>
-      <tonode>ForEach2</tonode><toport>nbBranches</toport>
+      <tonode>ForEach1</tonode><toport>nbBranches</toport>
       <value><int>1</int></value>
    </parameter>
-   <presentation name="End" x="402.5" y="85" width="158" height="144" expanded="1" expx="402.5" expy="85" expWidth="158" expHeight="144" shownState="0"/>
    <presentation name="ForEach1.PyNode1" x="9.5" y="88" width="158" height="63" expanded="1" expx="9.5" expy="88" expWidth="158" expHeight="63" shownState="0"/>
-   <presentation name="Begin" x="9" y="86" width="158" height="117" expanded="1" expx="9" expy="86" expWidth="158" expHeight="117" shownState="0"/>
-   <presentation name="ForEach1" x="201.5" y="146" width="171.5" height="155" expanded="1" expx="201.5" expy="146" expWidth="171.5" expHeight="155" shownState="0"/>
    <presentation name="ForEach2" x="204" y="313.5" width="175" height="159" expanded="1" expx="204" expy="313.5" expWidth="175" expHeight="159" shownState="0"/>
+   <presentation name="ForEach1" x="201.5" y="146" width="171.5" height="155" expanded="1" expx="201.5" expy="146" expWidth="171.5" expHeight="155" shownState="0"/>
+   <presentation name="End" x="402.5" y="85" width="158" height="144" expanded="1" expx="402.5" expy="85" expWidth="158" expHeight="144" shownState="0"/>
+   <presentation name="Begin" x="9" y="86" width="158" height="117" expanded="1" expx="9" expy="86" expWidth="158" expHeight="117" shownState="0"/>
    <presentation name="ForEach2.PyNode2" x="13" y="92" width="158" height="63" expanded="1" expx="13" expy="92" expWidth="158" expHeight="63" shownState="0"/>
    <presentation name="__ROOT__" x="0" y="0" width="564.5" height="476.5" expanded="1" expx="0" expy="0" expWidth="564.5" expHeight="476.5" shownState="0"/>
 </proc>
index ab07a4089f91d0d48423b815a6ffd11928405694..334dcd2af604110ef90388c7c5b37180060ae05e 100755 (executable)
@@ -27,6 +27,7 @@ import tempfile
 import os
 import salome
 
+NB_NODE=16
 class TestEdit(unittest.TestCase):
 
     def setUp(self):
@@ -40,7 +41,7 @@ class TestEdit(unittest.TestCase):
         salome.salome_init()
         resourceManager = salome.lcc.getResourcesManager()
         resource_definition = resourceManager.GetResourceDefinition("localhost")
-        resource_definition.nb_node = 16
+        resource_definition.nb_node = NB_NODE
         resourceManager.AddResource(resource_definition, False, "")
         resource_required = salome.ResourceParameters()
         resource_required.can_run_containers = True
@@ -48,8 +49,8 @@ class TestEdit(unittest.TestCase):
         for r in res_list:
           if r != "localhost":
             resourceManager.RemoveResource(r, False, "")
-        #resource_definition = resourceManager.GetResourceDefinition("localhost")
-        #self.assertEqual(resource_definition.nb_node, 16)
+        resource_definition = resourceManager.GetResourceDefinition("localhost")
+        self.assertEqual(resource_definition.nb_node, NB_NODE)
 
     def tearDown(self):
         cm = salome.lcc.getContainerManager()
@@ -65,10 +66,12 @@ class TestEdit(unittest.TestCase):
         # theoretical time should be 15s
         execution_time = res_port.getPyObj()
         # lower time means some resources are overloaded
-        self.assertTrue(execution_time > 13)
+        msg = "Execution time is too short : {}s".format(execution_time)
+        self.assertTrue(execution_time > 13, msg)
         # The containers may need some time to be launched.
         # We need some delay to add to the 15s.
-        self.assertTrue(execution_time < 20)
+        msg = "Execution time is too long : {}s".format(execution_time)
+        self.assertTrue(execution_time < 25, msg)
 
     def test2(self):
         """ Two parallel foreach-s with different containers and python nodes
@@ -83,10 +86,17 @@ class TestEdit(unittest.TestCase):
         # theoretical time should be 16s
         execution_time = total_time.getPyObj()
         # lower time means some resources are overloaded
-        self.assertTrue(execution_time > 14)
+        msg = "Execution time is too short : {}s".format(execution_time)
+        self.assertTrue(execution_time > 14, msg)
         # The containers may need some time to be launched.
         # We need some delay to add to the 16s.
-        self.assertTrue(execution_time < 20)
+        msg = "Execution time is too long : {}s".format(execution_time)
+        self.assertTrue(execution_time < 26, msg)
+        coeff_cont = proc.getChildByName("End").getOutputPort("coeff_cont").getPyObj()
+        msg = "coeff_cont too low:"+str(coeff_cont)
+        self.assertTrue(coeff_cont >= NB_NODE, msg)
+        msg = "coeff_cont too high:"+str(coeff_cont)
+        self.assertTrue(coeff_cont <= 2*NB_NODE, msg)
 
     def test3(self):
         """ Launch 8 independent nodes in parallel.
@@ -96,7 +106,7 @@ class TestEdit(unittest.TestCase):
         self.assertEqual(proc.getState(),pilot.DONE)
         ok = proc.getChildByName("End").getOutputPort("ok")
         if not ok :
-          err_message = proc.getChildByName("End").getOutputPort("err_message")
+          err_message = proc.getChildByName("End").getOutputPort("err_message").getPyObj()
           self.fail(err_message)
 
 if __name__ == '__main__':