Salome HOME
Work in progress : workload manager step 2
authorOvidiu Mircescu <ovidiu.mircescu@edf.fr>
Thu, 7 May 2020 14:54:51 +0000 (16:54 +0200)
committerOvidiu Mircescu <ovidiu.mircescu@edf.fr>
Thu, 7 May 2020 14:54:51 +0000 (16:54 +0200)
Workload manager recommendations are used by python nodes if their
containers have the property "multi".
Test YACS_YacsLoaderTest_swig randomly fails, but this is the same
behaviour as before - more work should be done on this item.

src/engine/Container.cxx
src/engine/Container.hxx
src/engine/Executor.cxx
src/runtime/PythonNode.cxx
src/runtime/PythonNode.hxx
src/runtime/RuntimeSALOME.cxx
src/runtime/SalomeContainer.cxx
src/runtime/SalomeContainer.hxx
src/workloadmanager/Task.hxx
src/yacsloader_swig/Test/testSaveLoadRun.py

index 65efe968b5383bb7f33659f503763aad32356126..7dbbac6455d91022869b23624a0b243f1c80051b 100644 (file)
@@ -47,6 +47,18 @@ std::string Container::getDiscreminantStrOfThis(const Task *askingNode) const
   return oss.str();
 }
 
+void Container::start(const Task *askingNode,
+                      const std::string& resource_name,
+                      const std::string& container_name)
+{
+  return start(askingNode);
+}
+
+bool Container::canAcceptImposedResource()
+{
+  return false;
+}
+
 /*!
  * If \a val is equal to true the current container 'this' is not destined to be deeply copied on clone call.
  * If \a val is equal to false the current container 'this' is destined to be deeply copied on clone call.
index 3c752a543d4c1095153822a603789723fbffa1c7..9ec9e594dc49f2cf09c162a299ba4b5c8d2705c0 100644 (file)
@@ -51,6 +51,10 @@ namespace YACS
       virtual std::string getDiscreminantStrOfThis(const Task *askingNode) const;
       virtual bool isAlreadyStarted(const Task *askingNode) const = 0;
       virtual void start(const Task *askingNode)  = 0;
+      virtual void start(const Task *askingNode,
+                         const std::string& resource_name,
+                         const std::string& container_name);
+      virtual bool canAcceptImposedResource();
       virtual std::string getPlacementId(const Task *askingNode) const = 0;
       virtual std::string getFullPlacementId(const Task *askingNode) const = 0;
       //Edition only methods
index 1a3bb64dda70e5d8b2ea740d422ee2c8de924b8a..453eed195b4f2b324b5fc5c27b3f42c30f9915c9 100644 (file)
@@ -1682,6 +1682,8 @@ void loadResources(WorkloadManager::WorkloadManager& wm)
     id++;
     newResource.nbCores = res.second;
     wm.addResource(newResource);
+    std::cerr << "Add resource " << newResource.name << " with "
+              << newResource.nbCores << " cores." << std::endl;
   }
 }
 
@@ -1788,7 +1790,7 @@ void Executor::newRun(Scheduler *graph,int debug, bool fromScratch)
   WorkloadManager::WorkloadManager wlm(algo);
   loadResources(wlm);
   wlm.start();
-  
+
   while (_toContinue)
     {
       DEBTRACE("--- executor main loop");
index 547d233601b10cf465db37a840cf260a122c3437..4f7bd51170de67de5fb60448908a9dd2479b0840 100644 (file)
@@ -102,6 +102,9 @@ void PythonEntry::commonRemoteLoadPart1(InlineNode *reqNode)
         {
           try
           {
+            if(!_imposedResource.empty() && !_imposedContainer.empty())
+              container->start(reqNode, _imposedResource, _imposedContainer);
+            else
               container->start(reqNode);
           }
           catch(Exception& e)
@@ -722,6 +725,21 @@ void PythonNode::shutdown(int level)
     }
 }
 
+void PythonNode::imposeResource(const std::string& resource_name,
+                                const std::string& container_name)
+{
+  if(!resource_name.empty() && !container_name.empty())
+  {
+    _imposedResource = resource_name;
+    _imposedContainer = container_name;
+  }
+}
+
+bool PythonNode::canAcceptImposedResource()
+{
+  return _container != nullptr && _container->canAcceptImposedResource();
+}
+
 Node *PythonNode::simpleClone(ComposedNode *father, bool editionOnly) const
 {
   return new PythonNode(*this,father);
@@ -1332,3 +1350,18 @@ void PyFuncNode::shutdown(int level)
     }
 }
 
+void PyFuncNode::imposeResource(const std::string& resource_name,
+                                const std::string& container_name)
+{
+  if(!resource_name.empty() && !container_name.empty())
+  {
+    _imposedResource = resource_name;
+    _imposedContainer = container_name;
+  }
+}
+
+bool PyFuncNode::canAcceptImposedResource()
+{
+  return _container != nullptr && _container->canAcceptImposedResource();
+}
+
index def2b876fe218277dad5cd664e31fecd09ab6fd7..d90c17b29765b1427bdb844ce1bf2da654fd53fd 100644 (file)
@@ -55,6 +55,8 @@ namespace YACS
       PyObject *_pyfuncSer;
       PyObject *_pyfuncUnser;
       PyObject *_pyfuncSimpleSer;
+      std::string _imposedResource;
+      std::string _imposedContainer;
     public:
       static const char SCRIPT_FOR_SIMPLE_SERIALIZATION[];
     };
@@ -81,6 +83,9 @@ namespace YACS
       virtual void executeRemote();
       virtual void executeLocal();
       virtual void shutdown(int level);
+      void imposeResource(const std::string& resource_name,
+                          const std::string& container_name) override;
+      bool canAcceptImposedResource()override;
       std::string getContainerLog();
       PythonNode* cloneNode(const std::string& name);
       virtual std::string typeName() { return "YACS__ENGINE__PythonNode"; }
@@ -125,6 +130,9 @@ namespace YACS
       virtual void executeRemote();
       virtual void executeLocal();
       virtual void shutdown(int level);
+      void imposeResource(const std::string& resource_name,
+                          const std::string& container_name) override;
+      bool canAcceptImposedResource()override;
       std::string getContainerLog();
       PyFuncNode* cloneNode(const std::string& name);
       virtual std::string typeName() { return "YACS__ENGINE__PyFuncNode"; }
index 94b205986f021f812f3d97c16e1760d70b4bf427..5bcca56fc5ba21f0823ecacd1a54755bb838abeb 100644 (file)
@@ -465,22 +465,33 @@ std::vector< std::pair<std::string,int> > RuntimeSALOME::getCatalogOfComputeNode
   if(CORBA::is_nil(resManager))
     throw Exception("SalomeContainerToolsSpreadOverTheResDecorator::getParameters : Internal error ! The entry attached to the res manager in NS does not have right type !");
   std::vector< std::pair<std::string,int> > ret;
+  Engines::ResourceParameters params;
+  params.name = "";
+  params.hostname = "";
+  params.OS = "";
+  params.nb_proc = 0;
+  params.mem_mb = 0;
+  params.cpu_clock = 0;
+  params.nb_node = 0;
+  params.nb_proc_per_node = 0;
+  params.policy = "";
+  params.can_launch_batch_jobs = false;
+  params.can_run_containers = true;
+  params.componentList.length(0);
+  Engines::ResourceList_var resourceList;
+  resourceList = resManager->GetFittingResources(params);
+  ret.reserve(resourceList->length());
+  for(int i = 0; i<resourceList->length(); i++)
   {
-    Engines::ResourceList *rl(0);
-    Engines::IntegerList *il(0);
-    resManager->ListAllAvailableResources(rl,il);
-    int sz(rl->length());
-    if(il->length()!=sz)
-      throw Exception("SalomeContainerToolsSpreadOverTheResDecorator::getParameters : Internal error ! Invalid size !");
-    ret.resize(sz);
-    for(int i=0;i<sz;i++)
-      {
-        std::string s((*rl)[i]);
-        ret[i]=std::pair<std::string,int>(s,(*il)[i]);
-      }
-    delete rl;
-    delete il;
+    const char* resource_name = resourceList[i];
+    std::string std_resource_name = resource_name;
+    Engines::ResourceDefinition_var resource_definition
+                             = resManager->GetResourceDefinition(resource_name);
+    int nb_cores = resource_definition->nb_node *
+                   resource_definition->nb_proc_per_node;
+    ret.push_back(std::pair<std::string,int>(resource_name, nb_cores));
   }
+
   return ret;
 }
 
index 19714c9bf6ddff5f9b74beab4a3774b2d387b178..150b520482c765347bb6d936a77245ef70504f09 100644 (file)
@@ -221,11 +221,31 @@ Engines::Container_ptr SalomeContainer::getContainerPtr(const Task *askingNode)
 /*!
  * \param inst the component instance
  */
-void SalomeContainer::start(const Task *askingNode) 
+void SalomeContainer::start(const Task *askingNode)
 {
   SalomeContainerTools::Start(_componentNames,_launchModeType,_sct,_shutdownLevel,this,askingNode);
 }
 
+void SalomeContainer::start(const Task *askingNode,
+                            const std::string& resource_name,
+                            const std::string& container_name)
+{
+  if(canAcceptImposedResource())
+  {
+    SalomeContainerTools tempSct = _sct;
+    tempSct.setProperty("name", resource_name);
+    tempSct.setProperty("container_name", container_name);
+    SalomeContainerTools::Start(_componentNames,_launchModeType,tempSct,_shutdownLevel,this,askingNode);
+  }
+  else
+    start(askingNode);
+}
+
+bool SalomeContainer::canAcceptImposedResource()
+{
+  return _launchModeType->getType() == SalomeContainerMultiHelper::TYPE_NAME;
+}
+
 void SalomeContainer::shutdown(int level)
 {
   DEBTRACE("SalomeContainer::shutdown: " << _name << "," << level << "," << _shutdownLevel);
index 66814aa5d69726b23350a0192104c8d19f53fa8a..900d3e0f9f4ff22b98f410fc35c4962344f010bd 100644 (file)
@@ -51,7 +51,11 @@ namespace YACS
       std::string getKind() const;
       bool isAlreadyStarted(const Task *askingNode) const;
       Engines::Container_ptr getContainerPtr(const Task *askingNode) const;
-      void start(const Task *askingNode) ;
+      void start(const Task *askingNode) override;
+      void start(const Task *askingNode,
+                 const std::string& resource_name,
+                 const std::string& container_name) override;
+      bool canAcceptImposedResource() override;
       Container *clone() const;
       Container *cloneAlways() const;
       std::string getPlacementId(const Task *askingNode) const;
index 9724f342c01aab82b408a40486c3f8a54ce61d5f..87c467798f0deb69154f25de0329f512c265a350 100644 (file)
@@ -61,7 +61,7 @@ namespace WorkloadManager
   {
     ContainerType type;
     Resource resource;
-    unsigned int index; // worker index on the resource for this type
+    unsigned int index=0; // worker index on the resource for this type
   };
 
   /**
index 6ea538cb95af41dab12e144abe2625fba142baf3..ad0632238985c3ad857bda64eb8bdb72cea76777 100755 (executable)
@@ -1511,7 +1511,7 @@ dd=range(10)""")
     n1.edAddChild(n10)
     n10.setScript("""
 import time
-time.sleep(2)
+time.sleep(4)
 o2=2*i1
 """)
     i1=n10.edAddInputPort("i1",ti)
@@ -1542,7 +1542,7 @@ o2=2*i1
     myRun=threading.Thread(None, ex.RunW, None, (p,0))
     myRun.start()
     import time
-    time.sleep(5)
+    time.sleep(7)
     SALOMERuntime.schemaSaveState(p, ex, xmlStateFileName)
     a,b,c=n1.getPassedResults(ex)
     myRun.join()