Salome HOME
Direct manipulation of resource map singleton
[modules/kernel.git] / src / Launcher_SWIG / Launcher.i
1 // Copyright (C) 2019-2024  CEA, EDF
2 //
3 // This library is free software; you can redistribute it and/or
4 // modify it under the terms of the GNU Lesser General Public
5 // License as published by the Free Software Foundation; either
6 // version 2.1 of the License, or (at your option) any later version.
7 //
8 // This library is distributed in the hope that it will be useful,
9 // but WITHOUT ANY WARRANTY; without even the implied warranty of
10 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
11 // Lesser General Public License for more details.
12 //
13 // You should have received a copy of the GNU Lesser General Public
14 // License along with this library; if not, write to the Free Software
15 // Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307 USA
16 //
17 // See http://www.salome-platform.org/ or email : webmaster.salome@opencascade.com
18 //
19
20 %module pylauncher
21
22 %{
23 #include "Launcher.hxx"
24 #include "ResourcesManager.hxx"
25
26 #include <sstream>
27
28 struct ResourceDefinition_cpp
29 {
30 public:
31   std::string name;
32   std::string hostname;
33   std::string type;
34   std::string protocol;
35   std::string username;
36   std::string applipath;
37   std::string OS;
38   int  mem_mb;
39   int  cpu_clock;
40   int  nb_node;
41   int  nb_proc_per_node;
42   std::string batch;
43   std::string mpiImpl;
44   std::string iprotocol;
45   bool can_launch_batch_jobs;
46   bool can_run_containers;
47   std::string working_directory;
48 };
49
50 std::shared_ptr<ResourcesManager_cpp> HandleToLocalInstance(const std::string& ptrInStringFrmt)
51 {
52   std::istringstream iss(ptrInStringFrmt);
53   void *zePtr(nullptr);
54   iss >> zePtr;
55   std::shared_ptr<ResourcesManager_cpp> *effPtr = reinterpret_cast<std::shared_ptr<ResourcesManager_cpp> *>(zePtr);
56   std::shared_ptr<ResourcesManager_cpp> ret(*effPtr);
57   delete effPtr;
58   return ret;
59 }
60 %}
61
62 %include "std_string.i"
63 %include "std_vector.i"
64 %include "std_list.i"
65 %include "std_map.i"
66
67 namespace std {
68   %template(list_int) list<int>;
69   %template(list_str) list<string>;
70   %template(vector_str) vector<string>;
71   %template(map_ss) map<string,string>;
72 };
73
74 // see ResourceParameters from SALOME_ResourcesManager.idl
75 // see resourceParams from ResourcesManager.hxx
76 %naturalvar JobParameters_cpp::componentList;
77 %naturalvar JobParameters_cpp::resourceList;
78 struct resourceParams
79 {
80   resourceParams();
81
82   std::string name;
83   std::string hostname;
84   bool can_launch_batch_jobs;
85   bool can_run_containers;
86   std::string OS;
87   long nb_proc;
88   long nb_node;
89   long nb_proc_per_node;
90   long cpu_clock;
91   long mem_mb;
92   std::vector<std::string> componentList;
93   std::vector<std::string> resourceList;
94 };
95
96 // see JobParameters from SALOME_Launcher.idl
97 // see JobParameters_cpp from Launcher.hxx
98 %naturalvar JobParameters_cpp::in_files;
99 %naturalvar JobParameters_cpp::out_files;
100 %naturalvar JobParameters_cpp::specific_parameters;
101 struct JobParameters_cpp
102 {
103 public:
104   std::string job_name;
105   std::string job_type;
106   std::string job_file;
107   std::string pre_command;
108   std::string env_file;
109   std::list<std::string> in_files;
110   std::list<std::string> out_files;
111   std::string work_directory;
112   std::string local_directory;
113   std::string result_directory;
114   std::string maximum_duration;
115   resourceParams resource_required;
116   std::string queue;
117   std::string partition;
118   bool exclusive;
119   unsigned int mem_per_cpu;
120   std::string wckey;
121   std::string extra_params;
122   std::map<std::string, std::string> specific_parameters;
123   std::string launcher_file;
124   std::string launcher_args;
125 };
126
127 // see ResourceDefinition from SALOME_ResourcesManager.idl
128 // no other c++ equivalent. Convertion from ParserResourcesType
129 struct ResourceDefinition_cpp
130 {
131 public:
132   std::string name;
133   std::string hostname;
134   std::string type;
135   std::string protocol;
136   std::string username;
137   std::string applipath;
138   std::string OS;
139   int  mem_mb;
140   int  cpu_clock;
141   int  nb_node;
142   int  nb_proc_per_node;
143   std::string batch;
144   std::string mpiImpl;
145   std::string iprotocol;
146   bool can_launch_batch_jobs;
147   bool can_run_containers;
148   std::string working_directory;
149 };
150
151 %exception
152 {
153   try
154   {
155     $function
156   }
157   catch (ResourcesException& e)
158   {
159     SWIG_exception_fail(SWIG_RuntimeError, e.msg.c_str());
160   }
161   catch(...)
162   {
163     SWIG_exception_fail(SWIG_RuntimeError,"Unknown exception");
164   }
165 }
166
167 %include <std_shared_ptr.i>
168 %shared_ptr(ResourcesManager_cpp)
169
170 class ResourcesManager_cpp
171 {
172 public:
173   ResourcesManager_cpp(const char *xmlFilePath);
174   std::vector<std::string> GetFittingResources(const resourceParams& params);
175   void WriteInXmlFile(std::string xml_file);
176   void DeleteAllResourcesInCatalog();
177 %extend
178 {
179   ResourceDefinition_cpp GetResourceDefinition(const std::string& name)
180   {
181     ResourceDefinition_cpp swig_result;
182     ParserResourcesType cpp_result = $self->GetResourcesDescr(name);
183
184     swig_result.name = cpp_result.Name;
185     swig_result.hostname = cpp_result.HostName;
186     swig_result.type = cpp_result.getResourceTypeStr();
187     swig_result.protocol = cpp_result.getAccessProtocolTypeStr();
188     swig_result.username = cpp_result.UserName;
189     swig_result.applipath = cpp_result.AppliPath;
190     swig_result.OS = cpp_result.OS;
191     swig_result.mem_mb = cpp_result.DataForSort._memInMB;
192     swig_result.cpu_clock = cpp_result.DataForSort._CPUFreqMHz;
193     swig_result.nb_node = cpp_result.DataForSort._nbOfNodes;
194     swig_result.nb_proc_per_node = cpp_result.DataForSort._nbOfProcPerNode;
195     swig_result.batch = cpp_result.getBatchTypeStr();
196     swig_result.mpiImpl = cpp_result.getMpiImplTypeStr();
197     swig_result.iprotocol = cpp_result.getClusterInternalProtocolStr();
198     swig_result.can_launch_batch_jobs = cpp_result.can_launch_batch_jobs;
199     swig_result.can_run_containers = cpp_result.can_run_containers;
200     swig_result.working_directory = cpp_result.working_directory;
201
202     return swig_result;
203   }
204
205   void DeleteResourceInCatalog(const std::string& name)
206   {
207     $self->DeleteResourceInCatalog(name.c_str());
208   }
209   
210   void AddResourceInCatalog (const ResourceDefinition_cpp& new_resource)
211   {
212     ParserResourcesType new_resource_cpp;
213     new_resource_cpp.Name = new_resource.name;
214     new_resource_cpp.HostName = new_resource.hostname;
215     new_resource_cpp.setResourceTypeStr( new_resource.type );
216     new_resource_cpp.setAccessProtocolTypeStr( new_resource.protocol );
217     new_resource_cpp.UserName = new_resource.username;
218     new_resource_cpp.AppliPath = new_resource.applipath;
219     new_resource_cpp.OS = new_resource.OS;
220     new_resource_cpp.DataForSort._Name = new_resource.name;
221     new_resource_cpp.DataForSort._memInMB = new_resource.mem_mb;
222     new_resource_cpp.DataForSort._CPUFreqMHz = new_resource.cpu_clock;
223     new_resource_cpp.DataForSort._nbOfNodes = new_resource.nb_node;
224     new_resource_cpp.DataForSort._nbOfProcPerNode = new_resource.nb_proc_per_node;
225     new_resource_cpp.setBatchTypeStr(new_resource.batch);
226     new_resource_cpp.setMpiImplTypeStr(new_resource.mpiImpl);
227     new_resource_cpp.setClusterInternalProtocolStr(new_resource.iprotocol);
228     new_resource_cpp.can_launch_batch_jobs = new_resource.can_launch_batch_jobs;
229     new_resource_cpp.can_run_containers = new_resource.can_run_containers;
230     new_resource_cpp.working_directory = new_resource.working_directory;
231     $self->AddResourceInCatalog(new_resource_cpp);
232   }
233
234   void AddResourceInCatalogNoQuestion (const ResourceDefinition_cpp& new_resource)
235   {
236     ParserResourcesType new_resource_cpp;
237     new_resource_cpp.Name = new_resource.name;
238     new_resource_cpp.HostName = new_resource.hostname;
239     new_resource_cpp.setResourceTypeStr( new_resource.type );
240     new_resource_cpp.setAccessProtocolTypeStr( new_resource.protocol );
241     new_resource_cpp.UserName = new_resource.username;
242     new_resource_cpp.AppliPath = new_resource.applipath;
243     new_resource_cpp.OS = new_resource.OS;
244     new_resource_cpp.DataForSort._Name = new_resource.name;
245     new_resource_cpp.DataForSort._memInMB = new_resource.mem_mb;
246     new_resource_cpp.DataForSort._CPUFreqMHz = new_resource.cpu_clock;
247     new_resource_cpp.DataForSort._nbOfNodes = new_resource.nb_node;
248     new_resource_cpp.DataForSort._nbOfProcPerNode = new_resource.nb_proc_per_node;
249     new_resource_cpp.setBatchTypeStr(new_resource.batch);
250     new_resource_cpp.setMpiImplTypeStr(new_resource.mpiImpl);
251     new_resource_cpp.setClusterInternalProtocolStr(new_resource.iprotocol);
252     new_resource_cpp.can_launch_batch_jobs = new_resource.can_launch_batch_jobs;
253     new_resource_cpp.can_run_containers = new_resource.can_run_containers;
254     new_resource_cpp.working_directory = new_resource.working_directory;
255     $self->AddResourceInCatalogNoQuestion(new_resource_cpp);
256   }
257   
258   void ParseXmlFiles()
259   {
260     $self->ParseXmlFiles();
261   }
262   
263   std::vector<std::string> GetListOfEntries() const
264   {
265     const MapOfParserResourcesType& allRes = $self->GetList();
266     std::vector<std::string> ret;
267     for(auto it : allRes)
268       ret.push_back(it.first);
269     return ret;
270   }
271 }
272 };
273
274 %inline
275 {
276   std::shared_ptr<ResourcesManager_cpp> HandleToLocalInstance(const std::string& ptrInStringFrmt);
277 }
278
279 %exception
280 {
281   try
282   {
283     $function
284   }
285   catch (LauncherException& e)
286   {
287     SWIG_exception_fail(SWIG_RuntimeError, e.msg.c_str());
288   }
289   catch(...)
290   {
291     SWIG_exception_fail(SWIG_RuntimeError,"Unknown exception");
292   }
293 }
294
295 class Launcher_cpp
296 {
297 public:
298   Launcher_cpp();
299   virtual ~Launcher_cpp();
300   int          createJob(const JobParameters_cpp& job_parameters);
301   void         launchJob(int job_id);
302   std::string  getJobState(int job_id);
303   std::string  getAssignedHostnames(int job_id); // Get names or ids of hosts assigned to the job
304   void         exportInputFiles(int job_id);
305   void         getJobResults(int job_id, std::string directory);
306   void         clearJobWorkingDir(int job_id);
307   bool         getJobDumpState(int job_id, std::string directory);
308   bool         getJobWorkFile(int job_id, std::string work_file, std::string directory);
309   void         stopJob(int job_id);
310   void         removeJob(int job_id);
311   std::string  dumpJob(int job_id);
312   int restoreJob(const std::string& dumpedJob);
313   JobParameters_cpp getJobParameters(int job_id);
314   std::list<int> loadJobs(const char* jobs_file);
315   void saveJobs(const char* jobs_file);
316   long createJobWithFile(std::string xmlExecuteFile, std::string clusterName);
317   void SetResourcesManager(std::shared_ptr<ResourcesManager_cpp>& rm );
318 };
319
320 %pythoncode %{
321 def CreateSSHContainerResource(hostname,applipath,nbOfNodes=1):
322   return CreateContainerResource(hostname,applipath,"ssh",nbOfNodes)
323
324 def CreateSRUNContainerResource(hostname,applipath,nbOfNodes=1):
325   return CreateContainerResource(hostname,applipath,"srun",nbOfNodes)
326
327 def CreateContainerResource(hostname,applipath,protocol,nbOfNodes=1):
328   import getpass
329   ret = ResourceDefinition_cpp()
330   ret.name = hostname.split(".")[0]
331   ret.hostname = ret.name
332   ret.protocol = protocol
333   ret.applipath = applipath
334   ret.nb_node = nbOfNodes
335   ret.nb_proc_per_node = 1
336   ret.can_run_containers = True
337   ret.can_launch_batch_jobs = False
338   ret.mpiImpl = "no mpi"
339   ret.iprotocol = protocol
340   ret.type = "single_machine"
341   ret.username = getpass.getuser()
342   return ret
343
344 def ResourceDefinition_cpp_repr(self):
345   pat0 = "{} = {}"
346   pat1 = "{} = \"{}\""
347   data = [("name","name",pat0),
348   ("hostname","hostname",pat0),
349   ("type","type",pat0),
350   ("protocol","protocol",pat0),
351   ("userName","username",pat0),
352   ("appliPath","applipath",pat1),
353   ("mpi","mpiImpl",pat0),
354   ("nbOfNodes","nb_node",pat0),
355   ("nbOfProcPerNode","nb_proc_per_node",pat0),
356   ("canRunContainer","can_run_containers",pat0)
357   ]
358   ret = [c.format(a,getattr(self,b)) for a,b,c in data]
359   return "\n".join( ret )
360
361 def ResourcesManager_cpp_GetList(self):
362   return {name:self.GetResourceDefinition(name) for name in self.GetListOfEntries()}
363
364 def ResourcesManager_cpp___getitem__(self,name):
365   return self.GetResourceDefinition(name)
366
367 def ResourcesManager_cpp___repr__(self):
368   return str( self.GetList() )
369
370 def RetrieveRMCppSingleton():
371   import KernelLauncher
372   return HandleToLocalInstance( KernelLauncher.RetrieveInternalInstanceOfLocalCppResourcesManager() )
373
374 def GetPlayGroundInsideASlurmJob():
375   import subprocess as sp
376   cont = sp.check_output(["srun","hostname"])
377   nodesMul = [elt for elt in cont.decode().split("\n") if elt != ""]
378   from collections import defaultdict
379   d = defaultdict(int)
380   for elt in nodesMul:
381       d[elt]+=1
382   return d
383
384 def BuildCatalogFromScratch(protocol):
385   import os
386   d = GetPlayGroundInsideASlurmJob()
387   rmcpp = RetrieveRMCppSingleton()
388   rmcpp.DeleteAllResourcesInCatalog()
389   for k,v in d.items():
390       contRes = CreateContainerResource(hostname=k,applipath=os.environ["APPLI"],protocol=protocol,nbOfNodes=v)
391       rmcpp.AddResourceInCatalog(contRes)
392
393 def GetRequestForGiveContainer(hostname, contName):
394   import Engines
395   import os
396   rp=Engines.ResourceParameters(name=hostname,
397                                 hostname=hostname,
398                                 can_launch_batch_jobs=False,
399                                 can_run_containers=True,
400                                 OS="Linux",
401                                 componentList=[],
402                                 nb_proc=1,
403                                 mem_mb=1000,
404                                 cpu_clock=1000,
405                                 nb_node=1,
406                                 nb_proc_per_node=1,
407                                 policy="first",
408                                 resList=[])
409
410   cp=Engines.ContainerParameters(container_name=contName,
411                                   mode="start",
412                                   workingdir=os.path.expanduser("~"),
413                                   nb_proc=1,
414                                   isMPI=False,
415                                   parallelLib="",
416                                   resource_params=rp)
417   return cp
418
419 ResourceDefinition_cpp.repr = ResourceDefinition_cpp_repr
420 ResourceDefinition_cpp.__repr__ = ResourceDefinition_cpp_repr
421 ResourcesManager_cpp.GetList = ResourcesManager_cpp_GetList
422 ResourcesManager_cpp.__getitem__ = ResourcesManager_cpp___getitem__
423 ResourcesManager_cpp.__repr__ = ResourcesManager_cpp___repr__
424 %}