Salome HOME
[EDF28648] : On container launch time from ContainerManager it s possible to execute...
[modules/kernel.git] / src / Launcher_SWIG / Launcher.i
1 // Copyright (C) 2019-2023  CEA, EDF
2 //
3 // This library is free software; you can redistribute it and/or
4 // modify it under the terms of the GNU Lesser General Public
5 // License as published by the Free Software Foundation; either
6 // version 2.1 of the License, or (at your option) any later version.
7 //
8 // This library is distributed in the hope that it will be useful,
9 // but WITHOUT ANY WARRANTY; without even the implied warranty of
10 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
11 // Lesser General Public License for more details.
12 //
13 // You should have received a copy of the GNU Lesser General Public
14 // License along with this library; if not, write to the Free Software
15 // Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307 USA
16 //
17 // See http://www.salome-platform.org/ or email : webmaster.salome@opencascade.com
18 //
19
20 %module pylauncher
21
22 %{
23 #include "Launcher.hxx"
24 #include "ResourcesManager.hxx"
25
26 #include <sstream>
27
28 struct ResourceDefinition_cpp
29 {
30 public:
31   std::string name;
32   std::string hostname;
33   std::string type;
34   std::string protocol;
35   std::string username;
36   std::string applipath;
37   std::string OS;
38   int  mem_mb;
39   int  cpu_clock;
40   int  nb_node;
41   int  nb_proc_per_node;
42   std::string batch;
43   std::string mpiImpl;
44   std::string iprotocol;
45   bool can_launch_batch_jobs;
46   bool can_run_containers;
47   std::string working_directory;
48 };
49
50 std::shared_ptr<ResourcesManager_cpp> HandleToLocalInstance(const std::string& ptrInStringFrmt)
51 {
52   std::istringstream iss(ptrInStringFrmt);
53   void *zePtr(nullptr);
54   iss >> zePtr;
55   std::shared_ptr<ResourcesManager_cpp> *effPtr = reinterpret_cast<std::shared_ptr<ResourcesManager_cpp> *>(zePtr);
56   std::shared_ptr<ResourcesManager_cpp> ret(*effPtr);
57   delete effPtr;
58   return ret;
59 }
60 %}
61
62 %include "std_string.i"
63 %include "std_vector.i"
64 %include "std_list.i"
65 %include "std_map.i"
66
67 namespace std {
68   %template(list_int) list<int>;
69   %template(list_str) list<string>;
70   %template(vector_str) vector<string>;
71   %template(map_ss) map<string,string>;
72 };
73
74 // see ResourceParameters from SALOME_ResourcesManager.idl
75 // see resourceParams from ResourcesManager.hxx
76 %naturalvar JobParameters_cpp::componentList;
77 %naturalvar JobParameters_cpp::resourceList;
78 struct resourceParams
79 {
80   resourceParams();
81
82   std::string name;
83   std::string hostname;
84   bool can_launch_batch_jobs;
85   bool can_run_containers;
86   std::string OS;
87   long nb_proc;
88   long nb_node;
89   long nb_proc_per_node;
90   long cpu_clock;
91   long mem_mb;
92   std::vector<std::string> componentList;
93   std::vector<std::string> resourceList;
94 };
95
96 // see JobParameters from SALOME_Launcher.idl
97 // see JobParameters_cpp from Launcher.hxx
98 %naturalvar JobParameters_cpp::in_files;
99 %naturalvar JobParameters_cpp::out_files;
100 %naturalvar JobParameters_cpp::specific_parameters;
101 struct JobParameters_cpp
102 {
103 public:
104   std::string job_name;
105   std::string job_type;
106   std::string job_file;
107   std::string pre_command;
108   std::string env_file;
109   std::list<std::string> in_files;
110   std::list<std::string> out_files;
111   std::string work_directory;
112   std::string local_directory;
113   std::string result_directory;
114   std::string maximum_duration;
115   resourceParams resource_required;
116   std::string queue;
117   std::string partition;
118   bool exclusive;
119   unsigned int mem_per_cpu;
120   std::string wckey;
121   std::string extra_params;
122   std::map<std::string, std::string> specific_parameters;
123   std::string launcher_file;
124   std::string launcher_args;
125 };
126
127 // see ResourceDefinition from SALOME_ResourcesManager.idl
128 // no other c++ equivalent. Convertion from ParserResourcesType
129 struct ResourceDefinition_cpp
130 {
131 public:
132   std::string name;
133   std::string hostname;
134   std::string type;
135   std::string protocol;
136   std::string username;
137   std::string applipath;
138   std::string OS;
139   int  mem_mb;
140   int  cpu_clock;
141   int  nb_node;
142   int  nb_proc_per_node;
143   std::string batch;
144   std::string mpiImpl;
145   std::string iprotocol;
146   bool can_launch_batch_jobs;
147   bool can_run_containers;
148   std::string working_directory;
149 };
150
151 %exception
152 {
153   try
154   {
155     $function
156   }
157   catch (ResourcesException& e)
158   {
159     SWIG_exception_fail(SWIG_RuntimeError, e.msg.c_str());
160   }
161   catch(...)
162   {
163     SWIG_exception_fail(SWIG_RuntimeError,"Unknown exception");
164   }
165 }
166
167 %include <std_shared_ptr.i>
168 %shared_ptr(ResourcesManager_cpp)
169
170 class ResourcesManager_cpp
171 {
172 public:
173   ResourcesManager_cpp(const char *xmlFilePath);
174   std::vector<std::string> GetFittingResources(const resourceParams& params);
175   void WriteInXmlFile(std::string xml_file);
176   void DeleteAllResourcesInCatalog();
177 %extend
178 {
179   ResourceDefinition_cpp GetResourceDefinition(const std::string& name)
180   {
181     ResourceDefinition_cpp swig_result;
182     ParserResourcesType cpp_result = $self->GetResourcesDescr(name);
183
184     swig_result.name = cpp_result.Name;
185     swig_result.hostname = cpp_result.HostName;
186     swig_result.type = cpp_result.getResourceTypeStr();
187     swig_result.protocol = cpp_result.getAccessProtocolTypeStr();
188     swig_result.username = cpp_result.UserName;
189     swig_result.applipath = cpp_result.AppliPath;
190     swig_result.OS = cpp_result.OS;
191     swig_result.mem_mb = cpp_result.DataForSort._memInMB;
192     swig_result.cpu_clock = cpp_result.DataForSort._CPUFreqMHz;
193     swig_result.nb_node = cpp_result.DataForSort._nbOfNodes;
194     swig_result.nb_proc_per_node = cpp_result.DataForSort._nbOfProcPerNode;
195     swig_result.batch = cpp_result.getBatchTypeStr();
196     swig_result.mpiImpl = cpp_result.getMpiImplTypeStr();
197     swig_result.iprotocol = cpp_result.getClusterInternalProtocolStr();
198     swig_result.can_launch_batch_jobs = cpp_result.can_launch_batch_jobs;
199     swig_result.can_run_containers = cpp_result.can_run_containers;
200     swig_result.working_directory = cpp_result.working_directory;
201
202     return swig_result;
203   }
204
205   void DeleteResourceInCatalog(const std::string& name)
206   {
207     $self->DeleteResourceInCatalog(name.c_str());
208   }
209   
210   void AddResourceInCatalog (const ResourceDefinition_cpp& new_resource)
211   {
212     ParserResourcesType new_resource_cpp;
213     new_resource_cpp.Name = new_resource.name;
214     new_resource_cpp.HostName = new_resource.hostname;
215     new_resource_cpp.setResourceTypeStr( new_resource.type );
216     new_resource_cpp.setAccessProtocolTypeStr( new_resource.protocol );
217     new_resource_cpp.UserName = new_resource.username;
218     new_resource_cpp.AppliPath = new_resource.applipath;
219     new_resource_cpp.OS = new_resource.OS;
220     new_resource_cpp.DataForSort._memInMB = new_resource.mem_mb;
221     new_resource_cpp.DataForSort._CPUFreqMHz = new_resource.cpu_clock;
222     new_resource_cpp.DataForSort._nbOfNodes = new_resource.nb_node;
223     new_resource_cpp.DataForSort._nbOfProcPerNode = new_resource.nb_proc_per_node;
224     new_resource_cpp.setBatchTypeStr(new_resource.batch);
225     new_resource_cpp.setMpiImplTypeStr(new_resource.mpiImpl);
226     new_resource_cpp.setClusterInternalProtocolStr(new_resource.iprotocol);
227     new_resource_cpp.can_launch_batch_jobs = new_resource.can_launch_batch_jobs;
228     new_resource_cpp.can_run_containers = new_resource.can_run_containers;
229     new_resource_cpp.working_directory = new_resource.working_directory;
230     $self->AddResourceInCatalog(new_resource_cpp);
231   }
232   
233   void ParseXmlFiles()
234   {
235     $self->ParseXmlFiles();
236   }
237   
238   std::vector<std::string> GetListOfEntries() const
239   {
240     const MapOfParserResourcesType& allRes = $self->GetList();
241     std::vector<std::string> ret;
242     for(auto it : allRes)
243       ret.push_back(it.first);
244     return ret;
245   }
246 }
247 };
248
249 %inline
250 {
251   std::shared_ptr<ResourcesManager_cpp> HandleToLocalInstance(const std::string& ptrInStringFrmt);
252 }
253
254 %exception
255 {
256   try
257   {
258     $function
259   }
260   catch (LauncherException& e)
261   {
262     SWIG_exception_fail(SWIG_RuntimeError, e.msg.c_str());
263   }
264   catch(...)
265   {
266     SWIG_exception_fail(SWIG_RuntimeError,"Unknown exception");
267   }
268 }
269
270 class Launcher_cpp
271 {
272 public:
273   Launcher_cpp();
274   virtual ~Launcher_cpp();
275   int          createJob(const JobParameters_cpp& job_parameters);
276   void         launchJob(int job_id);
277   std::string  getJobState(int job_id);
278   std::string  getAssignedHostnames(int job_id); // Get names or ids of hosts assigned to the job
279   void         exportInputFiles(int job_id);
280   void         getJobResults(int job_id, std::string directory);
281   void         clearJobWorkingDir(int job_id);
282   bool         getJobDumpState(int job_id, std::string directory);
283   bool         getJobWorkFile(int job_id, std::string work_file, std::string directory);
284   void         stopJob(int job_id);
285   void         removeJob(int job_id);
286   std::string  dumpJob(int job_id);
287   int restoreJob(const std::string& dumpedJob);
288   JobParameters_cpp getJobParameters(int job_id);
289   std::list<int> loadJobs(const char* jobs_file);
290   void saveJobs(const char* jobs_file);
291   long createJobWithFile(std::string xmlExecuteFile, std::string clusterName);
292   void SetResourcesManager(std::shared_ptr<ResourcesManager_cpp>& rm );
293 };
294
295 %pythoncode %{
296 def CreateSSHContainerResource(hostname,applipath,nbOfNodes=1):
297   return CreateContainerResource(hostname,applipath,"ssh",nbOfNodes)
298
299 def CreateSRUNContainerResource(hostname,applipath,nbOfNodes=1):
300   return CreateContainerResource(hostname,applipath,"srun",nbOfNodes)
301
302 def CreateContainerResource(hostname,applipath,protocol,nbOfNodes=1):
303   import getpass
304   ret = ResourceDefinition_cpp()
305   ret.name = hostname.split(".")[0]
306   ret.hostname = ret.name
307   ret.protocol = protocol
308   ret.applipath = applipath
309   ret.nb_node = nbOfNodes
310   ret.nb_proc_per_node = 1
311   ret.can_run_containers = True
312   ret.can_launch_batch_jobs = False
313   ret.mpiImpl = "no mpi"
314   ret.iprotocol = protocol
315   ret.type = "single_machine"
316   ret.username = getpass.getuser()
317   return ret
318
319 def ResourceDefinition_cpp_repr(self):
320   pat0 = "{} = {}"
321   pat1 = "{} = \"{}\""
322   data = [("name","name",pat0),
323   ("hostname","hostname",pat0),
324   ("type","type",pat0),
325   ("protocol","protocol",pat0),
326   ("userName","username",pat0),
327   ("appliPath","applipath",pat1),
328   ("mpi","mpiImpl",pat0),
329   ("nbOfNodes","nb_node",pat0),
330   ("nbOfProcPerNode","nb_proc_per_node",pat0),
331   ("canRunContainer","can_run_containers",pat0)
332   ]
333   ret = [c.format(a,getattr(self,b)) for a,b,c in data]
334   return "\n".join( ret )
335
336 def ResourcesManager_cpp_GetList(self):
337   return {name:self.GetResourceDefinition(name) for name in self.GetListOfEntries()}
338
339 def ResourcesManager_cpp___getitem__(self,name):
340   return self.GetResourceDefinition(name)
341
342 def ResourcesManager_cpp___repr__(self):
343   return str( self.GetList() )
344
345 def RetrieveRMCppSingleton():
346   import KernelLauncher
347   return HandleToLocalInstance( KernelLauncher.RetrieveInternalInstanceOfLocalCppResourcesManager() )
348
349 def GetPlayGroundInsideASlurmJob():
350   import subprocess as sp
351   cont = sp.check_output(["srun","hostname"])
352   nodesMul = [elt for elt in cont.decode().split("\n") if elt != ""]
353   from collections import defaultdict
354   d = defaultdict(int)
355   for elt in nodesMul:
356       d[elt]+=1
357   return d
358
359 def BuildCatalogFromScratch(protocol):
360   import os
361   d = GetPlayGroundInsideASlurmJob()
362   rmcpp = RetrieveRMCppSingleton()
363   rmcpp.DeleteAllResourcesInCatalog()
364   for k,v in d.items():
365       contRes = CreateContainerResource(hostname=k,applipath=os.environ["APPLI"],protocol=protocol,nbOfNodes=v)
366       rmcpp.AddResourceInCatalog(contRes)
367
368 def GetRequestForGiveContainer(hostname, contName):
369   import Engines
370   import os
371   rp=Engines.ResourceParameters(name=hostname,
372                                 hostname=hostname,
373                                 can_launch_batch_jobs=False,
374                                 can_run_containers=True,
375                                 OS="Linux",
376                                 componentList=[],
377                                 nb_proc=1,
378                                 mem_mb=1000,
379                                 cpu_clock=1000,
380                                 nb_node=1,
381                                 nb_proc_per_node=1,
382                                 policy="first",
383                                 resList=[])
384
385   cp=Engines.ContainerParameters(container_name=contName,
386                                   mode="start",
387                                   workingdir=os.path.expanduser("~"),
388                                   nb_proc=1,
389                                   isMPI=False,
390                                   parallelLib="",
391                                   resource_params=rp)
392   return cp
393
394 ResourceDefinition_cpp.repr = ResourceDefinition_cpp_repr
395 ResourceDefinition_cpp.__repr__ = ResourceDefinition_cpp_repr
396 ResourcesManager_cpp.GetList = ResourcesManager_cpp_GetList
397 ResourcesManager_cpp.__getitem__ = ResourcesManager_cpp___getitem__
398 ResourcesManager_cpp.__repr__ = ResourcesManager_cpp___repr__
399 %}