1 // Copyright (C) 2019-2024 CEA, EDF
3 // This library is free software; you can redistribute it and/or
4 // modify it under the terms of the GNU Lesser General Public
5 // License as published by the Free Software Foundation; either
6 // version 2.1 of the License, or (at your option) any later version.
8 // This library is distributed in the hope that it will be useful,
9 // but WITHOUT ANY WARRANTY; without even the implied warranty of
10 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
11 // Lesser General Public License for more details.
13 // You should have received a copy of the GNU Lesser General Public
14 // License along with this library; if not, write to the Free Software
15 // Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
17 // See http://www.salome-platform.org/ or email : webmaster.salome@opencascade.com
23 #include "Launcher.hxx"
24 #include "ResourcesManager.hxx"
28 struct ResourceDefinition_cpp
36 std::string applipath;
44 std::string iprotocol;
45 bool can_launch_batch_jobs;
46 bool can_run_containers;
47 std::string working_directory;
50 std::shared_ptr<ResourcesManager_cpp> HandleToLocalInstance(const std::string& ptrInStringFrmt)
52 std::istringstream iss(ptrInStringFrmt);
55 std::shared_ptr<ResourcesManager_cpp> *effPtr = reinterpret_cast<std::shared_ptr<ResourcesManager_cpp> *>(zePtr);
56 std::shared_ptr<ResourcesManager_cpp> ret(*effPtr);
62 %include "std_string.i"
63 %include "std_vector.i"
68 %template(list_int) list<int>;
69 %template(list_str) list<string>;
70 %template(vector_str) vector<string>;
71 %template(map_ss) map<string,string>;
74 // see ResourceParameters from SALOME_ResourcesManager.idl
75 // see resourceParams from ResourcesManager.hxx
76 %naturalvar JobParameters_cpp::componentList;
77 %naturalvar JobParameters_cpp::resourceList;
84 bool can_launch_batch_jobs;
85 bool can_run_containers;
89 long nb_proc_per_node;
92 std::vector<std::string> componentList;
93 std::vector<std::string> resourceList;
96 // see JobParameters from SALOME_Launcher.idl
97 // see JobParameters_cpp from Launcher.hxx
98 %naturalvar JobParameters_cpp::in_files;
99 %naturalvar JobParameters_cpp::out_files;
100 %naturalvar JobParameters_cpp::specific_parameters;
101 struct JobParameters_cpp
104 std::string job_name;
105 std::string job_type;
106 std::string job_file;
107 std::string pre_command;
108 std::string env_file;
109 std::list<std::string> in_files;
110 std::list<std::string> out_files;
111 std::string work_directory;
112 std::string local_directory;
113 std::string result_directory;
114 std::string maximum_duration;
115 resourceParams resource_required;
117 std::string partition;
119 unsigned int mem_per_cpu;
121 std::string extra_params;
122 std::map<std::string, std::string> specific_parameters;
123 std::string launcher_file;
124 std::string launcher_args;
127 // see ResourceDefinition from SALOME_ResourcesManager.idl
128 // no other c++ equivalent. Convertion from ParserResourcesType
129 struct ResourceDefinition_cpp
133 std::string hostname;
135 std::string protocol;
136 std::string username;
137 std::string applipath;
142 int nb_proc_per_node;
145 std::string iprotocol;
146 bool can_launch_batch_jobs;
147 bool can_run_containers;
148 std::string working_directory;
157 catch (ResourcesException& e)
159 SWIG_exception_fail(SWIG_RuntimeError, e.msg.c_str());
163 SWIG_exception_fail(SWIG_RuntimeError,"Unknown exception");
167 %include <std_shared_ptr.i>
168 %shared_ptr(ResourcesManager_cpp)
170 class ResourcesManager_cpp
173 ResourcesManager_cpp(const char *xmlFilePath);
174 std::vector<std::string> GetFittingResources(const resourceParams& params);
175 void WriteInXmlFile(std::string xml_file);
176 void DeleteAllResourcesInCatalog();
179 ResourceDefinition_cpp GetResourceDefinition(const std::string& name)
181 ResourceDefinition_cpp swig_result;
182 ParserResourcesType cpp_result = $self->GetResourcesDescr(name);
184 swig_result.name = cpp_result.Name;
185 swig_result.hostname = cpp_result.HostName;
186 swig_result.type = cpp_result.getResourceTypeStr();
187 swig_result.protocol = cpp_result.getAccessProtocolTypeStr();
188 swig_result.username = cpp_result.UserName;
189 swig_result.applipath = cpp_result.AppliPath;
190 swig_result.OS = cpp_result.OS;
191 swig_result.mem_mb = cpp_result.DataForSort._memInMB;
192 swig_result.cpu_clock = cpp_result.DataForSort._CPUFreqMHz;
193 swig_result.nb_node = cpp_result.DataForSort._nbOfNodes;
194 swig_result.nb_proc_per_node = cpp_result.DataForSort._nbOfProcPerNode;
195 swig_result.batch = cpp_result.getBatchTypeStr();
196 swig_result.mpiImpl = cpp_result.getMpiImplTypeStr();
197 swig_result.iprotocol = cpp_result.getClusterInternalProtocolStr();
198 swig_result.can_launch_batch_jobs = cpp_result.can_launch_batch_jobs;
199 swig_result.can_run_containers = cpp_result.can_run_containers;
200 swig_result.working_directory = cpp_result.working_directory;
205 void DeleteResourceInCatalog(const std::string& name)
207 $self->DeleteResourceInCatalog(name.c_str());
210 void AddResourceInCatalog (const ResourceDefinition_cpp& new_resource)
212 ParserResourcesType new_resource_cpp;
213 new_resource_cpp.Name = new_resource.name;
214 new_resource_cpp.HostName = new_resource.hostname;
215 new_resource_cpp.setResourceTypeStr( new_resource.type );
216 new_resource_cpp.setAccessProtocolTypeStr( new_resource.protocol );
217 new_resource_cpp.UserName = new_resource.username;
218 new_resource_cpp.AppliPath = new_resource.applipath;
219 new_resource_cpp.OS = new_resource.OS;
220 new_resource_cpp.DataForSort._Name = new_resource.name;
221 new_resource_cpp.DataForSort._memInMB = new_resource.mem_mb;
222 new_resource_cpp.DataForSort._CPUFreqMHz = new_resource.cpu_clock;
223 new_resource_cpp.DataForSort._nbOfNodes = new_resource.nb_node;
224 new_resource_cpp.DataForSort._nbOfProcPerNode = new_resource.nb_proc_per_node;
225 new_resource_cpp.setBatchTypeStr(new_resource.batch);
226 new_resource_cpp.setMpiImplTypeStr(new_resource.mpiImpl);
227 new_resource_cpp.setClusterInternalProtocolStr(new_resource.iprotocol);
228 new_resource_cpp.can_launch_batch_jobs = new_resource.can_launch_batch_jobs;
229 new_resource_cpp.can_run_containers = new_resource.can_run_containers;
230 new_resource_cpp.working_directory = new_resource.working_directory;
231 $self->AddResourceInCatalog(new_resource_cpp);
234 void AddResourceInCatalogNoQuestion (const ResourceDefinition_cpp& new_resource)
236 ParserResourcesType new_resource_cpp;
237 new_resource_cpp.Name = new_resource.name;
238 new_resource_cpp.HostName = new_resource.hostname;
239 new_resource_cpp.setResourceTypeStr( new_resource.type );
240 new_resource_cpp.setAccessProtocolTypeStr( new_resource.protocol );
241 new_resource_cpp.UserName = new_resource.username;
242 new_resource_cpp.AppliPath = new_resource.applipath;
243 new_resource_cpp.OS = new_resource.OS;
244 new_resource_cpp.DataForSort._Name = new_resource.name;
245 new_resource_cpp.DataForSort._memInMB = new_resource.mem_mb;
246 new_resource_cpp.DataForSort._CPUFreqMHz = new_resource.cpu_clock;
247 new_resource_cpp.DataForSort._nbOfNodes = new_resource.nb_node;
248 new_resource_cpp.DataForSort._nbOfProcPerNode = new_resource.nb_proc_per_node;
249 new_resource_cpp.setBatchTypeStr(new_resource.batch);
250 new_resource_cpp.setMpiImplTypeStr(new_resource.mpiImpl);
251 new_resource_cpp.setClusterInternalProtocolStr(new_resource.iprotocol);
252 new_resource_cpp.can_launch_batch_jobs = new_resource.can_launch_batch_jobs;
253 new_resource_cpp.can_run_containers = new_resource.can_run_containers;
254 new_resource_cpp.working_directory = new_resource.working_directory;
255 $self->AddResourceInCatalogNoQuestion(new_resource_cpp);
260 $self->ParseXmlFiles();
263 std::vector<std::string> GetListOfEntries() const
265 const MapOfParserResourcesType& allRes = $self->GetList();
266 std::vector<std::string> ret;
267 for(auto it : allRes)
268 ret.push_back(it.first);
276 std::shared_ptr<ResourcesManager_cpp> HandleToLocalInstance(const std::string& ptrInStringFrmt);
285 catch (LauncherException& e)
287 SWIG_exception_fail(SWIG_RuntimeError, e.msg.c_str());
291 SWIG_exception_fail(SWIG_RuntimeError,"Unknown exception");
299 virtual ~Launcher_cpp();
300 int createJob(const JobParameters_cpp& job_parameters);
301 void launchJob(int job_id);
302 std::string getJobState(int job_id);
303 std::string getAssignedHostnames(int job_id); // Get names or ids of hosts assigned to the job
304 void exportInputFiles(int job_id);
305 void getJobResults(int job_id, std::string directory);
306 void clearJobWorkingDir(int job_id);
307 bool getJobDumpState(int job_id, std::string directory);
308 bool getJobWorkFile(int job_id, std::string work_file, std::string directory);
309 void stopJob(int job_id);
310 void removeJob(int job_id);
311 std::string dumpJob(int job_id);
312 int restoreJob(const std::string& dumpedJob);
313 JobParameters_cpp getJobParameters(int job_id);
314 std::list<int> loadJobs(const char* jobs_file);
315 void saveJobs(const char* jobs_file);
316 long createJobWithFile(std::string xmlExecuteFile, std::string clusterName);
317 void SetResourcesManager(std::shared_ptr<ResourcesManager_cpp>& rm );
321 def CreateSSHContainerResource(hostname,applipath,nbOfNodes=1):
322 return CreateContainerResource(hostname,applipath,"ssh",nbOfNodes)
324 def CreateSRUNContainerResource(hostname,applipath,nbOfNodes=1):
325 return CreateContainerResource(hostname,applipath,"srun",nbOfNodes)
327 def CreateContainerResource(hostname,applipath,protocol,nbOfNodes=1):
329 ret = ResourceDefinition_cpp()
330 ret.name = hostname.split(".")[0]
331 ret.hostname = ret.name
332 ret.protocol = protocol
333 ret.applipath = applipath
334 ret.nb_node = nbOfNodes
335 ret.nb_proc_per_node = 1
336 ret.can_run_containers = True
337 ret.can_launch_batch_jobs = False
338 ret.mpiImpl = "no mpi"
339 ret.iprotocol = protocol
340 ret.type = "single_machine"
341 ret.username = getpass.getuser()
344 def ResourceDefinition_cpp_repr(self):
347 data = [("name","name",pat0),
348 ("hostname","hostname",pat0),
349 ("type","type",pat0),
350 ("protocol","protocol",pat0),
351 ("userName","username",pat0),
352 ("appliPath","applipath",pat1),
353 ("mpi","mpiImpl",pat0),
354 ("nbOfNodes","nb_node",pat0),
355 ("nbOfProcPerNode","nb_proc_per_node",pat0),
356 ("canRunContainer","can_run_containers",pat0)
358 ret = [c.format(a,getattr(self,b)) for a,b,c in data]
359 return "\n".join( ret )
361 def ResourcesManager_cpp_GetList(self):
362 return {name:self.GetResourceDefinition(name) for name in self.GetListOfEntries()}
364 def ResourcesManager_cpp___getitem__(self,name):
365 return self.GetResourceDefinition(name)
367 def ResourcesManager_cpp___repr__(self):
368 return str( self.GetList() )
370 def RetrieveRMCppSingleton():
371 import KernelLauncher
372 return HandleToLocalInstance( KernelLauncher.RetrieveInternalInstanceOfLocalCppResourcesManager() )
374 def GetPlayGroundInsideASlurmJob():
375 import subprocess as sp
376 cont = sp.check_output(["srun","hostname"])
377 nodesMul = [elt for elt in cont.decode().split("\n") if elt != ""]
378 from collections import defaultdict
384 def BuildCatalogFromScratch(protocol):
386 d = GetPlayGroundInsideASlurmJob()
387 rmcpp = RetrieveRMCppSingleton()
388 rmcpp.DeleteAllResourcesInCatalog()
389 for k,v in d.items():
390 contRes = CreateContainerResource(hostname=k,applipath=os.environ["APPLI"],protocol=protocol,nbOfNodes=v)
391 rmcpp.AddResourceInCatalog(contRes)
393 def GetRequestForGiveContainer(hostname, contName):
396 rp=Engines.ResourceParameters(name=hostname,
398 can_launch_batch_jobs=False,
399 can_run_containers=True,
410 cp=Engines.ContainerParameters(container_name=contName,
412 workingdir=os.path.expanduser("~"),
419 ResourceDefinition_cpp.repr = ResourceDefinition_cpp_repr
420 ResourceDefinition_cpp.__repr__ = ResourceDefinition_cpp_repr
421 ResourcesManager_cpp.GetList = ResourcesManager_cpp_GetList
422 ResourcesManager_cpp.__getitem__ = ResourcesManager_cpp___getitem__
423 ResourcesManager_cpp.__repr__ = ResourcesManager_cpp___repr__