1 // Copyright (C) 2007-2013 CEA/DEN, EDF R&D, OPEN CASCADE
3 // Copyright (C) 2003-2007 OPEN CASCADE, EADS/CCR, LIP6, CEA/DEN,
4 // CEDRAT, EDF R&D, LEG, PRINCIPIA R&D, BUREAU VERITAS
6 // This library is free software; you can redistribute it and/or
7 // modify it under the terms of the GNU Lesser General Public
8 // License as published by the Free Software Foundation; either
9 // version 2.1 of the License.
11 // This library is distributed in the hope that it will be useful,
12 // but WITHOUT ANY WARRANTY; without even the implied warranty of
13 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 // Lesser General Public License for more details.
16 // You should have received a copy of the GNU Lesser General Public
17 // License along with this library; if not, write to the Free Software
18 // Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
20 // See http://www.salome-platform.org/ or email : webmaster.salome@opencascade.com
23 #include "SALOME_ResourcesManager.hxx"
24 #include "Utils_ExceptHandlers.hxx"
25 #include "Utils_CorbaException.hxx"
43 #include <sys/types.h>
45 #include "utilities.h"
47 #define MAX_SIZE_FOR_HOSTNAME 256;
51 const char *SALOME_ResourcesManager::_ResourcesManagerNameInNS = "/ResourcesManager";
53 //=============================================================================
57 //=============================================================================
59 SALOME_ResourcesManager::
60 SALOME_ResourcesManager(CORBA::ORB_ptr orb,
61 PortableServer::POA_var poa,
62 SALOME_NamingService *ns,
63 const char *xmlFilePath) : _rm(xmlFilePath)
65 MESSAGE("SALOME_ResourcesManager constructor");
67 _orb = CORBA::ORB::_duplicate(orb) ;
68 _poa = PortableServer::POA::_duplicate(poa) ;
69 PortableServer::ObjectId_var id = _poa->activate_object(this);
70 CORBA::Object_var obj = _poa->id_to_reference(id);
71 Engines::ResourcesManager_var refContMan = Engines::ResourcesManager::_narrow(obj);
72 _NS->Register(refContMan,_ResourcesManagerNameInNS);
73 MESSAGE("SALOME_ResourcesManager constructor end");
76 //=============================================================================
78 * Standard constructor, parse resource file.
79 * - if ${APPLI} exists in environment,
80 * look for ${HOME}/${APPLI}/CatalogResources.xml
81 * - else look for default:
82 * ${KERNEL_ROOT_DIR}/share/salome/resources/kernel/CatalogResources.xml
83 * - parse XML resource file.
85 //=============================================================================
87 SALOME_ResourcesManager::SALOME_ResourcesManager(CORBA::ORB_ptr orb,
88 PortableServer::POA_var poa,
89 SALOME_NamingService *ns) : _rm()
91 MESSAGE("SALOME_ResourcesManager constructor");
93 _orb = CORBA::ORB::_duplicate(orb) ;
94 _poa = PortableServer::POA::_duplicate(poa) ;
95 PortableServer::ObjectId_var id = _poa->activate_object(this);
96 CORBA::Object_var obj = _poa->id_to_reference(id);
97 Engines::ResourcesManager_var refContMan = Engines::ResourcesManager::_narrow(obj);
98 _NS->Register(refContMan,_ResourcesManagerNameInNS);
100 MESSAGE("SALOME_ResourcesManager constructor end");
103 //=============================================================================
105 * Standard Destructor
107 //=============================================================================
109 SALOME_ResourcesManager::~SALOME_ResourcesManager()
111 MESSAGE("SALOME_ResourcesManager destructor");
115 //=============================================================================
117 * shutdown all the containers, then the ContainerManager servant
119 //=============================================================================
121 void SALOME_ResourcesManager::Shutdown()
124 _NS->Destroy_Name(_ResourcesManagerNameInNS);
125 PortableServer::ObjectId_var oid = _poa->servant_to_id(this);
126 _poa->deactivate_object(oid);
129 //=============================================================================
130 //! get the name of resources fitting the specified constraints (params)
132 * If hostname specified, check it is local or known in resources catalog.
135 * - select first machines with corresponding OS (all machines if
136 * parameter OS empty),
137 * - then select the sublist of machines on which the component is known
138 * (if the result is empty, that probably means that the inventory of
139 * components is probably not done, so give complete list from previous step)
141 //=============================================================================
143 Engines::ResourceList *
144 SALOME_ResourcesManager::GetFittingResources(const Engines::ResourceParameters& params)
146 MESSAGE("ResourcesManager::GetFittingResources");
147 Engines::ResourceList * ret = new Engines::ResourceList;
151 p.name = params.name;
152 p.hostname = params.hostname;
153 p.can_launch_batch_jobs = params.can_launch_batch_jobs;
154 p.can_run_containers = params.can_run_containers;
156 p.nb_proc = params.nb_proc;
157 p.nb_node = params.nb_node;
158 p.nb_proc_per_node = params.nb_proc_per_node;
159 p.cpu_clock = params.cpu_clock;
160 p.mem_mb = params.mem_mb;
161 for(unsigned int i=0; i<params.componentList.length(); i++)
162 p.componentList.push_back(std::string(params.componentList[i]));
163 for(unsigned int i=0; i<params.resList.length(); i++)
164 p.resourceList.push_back(std::string(params.resList[i]));
168 // Call C++ ResourceManager
169 std::vector <std::string> vec = _rm.GetFittingResources(p);
172 ret->length(vec.size());
173 for(unsigned int i=0;i<vec.size();i++)
174 (*ret)[i] = (vec[i]).c_str();
176 catch(const ResourcesException &ex)
178 INFOS("Caught exception in GetFittingResources C++: " << ex.msg);
179 THROW_SALOME_CORBA_EXCEPTION(ex.msg.c_str(),SALOME::BAD_PARAM);
185 //=============================================================================
187 * dynamically obtains the first machines
189 //=============================================================================
192 SALOME_ResourcesManager::FindFirst(const Engines::ResourceList& listOfResources)
195 std::vector<std::string> rl;
196 for(unsigned int i=0; i<listOfResources.length(); i++)
197 rl.push_back(std::string(listOfResources[i]));
199 return CORBA::string_dup(_rm.Find("first", rl).c_str());
203 SALOME_ResourcesManager::Find(const char* policy, const Engines::ResourceList& listOfResources)
206 std::vector<std::string> rl;
207 for(unsigned int i=0; i<listOfResources.length(); i++)
208 rl.push_back(std::string(listOfResources[i]));
210 return CORBA::string_dup(_rm.Find(policy, rl).c_str());
213 Engines::ResourceDefinition*
214 SALOME_ResourcesManager::GetResourceDefinition(const char * name)
216 Engines::ResourceDefinition * p_ptr = NULL;
218 ParserResourcesType resource = _rm.GetResourcesDescr(name);
219 p_ptr = new Engines::ResourceDefinition;
221 p_ptr->name = CORBA::string_dup(resource.Name.c_str());
222 p_ptr->hostname = CORBA::string_dup(resource.HostName.c_str());
223 p_ptr->type = CORBA::string_dup(resource.getResourceTypeStr().c_str());
224 p_ptr->protocol = CORBA::string_dup(resource.getAccessProtocolTypeStr().c_str());
225 p_ptr->iprotocol = CORBA::string_dup(resource.getClusterInternalProtocolStr().c_str());
226 p_ptr->username = CORBA::string_dup(resource.UserName.c_str());
227 p_ptr->applipath = CORBA::string_dup(resource.AppliPath.c_str());
228 p_ptr->componentList.length(resource.ComponentsList.size());
229 for(unsigned int i=0;i<resource.ComponentsList.size();i++)
230 p_ptr->componentList[i] = CORBA::string_dup(resource.ComponentsList[i].c_str());
231 p_ptr->OS = CORBA::string_dup(resource.OS.c_str());
232 p_ptr->mem_mb = resource.DataForSort._memInMB;
233 p_ptr->cpu_clock = resource.DataForSort._CPUFreqMHz;
234 p_ptr->nb_proc_per_node = resource.DataForSort._nbOfProcPerNode;
235 p_ptr->nb_node = resource.DataForSort._nbOfNodes;
236 p_ptr->can_launch_batch_jobs = resource.can_launch_batch_jobs;
237 p_ptr->can_run_containers = resource.can_run_containers;
238 p_ptr->working_directory = CORBA::string_dup(resource.working_directory.c_str());
239 p_ptr->mpiImpl = CORBA::string_dup(resource.getMpiImplTypeStr().c_str());
240 p_ptr->batch = CORBA::string_dup(resource.getBatchTypeStr().c_str());
241 } catch (const exception & ex) {
242 INFOS("Caught exception in GetResourceDefinition: " << ex.what());
243 THROW_SALOME_CORBA_EXCEPTION(ex.what(), SALOME::BAD_PARAM);
250 SALOME_ResourcesManager::AddResource(const Engines::ResourceDefinition& new_resource,
251 CORBA::Boolean write,
252 const char * xml_file)
256 ParserResourcesType resource;
257 resource.Name = new_resource.name.in();
258 resource.HostName = new_resource.hostname.in();
259 resource.setResourceTypeStr(new_resource.type.in());
260 resource.OS = new_resource.OS.in();
261 resource.AppliPath = new_resource.applipath.in();
262 resource.DataForSort._memInMB = new_resource.mem_mb;
263 resource.DataForSort._CPUFreqMHz = new_resource.cpu_clock;
264 resource.DataForSort._nbOfNodes = new_resource.nb_node;
265 resource.DataForSort._nbOfProcPerNode = new_resource.nb_proc_per_node;
266 resource.UserName = new_resource.username.in();
267 resource.can_launch_batch_jobs = new_resource.can_launch_batch_jobs;
268 resource.can_run_containers = new_resource.can_run_containers;
269 resource.working_directory = new_resource.working_directory.in();
270 resource.setBatchTypeStr(new_resource.batch.in());
271 resource.setMpiImplTypeStr(new_resource.mpiImpl.in());
272 resource.setAccessProtocolTypeStr(new_resource.protocol.in());
273 resource.setClusterInternalProtocolStr(new_resource.iprotocol.in());
274 for (CORBA::ULong i = 0; i < new_resource.componentList.length(); i++)
275 resource.ComponentsList.push_back(new_resource.componentList[i].in());
277 _rm.AddResourceInCatalog(resource);
281 _rm.WriteInXmlFile(std::string(xml_file));
285 catch (const SALOME_Exception & e)
287 INFOS("Error in AddResourceInCatalog: " << e);
288 THROW_SALOME_CORBA_EXCEPTION(e.what(), SALOME::BAD_PARAM);
293 SALOME_ResourcesManager::RemoveResource(const char * resource_name,
294 CORBA::Boolean write,
295 const char * xml_file)
299 _rm.DeleteResourceInCatalog(resource_name);
301 catch (const SALOME_Exception & e)
303 INFOS("Error in DeleteResourceInCatalog: " << e);
304 THROW_SALOME_CORBA_EXCEPTION(e.what(), SALOME::BAD_PARAM);
309 _rm.WriteInXmlFile(std::string(xml_file));
315 SALOME_ResourcesManager::getMachineFile(std::string resource_name,
316 CORBA::Long nb_procs,
317 std::string parallelLib)
319 std::string machine_file_name("");
321 if (parallelLib == "Dummy")
323 MESSAGE("[getMachineFile] parallelLib is Dummy");
324 MapOfParserResourcesType resourcesList = _rm.GetList();
325 if (resourcesList.find(resource_name) != resourcesList.end())
327 ParserResourcesType resource = resourcesList[resource_name];
329 // Check if resource is cluster or not
330 if (resource.ClusterMembersList.empty())
332 //It is not a cluster so we create a cluster with one machine
333 ParserResourcesType fake_node;
334 fake_node.HostName = resource.HostName;
335 fake_node.Protocol = resource.Protocol;
336 fake_node.ClusterInternalProtocol = resource.ClusterInternalProtocol;
337 fake_node.UserName = resource.UserName;
338 fake_node.AppliPath = resource.AppliPath;
339 fake_node.DataForSort = resource.DataForSort;
341 resource.ClusterMembersList.push_front(fake_node);
344 // Creating list of machines for creating the machine file
345 std::list<std::string> list_of_machines;
346 std::list<ParserResourcesType>::iterator cluster_it =
347 resource.ClusterMembersList.begin();
348 while (cluster_it != resource.ClusterMembersList.end())
350 // For each member of the cluster we add a nbOfNodes * nbOfProcPerNode in the list
351 unsigned int number_of_proc = (*cluster_it).DataForSort._nbOfNodes *
352 (*cluster_it).DataForSort._nbOfProcPerNode;
353 for (unsigned int i = 0; i < number_of_proc; i++)
354 list_of_machines.push_back((*cluster_it).HostName);
358 // Creating machine file
359 machine_file_name = tmpnam(NULL);
360 std::ofstream machine_file(machine_file_name.c_str(), std::ios_base::out);
362 CORBA::Long machine_number = 0;
363 std::list<std::string>::iterator it = list_of_machines.begin();
364 while (machine_number != nb_procs)
366 // Adding a new node to the machine file
367 machine_file << *it << std::endl;
371 if (it == list_of_machines.end())
372 it = list_of_machines.begin();
377 INFOS("[getMachineFile] Error resource_name not found in resourcesList -> " << resource_name);
379 else if (parallelLib == "Mpi")
381 MESSAGE("[getMachineFile] parallelLib is Mpi");
383 MapOfParserResourcesType resourcesList = _rm.GetList();
384 if (resourcesList.find(resource_name) != resourcesList.end())
386 ParserResourcesType resource = resourcesList[resource_name];
387 // Check if resource is cluster or not
388 if (resource.ClusterMembersList.empty())
390 //It is not a cluster so we create a cluster with one machine
391 ParserResourcesType fake_node;
392 fake_node.HostName = resource.HostName;
393 fake_node.Protocol = resource.Protocol;
394 fake_node.ClusterInternalProtocol = resource.ClusterInternalProtocol;
395 fake_node.UserName = resource.UserName;
396 fake_node.AppliPath = resource.AppliPath;
397 fake_node.DataForSort = resource.DataForSort;
399 resource.ClusterMembersList.push_front(fake_node);
402 // Choose mpi implementation -> each MPI implementation has is own machinefile...
403 if (resource.mpi == lam)
405 // Creating machine file
406 machine_file_name = tmpnam(NULL);
407 std::ofstream machine_file(machine_file_name.c_str(), std::ios_base::out);
409 // We add all cluster machines to the file
410 std::list<ParserResourcesType>::iterator cluster_it =
411 resource.ClusterMembersList.begin();
412 while (cluster_it != resource.ClusterMembersList.end())
414 unsigned int number_of_proc = (*cluster_it).DataForSort._nbOfNodes *
415 (*cluster_it).DataForSort._nbOfProcPerNode;
416 machine_file << (*cluster_it).HostName << " cpu=" << number_of_proc << std::endl;
420 else if ((resource.mpi == openmpi) || (resource.mpi == ompi))
422 // Creating machine file
423 machine_file_name = tmpnam(NULL);
424 std::ofstream machine_file(machine_file_name.c_str(), std::ios_base::out);
426 // We add all cluster machines to the file
427 std::list<ParserResourcesType>::iterator cluster_it =
428 resource.ClusterMembersList.begin();
429 while (cluster_it != resource.ClusterMembersList.end())
431 unsigned int number_of_proc = (*cluster_it).DataForSort._nbOfNodes *
432 (*cluster_it).DataForSort._nbOfProcPerNode;
433 machine_file << (*cluster_it).HostName << " slots=" << number_of_proc << std::endl;
437 else if (resource.mpi == nompi)
439 INFOS("[getMachineFile] Error resource_name MPI implementation was defined for " << resource_name);
442 INFOS("[getMachineFile] Error resource_name MPI implementation not currenly handled for " << resource_name);
445 INFOS("[getMachineFile] Error resource_name not found in resourcesList -> " << resource_name);
448 INFOS("[getMachineFile] Error parallelLib is not handled -> " << parallelLib);
450 return machine_file_name;