1 // Copyright (C) 2007-2024 CEA, EDF, OPEN CASCADE
3 // Copyright (C) 2003-2007 OPEN CASCADE, EADS/CCR, LIP6, CEA/DEN,
4 // CEDRAT, EDF R&D, LEG, PRINCIPIA R&D, BUREAU VERITAS
6 // This library is free software; you can redistribute it and/or
7 // modify it under the terms of the GNU Lesser General Public
8 // License as published by the Free Software Foundation; either
9 // version 2.1 of the License, or (at your option) any later version.
11 // This library is distributed in the hope that it will be useful,
12 // but WITHOUT ANY WARRANTY; without even the implied warranty of
13 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 // Lesser General Public License for more details.
16 // You should have received a copy of the GNU Lesser General Public
17 // License along with this library; if not, write to the Free Software
18 // Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
20 // See http://www.salome-platform.org/ or email : webmaster.salome@opencascade.com
23 #include "SALOME_ResourcesManager.hxx"
24 #include "Utils_ExceptHandlers.hxx"
25 #include "Utils_CorbaException.hxx"
43 #include <sys/types.h>
45 #include "utilities.h"
47 #define MAX_SIZE_FOR_HOSTNAME 256;
51 const char *SALOME_ResourcesManager::_ResourcesManagerNameInNS = "/ResourcesManager";
53 //=============================================================================
57 //=============================================================================
59 SALOME_ResourcesManager::SALOME_ResourcesManager(CORBA::ORB_ptr orb,
60 PortableServer::POA_var poa,
61 SALOME_NamingService_Abstract *ns,
62 const char *xmlFilePath)
63 : _rm(new ResourcesManager_cpp(xmlFilePath))
65 MESSAGE("SALOME_ResourcesManager constructor");
67 _orb = CORBA::ORB::_duplicate(orb) ;
69 PortableServer::POAManager_var pman = poa->the_POAManager();
70 CORBA::PolicyList policies;
72 PortableServer::ThreadPolicy_var threadPol(poa->create_thread_policy(PortableServer::SINGLE_THREAD_MODEL));
73 policies[0] = PortableServer::ThreadPolicy::_duplicate(threadPol);
74 _poa = poa->create_POA("SingleThreadPOA",pman,policies);
77 PortableServer::ObjectId_var id = _poa->activate_object(this);
78 CORBA::Object_var obj = _poa->id_to_reference(id);
79 Engines::ResourcesManager_var refContMan = Engines::ResourcesManager::_narrow(obj);
80 _NS->Register(refContMan,_ResourcesManagerNameInNS);
81 MESSAGE("SALOME_ResourcesManager constructor end");
84 //=============================================================================
86 * Standard constructor, parse resource file.
87 * - if ${APPLI} exists in environment,
88 * look for ${HOME}/${APPLI}/CatalogResources.xml
89 * - else look for default:
90 * ${KERNEL_ROOT_DIR}/share/salome/resources/kernel/CatalogResources.xml
91 * - parse XML resource file.
93 //=============================================================================
95 SALOME_ResourcesManager::SALOME_ResourcesManager(CORBA::ORB_ptr orb,
96 PortableServer::POA_var poa,
97 SALOME_NamingService_Abstract *ns) : _rm(new ResourcesManager_cpp())
100 _orb = CORBA::ORB::_duplicate(orb) ;
102 PortableServer::POAManager_var pman = poa->the_POAManager();
103 CORBA::PolicyList policies;
105 PortableServer::ThreadPolicy_var threadPol(poa->create_thread_policy(PortableServer::SINGLE_THREAD_MODEL));
106 policies[0] = PortableServer::ThreadPolicy::_duplicate(threadPol);
107 _poa = poa->create_POA("SingleThreadPOA",pman,policies);
108 threadPol->destroy();
110 PortableServer::ObjectId_var id = _poa->activate_object(this);
111 CORBA::Object_var obj = _poa->id_to_reference(id);
112 Engines::ResourcesManager_var refContMan = Engines::ResourcesManager::_narrow(obj);
114 _NS->Register(refContMan,_ResourcesManagerNameInNS);
117 //=============================================================================
119 * Standard Destructor
121 //=============================================================================
123 SALOME_ResourcesManager::~SALOME_ResourcesManager()
125 MESSAGE("SALOME_ResourcesManager destructor");
129 //=============================================================================
131 * shutdown all the containers, then the ContainerManager servant
133 //=============================================================================
135 void SALOME_ResourcesManager::Shutdown()
140 _NS->Destroy_Name(_ResourcesManagerNameInNS);
141 PortableServer::ObjectId_var oid = _poa->servant_to_id(this);
142 _poa->deactivate_object(oid);
146 * Return list of resources available (regarding content of CatalogResources.xml) but select only those with canRunContainers attribute set to true.
147 * And for each resource the number of proc available of it.
149 * \sa SALOME_ResourcesManager::ListAllResourcesInCatalog, SALOME_ResourcesManager::ListAllResourceEntriesInCatalog
151 void SALOME_ResourcesManager::ListAllAvailableResources(Engines::ResourceList_out machines, Engines::IntegerList_out nbProcsOfMachines)
153 const MapOfParserResourcesType& zeList(_rm->GetList());
154 std::vector<std::string> ret0;
155 std::vector<int> ret1;
156 for(MapOfParserResourcesType::const_iterator it=zeList.begin();it!=zeList.end();it++)
158 const ParserResourcesType& elt((*it).second);
159 if(elt.can_run_containers)
161 ret0.push_back(elt.HostName);
162 ret1.push_back(elt.DataForSort._nbOfNodes*elt.DataForSort._nbOfProcPerNode);
165 machines=new Engines::ResourceList;
166 nbProcsOfMachines=new Engines::IntegerList;
167 std::size_t sz(ret0.size());
168 machines->length((CORBA::ULong)sz); nbProcsOfMachines->length((CORBA::ULong)sz);
169 for(std::size_t j=0;j<sz;j++)
171 (*machines)[(CORBA::ULong)j]=CORBA::string_dup(ret0[j].c_str());
172 (*nbProcsOfMachines)[(CORBA::ULong)j]=ret1[j];
177 * Return list of resources available (regarding content of CatalogResources.xml) whatever canRunContainers attribute value.
179 * \sa SALOME_ResourcesManager::ListAllAvailableResources, SALOME_ResourcesManager::ListAllResourceEntriesInCatalog
181 Engines::ResourceList *SALOME_ResourcesManager::ListAllResourcesInCatalog()
183 const MapOfParserResourcesType& zeList(_rm->GetList());
184 auto sz = zeList.size();
185 Engines::ResourceList *ret(new Engines::ResourceList);
188 for(auto it : zeList)
190 (*ret)[i++] = CORBA::string_dup( it.second.HostName.c_str() );
196 * Return list of resources entries available. Useful to scan remotely the content of the playground
198 Engines::ResourceList *SALOME_ResourcesManager::ListAllResourceEntriesInCatalog()
200 const MapOfParserResourcesType& zeList(_rm->GetList());
201 auto sz = zeList.size();
202 Engines::ResourceList *ret(new Engines::ResourceList);
205 for(auto it : zeList)
207 (*ret)[i++] = CORBA::string_dup( it.first.c_str() );
212 //=============================================================================
213 //! get the name of resources fitting the specified constraints (params)
215 * If hostname specified, check it is local or known in resources catalog.
218 * - select first machines with corresponding OS (all machines if
219 * parameter OS empty),
220 * - then select the sublist of machines on which the component is known
221 * (if the result is empty, that probably means that the inventory of
222 * components is probably not done, so give complete list from previous step)
224 //=============================================================================
226 Engines::ResourceList *
227 SALOME_ResourcesManager::GetFittingResources(const Engines::ResourceParameters& params)
229 //MESSAGE("ResourcesManager::GetFittingResources");
230 Engines::ResourceList_var ret;
233 resourceParams p = resourceParameters_CORBAtoCPP(params);
237 // Call C++ ResourceManager
238 std::vector <std::string> vec = _rm->GetFittingResources(p);
241 ret = resourceList_CPPtoCORBA(vec);
243 catch(const ResourcesException &ex)
245 INFOS("Caught exception in GetFittingResources C++: " << ex.msg);
246 THROW_SALOME_CORBA_EXCEPTION(ex.msg.c_str(),SALOME::BAD_PARAM);
252 //=============================================================================
254 * dynamically obtains the first machines
256 //=============================================================================
259 SALOME_ResourcesManager::FindFirst(const Engines::ResourceList& listOfResources)
262 std::vector<std::string> rl = resourceList_CORBAtoCPP(listOfResources);
264 return CORBA::string_dup(_rm->Find("first", rl).c_str());
268 SALOME_ResourcesManager::Find(const char* policy, const Engines::ResourceList& listOfResources)
271 std::vector<std::string> rl = resourceList_CORBAtoCPP(listOfResources);
273 return CORBA::string_dup(_rm->Find(policy, rl).c_str());
276 Engines::ResourceDefinition*
277 SALOME_ResourcesManager::GetResourceDefinition(const char * name)
279 Engines::ResourceDefinition_var resDef;
281 ParserResourcesType resource = _rm->GetResourcesDescr(name);
282 resDef = resourceDefinition_CPPtoCORBA(resource);
283 } catch (const exception & ex) {
284 INFOS("Caught exception in GetResourceDefinition: " << ex.what());
285 THROW_SALOME_CORBA_EXCEPTION(ex.what(), SALOME::BAD_PARAM);
288 return resDef._retn();
292 SALOME_ResourcesManager::AddResource(const Engines::ResourceDefinition& new_resource,
293 CORBA::Boolean write,
294 const char * xml_file)
298 ParserResourcesType resource = resourceDefinition_CORBAtoCPP(new_resource);
299 _rm->AddResourceInCatalog(resource);
303 _rm->WriteInXmlFile(std::string(xml_file));
304 _rm->ParseXmlFiles();
307 catch (const SALOME_Exception & e)
309 INFOS("Error in AddResourceInCatalog: " << e);
310 THROW_SALOME_CORBA_EXCEPTION(e.what(), SALOME::BAD_PARAM);
312 catch (const ResourcesException & e)
314 INFOS("Error in AddResourceInCatalog: " << e.msg);
315 THROW_SALOME_CORBA_EXCEPTION(e.msg.c_str(), SALOME::BAD_PARAM);
320 SALOME_ResourcesManager::RemoveResource(const char * resource_name,
321 CORBA::Boolean write,
322 const char * xml_file)
326 _rm->DeleteResourceInCatalog(resource_name);
328 catch (const SALOME_Exception & e)
330 INFOS("Error in DeleteResourceInCatalog: " << e);
331 THROW_SALOME_CORBA_EXCEPTION(e.what(), SALOME::BAD_PARAM);
336 _rm->WriteInXmlFile(std::string(xml_file));
337 _rm->ParseXmlFiles();
342 SALOME_ResourcesManager::getMachineFile(const char * resource_name,
343 CORBA::Long nb_procs,
344 const char * parallelLib)
346 std::string machine_file_name("");
348 if (std::string(parallelLib) == "Dummy")
350 MESSAGE("[getMachineFile] parallelLib is Dummy");
351 MapOfParserResourcesType resourcesList = _rm->GetList();
352 if (resourcesList.find(std::string(resource_name)) != resourcesList.end())
354 ParserResourcesType resource = resourcesList[std::string(resource_name)];
356 // Check if resource is cluster or not
357 if (resource.ClusterMembersList.empty())
359 //It is not a cluster so we create a cluster with one machine
360 ParserResourcesType fake_node;
361 fake_node.HostName = resource.HostName;
362 fake_node.Protocol = resource.Protocol;
363 fake_node.ClusterInternalProtocol = resource.ClusterInternalProtocol;
364 fake_node.UserName = resource.UserName;
365 fake_node.AppliPath = resource.AppliPath;
366 fake_node.DataForSort = resource.DataForSort;
368 resource.ClusterMembersList.push_front(fake_node);
371 // Creating list of machines for creating the machine file
372 std::list<std::string> list_of_machines;
373 std::list<ParserResourcesType>::iterator cluster_it =
374 resource.ClusterMembersList.begin();
375 while (cluster_it != resource.ClusterMembersList.end())
377 // For each member of the cluster we add a nbOfNodes * nbOfProcPerNode in the list
378 unsigned int number_of_proc = (*cluster_it).DataForSort._nbOfNodes *
379 (*cluster_it).DataForSort._nbOfProcPerNode;
380 for (unsigned int i = 0; i < number_of_proc; i++)
381 list_of_machines.push_back((*cluster_it).HostName);
385 // Creating machine file
386 machine_file_name = tmpnam(NULL);
387 std::ofstream machine_file(machine_file_name.c_str(), std::ios_base::out);
389 CORBA::Long machine_number = 0;
390 std::list<std::string>::iterator it = list_of_machines.begin();
391 while (machine_number != nb_procs)
393 // Adding a new node to the machine file
394 machine_file << *it << std::endl;
398 if (it == list_of_machines.end())
399 it = list_of_machines.begin();
404 INFOS("[getMachineFile] Error resource_name not found in resourcesList -> " << resource_name);
406 else if (std::string(parallelLib) == "Mpi")
408 MESSAGE("[getMachineFile] parallelLib is Mpi");
410 MapOfParserResourcesType resourcesList = _rm->GetList();
411 if (resourcesList.find(std::string(resource_name)) != resourcesList.end())
413 ParserResourcesType resource = resourcesList[std::string(resource_name)];
414 // Check if resource is cluster or not
415 if (resource.ClusterMembersList.empty())
417 //It is not a cluster so we create a cluster with one machine
418 ParserResourcesType fake_node;
419 fake_node.HostName = resource.HostName;
420 fake_node.Protocol = resource.Protocol;
421 fake_node.ClusterInternalProtocol = resource.ClusterInternalProtocol;
422 fake_node.UserName = resource.UserName;
423 fake_node.AppliPath = resource.AppliPath;
424 fake_node.DataForSort = resource.DataForSort;
426 resource.ClusterMembersList.push_front(fake_node);
429 // Choose mpi implementation -> each MPI implementation has is own machinefile...
430 if (resource.mpi == lam)
432 // Creating machine file
433 machine_file_name = tmpnam(NULL);
434 std::ofstream machine_file(machine_file_name.c_str(), std::ios_base::out);
436 // We add all cluster machines to the file
437 std::list<ParserResourcesType>::iterator cluster_it =
438 resource.ClusterMembersList.begin();
439 while (cluster_it != resource.ClusterMembersList.end())
441 unsigned int number_of_proc = (*cluster_it).DataForSort._nbOfNodes *
442 (*cluster_it).DataForSort._nbOfProcPerNode;
443 machine_file << (*cluster_it).HostName << " cpu=" << number_of_proc << std::endl;
447 else if ((resource.mpi == openmpi) || (resource.mpi == ompi))
449 // Creating machine file
450 machine_file_name = tmpnam(NULL);
451 std::ofstream machine_file(machine_file_name.c_str(), std::ios_base::out);
453 // We add all cluster machines to the file
454 std::list<ParserResourcesType>::iterator cluster_it =
455 resource.ClusterMembersList.begin();
456 while (cluster_it != resource.ClusterMembersList.end())
458 unsigned int number_of_proc = (*cluster_it).DataForSort._nbOfNodes *
459 (*cluster_it).DataForSort._nbOfProcPerNode;
460 machine_file << (*cluster_it).HostName << " slots=" << number_of_proc << std::endl;
464 else if (resource.mpi == nompi)
466 INFOS("[getMachineFile] Error resource_name MPI implementation was defined for " << resource_name);
469 INFOS("[getMachineFile] Error resource_name MPI implementation not currently handled for " << resource_name);
472 INFOS("[getMachineFile] Error resource_name not found in resourcesList -> " << resource_name);
475 INFOS("[getMachineFile] Error parallelLib is not handled -> " << parallelLib);
477 return CORBA::string_dup(machine_file_name.c_str());