1 // Copyright (C) 2007-2008 CEA/DEN, EDF R&D, OPEN CASCADE
3 // Copyright (C) 2003-2007 OPEN CASCADE, EADS/CCR, LIP6, CEA/DEN,
4 // CEDRAT, EDF R&D, LEG, PRINCIPIA R&D, BUREAU VERITAS
6 // This library is free software; you can redistribute it and/or
7 // modify it under the terms of the GNU Lesser General Public
8 // License as published by the Free Software Foundation; either
9 // version 2.1 of the License.
11 // This library is distributed in the hope that it will be useful,
12 // but WITHOUT ANY WARRANTY; without even the implied warranty of
13 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 // Lesser General Public License for more details.
16 // You should have received a copy of the GNU Lesser General Public
17 // License along with this library; if not, write to the Free Software
18 // Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
20 // See http://www.salome-platform.org/ or email : webmaster.salome@opencascade.com
22 #include "SALOME_ResourcesManager.hxx"
23 #include "Utils_ExceptHandlers.hxx"
24 #include "Utils_CorbaException.hxx"
42 #include <sys/types.h>
44 #include "utilities.h"
46 #define MAX_SIZE_FOR_HOSTNAME 256;
50 const char *SALOME_ResourcesManager::_ResourcesManagerNameInNS = "/ResourcesManager";
52 //=============================================================================
56 //=============================================================================
58 SALOME_ResourcesManager::
59 SALOME_ResourcesManager(CORBA::ORB_ptr orb,
60 PortableServer::POA_var poa,
61 SALOME_NamingService *ns,
62 const char *xmlFilePath) : _rm(xmlFilePath)
64 MESSAGE("SALOME_ResourcesManager constructor");
66 _orb = CORBA::ORB::_duplicate(orb) ;
67 _poa = PortableServer::POA::_duplicate(poa) ;
68 PortableServer::ObjectId_var id = _poa->activate_object(this);
69 CORBA::Object_var obj = _poa->id_to_reference(id);
70 Engines::ResourcesManager_var refContMan = Engines::ResourcesManager::_narrow(obj);
71 _NS->Register(refContMan,_ResourcesManagerNameInNS);
72 MESSAGE("SALOME_ResourcesManager constructor end");
75 //=============================================================================
77 * Standard constructor, parse resource file.
78 * - if ${APPLI} exists in environment,
79 * look for ${HOME}/${APPLI}/CatalogResources.xml
80 * - else look for default:
81 * ${KERNEL_ROOT_DIR}/share/salome/resources/kernel/CatalogResources.xml
82 * - parse XML resource file.
84 //=============================================================================
86 SALOME_ResourcesManager::SALOME_ResourcesManager(CORBA::ORB_ptr orb,
87 PortableServer::POA_var poa,
88 SALOME_NamingService *ns) : _rm()
90 MESSAGE("SALOME_ResourcesManager constructor");
92 _orb = CORBA::ORB::_duplicate(orb) ;
93 _poa = PortableServer::POA::_duplicate(poa) ;
94 PortableServer::ObjectId_var id = _poa->activate_object(this);
95 CORBA::Object_var obj = _poa->id_to_reference(id);
96 Engines::ResourcesManager_var refContMan = Engines::ResourcesManager::_narrow(obj);
97 _NS->Register(refContMan,_ResourcesManagerNameInNS);
99 MESSAGE("SALOME_ResourcesManager constructor end");
102 //=============================================================================
104 * Standard Destructor
106 //=============================================================================
108 SALOME_ResourcesManager::~SALOME_ResourcesManager()
110 MESSAGE("SALOME_ResourcesManager destructor");
114 //=============================================================================
116 * shutdown all the containers, then the ContainerManager servant
118 //=============================================================================
120 void SALOME_ResourcesManager::Shutdown()
123 _NS->Destroy_Name(_ResourcesManagerNameInNS);
124 PortableServer::ObjectId_var oid = _poa->servant_to_id(this);
125 _poa->deactivate_object(oid);
128 //=============================================================================
129 //! get the name of resources fitting the specified constraints (params)
131 * If hostname specified, check it is local or known in resources catalog.
134 * - select first machines with corresponding OS (all machines if
135 * parameter OS empty),
136 * - then select the sublist of machines on which the component is known
137 * (if the result is empty, that probably means that the inventory of
138 * components is probably not done, so give complete list from previous step)
140 //=============================================================================
142 Engines::ResourceList *
143 SALOME_ResourcesManager::GetFittingResources(const Engines::ResourceParameters& params)
145 MESSAGE("ResourcesManager::GetFittingResources");
146 Engines::ResourceList * ret = new Engines::ResourceList;
150 p.name = params.name;
151 p.hostname = params.hostname;
153 p.nb_proc = params.nb_proc;
154 p.nb_node = params.nb_node;
155 p.nb_proc_per_node = params.nb_proc_per_node;
156 p.cpu_clock = params.cpu_clock;
157 p.mem_mb = params.mem_mb;
158 for(unsigned int i=0; i<params.componentList.length(); i++)
159 p.componentList.push_back(string(params.componentList[i]));
160 for(unsigned int i=0; i<params.resList.length(); i++)
161 p.resourceList.push_back(string(params.resList[i]));
165 // Call C++ ResourceManager
166 vector <std::string> vec = _rm.GetFittingResources(p);
169 ret->length(vec.size());
170 for(unsigned int i=0;i<vec.size();i++)
171 (*ret)[i] = (vec[i]).c_str();
173 catch(const ResourcesException &ex)
175 INFOS("Caught exception in GetFittingResources C++: " << ex.msg);
176 THROW_SALOME_CORBA_EXCEPTION(ex.msg.c_str(),SALOME::BAD_PARAM);
182 //=============================================================================
184 * dynamically obtains the first machines
186 //=============================================================================
189 SALOME_ResourcesManager::FindFirst(const Engines::ResourceList& listOfResources)
193 for(unsigned int i=0; i<listOfResources.length(); i++)
194 rl.push_back(string(listOfResources[i]));
196 return CORBA::string_dup(_rm.Find("first", rl).c_str());
200 SALOME_ResourcesManager::Find(const char* policy, const Engines::ResourceList& listOfResources)
204 for(unsigned int i=0; i<listOfResources.length(); i++)
205 rl.push_back(string(listOfResources[i]));
207 return CORBA::string_dup(_rm.Find(policy, rl).c_str());
210 Engines::ResourceDefinition*
211 SALOME_ResourcesManager::GetResourceDefinition(const char * name)
213 ParserResourcesType resource = _rm.GetResourcesDescr(name);
214 Engines::ResourceDefinition *p_ptr = new Engines::ResourceDefinition;
216 p_ptr->name = CORBA::string_dup(resource.Name.c_str());
217 p_ptr->hostname = CORBA::string_dup(resource.HostName.c_str());
218 if( resource.Protocol == rsh )
219 p_ptr->protocol = "rsh";
220 else if( resource.Protocol == ssh )
221 p_ptr->protocol = "ssh";
222 if( resource.ClusterInternalProtocol == rsh )
223 p_ptr->iprotocol = "rsh";
224 else if( resource.ClusterInternalProtocol == ssh )
225 p_ptr->iprotocol = "ssh";
226 p_ptr->username = CORBA::string_dup(resource.UserName.c_str());
227 p_ptr->applipath = CORBA::string_dup(resource.AppliPath.c_str());
228 p_ptr->componentList.length(resource.ComponentsList.size());
229 for(unsigned int i=0;i<resource.ComponentsList.size();i++)
230 p_ptr->componentList[i] = CORBA::string_dup(resource.ComponentsList[i].c_str());
231 p_ptr->OS = CORBA::string_dup(resource.OS.c_str());
232 p_ptr->mem_mb = resource.DataForSort._memInMB;
233 p_ptr->cpu_clock = resource.DataForSort._CPUFreqMHz;
234 p_ptr->nb_proc_per_node = resource.DataForSort._nbOfProcPerNode;
235 p_ptr->nb_node = resource.DataForSort._nbOfNodes;
237 if( resource.mpi == lam )
238 p_ptr->mpiImpl = "lam";
239 else if( resource.mpi == mpich1 )
240 p_ptr->mpiImpl = "mpich1";
241 else if( resource.mpi == mpich2 )
242 p_ptr->mpiImpl = "mpich2";
243 else if( resource.mpi == openmpi )
244 p_ptr->mpiImpl = "openmpi";
245 else if( resource.mpi == slurm )
246 p_ptr->mpiImpl = "slurm";
247 else if( resource.mpi == prun )
248 p_ptr->mpiImpl = "prun";
250 if( resource.Batch == pbs )
251 p_ptr->batch = "pbs";
252 else if( resource.Batch == lsf )
253 p_ptr->batch = "lsf";
254 else if( resource.Batch == sge )
255 p_ptr->batch = "sge";
261 SALOME_ResourcesManager::getMachineFile(std::string hostname,
262 CORBA::Long nb_procs,
263 std::string parallelLib)
265 std::string machine_file_name("");
267 if (parallelLib == "Dummy")
269 MESSAGE("[getMachineFile] parallelLib is Dummy");
270 MapOfParserResourcesType resourcesList = _rm.GetList();
271 if (resourcesList.find(hostname) != resourcesList.end())
273 ParserResourcesType resource = resourcesList[hostname];
275 // Check if resource is cluster or not
276 if (resource.ClusterMembersList.empty())
278 //It is not a cluster so we create a cluster with one machine
279 ParserResourcesClusterMembersType fake_node;
280 fake_node.HostName = resource.HostName;
281 fake_node.Protocol = resource.Protocol;
282 fake_node.ClusterInternalProtocol = resource.ClusterInternalProtocol;
283 fake_node.UserName = resource.UserName;
284 fake_node.AppliPath = resource.AppliPath;
285 fake_node.DataForSort = resource.DataForSort;
287 resource.ClusterMembersList.push_front(fake_node);
290 // Creating list of machines for creating the machine file
291 std::list<std::string> list_of_machines;
292 std::list<ParserResourcesClusterMembersType>::iterator cluster_it =
293 resource.ClusterMembersList.begin();
294 while (cluster_it != resource.ClusterMembersList.end())
296 // For each member of the cluster we add a nbOfNodes * nbOfProcPerNode in the list
297 unsigned int number_of_proc = (*cluster_it).DataForSort._nbOfNodes *
298 (*cluster_it).DataForSort._nbOfProcPerNode;
299 for (unsigned int i = 0; i < number_of_proc; i++)
300 list_of_machines.push_back((*cluster_it).HostName);
304 // Creating machine file
305 machine_file_name = tmpnam(NULL);
306 std::ofstream machine_file(machine_file_name.c_str(), ios_base::out);
308 CORBA::Long machine_number = 0;
309 std::list<std::string>::iterator it = list_of_machines.begin();
310 while (machine_number != nb_procs)
312 // Adding a new node to the machine file
313 machine_file << *it << endl;
317 if (it == list_of_machines.end())
318 it = list_of_machines.begin();
323 INFOS("[getMachineFile] Error hostname not found in resourcesList -> " << hostname);
325 else if (parallelLib == "Mpi")
327 MESSAGE("[getMachineFile] parallelLib is Mpi");
329 MapOfParserResourcesType resourcesList = _rm.GetList();
330 if (resourcesList.find(hostname) != resourcesList.end())
332 ParserResourcesType resource = resourcesList[hostname];
333 // Check if resource is cluster or not
334 if (resource.ClusterMembersList.empty())
336 //It is not a cluster so we create a cluster with one machine
337 ParserResourcesClusterMembersType fake_node;
338 fake_node.HostName = resource.HostName;
339 fake_node.Protocol = resource.Protocol;
340 fake_node.ClusterInternalProtocol = resource.ClusterInternalProtocol;
341 fake_node.UserName = resource.UserName;
342 fake_node.AppliPath = resource.AppliPath;
343 fake_node.DataForSort = resource.DataForSort;
345 resource.ClusterMembersList.push_front(fake_node);
348 // Choose mpi implementation -> each MPI implementation has is own machinefile...
349 if (resource.mpi == lam)
351 // Creating machine file
352 machine_file_name = tmpnam(NULL);
353 std::ofstream machine_file(machine_file_name.c_str(), ios_base::out);
355 // We add all cluster machines to the file
356 std::list<ParserResourcesClusterMembersType>::iterator cluster_it =
357 resource.ClusterMembersList.begin();
358 while (cluster_it != resource.ClusterMembersList.end())
360 unsigned int number_of_proc = (*cluster_it).DataForSort._nbOfNodes *
361 (*cluster_it).DataForSort._nbOfProcPerNode;
362 machine_file << (*cluster_it).HostName << " cpu=" << number_of_proc << endl;
366 else if (resource.mpi == nompi)
368 INFOS("[getMachineFile] Error hostname MPI implementation was defined for " << hostname);
371 INFOS("[getMachineFile] Error hostname MPI implementation not currenly handled for " << hostname);
374 INFOS("[getMachineFile] Error hostname not found in resourcesList -> " << hostname);
377 INFOS("[getMachineFile] Error parallelLib is not handled -> " << parallelLib);
379 return machine_file_name;