1 // Copyright (C) 2005 OPEN CASCADE, EADS/CCR, LIP6, CEA/DEN,
2 // CEDRAT, EDF R&D, LEG, PRINCIPIA R&D, BUREAU VERITAS
4 // This library is free software; you can redistribute it and/or
5 // modify it under the terms of the GNU Lesser General Public
6 // License as published by the Free Software Foundation; either
7 // version 2.1 of the License.
9 // This library is distributed in the hope that it will be useful
10 // but WITHOUT ANY WARRANTY; without even the implied warranty of
11 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
12 // Lesser General Public License for more details.
14 // You should have received a copy of the GNU Lesser General Public
15 // License along with this library; if not, write to the Free Software
16 // Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
18 // See http://www.salome-platform.org/ or email : webmaster.salome@opencascade.com
20 #include "SALOME_ContainerManager.hxx"
21 #include "SALOME_NamingService.hxx"
23 #include <sys/types.h>
28 #include "Utils_CorbaException.hxx"
30 #ifdef WITH_PACO_PARALLEL
34 #define TIME_OUT_TO_LAUNCH_CONT 21
38 const char *SALOME_ContainerManager::_ContainerManagerNameInNS =
41 //=============================================================================
45 * Define a CORBA single thread policy for the server, which avoid to deal
46 * with non thread-safe usage like Change_Directory in SALOME naming service
48 //=============================================================================
50 SALOME_ContainerManager::SALOME_ContainerManager(CORBA::ORB_ptr orb)
52 MESSAGE("constructor");
53 _NS = new SALOME_NamingService(orb);
54 _ResManager = new SALOME_ResourcesManager(orb);
56 PortableServer::POA_var root_poa = PortableServer::POA::_the_root_poa();
57 PortableServer::POAManager_var pman = root_poa->the_POAManager();
58 PortableServer::POA_var my_poa;
60 CORBA::PolicyList policies;
62 PortableServer::ThreadPolicy_var threadPol =
63 root_poa->create_thread_policy(PortableServer::SINGLE_THREAD_MODEL);
64 policies[0] = PortableServer::ThreadPolicy::_duplicate(threadPol);
67 root_poa->create_POA("SThreadPOA",pman,policies);
69 PortableServer::ObjectId_var id = my_poa->activate_object(this);
70 CORBA::Object_var obj = my_poa->id_to_reference(id);
71 Engines::ContainerManager_var refContMan =
72 Engines::ContainerManager::_narrow(obj);
74 _NS->Register(refContMan,_ContainerManagerNameInNS);
75 MESSAGE("constructor end");
78 //=============================================================================
82 //=============================================================================
84 SALOME_ContainerManager::~SALOME_ContainerManager()
86 MESSAGE("destructor");
91 //=============================================================================
93 * shutdown all the containers, then the ContainerManager servant
95 //=============================================================================
97 void SALOME_ContainerManager::Shutdown()
100 ShutdownContainers();
101 PortableServer::ObjectId_var oid = _default_POA()->servant_to_id(this);
102 _default_POA()->deactivate_object(oid);
107 //=============================================================================
109 * Loop on all the containers listed in naming service, ask shutdown on each
111 //=============================================================================
113 void SALOME_ContainerManager::ShutdownContainers()
115 MESSAGE("ShutdownContainers");
116 _NS->Change_Directory("/Containers");
117 vector<string> vec = _NS->list_directory_recurs();
118 list<string> lstCont;
119 for(vector<string>::iterator iter = vec.begin();iter!=vec.end();iter++)
122 CORBA::Object_var obj=_NS->Resolve((*iter).c_str());
123 Engines::Container_var cont=Engines::Container::_narrow(obj);
124 if(!CORBA::is_nil(cont))
126 lstCont.push_back((*iter));
129 MESSAGE("Container list: ");
130 for(list<string>::iterator iter=lstCont.begin();iter!=lstCont.end();iter++)
134 for(list<string>::iterator iter=lstCont.begin();iter!=lstCont.end();iter++)
137 CORBA::Object_var obj=_NS->Resolve((*iter).c_str());
138 Engines::Container_var cont=Engines::Container::_narrow(obj);
139 if(!CORBA::is_nil(cont))
141 MESSAGE("ShutdownContainers: " << (*iter));
144 else MESSAGE("ShutdownContainers: no container ref for " << (*iter));
148 //=============================================================================
150 * Find a suitable Container in a list of machines, or start one
151 * \param params Machine Parameters required for the container
152 * \param possibleComputers list of machines usable for find or start
154 //=============================================================================
156 Engines::Container_ptr
157 SALOME_ContainerManager::
158 FindOrStartContainer(const Engines::MachineParameters& params,
159 const Engines::MachineList& possibleComputers)
162 string containerNameInNS;
163 char idc[3*sizeof(long)];
165 Engines::Container_ptr ret = FindContainer(params,possibleComputers);
166 if(!CORBA::is_nil(ret))
168 MESSAGE("Container doesn't exist try to launch it ...");
169 MESSAGE("SALOME_ContainerManager::FindOrStartContainer " <<
170 possibleComputers.length());
171 //vector<string> vector;
172 string theMachine=_ResManager->FindBest(possibleComputers);
173 MESSAGE("try to launch it on " << theMachine);
175 // Get Id for container: a parallel container registers in Naming Service
176 // on the machine where is process 0. ContainerManager does'nt know the name
177 // of this machine before the launch of the parallel container. So to get
178 // the IOR of the parallel container in Naming Service, ContainerManager
179 // gives a unique Id. The parallel container registers his name under
180 // /ContainerManager/Id directory in NamingService
182 id = GetIdForContainer();
187 MESSAGE("SALOME_ContainerManager::FindOrStartContainer : " <<
188 "no possible computer");
189 return Engines::Container::_nil();
191 else if(theMachine==GetHostname())
193 command=_ResManager->BuildCommandToLaunchLocalContainer(params,id);
197 _ResManager->BuildCommandToLaunchRemoteContainer(theMachine,params,id);
199 _ResManager->RmTmpFile();
200 int status=system(command.c_str());
203 MESSAGE("SALOME_LifeCycleCORBA::StartOrFindContainer rsh failed " <<
204 "(system command status -1)");
205 return Engines::Container::_nil();
207 else if (status == 217)
209 MESSAGE("SALOME_LifeCycleCORBA::StartOrFindContainer rsh failed " <<
210 "(system command status 217)");
211 return Engines::Container::_nil();
215 int count=TIME_OUT_TO_LAUNCH_CONT;
216 while ( CORBA::is_nil(ret) && count )
225 MESSAGE( count << ". Waiting for FactoryServer on " << theMachine);
228 containerNameInNS = "/ContainerManager/id";
229 sprintf(idc,"%ld",id);
230 containerNameInNS += idc;
234 _NS->BuildContainerNameForNS(params,theMachine.c_str());
235 SCRUTE(containerNameInNS);
236 CORBA::Object_var obj = _NS->Resolve(containerNameInNS.c_str());
237 ret=Engines::Container::_narrow(obj);
239 if ( CORBA::is_nil(ret) )
241 MESSAGE("SALOME_LifeCycleCORBA::StartOrFindContainer rsh failed");
247 #ifdef WITH_PACO_PARALLEL
248 //=============================================================================
250 * Find or Start a suitable PaCO++ Parallel Container in a list of machines.
251 * \param params Machine Parameters required for the container
252 * \param possibleComputers list of machines usable for find or start
254 * \return CORBA container reference.
256 //=============================================================================
257 Engines::Container_ptr
258 SALOME_ContainerManager::
259 FindOrStartParallelContainer(const Engines::MachineParameters& params_const,
260 const Engines::MachineList& possibleComputers)
262 CORBA::Object_var obj;
263 Engines::Container_ptr ret = Engines::Container::_nil();
264 Engines::MachineParameters params(params_const);
266 // Step 1 : Try to find a suitable container
267 // Currently not as good as could be since
268 // we have to verified the number of nodes of the container
269 // if a user tell that.
270 ret = FindContainer(params, possibleComputers);
272 if(CORBA::is_nil(ret)) {
273 // Step 2 : Starting a new parallel container
274 INFOS("[FindOrStartParallelContainer] Starting a parallel container");
276 // Step 2.1 : Choose a computer
277 string theMachine = _ResManager->FindBest(possibleComputers);
278 if(theMachine == "") {
279 INFOS("[FindOrStartParallelContainer] !!!!!!!!!!!!!!!!!!!!!!!!!!");
280 INFOS("[FindOrStartParallelContainer] No possible computer found");
281 INFOS("[FindOrStartParallelContainer] !!!!!!!!!!!!!!!!!!!!!!!!!!");
284 INFOS("[FindOrStartParallelContainer] on machine : " << theMachine);
286 if(theMachine == GetHostname()) {
287 // Step 3 : starting parallel container proxy
288 params.hostname = CORBA::string_dup(theMachine.c_str());
289 Engines::MachineParameters params_proxy(params);
290 command = _ResManager->BuildCommandToLaunchLocalParallelContainer("SALOME_ParallelContainerProxy", params_proxy, "xterm");
291 // LaunchParallelContainer uses this value to know if it launches the proxy or the nodes
292 params_proxy.nb_component_nodes = 0;
293 obj = LaunchParallelContainer(command, params_proxy, _NS->ContainerName(params));
294 ret = Engines::Container::_narrow(obj);
296 // Step 4 : starting parallel container nodes
297 command = _ResManager->BuildCommandToLaunchLocalParallelContainer("SALOME_ParallelContainerNode", params, "xterm");
298 string name = _NS->ContainerName(params) + "Node";
299 LaunchParallelContainer(command, params, name);
301 // Step 5 : connecting nodes and the proxy to actually create a parallel container
303 for (int i = 0; i < params.nb_component_nodes; i++) {
306 snprintf(buffer,5,"%d",i);
307 string name_cont = name + string(buffer);
309 string theNodeMachine(CORBA::string_dup(params.hostname));
310 string containerNameInNS = _NS->BuildContainerNameForNS(name_cont.c_str(),theNodeMachine.c_str());
311 int count = TIME_OUT_TO_LAUNCH_CONT;
312 obj = _NS->Resolve(containerNameInNS.c_str());
313 while (CORBA::is_nil(obj) && count) {
314 INFOS("[FindOrStartParallelContainer] CONNECTION FAILED !!!!!!!!!!!!!!!!!!!!!!!!");
321 obj = _NS->Resolve(containerNameInNS.c_str());
324 PaCO::InterfaceParallel_var node = PaCO::InterfaceParallel::_narrow(obj);
325 MESSAGE("[FindOrStartParallelContainer] Deploying node : " << name);
329 catch(CORBA::SystemException& e)
331 INFOS("Caught CORBA::SystemException. : " << e);
333 catch(PortableServer::POA::ServantAlreadyActive&)
335 INFOS("Caught CORBA::ServantAlreadyActiveException");
337 catch(CORBA::Exception&)
339 INFOS("Caught CORBA::Exception.");
341 catch(std::exception& exc)
343 INFOS("Caught std::exception - "<<exc.what());
347 INFOS("Caught unknown exception.");
349 INFOS("[FindOrStartParallelContainer] node " << name << " deployed");
353 INFOS("[FindOrStartParallelContainer] Currently parallel containers are launched only on the local host");
360 //=============================================================================
362 * Find or Start a suitable PaCO++ Parallel Container in a list of machines.
363 * \param params Machine Parameters required for the container
364 * \param possibleComputers list of machines usable for find or start
366 * \return CORBA container reference.
368 //=============================================================================
369 Engines::Container_ptr
370 SALOME_ContainerManager::
371 FindOrStartParallelContainer(const Engines::MachineParameters& params,
372 const Engines::MachineList& possibleComputers)
374 Engines::Container_ptr ret = Engines::Container::_nil();
375 INFOS("[FindOrStartParallelContainer] is disabled !");
376 INFOS("[FindOrStartParallelContainer] recompile SALOME Kernel to enable parallel extension");
381 //=============================================================================
385 //=============================================================================
387 Engines::MachineList *
388 SALOME_ContainerManager::
389 GetFittingResources(const Engines::MachineParameters& params,
390 const char *componentName)
392 MESSAGE("SALOME_ContainerManager::GetFittingResources");
393 Engines::MachineList *ret=new Engines::MachineList;
397 vec = _ResManager->GetFittingResources(params,componentName);
399 catch(const SALOME_Exception &ex)
401 INFOS("Caught exception.");
402 THROW_SALOME_CORBA_EXCEPTION(ex.what(),SALOME::BAD_PARAM);
406 // MESSAGE("Machine list length "<<vec.size());
407 ret->length(vec.size());
408 for(unsigned int i=0;i<vec.size();i++)
410 (*ret)[i]=(vec[i]).c_str();
415 //=============================================================================
419 //=============================================================================
422 SALOME_ContainerManager::
423 FindBest(const Engines::MachineList& possibleComputers)
425 string theMachine=_ResManager->FindBest(possibleComputers);
426 return CORBA::string_dup(theMachine.c_str());
429 //=============================================================================
433 //=============================================================================
435 Engines::Container_ptr
436 SALOME_ContainerManager::
437 FindContainer(const Engines::MachineParameters& params,
438 const char *theMachine)
440 string containerNameInNS(_NS->BuildContainerNameForNS(params,theMachine));
441 CORBA::Object_var obj = _NS->Resolve(containerNameInNS.c_str());
442 if( !CORBA::is_nil(obj) )
443 return Engines::Container::_narrow(obj);
445 return Engines::Container::_nil();
448 //=============================================================================
452 //=============================================================================
454 Engines::Container_ptr
455 SALOME_ContainerManager::
456 FindContainer(const Engines::MachineParameters& params,
457 const Engines::MachineList& possibleComputers)
459 MESSAGE("FindContainer "<<possibleComputers.length());
460 for(unsigned int i=0;i<possibleComputers.length();i++)
462 MESSAGE("FindContainer possible " << possibleComputers[i]);
463 Engines::Container_ptr cont = FindContainer(params,possibleComputers[i]);
464 if( !CORBA::is_nil(cont) )
467 MESSAGE("FindContainer: not found");
468 return Engines::Container::_nil();
471 //=============================================================================
472 /*! This method launches the parallel container.
473 * It will may be placed on the ressources manager.
475 * \param command to launch
476 * \param container's parameters
477 * \param name of the container
479 * \return CORBA container reference
481 //=============================================================================
483 SALOME_ContainerManager::LaunchParallelContainer(const std::string& command,
484 const Engines::MachineParameters& params,
485 const std::string& name)
487 CORBA::Object_ptr obj = CORBA::Object::_nil();
488 string containerNameInNS;
490 if (params.nb_component_nodes == 0) {
491 INFOS("[LaunchParallelContainer] launching the proxy of the parallel container");
492 int status = system(command.c_str());
494 INFOS("[LaunchParallelContainer] failed : system command status -1");
496 else if (status == 217) {
497 INFOS("[LaunchParallelContainer] failed : system command status 217");
500 int count = TIME_OUT_TO_LAUNCH_CONT;
501 string theMachine(CORBA::string_dup(params.hostname));
502 containerNameInNS = _NS->BuildContainerNameForNS((char*) name.c_str(),theMachine.c_str());
504 INFOS("[LaunchContainer] Waiting for Parallel Container proxy on " << theMachine);
505 while (CORBA::is_nil(obj) && count) {
512 obj = _NS->Resolve(containerNameInNS.c_str());
516 INFOS("[LaunchParallelContainer] launching the nodes of the parallel container");
517 int status = system(command.c_str());
519 INFOS("[LaunchParallelContainer] failed : system command status -1");
521 else if (status == 217) {
522 INFOS("[LaunchParallelContainer] failed : system command status 217");
524 // We are waiting all the nodes
525 for (int i = 0; i < params.nb_component_nodes; i++) {
526 obj = CORBA::Object::_nil();
527 int count = TIME_OUT_TO_LAUNCH_CONT;
531 snprintf(buffer,5,"%d",i);
532 string name_cont = name + string(buffer);
534 // I don't like this...
535 string theMachine(CORBA::string_dup(params.hostname));
536 containerNameInNS = _NS->BuildContainerNameForNS((char*) name_cont.c_str(),theMachine.c_str());
537 cerr << "[LaunchContainer] Waiting for Parllel Container node " << containerNameInNS << " on " << theMachine << endl;
538 while (CORBA::is_nil(obj) && count) {
545 obj = _NS->Resolve(containerNameInNS.c_str());
550 if ( CORBA::is_nil(obj) ) {
551 INFOS("[LaunchParallelContainer] failed");
556 //=============================================================================
558 * Get Id for container: a parallel container registers in Naming Service
559 * on the machine where is process 0. ContainerManager does'nt know the name
560 * of this machine before the launch of the parallel container. So to get
561 * the IOR of the parallel container in Naming Service, ContainerManager
562 * gives a unique Id. The parallel container registers his name under
563 * /ContainerManager/Id directory in NamingService
565 //=============================================================================
568 long SALOME_ContainerManager::GetIdForContainer(void)