1 // Copyright (C) 2007-2008 CEA/DEN, EDF R&D, OPEN CASCADE
3 // Copyright (C) 2003-2007 OPEN CASCADE, EADS/CCR, LIP6, CEA/DEN,
4 // CEDRAT, EDF R&D, LEG, PRINCIPIA R&D, BUREAU VERITAS
6 // This library is free software; you can redistribute it and/or
7 // modify it under the terms of the GNU Lesser General Public
8 // License as published by the Free Software Foundation; either
9 // version 2.1 of the License.
11 // This library is distributed in the hope that it will be useful,
12 // but WITHOUT ANY WARRANTY; without even the implied warranty of
13 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 // Lesser General Public License for more details.
16 // You should have received a copy of the GNU Lesser General Public
17 // License along with this library; if not, write to the Free Software
18 // Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
20 // See http://www.salome-platform.org/ or email : webmaster.salome@opencascade.com
22 #include "SALOME_ContainerManager.hxx"
23 #include "SALOME_NamingService.hxx"
24 #include "SALOME_ModuleCatalog.hh"
25 #include "Basics_Utils.hxx"
26 #include "Basics_DirUtils.hxx"
27 #include <sys/types.h>
33 #include "Utils_CorbaException.hxx"
34 #include "Batch_Date.hxx"
36 #ifdef WITH_PACO_PARALLEL
40 #define TIME_OUT_TO_LAUNCH_CONT 61
44 vector<Engines::Container_ptr> SALOME_ContainerManager::_batchLaunchedContainers;
46 vector<Engines::Container_ptr>::iterator SALOME_ContainerManager::_batchLaunchedContainersIter;
48 const char *SALOME_ContainerManager::_ContainerManagerNameInNS =
51 //=============================================================================
55 * Define a CORBA single thread policy for the server, which avoid to deal
56 * with non thread-safe usage like Change_Directory in SALOME naming service
58 //=============================================================================
60 SALOME_ContainerManager::SALOME_ContainerManager(CORBA::ORB_ptr orb, PortableServer::POA_var poa, SALOME_ResourcesManager *rm, SALOME_NamingService *ns)
62 MESSAGE("constructor");
67 PortableServer::POAManager_var pman = poa->the_POAManager();
68 _orb = CORBA::ORB::_duplicate(orb) ;
69 CORBA::PolicyList policies;
71 PortableServer::ThreadPolicy_var threadPol =
72 poa->create_thread_policy(PortableServer::SINGLE_THREAD_MODEL);
73 policies[0] = PortableServer::ThreadPolicy::_duplicate(threadPol);
75 _poa = poa->create_POA("SThreadPOA",pman,policies);
77 PortableServer::ObjectId_var id = _poa->activate_object(this);
78 CORBA::Object_var obj = _poa->id_to_reference(id);
79 Engines::ContainerManager_var refContMan =
80 Engines::ContainerManager::_narrow(obj);
82 _NS->Register(refContMan,_ContainerManagerNameInNS);
84 _isAppliSalomeDefined = (getenv("APPLI") != 0);
85 MESSAGE("constructor end");
88 //=============================================================================
92 //=============================================================================
94 SALOME_ContainerManager::~SALOME_ContainerManager()
96 MESSAGE("destructor");
99 //=============================================================================
101 * shutdown all the containers, then the ContainerManager servant
103 //=============================================================================
105 void SALOME_ContainerManager::Shutdown()
108 ShutdownContainers();
109 _NS->Destroy_Name(_ContainerManagerNameInNS);
110 PortableServer::ObjectId_var oid = _poa->servant_to_id(this);
111 _poa->deactivate_object(oid);
112 //_remove_ref() has already been done at creation
116 //=============================================================================
118 * Loop on all the containers listed in naming service, ask shutdown on each
120 //=============================================================================
122 void SALOME_ContainerManager::ShutdownContainers()
124 MESSAGE("ShutdownContainers");
126 isOK = _NS->Change_Directory("/Containers");
128 vector<string> vec = _NS->list_directory_recurs();
129 list<string> lstCont;
130 for(vector<string>::iterator iter = vec.begin();iter!=vec.end();iter++)
133 CORBA::Object_var obj=_NS->Resolve((*iter).c_str());
136 Engines::Container_var cont=Engines::Container::_narrow(obj);
137 if(!CORBA::is_nil(cont))
138 lstCont.push_back((*iter));
140 catch(const CORBA::Exception& e)
142 // ignore this entry and continue
145 MESSAGE("Container list: ");
146 for(list<string>::iterator iter=lstCont.begin();iter!=lstCont.end();iter++){
149 for(list<string>::iterator iter=lstCont.begin();iter!=lstCont.end();iter++){
151 CORBA::Object_var obj=_NS->Resolve((*iter).c_str());
152 Engines::Container_var cont=Engines::Container::_narrow(obj);
153 if(!CORBA::is_nil(cont))
155 MESSAGE("ShutdownContainers: " << (*iter));
160 catch(CORBA::SystemException& e)
162 INFOS("CORBA::SystemException ignored : " << e);
164 catch(CORBA::Exception&)
166 INFOS("CORBA::Exception ignored.");
170 INFOS("Unknown exception ignored.");
174 MESSAGE("ShutdownContainers: no container ref for " << (*iter));
179 //=============================================================================
181 * Find a suitable Container in a list of machines, or start one
182 * \param params Machine Parameters required for the container
183 * \param possibleComputers list of machines usable for find or start
185 //=============================================================================
187 Engines::Container_ptr
188 SALOME_ContainerManager::
189 FindOrStartContainer(const Engines::MachineParameters& params,
190 const Engines::MachineList& possibleComputers)
192 Engines::Container_ptr ret = FindContainer(params,possibleComputers);
193 if(!CORBA::is_nil(ret))
195 MESSAGE("Container doesn't exist try to launch it ...");
197 return StartContainer(params,possibleComputers,Engines::P_FIRST);
201 //=============================================================================
203 * Start a suitable Container in a list of machines
204 * \param params Machine Parameters required for the container
205 * \param possibleComputers list of machines usable for start
207 //=============================================================================
209 Engines::Container_ptr
210 SALOME_ContainerManager::
211 StartContainer(const Engines::MachineParameters& params,
212 const Engines::MachineList& possibleComputers,
213 Engines::ResPolicy policy,const std::string& container_exe)
215 #ifdef WITH_PACO_PARALLEL
216 std::string parallelLib(params.parallelLib);
217 if (parallelLib != "")
218 return FindOrStartParallelContainer(params, possibleComputers);
221 string containerNameInNS;
222 char idc[3*sizeof(long)];
223 Engines::Container_ptr ret = Engines::Container::_nil();
225 MESSAGE("SALOME_ContainerManager::StartContainer " <<
226 possibleComputers.length());
229 for(unsigned int i=0;i<possibleComputers.length();i++)
230 lm.push_back(string(possibleComputers[i]));
235 case Engines::P_FIRST:
236 theMachine=_ResManager->GetImpl()->FindFirst(lm);
238 case Engines::P_CYCL:
239 theMachine=_ResManager->GetImpl()->FindNext(lm);
241 case Engines::P_BEST:
242 theMachine=_ResManager->GetImpl()->FindBest(lm);
246 catch( const SALOME_Exception &ex ){
248 return Engines::Container::_nil();
251 //If the machine name is localhost use the real name
252 if(theMachine == "localhost")
253 theMachine=Kernel_Utils::GetHostname();
255 MESSAGE("try to launch it on " << theMachine);
257 // Get Id for container: a parallel container registers in Naming Service
258 // on the machine where is process 0. ContainerManager does'nt know the name
259 // of this machine before the launch of the parallel container. So to get
260 // the IOR of the parallel container in Naming Service, ContainerManager
261 // gives a unique Id. The parallel container registers his name under
262 // /ContainerManager/Id directory in NamingService
264 id = GetIdForContainer();
268 MESSAGE("SALOME_ContainerManager::StartContainer : " <<
269 "no possible computer");
270 return Engines::Container::_nil();
272 else if(theMachine==Kernel_Utils::GetHostname())
273 command = BuildCommandToLaunchLocalContainer(params,id,container_exe);
275 command = BuildCommandToLaunchRemoteContainer(theMachine,params,id,container_exe);
277 // RmTmpFile(); Too early! May be this function has not been used for a long time...
279 //check if an entry exists in Naming service
282 containerNameInNS = "/ContainerManager/id";
283 sprintf(idc,"%ld",id);
284 containerNameInNS += idc;
287 containerNameInNS = _NS->BuildContainerNameForNS(params,theMachine.c_str());
289 SCRUTE(containerNameInNS);
290 CORBA::Object_var obj = _NS->Resolve(containerNameInNS.c_str());
291 if ( !CORBA::is_nil(obj) )
295 // shutdown the registered container if it exists
296 Engines::Container_var cont=Engines::Container::_narrow(obj);
297 if(!CORBA::is_nil(cont))
300 catch(CORBA::Exception&)
302 INFOS("CORBA::Exception ignored.");
306 //redirect stdout and stderr in a file
307 string logFilename="/tmp/"+_NS->ContainerName(params)+"_"+ theMachine +"_"+getenv( "USER" )+".log" ;
308 command += " > " + logFilename + " 2>&1 &";
310 // launch container with a system call
311 int status=system(command.c_str());
315 MESSAGE("SALOME_LifeCycleCORBA::StartOrFindContainer rsh failed " <<
316 "(system command status -1)");
317 RmTmpFile(); // command file can be removed here
318 return Engines::Container::_nil();
320 else if (status == 217){
321 MESSAGE("SALOME_LifeCycleCORBA::StartOrFindContainer rsh failed " <<
322 "(system command status 217)");
323 RmTmpFile(); // command file can be removed here
324 return Engines::Container::_nil();
327 int count=TIME_OUT_TO_LAUNCH_CONT;
328 MESSAGE("count = "<<count);
329 while ( CORBA::is_nil(ret) && count ){
337 MESSAGE( count << ". Waiting for container on " << theMachine);
339 CORBA::Object_var obj = _NS->Resolve(containerNameInNS.c_str());
340 ret=Engines::Container::_narrow(obj);
343 if ( CORBA::is_nil(ret) )
345 MESSAGE("SALOME_LifeCycleCORBA::StartOrFindContainer rsh failed");
349 logFilename=":"+logFilename;
350 logFilename="@"+Kernel_Utils::GetHostname()+logFilename;
351 logFilename=getenv( "USER" )+logFilename;
352 ret->logfilename(logFilename.c_str());
355 RmTmpFile(); // command file can be removed here
360 //=============================================================================
362 * Start a suitable Container in a list of machines
363 * \param params Machine Parameters required for the container
364 * \param possibleComputers list of machines usable for start
366 //=============================================================================
368 Engines::Container_ptr
369 SALOME_ContainerManager::
370 StartContainer(const Engines::MachineParameters& params,
371 Engines::ResPolicy policy,
372 const Engines::CompoList& componentList)
374 Engines::MachineList_var possibleComputers = _ResManager->GetFittingResources(params,componentList);
376 // Look into ModulCatalog if a specific container must be launched
377 CORBA::String_var container_exe;
381 CORBA::Object_var obj = _NS->Resolve("/Kernel/ModulCatalog");
382 SALOME_ModuleCatalog::ModuleCatalog_var Catalog = SALOME_ModuleCatalog::ModuleCatalog::_narrow(obj) ;
383 if (CORBA::is_nil (Catalog))
384 return Engines::Container::_nil();
385 // Loop through component list
386 for(unsigned int i=0;i<componentList.length();i++)
388 const char* compoi = componentList[i];
389 SALOME_ModuleCatalog::Acomponent_var compoInfo = Catalog->GetComponent(compoi);
390 if (CORBA::is_nil (compoInfo))
394 SALOME_ModuleCatalog::ImplType impl=compoInfo->implementation_type();
395 container_exe=compoInfo->implementation_name();
396 if(impl==SALOME_ModuleCatalog::CEXE)
400 INFOS("ContainerManager Error: you can't have 2 CEXE component in the same container" );
401 return Engines::Container::_nil();
407 catch (ServiceUnreachable&)
409 INFOS("Caught exception: Naming Service Unreachable");
410 return Engines::Container::_nil();
414 INFOS("Caught unknown exception.");
415 return Engines::Container::_nil();
419 return StartContainer(params,possibleComputers,policy,container_exe.in());
421 return StartContainer(params,possibleComputers,policy);
424 #ifdef WITH_PACO_PARALLEL
425 //=============================================================================
427 * Find or Start a suitable PaCO++ Parallel Container in a list of machines.
428 * \param params Machine Parameters required for the container
429 * \param possibleComputers list of machines usable for find or start
431 * \return CORBA container reference.
433 //=============================================================================
434 Engines::Container_ptr
435 SALOME_ContainerManager::
436 FindOrStartParallelContainer(const Engines::MachineParameters& params_const,
437 const Engines::MachineList& possibleComputers)
439 CORBA::Object_var obj;
440 PaCO::InterfaceManager_var proxy;
441 Engines::Container_ptr ret = Engines::Container::_nil();
442 Engines::MachineParameters params(params_const);
444 // Step 1 : Try to find a suitable container
445 // Currently not as good as could be since
446 // we have to verified the number of nodes of the container
447 // if a user tell that.
448 ret = FindContainer(params, possibleComputers);
450 if(CORBA::is_nil(ret)) {
451 // Step 2 : Starting a new parallel container
452 INFOS("[FindOrStartParallelContainer] Starting a parallel container");
454 // Step 2.1 : Choose a computer
455 string theMachine = _ResManager->FindFirst(possibleComputers);
456 if(theMachine == "") {
457 INFOS("[FindOrStartParallelContainer] !!!!!!!!!!!!!!!!!!!!!!!!!!");
458 INFOS("[FindOrStartParallelContainer] No possible computer found");
459 INFOS("[FindOrStartParallelContainer] !!!!!!!!!!!!!!!!!!!!!!!!!!");
462 INFOS("[FindOrStartParallelContainer] on machine : " << theMachine);
464 if(theMachine == Kernel_Utils::GetHostname()) {
465 // Step 3 : starting parallel container proxy
466 params.hostname = CORBA::string_dup(theMachine.c_str());
467 Engines::MachineParameters params_proxy(params);
469 command = BuildCommandToLaunchLocalParallelContainer("SALOME_ParallelContainerProxy", params_proxy, "xterm");
471 catch(const SALOME_Exception & ex){
473 return Engines::Container::_nil();
475 // LaunchParallelContainer uses this value to know if it launches the proxy or the nodes
476 params_proxy.nb_component_nodes = 0;
477 obj = LaunchParallelContainer(command, params_proxy, _NS->ContainerName(params));
478 ret = Engines::Container::_narrow(obj);
479 proxy = PaCO::InterfaceManager::_narrow(obj);
481 // Step 4 : starting parallel container nodes
482 command = BuildCommandToLaunchLocalParallelContainer("SALOME_ParallelContainerNode", params, "xterm");
483 string name = _NS->ContainerName(params) + "Node";
484 LaunchParallelContainer(command, params, name);
485 // Step 5 : connecting nodes and the proxy to actually create a parallel container
487 for (int i = 0; i < params.nb_component_nodes; i++) {
491 snprintf(buffer,5,"%d",i);
493 _snprintf(buffer,5,"%d",i);
495 string name_cont = name + string(buffer);
497 string theNodeMachine(CORBA::string_dup(params.hostname));
498 string containerNameInNS = _NS->BuildContainerNameForNS(name_cont.c_str(),theNodeMachine.c_str());
499 int count = TIME_OUT_TO_LAUNCH_CONT;
500 obj = _NS->Resolve(containerNameInNS.c_str());
501 while (CORBA::is_nil(obj) && count) {
502 INFOS("[FindOrStartParallelContainer] CONNECTION FAILED !!!!!!!!!!!!!!!!!!!!!!!!");
509 obj = _NS->Resolve(containerNameInNS.c_str());
512 PaCO::InterfaceParallel_var node = PaCO::InterfaceParallel::_narrow(obj);
513 MESSAGE("[FindOrStartParallelContainer] Deploying node : " << name);
518 catch(CORBA::SystemException& e)
520 INFOS("Caught CORBA::SystemException. : " << e);
522 catch(PortableServer::POA::ServantAlreadyActive&)
524 INFOS("Caught CORBA::ServantAlreadyActiveException");
526 catch(CORBA::Exception&)
528 INFOS("Caught CORBA::Exception.");
530 catch(std::exception& exc)
532 INFOS("Caught std::exception - "<<exc.what());
536 INFOS("Caught unknown exception.");
538 INFOS("[FindOrStartParallelContainer] node " << name << " deployed");
541 INFOS("[FindOrStartParallelContainer] Currently parallel containers are launched only on the local host");
548 //=============================================================================
550 * Find or Start a suitable PaCO++ Parallel Container in a list of machines.
551 * \param params Machine Parameters required for the container
552 * \param possibleComputers list of machines usable for find or start
554 * \return CORBA container reference.
556 //=============================================================================
557 Engines::Container_ptr
558 SALOME_ContainerManager::
559 FindOrStartParallelContainer(const Engines::MachineParameters& params,
560 const Engines::MachineList& possibleComputers)
562 Engines::Container_ptr ret = Engines::Container::_nil();
563 INFOS("[FindOrStartParallelContainer] is disabled !");
564 INFOS("[FindOrStartParallelContainer] recompile SALOME Kernel to enable parallel extension");
569 //=============================================================================
571 * Give a suitable Container in a list of machines
572 * \param params Machine Parameters required for the container
573 * \param possibleComputers list of machines usable for start
575 //=============================================================================
577 Engines::Container_ptr
578 SALOME_ContainerManager::
579 GiveContainer(const Engines::MachineParameters& params,
580 Engines::ResPolicy policy,
581 const Engines::CompoList& componentList)
583 char *valenv=getenv("SALOME_BATCH");
585 if (strcmp(valenv,"1")==0)
587 if(_batchLaunchedContainers.empty())
588 fillBatchLaunchedContainers();
590 if (_batchLaunchedContainersIter == _batchLaunchedContainers.end())
591 _batchLaunchedContainersIter = _batchLaunchedContainers.begin();
593 Engines::Container_ptr rtn = Engines::Container::_duplicate(*_batchLaunchedContainersIter);
594 _batchLaunchedContainersIter++;
597 return StartContainer(params,policy,componentList);
600 //=============================================================================
604 //=============================================================================
606 Engines::Container_ptr
607 SALOME_ContainerManager::
608 FindContainer(const Engines::MachineParameters& params,
609 const char *theMachine)
611 string containerNameInNS(_NS->BuildContainerNameForNS(params,theMachine));
612 CORBA::Object_var obj = _NS->Resolve(containerNameInNS.c_str());
613 if( !CORBA::is_nil(obj) )
614 return Engines::Container::_narrow(obj);
616 return Engines::Container::_nil();
619 //=============================================================================
623 //=============================================================================
625 Engines::Container_ptr
626 SALOME_ContainerManager::
627 FindContainer(const Engines::MachineParameters& params,
628 const Engines::MachineList& possibleComputers)
630 MESSAGE("FindContainer "<<possibleComputers.length());
631 for(unsigned int i=0;i<possibleComputers.length();i++)
633 MESSAGE("FindContainer possible " << possibleComputers[i]);
634 Engines::Container_ptr cont = FindContainer(params,possibleComputers[i]);
635 if( !CORBA::is_nil(cont) )
638 MESSAGE("FindContainer: not found");
639 return Engines::Container::_nil();
642 //=============================================================================
643 /*! This method launches the parallel container.
644 * It will may be placed on the ressources manager.
646 * \param command to launch
647 * \param container's parameters
648 * \param name of the container
650 * \return CORBA container reference
652 //=============================================================================
654 SALOME_ContainerManager::LaunchParallelContainer(const std::string& command,
655 const Engines::MachineParameters& params,
656 const std::string& name)
658 CORBA::Object_ptr obj = CORBA::Object::_nil();
659 string containerNameInNS;
660 MESSAGE("[LaunchParallelContainer] : command to launch...");
662 if (params.nb_component_nodes == 0) {
663 INFOS("[LaunchParallelContainer] launching the proxy of the parallel container");
664 int status = system(command.c_str());
666 INFOS("[LaunchParallelContainer] failed : system command status -1");
668 else if (status == 217) {
669 INFOS("[LaunchParallelContainer] failed : system command status 217");
672 int count = TIME_OUT_TO_LAUNCH_CONT;
673 string theMachine(CORBA::string_dup(params.hostname));
674 containerNameInNS = _NS->BuildContainerNameForNS((char*) name.c_str(),theMachine.c_str());
676 INFOS("[LaunchParallelContainer] Waiting for Parallel Container proxy on " << theMachine);
677 while (CORBA::is_nil(obj) && count) {
684 obj = _NS->Resolve(containerNameInNS.c_str());
688 INFOS("[LaunchParallelContainer] launching the nodes of the parallel container");
689 int status = system(command.c_str());
691 INFOS("[LaunchParallelContainer] failed : system command status -1");
693 else if (status == 217) {
694 INFOS("[LaunchParallelContainer] failed : system command status 217");
696 // We are waiting all the nodes
697 for (int i = 0; i < params.nb_component_nodes; i++) {
698 obj = CORBA::Object::_nil();
699 int count = TIME_OUT_TO_LAUNCH_CONT;
704 snprintf(buffer,5,"%d",i);
706 _snprintf(buffer,5,"%d",i);
709 string name_cont = name + string(buffer);
711 // I don't like this...
712 string theMachine(CORBA::string_dup(params.hostname));
713 containerNameInNS = _NS->BuildContainerNameForNS((char*) name_cont.c_str(),theMachine.c_str());
714 cerr << "[LaunchContainer] Waiting for Parllel Container node " << containerNameInNS << " on " << theMachine << endl;
715 while (CORBA::is_nil(obj) && count) {
722 obj = _NS->Resolve(containerNameInNS.c_str());
727 if ( CORBA::is_nil(obj) ) {
728 INFOS("[LaunchParallelContainer] failed");
733 //=============================================================================
735 * Get Id for container: a parallel container registers in Naming Service
736 * on the machine where is process 0. ContainerManager does'nt know the name
737 * of this machine before the launch of the parallel container. So to get
738 * the IOR of the parallel container in Naming Service, ContainerManager
739 * gives a unique Id. The parallel container registers his name under
740 * /ContainerManager/Id directory in NamingService
742 //=============================================================================
745 long SALOME_ContainerManager::GetIdForContainer(void)
751 void SALOME_ContainerManager::fillBatchLaunchedContainers()
753 _batchLaunchedContainers.clear();
754 _NS->Change_Directory("/Containers");
755 vector<string> vec = _NS->list_directory_recurs();
756 for(vector<string>::iterator iter = vec.begin();iter!=vec.end();iter++){
757 CORBA::Object_var obj=_NS->Resolve((*iter).c_str());
758 Engines::Container_ptr cont=Engines::Container::_narrow(obj);
759 if(!CORBA::is_nil(cont)){
760 _batchLaunchedContainers.push_back(cont);
763 _batchLaunchedContainersIter=_batchLaunchedContainers.begin();
766 //=============================================================================
768 * This is no longer valid (C++ container are also python containers)
770 //=============================================================================
772 bool isPythonContainer(const char* ContainerName)
775 int len = strlen(ContainerName);
778 if (strcmp(ContainerName + len - 2, "Py") == 0)
784 //=============================================================================
786 * Builds the script to be launched
788 * If SALOME Application not defined ($APPLI),
789 * see BuildTempFileToLaunchRemoteContainer()
791 * Else rely on distant configuration. Command is under the form (example):
792 * ssh user@machine distantPath/runRemote.sh hostNS portNS WORKINGDIR workingdir \
793 * SALOME_Container containerName &"
795 * - where user is ommited if not specified in CatalogResources,
796 * - where distant path is always relative to user@machine $HOME, and
797 * equal to $APPLI if not specified in CatalogResources,
798 * - where hostNS is the hostname of CORBA naming server (set by scripts to
799 * use to launch SALOME and servers in $APPLI: runAppli.sh, runRemote.sh)
800 * - where portNS is the port used by CORBA naming server (set by scripts to
801 * use to launch SALOME and servers in $APPLI: runAppli.sh, runRemote.sh)
802 * - where workingdir is the requested working directory for the container.
803 * If WORKINGDIR (and workingdir) is not present the working dir will be $HOME
805 //=============================================================================
808 SALOME_ContainerManager::BuildCommandToLaunchRemoteContainer
809 (const string& machine,
810 const Engines::MachineParameters& params, const long id,const std::string& container_exe)
814 char idc[3*sizeof(long)];
816 if ( ! _isAppliSalomeDefined )
817 command = BuildTempFileToLaunchRemoteContainer(machine, params);
821 const ParserResourcesType& resInfo = _ResManager->GetImpl()->GetResourcesList(machine);
825 if ( (params.nb_node <= 0) && (params.nb_proc_per_node <= 0) )
827 else if ( params.nb_node == 0 )
828 nbproc = params.nb_proc_per_node;
829 else if ( params.nb_proc_per_node == 0 )
830 nbproc = params.nb_node;
832 nbproc = params.nb_node * params.nb_proc_per_node;
835 // "ssh user@machine distantPath/runRemote.sh hostNS portNS WORKINGDIR workingdir \
836 // SALOME_Container containerName &"
838 if (resInfo.Protocol == rsh)
840 else if (resInfo.Protocol == ssh)
843 throw SALOME_Exception("Unknown protocol");
845 if (resInfo.UserName != "")
847 command += resInfo.UserName;
854 if (resInfo.AppliPath != "")
855 command += resInfo.AppliPath; // path relative to user@machine $HOME
858 ASSERT(getenv("APPLI"));
859 command += getenv("APPLI"); // path relative to user@machine $HOME
862 command += "/runRemote.sh ";
864 ASSERT(getenv("NSHOST"));
865 command += getenv("NSHOST"); // hostname of CORBA name server
868 ASSERT(getenv("NSPORT"));
869 command += getenv("NSPORT"); // port of CORBA name server
871 std::string wdir=params.workingdir.in();
874 command += " WORKINGDIR ";
876 if(wdir == "$TEMPDIR")
878 command += wdir; // requested working directory
884 command += " mpirun -np ";
885 std::ostringstream o;
889 command += "-x PATH,LD_LIBRARY_PATH,OMNIORB_CONFIG,SALOME_trace ";
891 command += " SALOME_MPIContainer ";
894 command += " " +container_exe+ " ";
896 command += _NS->ContainerName(params);
898 sprintf(idc,"%ld",id);
901 AddOmninamesParams(command);
903 MESSAGE("command =" << command);
909 //=============================================================================
911 * builds the command to be launched.
913 //=============================================================================
916 SALOME_ContainerManager::BuildCommandToLaunchLocalContainer
917 (const Engines::MachineParameters& params, const long id,const std::string& container_exe)
919 _TmpFileName = BuildTemporaryFileName();
922 //char idc[3*sizeof(long)];
924 ofstream command_file( _TmpFileName.c_str() );
928 //command = "mpirun -np ";
929 command_file << "mpirun -np ";
931 if ( (params.nb_node <= 0) && (params.nb_proc_per_node <= 0) )
933 else if ( params.nb_node == 0 )
934 nbproc = params.nb_proc_per_node;
935 else if ( params.nb_proc_per_node == 0 )
936 nbproc = params.nb_node;
938 nbproc = params.nb_node * params.nb_proc_per_node;
940 //std::ostringstream o;
942 //o << nbproc << " ";
943 command_file << nbproc << " ";
945 //command += o.str();
947 //command += "-x PATH,LD_LIBRARY_PATH,OMNIORB_CONFIG,SALOME_trace ";
948 command_file << "-x PATH,LD_LIBRARY_PATH,OMNIORB_CONFIG,SALOME_trace ";
951 if (isPythonContainer(params.container_name))
952 //command += "pyMPI SALOME_ContainerPy.py ";
953 command_file << "pyMPI SALOME_ContainerPy.py ";
955 //command += "SALOME_MPIContainer ";
956 command_file << "SALOME_MPIContainer ";
962 std::string wdir=params.workingdir.in();
965 // a working directory is requested
966 if(wdir == "$TEMPDIR")
968 // a new temporary directory is requested
969 string dir = Kernel_Utils::GetTmpDir();
971 //command += "cd /d "+ dir +";";
972 command_file << "cd /d " << dir << endl;
974 //command = "cd "+ dir +";";
975 command_file << "cd " << dir << ";";
981 // a permanent directory is requested use it or create it
983 //command="mkdir " + wdir;
984 command_file << "mkdir " + wdir << endl;
985 command_file << "cd /D " + wdir << endl;
987 //command="mkdir -p " + wdir + " && cd " + wdir + ";";
988 command_file << "mkdir -p " << wdir << " && cd " << wdir + ";";
992 if (isPythonContainer(params.container_name))
993 //command += "SALOME_ContainerPy.py ";
994 command_file << "SALOME_ContainerPy.py ";
996 //command += container_exe + " ";
997 command_file << container_exe + " ";
1002 /*command += _NS->ContainerName(params);
1004 sprintf(idc,"%ld",id);
1007 AddOmninamesParams(command);*/
1009 command_file << _NS->ContainerName(params);
1010 command_file << " -id " << id << " -";
1011 AddOmninamesParams(command_file);
1012 command_file.close();
1015 chmod(_TmpFileName.c_str(), 0x1ED);
1017 command = _TmpFileName;
1019 MESSAGE("Command is file ... " << command);
1024 //=============================================================================
1026 * removes the generated temporary file in case of a remote launch.
1028 //=============================================================================
1030 void SALOME_ContainerManager::RmTmpFile()
1032 int lenght = _TmpFileName.size();
1036 string command = "del /F ";
1038 string command = "rm ";
1041 command += _TmpFileName.substr(0, lenght - 3 );
1043 command += _TmpFileName;
1045 system(command.c_str());
1046 //if dir is empty - remove it
1047 string tmp_dir = Kernel_Utils::GetDirByPath( _TmpFileName );
1048 if ( Kernel_Utils::IsEmptyDir( tmp_dir ) )
1051 command = "del /F " + tmp_dir;
1053 command = "rmdir " + tmp_dir;
1055 system(command.c_str());
1060 //=============================================================================
1062 * add to command all options relative to naming service.
1064 //=============================================================================
1066 void SALOME_ContainerManager::AddOmninamesParams(string& command) const
1068 CORBA::String_var iorstr = _NS->getIORaddr();
1069 command += "ORBInitRef NameService=";
1074 //=============================================================================
1076 * add to command all options relative to naming service.
1078 //=============================================================================
1080 void SALOME_ContainerManager::AddOmninamesParams(ofstream& fileStream) const
1082 CORBA::String_var iorstr = _NS->getIORaddr();
1083 fileStream << "ORBInitRef NameService=";
1084 fileStream << iorstr;
1087 //=============================================================================
1089 * generate a file name in /tmp directory
1091 //=============================================================================
1093 string SALOME_ContainerManager::BuildTemporaryFileName() const
1095 //build more complex file name to support multiple salome session
1096 string aFileName = Kernel_Utils::GetTmpFileName();
1100 aFileName += ".bat";
1106 //=============================================================================
1108 * Builds in a temporary file the script to be launched.
1110 * Used if SALOME Application ($APPLI) is not defined.
1111 * The command is build with data from CatalogResources, in which every path
1112 * used on remote computer must be defined.
1114 //=============================================================================
1117 SALOME_ContainerManager::BuildTempFileToLaunchRemoteContainer
1118 (const string& machine,
1119 const Engines::MachineParameters& params) throw(SALOME_Exception)
1123 _TmpFileName = BuildTemporaryFileName();
1124 ofstream tempOutputFile;
1125 tempOutputFile.open(_TmpFileName.c_str(), ofstream::out );
1126 const ParserResourcesType& resInfo = _ResManager->GetImpl()->GetResourcesList(machine);
1127 tempOutputFile << "#! /bin/sh" << endl;
1131 tempOutputFile << "export SALOME_trace=local" << endl; // mkr : 27.11.2006 : PAL13967 - Distributed supervision graphs - Problem with "SALOME_trace"
1132 //tempOutputFile << "source " << resInfo.PreReqFilePath << endl;
1138 tempOutputFile << "mpirun -np ";
1141 if ( (params.nb_node <= 0) && (params.nb_proc_per_node <= 0) )
1143 else if ( params.nb_node == 0 )
1144 nbproc = params.nb_proc_per_node;
1145 else if ( params.nb_proc_per_node == 0 )
1146 nbproc = params.nb_node;
1148 nbproc = params.nb_node * params.nb_proc_per_node;
1150 std::ostringstream o;
1152 tempOutputFile << nbproc << " ";
1154 tempOutputFile << "-x PATH,LD_LIBRARY_PATH,OMNIORB_CONFIG,SALOME_trace ";
1158 tempOutputFile << getenv("KERNEL_ROOT_DIR") << "/bin/salome/";
1162 if (isPythonContainer(params.container_name))
1163 tempOutputFile << "pyMPI SALOME_ContainerPy.py ";
1165 tempOutputFile << "SALOME_MPIContainer ";
1170 if (isPythonContainer(params.container_name))
1171 tempOutputFile << "SALOME_ContainerPy.py ";
1173 tempOutputFile << "SALOME_Container ";
1176 tempOutputFile << _NS->ContainerName(params) << " -";
1177 AddOmninamesParams(tempOutputFile);
1178 tempOutputFile << " &" << endl;
1179 tempOutputFile.flush();
1180 tempOutputFile.close();
1182 chmod(_TmpFileName.c_str(), 0x1ED);
1185 // --- Build command
1189 if (resInfo.Protocol == rsh)
1192 string commandRcp = "rcp ";
1193 commandRcp += _TmpFileName;
1195 commandRcp += machine;
1197 commandRcp += _TmpFileName;
1198 status = system(commandRcp.c_str());
1201 else if (resInfo.Protocol == ssh)
1204 string commandRcp = "scp ";
1205 commandRcp += _TmpFileName;
1207 commandRcp += machine;
1209 commandRcp += _TmpFileName;
1210 status = system(commandRcp.c_str());
1213 throw SALOME_Exception("Unknown protocol");
1216 throw SALOME_Exception("Error of connection on remote host");
1219 _CommandForRemAccess = command;
1221 command += _TmpFileName;
1229 //=============================================================================
1230 /*! Creates a command line that the container manager uses to launch
1231 * a parallel container.
1233 //=============================================================================
1235 SALOME_ContainerManager::BuildCommandToLaunchLocalParallelContainer(const std::string& exe_name,
1236 const Engines::MachineParameters& params,
1237 const std::string& log)
1239 // This method knows the differences between the proxy and the nodes.
1240 // nb_component_nodes is not used in the same way if it is a proxy or
1244 string parallelLib(CORBA::string_dup(params.parallelLib));
1245 string hostname(CORBA::string_dup(params.hostname));
1246 int par = exe_name.find("Proxy");
1247 int nbproc = params.nb_component_nodes;
1249 sprintf(buffer,"%d",nbproc);
1251 Engines::MachineParameters_var rtn = new Engines::MachineParameters();
1252 rtn->container_name = params.container_name;
1253 rtn->hostname = params.hostname;
1254 rtn->OS = params.OS;
1255 rtn->mem_mb = params.mem_mb;
1256 rtn->cpu_clock = params.cpu_clock;
1257 rtn->nb_proc_per_node = params.nb_proc_per_node;
1258 rtn->nb_node = params.nb_node;
1259 rtn->isMPI = params.isMPI;
1261 string real_exe_name = exe_name + parallelLib;
1263 if (parallelLib == "Dummy")
1265 //command = "gdb --args ";
1266 //command = "valgrind --tool=memcheck --log-file=val_log ";
1267 //command += real_exe_name;
1269 command = real_exe_name;
1271 command += " " + _NS->ContainerName(rtn);
1272 command += " " + parallelLib;
1273 command += " " + hostname;
1275 AddOmninamesParams(command);
1278 else if (parallelLib == "Mpi")
1280 // Step 1 : check if MPI is started
1281 if (_MpiStarted == false)
1290 command = "mpiexec -np " + string(buffer) + " ";
1291 // command += "gdb --args ";
1292 command += real_exe_name;
1293 command += " " + _NS->ContainerName(rtn);
1294 command += " " + parallelLib;
1295 command += " " + hostname;
1297 AddOmninamesParams(command);
1302 command = "mpiexec -np 1 ";
1303 command += real_exe_name;
1304 command += " " + _NS->ContainerName(rtn);
1305 command += " " + string(buffer);
1306 command += " " + parallelLib;
1307 command += " " + hostname;
1309 AddOmninamesParams(command);
1314 std::string message("Unknown parallelLib" + parallelLib);
1315 throw SALOME_Exception(message.c_str());
1319 if (log == "default")
1321 command += " > /tmp/";
1322 command += _NS->ContainerName(rtn);
1324 command += Kernel_Utils::GetHostname();
1326 command += getenv( "USER" ) ;
1327 command += ".log 2>&1 &" ;
1331 command = "/usr/X11R6/bin/xterm -e \"export LD_LIBRARY_PATH=$LD_LIBRARY_PATH; export PATH=$PATH; "
1332 + command + " \" &";
1333 // + command + "; echo $LD_LIBRARY_PATH; cat \" &";
1337 /* if (log == "xterm")
1339 command = "/usr/X11R6/bin/xterm -e \"export LD_LIBRARY_PATH=$LD_LIBRARY_PATH; export PATH=$PATH; echo $LD_LIBRARY_PATH; echo $PATH; " + command + "; cat \" &";
1342 /* command = "cd ; rm " + fichier_commande + "; touch " + \
1343 fichier_commande + "; echo \" export LD_LIBRARY_PATH=$LD_LIBRARY_PATH; " + \
1344 command + " >& /tmp/ribes_" + fichier_commande + " & \" > " + fichier_commande + ";";
1345 command += "ssh cn01 sh " + fichier_commande + " &";
1346 cerr << "La commande : " << command << endl;
1350 void SALOME_ContainerManager::startMPI()
1352 cerr << "----------------------------------------------" << endl;
1353 cerr << "----------------------------------------------" << endl;
1354 cerr << "----------------------------------------------" << endl;
1355 cerr << "-Only Lam on Localhost is currently supported-" << endl;
1356 cerr << "----------------------------------------------" << endl;
1357 cerr << "----------------------------------------------" << endl;
1358 cerr << "----------------------------------------------" << endl;
1360 int status = system("lamboot");
1363 INFOS("lamboot failed : system command status -1");
1365 else if (status == 217)
1367 INFOS("lamboot failed : system command status 217");