1 // Copyright (C) 2005 OPEN CASCADE, EADS/CCR, LIP6, CEA/DEN,
2 // CEDRAT, EDF R&D, LEG, PRINCIPIA R&D, BUREAU VERITAS
4 // This library is free software; you can redistribute it and/or
5 // modify it under the terms of the GNU Lesser General Public
6 // License as published by the Free Software Foundation; either
7 // version 2.1 of the License.
9 // This library is distributed in the hope that it will be useful
10 // but WITHOUT ANY WARRANTY; without even the implied warranty of
11 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
12 // Lesser General Public License for more details.
14 // You should have received a copy of the GNU Lesser General Public
15 // License along with this library; if not, write to the Free Software
16 // Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
18 // See http://www.salome-platform.org/ or email : webmaster.salome@opencascade.com
20 #include "SALOME_ContainerManager.hxx"
21 #include "SALOME_NamingService.hxx"
22 #include "SALOME_ModuleCatalog.hh"
24 #include <sys/types.h>
30 #include "Utils_CorbaException.hxx"
31 #include "Batch_Date.hxx"
33 #ifdef WITH_PACO_PARALLEL
37 #define TIME_OUT_TO_LAUNCH_CONT 21
41 vector<Engines::Container_ptr> SALOME_ContainerManager::_batchLaunchedContainers;
43 vector<Engines::Container_ptr>::iterator SALOME_ContainerManager::_batchLaunchedContainersIter;
45 const char *SALOME_ContainerManager::_ContainerManagerNameInNS =
48 //=============================================================================
52 * Define a CORBA single thread policy for the server, which avoid to deal
53 * with non thread-safe usage like Change_Directory in SALOME naming service
55 //=============================================================================
57 SALOME_ContainerManager::SALOME_ContainerManager(CORBA::ORB_ptr orb, PortableServer::POA_var poa, SALOME_ResourcesManager *rm, SALOME_NamingService *ns)
59 MESSAGE("constructor");
64 PortableServer::POAManager_var pman = poa->the_POAManager();
65 _orb = CORBA::ORB::_duplicate(orb) ;
66 CORBA::PolicyList policies;
68 PortableServer::ThreadPolicy_var threadPol =
69 poa->create_thread_policy(PortableServer::SINGLE_THREAD_MODEL);
70 policies[0] = PortableServer::ThreadPolicy::_duplicate(threadPol);
72 _poa = poa->create_POA("SThreadPOA",pman,policies);
74 PortableServer::ObjectId_var id = _poa->activate_object(this);
75 CORBA::Object_var obj = _poa->id_to_reference(id);
76 Engines::ContainerManager_var refContMan =
77 Engines::ContainerManager::_narrow(obj);
79 _NS->Register(refContMan,_ContainerManagerNameInNS);
81 _isAppliSalomeDefined = (getenv("APPLI") != 0);
82 MESSAGE("constructor end");
85 //=============================================================================
89 //=============================================================================
91 SALOME_ContainerManager::~SALOME_ContainerManager()
93 MESSAGE("destructor");
96 //=============================================================================
98 * shutdown all the containers, then the ContainerManager servant
100 //=============================================================================
102 void SALOME_ContainerManager::Shutdown()
105 ShutdownContainers();
106 _NS->Destroy_Name(_ContainerManagerNameInNS);
107 PortableServer::ObjectId_var oid = _poa->servant_to_id(this);
108 _poa->deactivate_object(oid);
109 //_remove_ref() has already been done at creation
113 //=============================================================================
115 * Loop on all the containers listed in naming service, ask shutdown on each
117 //=============================================================================
119 void SALOME_ContainerManager::ShutdownContainers()
121 MESSAGE("ShutdownContainers");
123 isOK = _NS->Change_Directory("/Containers");
125 vector<string> vec = _NS->list_directory_recurs();
126 list<string> lstCont;
127 for(vector<string>::iterator iter = vec.begin();iter!=vec.end();iter++){
129 CORBA::Object_var obj=_NS->Resolve((*iter).c_str());
130 Engines::Container_var cont=Engines::Container::_narrow(obj);
131 if(!CORBA::is_nil(cont)){
132 lstCont.push_back((*iter));
135 MESSAGE("Container list: ");
136 for(list<string>::iterator iter=lstCont.begin();iter!=lstCont.end();iter++){
139 for(list<string>::iterator iter=lstCont.begin();iter!=lstCont.end();iter++){
141 CORBA::Object_var obj=_NS->Resolve((*iter).c_str());
142 Engines::Container_var cont=Engines::Container::_narrow(obj);
143 if(!CORBA::is_nil(cont))
145 MESSAGE("ShutdownContainers: " << (*iter));
150 catch(CORBA::SystemException& e)
152 INFOS("CORBA::SystemException ignored : " << e);
154 catch(CORBA::Exception&)
156 INFOS("CORBA::Exception ignored.");
160 INFOS("Unknown exception ignored.");
164 MESSAGE("ShutdownContainers: no container ref for " << (*iter));
169 //=============================================================================
171 * Find a suitable Container in a list of machines, or start one
172 * \param params Machine Parameters required for the container
173 * \param possibleComputers list of machines usable for find or start
175 //=============================================================================
177 Engines::Container_ptr
178 SALOME_ContainerManager::
179 FindOrStartContainer(const Engines::MachineParameters& params,
180 const Engines::MachineList& possibleComputers)
182 Engines::Container_ptr ret = FindContainer(params,possibleComputers);
183 if(!CORBA::is_nil(ret))
185 MESSAGE("Container doesn't exist try to launch it ...");
187 return StartContainer(params,possibleComputers,Engines::P_FIRST);
191 //=============================================================================
193 * Start a suitable Container in a list of machines
194 * \param params Machine Parameters required for the container
195 * \param possibleComputers list of machines usable for start
197 //=============================================================================
199 Engines::Container_ptr
200 SALOME_ContainerManager::
201 StartContainer(const Engines::MachineParameters& params,
202 const Engines::MachineList& possibleComputers,
203 Engines::ResPolicy policy,const std::string& container_exe)
205 #ifdef WITH_PACO_PARALLEL
206 std::string parallelLib(params.parallelLib);
207 if (parallelLib != "")
208 return FindOrStartParallelContainer(params, possibleComputers);
211 string containerNameInNS;
212 char idc[3*sizeof(long)];
213 Engines::Container_ptr ret = Engines::Container::_nil();
215 MESSAGE("SALOME_ContainerManager::StartContainer " <<
216 possibleComputers.length());
219 for(unsigned int i=0;i<possibleComputers.length();i++)
220 lm.push_back(string(possibleComputers[i]));
225 case Engines::P_FIRST:
226 theMachine=_ResManager->GetImpl()->FindFirst(lm);
228 case Engines::P_CYCL:
229 theMachine=_ResManager->GetImpl()->FindNext(lm);
231 case Engines::P_BEST:
232 theMachine=_ResManager->GetImpl()->FindBest(lm);
236 catch( const SALOME_Exception &ex ){
238 return Engines::Container::_nil();
241 //If the machine name is localhost use the real name
242 if(theMachine == "localhost")
243 theMachine=GetHostname();
245 MESSAGE("try to launch it on " << theMachine);
247 // Get Id for container: a parallel container registers in Naming Service
248 // on the machine where is process 0. ContainerManager does'nt know the name
249 // of this machine before the launch of the parallel container. So to get
250 // the IOR of the parallel container in Naming Service, ContainerManager
251 // gives a unique Id. The parallel container registers his name under
252 // /ContainerManager/Id directory in NamingService
254 id = GetIdForContainer();
258 MESSAGE("SALOME_ContainerManager::StartContainer : " <<
259 "no possible computer");
260 return Engines::Container::_nil();
262 else if(theMachine==GetHostname())
263 command = BuildCommandToLaunchLocalContainer(params,id,container_exe);
265 command = BuildCommandToLaunchRemoteContainer(theMachine,params,id,container_exe);
269 //check if an entry exists in Naming service
272 containerNameInNS = "/ContainerManager/id";
273 sprintf(idc,"%ld",id);
274 containerNameInNS += idc;
277 containerNameInNS = _NS->BuildContainerNameForNS(params,theMachine.c_str());
279 SCRUTE(containerNameInNS);
280 CORBA::Object_var obj = _NS->Resolve(containerNameInNS.c_str());
281 if ( !CORBA::is_nil(obj) )
283 // shutdown the registered container if it exists
284 Engines::Container_var cont=Engines::Container::_narrow(obj);
285 if(!CORBA::is_nil(cont))
291 catch(CORBA::Exception&)
293 INFOS("CORBA::Exception ignored.");
298 //redirect stdout and stderr in a file
299 string logFilename="/tmp/"+_NS->ContainerName(params)+"_"+ theMachine +"_"+getenv( "USER" )+".log" ;
300 command += " > " + logFilename + " 2>&1 &";
302 // launch container with a system call
303 int status=system(command.c_str());
305 MESSAGE("SALOME_LifeCycleCORBA::StartOrFindContainer rsh failed " <<
306 "(system command status -1)");
307 return Engines::Container::_nil();
309 else if (status == 217){
310 MESSAGE("SALOME_LifeCycleCORBA::StartOrFindContainer rsh failed " <<
311 "(system command status 217)");
312 return Engines::Container::_nil();
315 int count=TIME_OUT_TO_LAUNCH_CONT;
316 MESSAGE("count = "<<count);
317 while ( CORBA::is_nil(ret) && count ){
325 MESSAGE( count << ". Waiting for container on " << theMachine);
327 CORBA::Object_var obj = _NS->Resolve(containerNameInNS.c_str());
328 ret=Engines::Container::_narrow(obj);
331 if ( CORBA::is_nil(ret) )
333 MESSAGE("SALOME_LifeCycleCORBA::StartOrFindContainer rsh failed");
337 logFilename=":"+logFilename;
338 logFilename="@"+GetHostname()+logFilename;
339 logFilename=getenv( "USER" )+logFilename;
340 ret->logfilename(logFilename.c_str());
347 //=============================================================================
349 * Start a suitable Container in a list of machines
350 * \param params Machine Parameters required for the container
351 * \param possibleComputers list of machines usable for start
353 //=============================================================================
355 Engines::Container_ptr
356 SALOME_ContainerManager::
357 StartContainer(const Engines::MachineParameters& params,
358 Engines::ResPolicy policy,
359 const Engines::CompoList& componentList)
361 Engines::MachineList_var possibleComputers = _ResManager->GetFittingResources(params,componentList);
363 // Look into ModulCatalog if a specific container must be launched
364 CORBA::String_var container_exe;
368 CORBA::Object_var obj = _NS->Resolve("/Kernel/ModulCatalog");
369 SALOME_ModuleCatalog::ModuleCatalog_var Catalog = SALOME_ModuleCatalog::ModuleCatalog::_narrow(obj) ;
370 if (CORBA::is_nil (Catalog))
371 return Engines::Container::_nil();
372 // Loop through component list
373 for(unsigned int i=0;i<componentList.length();i++)
375 const char* compoi = componentList[i];
376 SALOME_ModuleCatalog::Acomponent_var compoInfo = Catalog->GetComponent(compoi);
377 if (CORBA::is_nil (compoInfo))
381 SALOME_ModuleCatalog::ImplType impl=compoInfo->implementation_type();
382 container_exe=compoInfo->implementation_name();
383 if(impl==SALOME_ModuleCatalog::CEXE)
387 INFOS("ContainerManager Error: you can't have 2 CEXE component in the same container" );
388 return Engines::Container::_nil();
394 catch (ServiceUnreachable&)
396 INFOS("Caught exception: Naming Service Unreachable");
397 return Engines::Container::_nil();
401 INFOS("Caught unknown exception.");
402 return Engines::Container::_nil();
406 return StartContainer(params,possibleComputers,policy,container_exe.in());
408 return StartContainer(params,possibleComputers,policy);
411 #ifdef WITH_PACO_PARALLEL
412 //=============================================================================
414 * Find or Start a suitable PaCO++ Parallel Container in a list of machines.
415 * \param params Machine Parameters required for the container
416 * \param possibleComputers list of machines usable for find or start
418 * \return CORBA container reference.
420 //=============================================================================
421 Engines::Container_ptr
422 SALOME_ContainerManager::
423 FindOrStartParallelContainer(const Engines::MachineParameters& params_const,
424 const Engines::MachineList& possibleComputers)
426 CORBA::Object_var obj;
427 PaCO::InterfaceManager_var proxy;
428 Engines::Container_ptr ret = Engines::Container::_nil();
429 Engines::MachineParameters params(params_const);
431 // Step 1 : Try to find a suitable container
432 // Currently not as good as could be since
433 // we have to verified the number of nodes of the container
434 // if a user tell that.
435 ret = FindContainer(params, possibleComputers);
437 if(CORBA::is_nil(ret)) {
438 // Step 2 : Starting a new parallel container
439 INFOS("[FindOrStartParallelContainer] Starting a parallel container");
441 // Step 2.1 : Choose a computer
442 string theMachine = _ResManager->FindFirst(possibleComputers);
443 if(theMachine == "") {
444 INFOS("[FindOrStartParallelContainer] !!!!!!!!!!!!!!!!!!!!!!!!!!");
445 INFOS("[FindOrStartParallelContainer] No possible computer found");
446 INFOS("[FindOrStartParallelContainer] !!!!!!!!!!!!!!!!!!!!!!!!!!");
449 INFOS("[FindOrStartParallelContainer] on machine : " << theMachine);
451 if(theMachine == GetHostname()) {
452 // Step 3 : starting parallel container proxy
453 params.hostname = CORBA::string_dup(theMachine.c_str());
454 Engines::MachineParameters params_proxy(params);
456 command = BuildCommandToLaunchLocalParallelContainer("SALOME_ParallelContainerProxy", params_proxy, "xterm");
458 catch(const SALOME_Exception & ex){
460 return Engines::Container::_nil();
462 // LaunchParallelContainer uses this value to know if it launches the proxy or the nodes
463 params_proxy.nb_component_nodes = 0;
464 obj = LaunchParallelContainer(command, params_proxy, _NS->ContainerName(params));
465 ret = Engines::Container::_narrow(obj);
466 proxy = PaCO::InterfaceManager::_narrow(obj);
468 // Step 4 : starting parallel container nodes
469 command = BuildCommandToLaunchLocalParallelContainer("SALOME_ParallelContainerNode", params, "xterm");
470 string name = _NS->ContainerName(params) + "Node";
471 LaunchParallelContainer(command, params, name);
472 // Step 5 : connecting nodes and the proxy to actually create a parallel container
474 for (int i = 0; i < params.nb_component_nodes; i++) {
478 snprintf(buffer,5,"%d",i);
480 _snprintf(buffer,5,"%d",i);
482 string name_cont = name + string(buffer);
484 string theNodeMachine(CORBA::string_dup(params.hostname));
485 string containerNameInNS = _NS->BuildContainerNameForNS(name_cont.c_str(),theNodeMachine.c_str());
486 int count = TIME_OUT_TO_LAUNCH_CONT;
487 obj = _NS->Resolve(containerNameInNS.c_str());
488 while (CORBA::is_nil(obj) && count) {
489 INFOS("[FindOrStartParallelContainer] CONNECTION FAILED !!!!!!!!!!!!!!!!!!!!!!!!");
496 obj = _NS->Resolve(containerNameInNS.c_str());
499 PaCO::InterfaceParallel_var node = PaCO::InterfaceParallel::_narrow(obj);
500 MESSAGE("[FindOrStartParallelContainer] Deploying node : " << name);
505 catch(CORBA::SystemException& e)
507 INFOS("Caught CORBA::SystemException. : " << e);
509 catch(PortableServer::POA::ServantAlreadyActive&)
511 INFOS("Caught CORBA::ServantAlreadyActiveException");
513 catch(CORBA::Exception&)
515 INFOS("Caught CORBA::Exception.");
517 catch(std::exception& exc)
519 INFOS("Caught std::exception - "<<exc.what());
523 INFOS("Caught unknown exception.");
525 INFOS("[FindOrStartParallelContainer] node " << name << " deployed");
528 INFOS("[FindOrStartParallelContainer] Currently parallel containers are launched only on the local host");
535 //=============================================================================
537 * Find or Start a suitable PaCO++ Parallel Container in a list of machines.
538 * \param params Machine Parameters required for the container
539 * \param possibleComputers list of machines usable for find or start
541 * \return CORBA container reference.
543 //=============================================================================
544 Engines::Container_ptr
545 SALOME_ContainerManager::
546 FindOrStartParallelContainer(const Engines::MachineParameters& params,
547 const Engines::MachineList& possibleComputers)
549 Engines::Container_ptr ret = Engines::Container::_nil();
550 INFOS("[FindOrStartParallelContainer] is disabled !");
551 INFOS("[FindOrStartParallelContainer] recompile SALOME Kernel to enable parallel extension");
556 //=============================================================================
558 * Give a suitable Container in a list of machines
559 * \param params Machine Parameters required for the container
560 * \param possibleComputers list of machines usable for start
562 //=============================================================================
564 Engines::Container_ptr
565 SALOME_ContainerManager::
566 GiveContainer(const Engines::MachineParameters& params,
567 Engines::ResPolicy policy,
568 const Engines::CompoList& componentList)
570 char *valenv=getenv("SALOME_BATCH");
572 if (strcmp(valenv,"1")==0)
574 if(_batchLaunchedContainers.empty())
575 fillBatchLaunchedContainers();
577 if (_batchLaunchedContainersIter == _batchLaunchedContainers.end())
578 _batchLaunchedContainersIter = _batchLaunchedContainers.begin();
580 Engines::Container_ptr rtn = Engines::Container::_duplicate(*_batchLaunchedContainersIter);
581 _batchLaunchedContainersIter++;
584 return StartContainer(params,policy,componentList);
587 //=============================================================================
591 //=============================================================================
593 Engines::Container_ptr
594 SALOME_ContainerManager::
595 FindContainer(const Engines::MachineParameters& params,
596 const char *theMachine)
598 string containerNameInNS(_NS->BuildContainerNameForNS(params,theMachine));
599 CORBA::Object_var obj = _NS->Resolve(containerNameInNS.c_str());
600 if( !CORBA::is_nil(obj) )
601 return Engines::Container::_narrow(obj);
603 return Engines::Container::_nil();
606 //=============================================================================
610 //=============================================================================
612 Engines::Container_ptr
613 SALOME_ContainerManager::
614 FindContainer(const Engines::MachineParameters& params,
615 const Engines::MachineList& possibleComputers)
617 MESSAGE("FindContainer "<<possibleComputers.length());
618 for(unsigned int i=0;i<possibleComputers.length();i++)
620 MESSAGE("FindContainer possible " << possibleComputers[i]);
621 Engines::Container_ptr cont = FindContainer(params,possibleComputers[i]);
622 if( !CORBA::is_nil(cont) )
625 MESSAGE("FindContainer: not found");
626 return Engines::Container::_nil();
629 //=============================================================================
630 /*! This method launches the parallel container.
631 * It will may be placed on the ressources manager.
633 * \param command to launch
634 * \param container's parameters
635 * \param name of the container
637 * \return CORBA container reference
639 //=============================================================================
641 SALOME_ContainerManager::LaunchParallelContainer(const std::string& command,
642 const Engines::MachineParameters& params,
643 const std::string& name)
645 CORBA::Object_ptr obj = CORBA::Object::_nil();
646 string containerNameInNS;
647 MESSAGE("[LaunchParallelContainer] : command to launch...");
649 if (params.nb_component_nodes == 0) {
650 INFOS("[LaunchParallelContainer] launching the proxy of the parallel container");
651 int status = system(command.c_str());
653 INFOS("[LaunchParallelContainer] failed : system command status -1");
655 else if (status == 217) {
656 INFOS("[LaunchParallelContainer] failed : system command status 217");
659 int count = TIME_OUT_TO_LAUNCH_CONT;
660 string theMachine(CORBA::string_dup(params.hostname));
661 containerNameInNS = _NS->BuildContainerNameForNS((char*) name.c_str(),theMachine.c_str());
663 INFOS("[LaunchParallelContainer] Waiting for Parallel Container proxy on " << theMachine);
664 while (CORBA::is_nil(obj) && count) {
671 obj = _NS->Resolve(containerNameInNS.c_str());
675 INFOS("[LaunchParallelContainer] launching the nodes of the parallel container");
676 int status = system(command.c_str());
678 INFOS("[LaunchParallelContainer] failed : system command status -1");
680 else if (status == 217) {
681 INFOS("[LaunchParallelContainer] failed : system command status 217");
683 // We are waiting all the nodes
684 for (int i = 0; i < params.nb_component_nodes; i++) {
685 obj = CORBA::Object::_nil();
686 int count = TIME_OUT_TO_LAUNCH_CONT;
691 snprintf(buffer,5,"%d",i);
693 _snprintf(buffer,5,"%d",i);
696 string name_cont = name + string(buffer);
698 // I don't like this...
699 string theMachine(CORBA::string_dup(params.hostname));
700 containerNameInNS = _NS->BuildContainerNameForNS((char*) name_cont.c_str(),theMachine.c_str());
701 cerr << "[LaunchContainer] Waiting for Parllel Container node " << containerNameInNS << " on " << theMachine << endl;
702 while (CORBA::is_nil(obj) && count) {
709 obj = _NS->Resolve(containerNameInNS.c_str());
714 if ( CORBA::is_nil(obj) ) {
715 INFOS("[LaunchParallelContainer] failed");
720 //=============================================================================
722 * Get Id for container: a parallel container registers in Naming Service
723 * on the machine where is process 0. ContainerManager does'nt know the name
724 * of this machine before the launch of the parallel container. So to get
725 * the IOR of the parallel container in Naming Service, ContainerManager
726 * gives a unique Id. The parallel container registers his name under
727 * /ContainerManager/Id directory in NamingService
729 //=============================================================================
732 long SALOME_ContainerManager::GetIdForContainer(void)
738 void SALOME_ContainerManager::fillBatchLaunchedContainers()
740 _batchLaunchedContainers.clear();
741 _NS->Change_Directory("/Containers");
742 vector<string> vec = _NS->list_directory_recurs();
743 for(vector<string>::iterator iter = vec.begin();iter!=vec.end();iter++){
744 CORBA::Object_var obj=_NS->Resolve((*iter).c_str());
745 Engines::Container_ptr cont=Engines::Container::_narrow(obj);
746 if(!CORBA::is_nil(cont)){
747 _batchLaunchedContainers.push_back(cont);
750 _batchLaunchedContainersIter=_batchLaunchedContainers.begin();
753 //=============================================================================
755 * This is no longer valid (C++ container are also python containers)
757 //=============================================================================
759 bool isPythonContainer(const char* ContainerName)
762 int len = strlen(ContainerName);
765 if (strcmp(ContainerName + len - 2, "Py") == 0)
771 //=============================================================================
773 * Builds the script to be launched
775 * If SALOME Application not defined ($APPLI),
776 * see BuildTempFileToLaunchRemoteContainer()
778 * Else rely on distant configuration. Command is under the form (example):
779 * ssh user@machine distantPath/runRemote.sh hostNS portNS WORKINGDIR workingdir \
780 * SALOME_Container containerName &"
782 * - where user is ommited if not specified in CatalogResources,
783 * - where distant path is always relative to user@machine $HOME, and
784 * equal to $APPLI if not specified in CatalogResources,
785 * - where hostNS is the hostname of CORBA naming server (set by scripts to
786 * use to launch SALOME and servers in $APPLI: runAppli.sh, runRemote.sh)
787 * - where portNS is the port used by CORBA naming server (set by scripts to
788 * use to launch SALOME and servers in $APPLI: runAppli.sh, runRemote.sh)
789 * - where workingdir is the requested working directory for the container.
790 * If WORKINGDIR (and workingdir) is not present the working dir will be $HOME
792 //=============================================================================
795 SALOME_ContainerManager::BuildCommandToLaunchRemoteContainer
796 (const string& machine,
797 const Engines::MachineParameters& params, const long id,const std::string& container_exe)
801 char idc[3*sizeof(long)];
803 if ( ! _isAppliSalomeDefined )
804 command = BuildTempFileToLaunchRemoteContainer(machine, params);
808 const ParserResourcesType& resInfo = _ResManager->GetImpl()->GetResourcesList(machine);
812 if ( (params.nb_node <= 0) && (params.nb_proc_per_node <= 0) )
814 else if ( params.nb_node == 0 )
815 nbproc = params.nb_proc_per_node;
816 else if ( params.nb_proc_per_node == 0 )
817 nbproc = params.nb_node;
819 nbproc = params.nb_node * params.nb_proc_per_node;
822 // "ssh user@machine distantPath/runRemote.sh hostNS portNS WORKINGDIR workingdir \
823 // SALOME_Container containerName &"
825 if (resInfo.Protocol == rsh)
827 else if (resInfo.Protocol == ssh)
830 throw SALOME_Exception("Unknown protocol");
832 if (resInfo.UserName != "")
834 command += resInfo.UserName;
841 if (resInfo.AppliPath != "")
842 command += resInfo.AppliPath; // path relative to user@machine $HOME
845 ASSERT(getenv("APPLI"));
846 command += getenv("APPLI"); // path relative to user@machine $HOME
849 command += "/runRemote.sh ";
851 ASSERT(getenv("NSHOST"));
852 command += getenv("NSHOST"); // hostname of CORBA name server
855 ASSERT(getenv("NSPORT"));
856 command += getenv("NSPORT"); // port of CORBA name server
858 std::string wdir=params.workingdir.in();
861 command += " WORKINGDIR ";
863 if(wdir == "$TEMPDIR")
865 command += wdir; // requested working directory
871 command += " mpirun -np ";
872 std::ostringstream o;
876 command += "-x PATH,LD_LIBRARY_PATH,OMNIORB_CONFIG,SALOME_trace ";
878 command += " SALOME_MPIContainer ";
881 command += " " +container_exe+ " ";
883 command += _NS->ContainerName(params);
885 sprintf(idc,"%ld",id);
888 AddOmninamesParams(command);
890 MESSAGE("command =" << command);
896 //=============================================================================
898 * builds the command to be launched.
900 //=============================================================================
903 SALOME_ContainerManager::BuildCommandToLaunchLocalContainer
904 (const Engines::MachineParameters& params, const long id,const std::string& container_exe)
906 _TmpFileName = BuildTemporaryFileName();
909 //char idc[3*sizeof(long)];
911 ofstream command_file( _TmpFileName.c_str() );
915 //command = "mpirun -np ";
916 command_file << "mpirun -np ";
918 if ( (params.nb_node <= 0) && (params.nb_proc_per_node <= 0) )
920 else if ( params.nb_node == 0 )
921 nbproc = params.nb_proc_per_node;
922 else if ( params.nb_proc_per_node == 0 )
923 nbproc = params.nb_node;
925 nbproc = params.nb_node * params.nb_proc_per_node;
927 //std::ostringstream o;
929 //o << nbproc << " ";
930 command_file << nbproc << " ";
932 //command += o.str();
934 //command += "-x PATH,LD_LIBRARY_PATH,OMNIORB_CONFIG,SALOME_trace ";
935 command_file << "-x PATH,LD_LIBRARY_PATH,OMNIORB_CONFIG,SALOME_trace ";
938 if (isPythonContainer(params.container_name))
939 //command += "pyMPI SALOME_ContainerPy.py ";
940 command_file << "pyMPI SALOME_ContainerPy.py ";
942 //command += "SALOME_MPIContainer ";
943 command_file << "SALOME_MPIContainer ";
949 std::string wdir=params.workingdir.in();
952 // a working directory is requested
953 if(wdir == "$TEMPDIR")
955 // a new temporary directory is requested
956 string dir = OpUtil_Dir::GetTmpDir();
958 //command += "cd /d "+ dir +";";
959 command_file << "cd /d " << dir << endl;
961 //command = "cd "+ dir +";";
962 command_file << "cd " << dir << ";";
968 // a permanent directory is requested use it or create it
970 //command="mkdir " + wdir;
971 command_file << "mkdir " + wdir << endl;
972 command_file << "cd /D " + wdir << endl;
974 //command="mkdir -p " + wdir + " && cd " + wdir + ";";
975 command_file << "mkdir -p " << wdir << " && cd " << wdir + ";";
979 if (isPythonContainer(params.container_name))
980 //command += "SALOME_ContainerPy.py ";
981 command_file << "SALOME_ContainerPy.py ";
983 //command += container_exe + " ";
984 command_file << container_exe + " ";
989 /*command += _NS->ContainerName(params);
991 sprintf(idc,"%ld",id);
994 AddOmninamesParams(command);*/
996 command_file << _NS->ContainerName(params);
997 command_file << " -id " << id << " -";
998 AddOmninamesParams(command_file);
999 command_file.close();
1002 chmod(_TmpFileName.c_str(), 0x1ED);
1004 command = _TmpFileName;
1006 MESSAGE("Command is file ... " << command);
1011 //=============================================================================
1013 * removes the generated temporary file in case of a remote launch.
1015 //=============================================================================
1017 void SALOME_ContainerManager::RmTmpFile()
1019 if (_TmpFileName != "")
1022 string command = "rm ";
1024 string command = "del /F ";
1026 command += _TmpFileName;
1027 char *temp = strdup(command.c_str());
1028 int lgthTemp = strlen(temp);
1029 temp[lgthTemp - 3] = '*';
1030 temp[lgthTemp - 2] = '\0';
1036 //=============================================================================
1038 * add to command all options relative to naming service.
1040 //=============================================================================
1042 void SALOME_ContainerManager::AddOmninamesParams(string& command) const
1044 CORBA::String_var iorstr = _NS->getIORaddr();
1045 command += "ORBInitRef NameService=";
1050 //=============================================================================
1052 * add to command all options relative to naming service.
1054 //=============================================================================
1056 void SALOME_ContainerManager::AddOmninamesParams(ofstream& fileStream) const
1058 CORBA::String_var iorstr = _NS->getIORaddr();
1059 fileStream << "ORBInitRef NameService=";
1060 fileStream << iorstr;
1063 //=============================================================================
1065 * generate a file name in /tmp directory
1067 //=============================================================================
1069 string SALOME_ContainerManager::BuildTemporaryFileName() const
1071 //build more complex file name to support multiple salome session
1072 string aFileName = OpUtil_Dir::GetTmpFileName();
1076 aFileName += ".bat";
1082 //=============================================================================
1084 * Builds in a temporary file the script to be launched.
1086 * Used if SALOME Application ($APPLI) is not defined.
1087 * The command is build with data from CatalogResources, in which every path
1088 * used on remote computer must be defined.
1090 //=============================================================================
1093 SALOME_ContainerManager::BuildTempFileToLaunchRemoteContainer
1094 (const string& machine,
1095 const Engines::MachineParameters& params) throw(SALOME_Exception)
1099 _TmpFileName = BuildTemporaryFileName();
1100 ofstream tempOutputFile;
1101 tempOutputFile.open(_TmpFileName.c_str(), ofstream::out );
1102 const ParserResourcesType& resInfo = _ResManager->GetImpl()->GetResourcesList(machine);
1103 tempOutputFile << "#! /bin/sh" << endl;
1107 tempOutputFile << "export SALOME_trace=local" << endl; // mkr : 27.11.2006 : PAL13967 - Distributed supervision graphs - Problem with "SALOME_trace"
1108 //tempOutputFile << "source " << resInfo.PreReqFilePath << endl;
1114 tempOutputFile << "mpirun -np ";
1117 if ( (params.nb_node <= 0) && (params.nb_proc_per_node <= 0) )
1119 else if ( params.nb_node == 0 )
1120 nbproc = params.nb_proc_per_node;
1121 else if ( params.nb_proc_per_node == 0 )
1122 nbproc = params.nb_node;
1124 nbproc = params.nb_node * params.nb_proc_per_node;
1126 std::ostringstream o;
1128 tempOutputFile << nbproc << " ";
1130 tempOutputFile << "-x PATH,LD_LIBRARY_PATH,OMNIORB_CONFIG,SALOME_trace ";
1134 tempOutputFile << getenv("KERNEL_ROOT_DIR") << "/bin/salome/";
1138 if (isPythonContainer(params.container_name))
1139 tempOutputFile << "pyMPI SALOME_ContainerPy.py ";
1141 tempOutputFile << "SALOME_MPIContainer ";
1146 if (isPythonContainer(params.container_name))
1147 tempOutputFile << "SALOME_ContainerPy.py ";
1149 tempOutputFile << "SALOME_Container ";
1152 tempOutputFile << _NS->ContainerName(params) << " -";
1153 AddOmninamesParams(tempOutputFile);
1154 tempOutputFile << " &" << endl;
1155 tempOutputFile.flush();
1156 tempOutputFile.close();
1158 chmod(_TmpFileName.c_str(), 0x1ED);
1161 // --- Build command
1165 if (resInfo.Protocol == rsh)
1168 string commandRcp = "rcp ";
1169 commandRcp += _TmpFileName;
1171 commandRcp += machine;
1173 commandRcp += _TmpFileName;
1174 status = system(commandRcp.c_str());
1177 else if (resInfo.Protocol == ssh)
1180 string commandRcp = "scp ";
1181 commandRcp += _TmpFileName;
1183 commandRcp += machine;
1185 commandRcp += _TmpFileName;
1186 status = system(commandRcp.c_str());
1189 throw SALOME_Exception("Unknown protocol");
1192 throw SALOME_Exception("Error of connection on remote host");
1195 _CommandForRemAccess = command;
1197 command += _TmpFileName;
1205 //=============================================================================
1206 /*! Creates a command line that the container manager uses to launch
1207 * a parallel container.
1209 //=============================================================================
1211 SALOME_ContainerManager::BuildCommandToLaunchLocalParallelContainer(const std::string& exe_name,
1212 const Engines::MachineParameters& params,
1213 const std::string& log)
1215 // This method knows the differences between the proxy and the nodes.
1216 // nb_component_nodes is not used in the same way if it is a proxy or
1220 string parallelLib(CORBA::string_dup(params.parallelLib));
1221 string hostname(CORBA::string_dup(params.hostname));
1222 int par = exe_name.find("Proxy");
1223 int nbproc = params.nb_component_nodes;
1225 sprintf(buffer,"%d",nbproc);
1227 Engines::MachineParameters_var rtn = new Engines::MachineParameters();
1228 rtn->container_name = params.container_name;
1229 rtn->hostname = params.hostname;
1230 rtn->OS = params.OS;
1231 rtn->mem_mb = params.mem_mb;
1232 rtn->cpu_clock = params.cpu_clock;
1233 rtn->nb_proc_per_node = params.nb_proc_per_node;
1234 rtn->nb_node = params.nb_node;
1235 rtn->isMPI = params.isMPI;
1237 string real_exe_name = exe_name + parallelLib;
1239 if (parallelLib == "Dummy")
1241 //command = "gdb --args ";
1242 //command = "valgrind --tool=memcheck --log-file=val_log ";
1243 //command += real_exe_name;
1245 command = real_exe_name;
1247 command += " " + _NS->ContainerName(rtn);
1248 command += " " + parallelLib;
1249 command += " " + hostname;
1251 AddOmninamesParams(command);
1254 else if (parallelLib == "Mpi")
1256 // Step 1 : check if MPI is started
1257 if (_MpiStarted == false)
1266 command = "mpiexec -np " + string(buffer) + " ";
1267 // command += "gdb --args ";
1268 command += real_exe_name;
1269 command += " " + _NS->ContainerName(rtn);
1270 command += " " + parallelLib;
1271 command += " " + hostname;
1273 AddOmninamesParams(command);
1278 command = "mpiexec -np 1 ";
1279 command += real_exe_name;
1280 command += " " + _NS->ContainerName(rtn);
1281 command += " " + string(buffer);
1282 command += " " + parallelLib;
1283 command += " " + hostname;
1285 AddOmninamesParams(command);
1290 std::string message("Unknown parallelLib" + parallelLib);
1291 throw SALOME_Exception(message.c_str());
1295 if (log == "default")
1297 command += " > /tmp/";
1298 command += _NS->ContainerName(rtn);
1300 command += GetHostname();
1302 command += getenv( "USER" ) ;
1303 command += ".log 2>&1 &" ;
1307 command = "/usr/X11R6/bin/xterm -e \"export LD_LIBRARY_PATH=$LD_LIBRARY_PATH; export PATH=$PATH; "
1308 + command + " \" &";
1309 // + command + "; echo $LD_LIBRARY_PATH; cat \" &";
1313 /* if (log == "xterm")
1315 command = "/usr/X11R6/bin/xterm -e \"export LD_LIBRARY_PATH=$LD_LIBRARY_PATH; export PATH=$PATH; echo $LD_LIBRARY_PATH; echo $PATH; " + command + "; cat \" &";
1318 /* command = "cd ; rm " + fichier_commande + "; touch " + \
1319 fichier_commande + "; echo \" export LD_LIBRARY_PATH=$LD_LIBRARY_PATH; " + \
1320 command + " >& /tmp/ribes_" + fichier_commande + " & \" > " + fichier_commande + ";";
1321 command += "ssh cn01 sh " + fichier_commande + " &";
1322 cerr << "La commande : " << command << endl;
1326 void SALOME_ContainerManager::startMPI()
1328 cerr << "----------------------------------------------" << endl;
1329 cerr << "----------------------------------------------" << endl;
1330 cerr << "----------------------------------------------" << endl;
1331 cerr << "-Only Lam on Localhost is currently supported-" << endl;
1332 cerr << "----------------------------------------------" << endl;
1333 cerr << "----------------------------------------------" << endl;
1334 cerr << "----------------------------------------------" << endl;
1336 int status = system("lamboot");
1339 INFOS("lamboot failed : system command status -1");
1341 else if (status == 217)
1343 INFOS("lamboot failed : system command status 217");