1 // Copyright (C) 2005 OPEN CASCADE, EADS/CCR, LIP6, CEA/DEN,
2 // CEDRAT, EDF R&D, LEG, PRINCIPIA R&D, BUREAU VERITAS
4 // This library is free software; you can redistribute it and/or
5 // modify it under the terms of the GNU Lesser General Public
6 // License as published by the Free Software Foundation; either
7 // version 2.1 of the License.
9 // This library is distributed in the hope that it will be useful
10 // but WITHOUT ANY WARRANTY; without even the implied warranty of
11 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
12 // Lesser General Public License for more details.
14 // You should have received a copy of the GNU Lesser General Public
15 // License along with this library; if not, write to the Free Software
16 // Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
18 // See http://www.salome-platform.org/ or email : webmaster.salome@opencascade.com
20 #include "SALOME_ContainerManager.hxx"
21 #include "SALOME_NamingService.hxx"
22 #include "SALOME_ModuleCatalog.hh"
23 #include "Basics_Utils.hxx"
24 #include "Basics_DirUtils.hxx"
25 #include <sys/types.h>
31 #include "Utils_CorbaException.hxx"
32 #include "Batch_Date.hxx"
34 #ifdef WITH_PACO_PARALLEL
38 #define TIME_OUT_TO_LAUNCH_CONT 21
42 vector<Engines::Container_ptr> SALOME_ContainerManager::_batchLaunchedContainers;
44 vector<Engines::Container_ptr>::iterator SALOME_ContainerManager::_batchLaunchedContainersIter;
46 const char *SALOME_ContainerManager::_ContainerManagerNameInNS =
49 //=============================================================================
53 * Define a CORBA single thread policy for the server, which avoid to deal
54 * with non thread-safe usage like Change_Directory in SALOME naming service
56 //=============================================================================
58 SALOME_ContainerManager::SALOME_ContainerManager(CORBA::ORB_ptr orb, PortableServer::POA_var poa, SALOME_ResourcesManager *rm, SALOME_NamingService *ns)
60 MESSAGE("constructor");
65 PortableServer::POAManager_var pman = poa->the_POAManager();
66 _orb = CORBA::ORB::_duplicate(orb) ;
67 CORBA::PolicyList policies;
69 PortableServer::ThreadPolicy_var threadPol =
70 poa->create_thread_policy(PortableServer::SINGLE_THREAD_MODEL);
71 policies[0] = PortableServer::ThreadPolicy::_duplicate(threadPol);
73 _poa = poa->create_POA("SThreadPOA",pman,policies);
75 PortableServer::ObjectId_var id = _poa->activate_object(this);
76 CORBA::Object_var obj = _poa->id_to_reference(id);
77 Engines::ContainerManager_var refContMan =
78 Engines::ContainerManager::_narrow(obj);
80 _NS->Register(refContMan,_ContainerManagerNameInNS);
82 _isAppliSalomeDefined = (getenv("APPLI") != 0);
83 MESSAGE("constructor end");
86 //=============================================================================
90 //=============================================================================
92 SALOME_ContainerManager::~SALOME_ContainerManager()
94 MESSAGE("destructor");
97 //=============================================================================
99 * shutdown all the containers, then the ContainerManager servant
101 //=============================================================================
103 void SALOME_ContainerManager::Shutdown()
106 ShutdownContainers();
107 _NS->Destroy_Name(_ContainerManagerNameInNS);
108 PortableServer::ObjectId_var oid = _poa->servant_to_id(this);
109 _poa->deactivate_object(oid);
110 //_remove_ref() has already been done at creation
114 //=============================================================================
116 * Loop on all the containers listed in naming service, ask shutdown on each
118 //=============================================================================
120 void SALOME_ContainerManager::ShutdownContainers()
122 MESSAGE("ShutdownContainers");
124 isOK = _NS->Change_Directory("/Containers");
126 vector<string> vec = _NS->list_directory_recurs();
127 list<string> lstCont;
128 for(vector<string>::iterator iter = vec.begin();iter!=vec.end();iter++){
130 CORBA::Object_var obj=_NS->Resolve((*iter).c_str());
131 Engines::Container_var cont=Engines::Container::_narrow(obj);
132 if(!CORBA::is_nil(cont)){
133 lstCont.push_back((*iter));
136 MESSAGE("Container list: ");
137 for(list<string>::iterator iter=lstCont.begin();iter!=lstCont.end();iter++){
140 for(list<string>::iterator iter=lstCont.begin();iter!=lstCont.end();iter++){
142 CORBA::Object_var obj=_NS->Resolve((*iter).c_str());
143 Engines::Container_var cont=Engines::Container::_narrow(obj);
144 if(!CORBA::is_nil(cont))
146 MESSAGE("ShutdownContainers: " << (*iter));
151 catch(CORBA::SystemException& e)
153 INFOS("CORBA::SystemException ignored : " << e);
155 catch(CORBA::Exception&)
157 INFOS("CORBA::Exception ignored.");
161 INFOS("Unknown exception ignored.");
165 MESSAGE("ShutdownContainers: no container ref for " << (*iter));
170 //=============================================================================
172 * Find a suitable Container in a list of machines, or start one
173 * \param params Machine Parameters required for the container
174 * \param possibleComputers list of machines usable for find or start
176 //=============================================================================
178 Engines::Container_ptr
179 SALOME_ContainerManager::
180 FindOrStartContainer(const Engines::MachineParameters& params,
181 const Engines::MachineList& possibleComputers)
183 Engines::Container_ptr ret = FindContainer(params,possibleComputers);
184 if(!CORBA::is_nil(ret))
186 MESSAGE("Container doesn't exist try to launch it ...");
188 return StartContainer(params,possibleComputers,Engines::P_FIRST);
192 //=============================================================================
194 * Start a suitable Container in a list of machines
195 * \param params Machine Parameters required for the container
196 * \param possibleComputers list of machines usable for start
198 //=============================================================================
200 Engines::Container_ptr
201 SALOME_ContainerManager::
202 StartContainer(const Engines::MachineParameters& params,
203 const Engines::MachineList& possibleComputers,
204 Engines::ResPolicy policy,const std::string& container_exe)
206 #ifdef WITH_PACO_PARALLEL
207 std::string parallelLib(params.parallelLib);
208 if (parallelLib != "")
209 return FindOrStartParallelContainer(params, possibleComputers);
212 string containerNameInNS;
213 char idc[3*sizeof(long)];
214 Engines::Container_ptr ret = Engines::Container::_nil();
216 MESSAGE("SALOME_ContainerManager::StartContainer " <<
217 possibleComputers.length());
220 for(unsigned int i=0;i<possibleComputers.length();i++)
221 lm.push_back(string(possibleComputers[i]));
226 case Engines::P_FIRST:
227 theMachine=_ResManager->GetImpl()->FindFirst(lm);
229 case Engines::P_CYCL:
230 theMachine=_ResManager->GetImpl()->FindNext(lm);
232 case Engines::P_BEST:
233 theMachine=_ResManager->GetImpl()->FindBest(lm);
237 catch( const SALOME_Exception &ex ){
239 return Engines::Container::_nil();
242 //If the machine name is localhost use the real name
243 if(theMachine == "localhost")
244 theMachine=Kernel_Utils::GetHostname();
246 MESSAGE("try to launch it on " << theMachine);
248 // Get Id for container: a parallel container registers in Naming Service
249 // on the machine where is process 0. ContainerManager does'nt know the name
250 // of this machine before the launch of the parallel container. So to get
251 // the IOR of the parallel container in Naming Service, ContainerManager
252 // gives a unique Id. The parallel container registers his name under
253 // /ContainerManager/Id directory in NamingService
255 id = GetIdForContainer();
259 MESSAGE("SALOME_ContainerManager::StartContainer : " <<
260 "no possible computer");
261 return Engines::Container::_nil();
263 else if(theMachine==Kernel_Utils::GetHostname())
264 command = BuildCommandToLaunchLocalContainer(params,id,container_exe);
266 command = BuildCommandToLaunchRemoteContainer(theMachine,params,id,container_exe);
268 // RmTmpFile(); Too early! May be this function has not been used for a long time...
270 //check if an entry exists in Naming service
273 containerNameInNS = "/ContainerManager/id";
274 sprintf(idc,"%ld",id);
275 containerNameInNS += idc;
278 containerNameInNS = _NS->BuildContainerNameForNS(params,theMachine.c_str());
280 SCRUTE(containerNameInNS);
281 CORBA::Object_var obj = _NS->Resolve(containerNameInNS.c_str());
282 if ( !CORBA::is_nil(obj) )
284 // shutdown the registered container if it exists
285 Engines::Container_var cont=Engines::Container::_narrow(obj);
286 if(!CORBA::is_nil(cont))
292 catch(CORBA::Exception&)
294 INFOS("CORBA::Exception ignored.");
299 //redirect stdout and stderr in a file
300 string logFilename="/tmp/"+_NS->ContainerName(params)+"_"+ theMachine +"_"+getenv( "USER" )+".log" ;
301 command += " > " + logFilename + " 2>&1 &";
303 // launch container with a system call
304 int status=system(command.c_str());
306 RmTmpFile(); // command file can be removed here
309 MESSAGE("SALOME_LifeCycleCORBA::StartOrFindContainer rsh failed " <<
310 "(system command status -1)");
311 return Engines::Container::_nil();
313 else if (status == 217){
314 MESSAGE("SALOME_LifeCycleCORBA::StartOrFindContainer rsh failed " <<
315 "(system command status 217)");
316 return Engines::Container::_nil();
319 int count=TIME_OUT_TO_LAUNCH_CONT;
320 MESSAGE("count = "<<count);
321 while ( CORBA::is_nil(ret) && count ){
329 MESSAGE( count << ". Waiting for container on " << theMachine);
331 CORBA::Object_var obj = _NS->Resolve(containerNameInNS.c_str());
332 ret=Engines::Container::_narrow(obj);
335 if ( CORBA::is_nil(ret) )
337 MESSAGE("SALOME_LifeCycleCORBA::StartOrFindContainer rsh failed");
341 logFilename=":"+logFilename;
342 logFilename="@"+Kernel_Utils::GetHostname()+logFilename;
343 logFilename=getenv( "USER" )+logFilename;
344 ret->logfilename(logFilename.c_str());
351 //=============================================================================
353 * Start a suitable Container in a list of machines
354 * \param params Machine Parameters required for the container
355 * \param possibleComputers list of machines usable for start
357 //=============================================================================
359 Engines::Container_ptr
360 SALOME_ContainerManager::
361 StartContainer(const Engines::MachineParameters& params,
362 Engines::ResPolicy policy,
363 const Engines::CompoList& componentList)
365 Engines::MachineList_var possibleComputers = _ResManager->GetFittingResources(params,componentList);
367 // Look into ModulCatalog if a specific container must be launched
368 CORBA::String_var container_exe;
372 CORBA::Object_var obj = _NS->Resolve("/Kernel/ModulCatalog");
373 SALOME_ModuleCatalog::ModuleCatalog_var Catalog = SALOME_ModuleCatalog::ModuleCatalog::_narrow(obj) ;
374 if (CORBA::is_nil (Catalog))
375 return Engines::Container::_nil();
376 // Loop through component list
377 for(unsigned int i=0;i<componentList.length();i++)
379 const char* compoi = componentList[i];
380 SALOME_ModuleCatalog::Acomponent_var compoInfo = Catalog->GetComponent(compoi);
381 if (CORBA::is_nil (compoInfo))
385 SALOME_ModuleCatalog::ImplType impl=compoInfo->implementation_type();
386 container_exe=compoInfo->implementation_name();
387 if(impl==SALOME_ModuleCatalog::CEXE)
391 INFOS("ContainerManager Error: you can't have 2 CEXE component in the same container" );
392 return Engines::Container::_nil();
398 catch (ServiceUnreachable&)
400 INFOS("Caught exception: Naming Service Unreachable");
401 return Engines::Container::_nil();
405 INFOS("Caught unknown exception.");
406 return Engines::Container::_nil();
410 return StartContainer(params,possibleComputers,policy,container_exe.in());
412 return StartContainer(params,possibleComputers,policy);
415 #ifdef WITH_PACO_PARALLEL
416 //=============================================================================
418 * Find or Start a suitable PaCO++ Parallel Container in a list of machines.
419 * \param params Machine Parameters required for the container
420 * \param possibleComputers list of machines usable for find or start
422 * \return CORBA container reference.
424 //=============================================================================
425 Engines::Container_ptr
426 SALOME_ContainerManager::
427 FindOrStartParallelContainer(const Engines::MachineParameters& params_const,
428 const Engines::MachineList& possibleComputers)
430 CORBA::Object_var obj;
431 PaCO::InterfaceManager_var proxy;
432 Engines::Container_ptr ret = Engines::Container::_nil();
433 Engines::MachineParameters params(params_const);
435 // Step 1 : Try to find a suitable container
436 // Currently not as good as could be since
437 // we have to verified the number of nodes of the container
438 // if a user tell that.
439 ret = FindContainer(params, possibleComputers);
441 if(CORBA::is_nil(ret)) {
442 // Step 2 : Starting a new parallel container
443 INFOS("[FindOrStartParallelContainer] Starting a parallel container");
445 // Step 2.1 : Choose a computer
446 string theMachine = _ResManager->FindFirst(possibleComputers);
447 if(theMachine == "") {
448 INFOS("[FindOrStartParallelContainer] !!!!!!!!!!!!!!!!!!!!!!!!!!");
449 INFOS("[FindOrStartParallelContainer] No possible computer found");
450 INFOS("[FindOrStartParallelContainer] !!!!!!!!!!!!!!!!!!!!!!!!!!");
453 INFOS("[FindOrStartParallelContainer] on machine : " << theMachine);
455 if(theMachine == Kernel_Utils::GetHostname()) {
456 // Step 3 : starting parallel container proxy
457 params.hostname = CORBA::string_dup(theMachine.c_str());
458 Engines::MachineParameters params_proxy(params);
460 command = BuildCommandToLaunchLocalParallelContainer("SALOME_ParallelContainerProxy", params_proxy, "xterm");
462 catch(const SALOME_Exception & ex){
464 return Engines::Container::_nil();
466 // LaunchParallelContainer uses this value to know if it launches the proxy or the nodes
467 params_proxy.nb_component_nodes = 0;
468 obj = LaunchParallelContainer(command, params_proxy, _NS->ContainerName(params));
469 ret = Engines::Container::_narrow(obj);
470 proxy = PaCO::InterfaceManager::_narrow(obj);
472 // Step 4 : starting parallel container nodes
473 command = BuildCommandToLaunchLocalParallelContainer("SALOME_ParallelContainerNode", params, "xterm");
474 string name = _NS->ContainerName(params) + "Node";
475 LaunchParallelContainer(command, params, name);
476 // Step 5 : connecting nodes and the proxy to actually create a parallel container
478 for (int i = 0; i < params.nb_component_nodes; i++) {
482 snprintf(buffer,5,"%d",i);
484 _snprintf(buffer,5,"%d",i);
486 string name_cont = name + string(buffer);
488 string theNodeMachine(CORBA::string_dup(params.hostname));
489 string containerNameInNS = _NS->BuildContainerNameForNS(name_cont.c_str(),theNodeMachine.c_str());
490 int count = TIME_OUT_TO_LAUNCH_CONT;
491 obj = _NS->Resolve(containerNameInNS.c_str());
492 while (CORBA::is_nil(obj) && count) {
493 INFOS("[FindOrStartParallelContainer] CONNECTION FAILED !!!!!!!!!!!!!!!!!!!!!!!!");
500 obj = _NS->Resolve(containerNameInNS.c_str());
503 PaCO::InterfaceParallel_var node = PaCO::InterfaceParallel::_narrow(obj);
504 MESSAGE("[FindOrStartParallelContainer] Deploying node : " << name);
509 catch(CORBA::SystemException& e)
511 INFOS("Caught CORBA::SystemException. : " << e);
513 catch(PortableServer::POA::ServantAlreadyActive&)
515 INFOS("Caught CORBA::ServantAlreadyActiveException");
517 catch(CORBA::Exception&)
519 INFOS("Caught CORBA::Exception.");
521 catch(std::exception& exc)
523 INFOS("Caught std::exception - "<<exc.what());
527 INFOS("Caught unknown exception.");
529 INFOS("[FindOrStartParallelContainer] node " << name << " deployed");
532 INFOS("[FindOrStartParallelContainer] Currently parallel containers are launched only on the local host");
539 //=============================================================================
541 * Find or Start a suitable PaCO++ Parallel Container in a list of machines.
542 * \param params Machine Parameters required for the container
543 * \param possibleComputers list of machines usable for find or start
545 * \return CORBA container reference.
547 //=============================================================================
548 Engines::Container_ptr
549 SALOME_ContainerManager::
550 FindOrStartParallelContainer(const Engines::MachineParameters& params,
551 const Engines::MachineList& possibleComputers)
553 Engines::Container_ptr ret = Engines::Container::_nil();
554 INFOS("[FindOrStartParallelContainer] is disabled !");
555 INFOS("[FindOrStartParallelContainer] recompile SALOME Kernel to enable parallel extension");
560 //=============================================================================
562 * Give a suitable Container in a list of machines
563 * \param params Machine Parameters required for the container
564 * \param possibleComputers list of machines usable for start
566 //=============================================================================
568 Engines::Container_ptr
569 SALOME_ContainerManager::
570 GiveContainer(const Engines::MachineParameters& params,
571 Engines::ResPolicy policy,
572 const Engines::CompoList& componentList)
574 char *valenv=getenv("SALOME_BATCH");
576 if (strcmp(valenv,"1")==0)
578 if(_batchLaunchedContainers.empty())
579 fillBatchLaunchedContainers();
581 if (_batchLaunchedContainersIter == _batchLaunchedContainers.end())
582 _batchLaunchedContainersIter = _batchLaunchedContainers.begin();
584 Engines::Container_ptr rtn = Engines::Container::_duplicate(*_batchLaunchedContainersIter);
585 _batchLaunchedContainersIter++;
588 return StartContainer(params,policy,componentList);
591 //=============================================================================
595 //=============================================================================
597 Engines::Container_ptr
598 SALOME_ContainerManager::
599 FindContainer(const Engines::MachineParameters& params,
600 const char *theMachine)
602 string containerNameInNS(_NS->BuildContainerNameForNS(params,theMachine));
603 CORBA::Object_var obj = _NS->Resolve(containerNameInNS.c_str());
604 if( !CORBA::is_nil(obj) )
605 return Engines::Container::_narrow(obj);
607 return Engines::Container::_nil();
610 //=============================================================================
614 //=============================================================================
616 Engines::Container_ptr
617 SALOME_ContainerManager::
618 FindContainer(const Engines::MachineParameters& params,
619 const Engines::MachineList& possibleComputers)
621 MESSAGE("FindContainer "<<possibleComputers.length());
622 for(unsigned int i=0;i<possibleComputers.length();i++)
624 MESSAGE("FindContainer possible " << possibleComputers[i]);
625 Engines::Container_ptr cont = FindContainer(params,possibleComputers[i]);
626 if( !CORBA::is_nil(cont) )
629 MESSAGE("FindContainer: not found");
630 return Engines::Container::_nil();
633 //=============================================================================
634 /*! This method launches the parallel container.
635 * It will may be placed on the ressources manager.
637 * \param command to launch
638 * \param container's parameters
639 * \param name of the container
641 * \return CORBA container reference
643 //=============================================================================
645 SALOME_ContainerManager::LaunchParallelContainer(const std::string& command,
646 const Engines::MachineParameters& params,
647 const std::string& name)
649 CORBA::Object_ptr obj = CORBA::Object::_nil();
650 string containerNameInNS;
651 MESSAGE("[LaunchParallelContainer] : command to launch...");
653 if (params.nb_component_nodes == 0) {
654 INFOS("[LaunchParallelContainer] launching the proxy of the parallel container");
655 int status = system(command.c_str());
657 INFOS("[LaunchParallelContainer] failed : system command status -1");
659 else if (status == 217) {
660 INFOS("[LaunchParallelContainer] failed : system command status 217");
663 int count = TIME_OUT_TO_LAUNCH_CONT;
664 string theMachine(CORBA::string_dup(params.hostname));
665 containerNameInNS = _NS->BuildContainerNameForNS((char*) name.c_str(),theMachine.c_str());
667 INFOS("[LaunchParallelContainer] Waiting for Parallel Container proxy on " << theMachine);
668 while (CORBA::is_nil(obj) && count) {
675 obj = _NS->Resolve(containerNameInNS.c_str());
679 INFOS("[LaunchParallelContainer] launching the nodes of the parallel container");
680 int status = system(command.c_str());
682 INFOS("[LaunchParallelContainer] failed : system command status -1");
684 else if (status == 217) {
685 INFOS("[LaunchParallelContainer] failed : system command status 217");
687 // We are waiting all the nodes
688 for (int i = 0; i < params.nb_component_nodes; i++) {
689 obj = CORBA::Object::_nil();
690 int count = TIME_OUT_TO_LAUNCH_CONT;
695 snprintf(buffer,5,"%d",i);
697 _snprintf(buffer,5,"%d",i);
700 string name_cont = name + string(buffer);
702 // I don't like this...
703 string theMachine(CORBA::string_dup(params.hostname));
704 containerNameInNS = _NS->BuildContainerNameForNS((char*) name_cont.c_str(),theMachine.c_str());
705 cerr << "[LaunchContainer] Waiting for Parllel Container node " << containerNameInNS << " on " << theMachine << endl;
706 while (CORBA::is_nil(obj) && count) {
713 obj = _NS->Resolve(containerNameInNS.c_str());
718 if ( CORBA::is_nil(obj) ) {
719 INFOS("[LaunchParallelContainer] failed");
724 //=============================================================================
726 * Get Id for container: a parallel container registers in Naming Service
727 * on the machine where is process 0. ContainerManager does'nt know the name
728 * of this machine before the launch of the parallel container. So to get
729 * the IOR of the parallel container in Naming Service, ContainerManager
730 * gives a unique Id. The parallel container registers his name under
731 * /ContainerManager/Id directory in NamingService
733 //=============================================================================
736 long SALOME_ContainerManager::GetIdForContainer(void)
742 void SALOME_ContainerManager::fillBatchLaunchedContainers()
744 _batchLaunchedContainers.clear();
745 _NS->Change_Directory("/Containers");
746 vector<string> vec = _NS->list_directory_recurs();
747 for(vector<string>::iterator iter = vec.begin();iter!=vec.end();iter++){
748 CORBA::Object_var obj=_NS->Resolve((*iter).c_str());
749 Engines::Container_ptr cont=Engines::Container::_narrow(obj);
750 if(!CORBA::is_nil(cont)){
751 _batchLaunchedContainers.push_back(cont);
754 _batchLaunchedContainersIter=_batchLaunchedContainers.begin();
757 //=============================================================================
759 * This is no longer valid (C++ container are also python containers)
761 //=============================================================================
763 bool isPythonContainer(const char* ContainerName)
766 int len = strlen(ContainerName);
769 if (strcmp(ContainerName + len - 2, "Py") == 0)
775 //=============================================================================
777 * Builds the script to be launched
779 * If SALOME Application not defined ($APPLI),
780 * see BuildTempFileToLaunchRemoteContainer()
782 * Else rely on distant configuration. Command is under the form (example):
783 * ssh user@machine distantPath/runRemote.sh hostNS portNS WORKINGDIR workingdir \
784 * SALOME_Container containerName &"
786 * - where user is ommited if not specified in CatalogResources,
787 * - where distant path is always relative to user@machine $HOME, and
788 * equal to $APPLI if not specified in CatalogResources,
789 * - where hostNS is the hostname of CORBA naming server (set by scripts to
790 * use to launch SALOME and servers in $APPLI: runAppli.sh, runRemote.sh)
791 * - where portNS is the port used by CORBA naming server (set by scripts to
792 * use to launch SALOME and servers in $APPLI: runAppli.sh, runRemote.sh)
793 * - where workingdir is the requested working directory for the container.
794 * If WORKINGDIR (and workingdir) is not present the working dir will be $HOME
796 //=============================================================================
799 SALOME_ContainerManager::BuildCommandToLaunchRemoteContainer
800 (const string& machine,
801 const Engines::MachineParameters& params, const long id,const std::string& container_exe)
805 char idc[3*sizeof(long)];
807 if ( ! _isAppliSalomeDefined )
808 command = BuildTempFileToLaunchRemoteContainer(machine, params);
812 const ParserResourcesType& resInfo = _ResManager->GetImpl()->GetResourcesList(machine);
816 if ( (params.nb_node <= 0) && (params.nb_proc_per_node <= 0) )
818 else if ( params.nb_node == 0 )
819 nbproc = params.nb_proc_per_node;
820 else if ( params.nb_proc_per_node == 0 )
821 nbproc = params.nb_node;
823 nbproc = params.nb_node * params.nb_proc_per_node;
826 // "ssh user@machine distantPath/runRemote.sh hostNS portNS WORKINGDIR workingdir \
827 // SALOME_Container containerName &"
829 if (resInfo.Protocol == rsh)
831 else if (resInfo.Protocol == ssh)
834 throw SALOME_Exception("Unknown protocol");
836 if (resInfo.UserName != "")
838 command += resInfo.UserName;
845 if (resInfo.AppliPath != "")
846 command += resInfo.AppliPath; // path relative to user@machine $HOME
849 ASSERT(getenv("APPLI"));
850 command += getenv("APPLI"); // path relative to user@machine $HOME
853 command += "/runRemote.sh ";
855 ASSERT(getenv("NSHOST"));
856 command += getenv("NSHOST"); // hostname of CORBA name server
859 ASSERT(getenv("NSPORT"));
860 command += getenv("NSPORT"); // port of CORBA name server
862 std::string wdir=params.workingdir.in();
865 command += " WORKINGDIR ";
867 if(wdir == "$TEMPDIR")
869 command += wdir; // requested working directory
875 command += " mpirun -np ";
876 std::ostringstream o;
880 command += "-x PATH,LD_LIBRARY_PATH,OMNIORB_CONFIG,SALOME_trace ";
882 command += " SALOME_MPIContainer ";
885 command += " " +container_exe+ " ";
887 command += _NS->ContainerName(params);
889 sprintf(idc,"%ld",id);
892 AddOmninamesParams(command);
894 MESSAGE("command =" << command);
900 //=============================================================================
902 * builds the command to be launched.
904 //=============================================================================
907 SALOME_ContainerManager::BuildCommandToLaunchLocalContainer
908 (const Engines::MachineParameters& params, const long id,const std::string& container_exe)
910 _TmpFileName = BuildTemporaryFileName();
913 //char idc[3*sizeof(long)];
915 ofstream command_file( _TmpFileName.c_str() );
919 //command = "mpirun -np ";
920 command_file << "mpirun -np ";
922 if ( (params.nb_node <= 0) && (params.nb_proc_per_node <= 0) )
924 else if ( params.nb_node == 0 )
925 nbproc = params.nb_proc_per_node;
926 else if ( params.nb_proc_per_node == 0 )
927 nbproc = params.nb_node;
929 nbproc = params.nb_node * params.nb_proc_per_node;
931 //std::ostringstream o;
933 //o << nbproc << " ";
934 command_file << nbproc << " ";
936 //command += o.str();
938 //command += "-x PATH,LD_LIBRARY_PATH,OMNIORB_CONFIG,SALOME_trace ";
939 command_file << "-x PATH,LD_LIBRARY_PATH,OMNIORB_CONFIG,SALOME_trace ";
942 if (isPythonContainer(params.container_name))
943 //command += "pyMPI SALOME_ContainerPy.py ";
944 command_file << "pyMPI SALOME_ContainerPy.py ";
946 //command += "SALOME_MPIContainer ";
947 command_file << "SALOME_MPIContainer ";
953 std::string wdir=params.workingdir.in();
956 // a working directory is requested
957 if(wdir == "$TEMPDIR")
959 // a new temporary directory is requested
960 string dir = Kernel_Utils::GetTmpDir();
962 //command += "cd /d "+ dir +";";
963 command_file << "cd /d " << dir << endl;
965 //command = "cd "+ dir +";";
966 command_file << "cd " << dir << ";";
972 // a permanent directory is requested use it or create it
974 //command="mkdir " + wdir;
975 command_file << "mkdir " + wdir << endl;
976 command_file << "cd /D " + wdir << endl;
978 //command="mkdir -p " + wdir + " && cd " + wdir + ";";
979 command_file << "mkdir -p " << wdir << " && cd " << wdir + ";";
983 if (isPythonContainer(params.container_name))
984 //command += "SALOME_ContainerPy.py ";
985 command_file << "SALOME_ContainerPy.py ";
987 //command += container_exe + " ";
988 command_file << container_exe + " ";
993 /*command += _NS->ContainerName(params);
995 sprintf(idc,"%ld",id);
998 AddOmninamesParams(command);*/
1000 command_file << _NS->ContainerName(params);
1001 command_file << " -id " << id << " -";
1002 AddOmninamesParams(command_file);
1003 command_file.close();
1006 chmod(_TmpFileName.c_str(), 0x1ED);
1008 command = _TmpFileName;
1010 MESSAGE("Command is file ... " << command);
1015 //=============================================================================
1017 * removes the generated temporary file in case of a remote launch.
1019 //=============================================================================
1021 void SALOME_ContainerManager::RmTmpFile()
1023 int lenght = _TmpFileName.size();
1027 string command = "del /F ";
1029 string command = "rm ";
1032 command += _TmpFileName.substr(0, lenght - 3 );
1034 command += _TmpFileName;
1036 system(command.c_str());
1037 //if dir is empty - remove it
1038 string tmp_dir = Kernel_Utils::GetDirByPath( _TmpFileName );
1039 if ( Kernel_Utils::IsEmptyDir( tmp_dir ) )
1042 command = "del /F " + tmp_dir;
1044 command = "rmdir " + tmp_dir;
1046 system(command.c_str());
1051 //=============================================================================
1053 * add to command all options relative to naming service.
1055 //=============================================================================
1057 void SALOME_ContainerManager::AddOmninamesParams(string& command) const
1059 CORBA::String_var iorstr = _NS->getIORaddr();
1060 command += "ORBInitRef NameService=";
1065 //=============================================================================
1067 * add to command all options relative to naming service.
1069 //=============================================================================
1071 void SALOME_ContainerManager::AddOmninamesParams(ofstream& fileStream) const
1073 CORBA::String_var iorstr = _NS->getIORaddr();
1074 fileStream << "ORBInitRef NameService=";
1075 fileStream << iorstr;
1078 //=============================================================================
1080 * generate a file name in /tmp directory
1082 //=============================================================================
1084 string SALOME_ContainerManager::BuildTemporaryFileName() const
1086 //build more complex file name to support multiple salome session
1087 string aFileName = Kernel_Utils::GetTmpFileName();
1091 aFileName += ".bat";
1097 //=============================================================================
1099 * Builds in a temporary file the script to be launched.
1101 * Used if SALOME Application ($APPLI) is not defined.
1102 * The command is build with data from CatalogResources, in which every path
1103 * used on remote computer must be defined.
1105 //=============================================================================
1108 SALOME_ContainerManager::BuildTempFileToLaunchRemoteContainer
1109 (const string& machine,
1110 const Engines::MachineParameters& params) throw(SALOME_Exception)
1114 _TmpFileName = BuildTemporaryFileName();
1115 ofstream tempOutputFile;
1116 tempOutputFile.open(_TmpFileName.c_str(), ofstream::out );
1117 const ParserResourcesType& resInfo = _ResManager->GetImpl()->GetResourcesList(machine);
1118 tempOutputFile << "#! /bin/sh" << endl;
1122 tempOutputFile << "export SALOME_trace=local" << endl; // mkr : 27.11.2006 : PAL13967 - Distributed supervision graphs - Problem with "SALOME_trace"
1123 //tempOutputFile << "source " << resInfo.PreReqFilePath << endl;
1129 tempOutputFile << "mpirun -np ";
1132 if ( (params.nb_node <= 0) && (params.nb_proc_per_node <= 0) )
1134 else if ( params.nb_node == 0 )
1135 nbproc = params.nb_proc_per_node;
1136 else if ( params.nb_proc_per_node == 0 )
1137 nbproc = params.nb_node;
1139 nbproc = params.nb_node * params.nb_proc_per_node;
1141 std::ostringstream o;
1143 tempOutputFile << nbproc << " ";
1145 tempOutputFile << "-x PATH,LD_LIBRARY_PATH,OMNIORB_CONFIG,SALOME_trace ";
1149 tempOutputFile << getenv("KERNEL_ROOT_DIR") << "/bin/salome/";
1153 if (isPythonContainer(params.container_name))
1154 tempOutputFile << "pyMPI SALOME_ContainerPy.py ";
1156 tempOutputFile << "SALOME_MPIContainer ";
1161 if (isPythonContainer(params.container_name))
1162 tempOutputFile << "SALOME_ContainerPy.py ";
1164 tempOutputFile << "SALOME_Container ";
1167 tempOutputFile << _NS->ContainerName(params) << " -";
1168 AddOmninamesParams(tempOutputFile);
1169 tempOutputFile << " &" << endl;
1170 tempOutputFile.flush();
1171 tempOutputFile.close();
1173 chmod(_TmpFileName.c_str(), 0x1ED);
1176 // --- Build command
1180 if (resInfo.Protocol == rsh)
1183 string commandRcp = "rcp ";
1184 commandRcp += _TmpFileName;
1186 commandRcp += machine;
1188 commandRcp += _TmpFileName;
1189 status = system(commandRcp.c_str());
1192 else if (resInfo.Protocol == ssh)
1195 string commandRcp = "scp ";
1196 commandRcp += _TmpFileName;
1198 commandRcp += machine;
1200 commandRcp += _TmpFileName;
1201 status = system(commandRcp.c_str());
1204 throw SALOME_Exception("Unknown protocol");
1207 throw SALOME_Exception("Error of connection on remote host");
1210 _CommandForRemAccess = command;
1212 command += _TmpFileName;
1220 //=============================================================================
1221 /*! Creates a command line that the container manager uses to launch
1222 * a parallel container.
1224 //=============================================================================
1226 SALOME_ContainerManager::BuildCommandToLaunchLocalParallelContainer(const std::string& exe_name,
1227 const Engines::MachineParameters& params,
1228 const std::string& log)
1230 // This method knows the differences between the proxy and the nodes.
1231 // nb_component_nodes is not used in the same way if it is a proxy or
1235 string parallelLib(CORBA::string_dup(params.parallelLib));
1236 string hostname(CORBA::string_dup(params.hostname));
1237 int par = exe_name.find("Proxy");
1238 int nbproc = params.nb_component_nodes;
1240 sprintf(buffer,"%d",nbproc);
1242 Engines::MachineParameters_var rtn = new Engines::MachineParameters();
1243 rtn->container_name = params.container_name;
1244 rtn->hostname = params.hostname;
1245 rtn->OS = params.OS;
1246 rtn->mem_mb = params.mem_mb;
1247 rtn->cpu_clock = params.cpu_clock;
1248 rtn->nb_proc_per_node = params.nb_proc_per_node;
1249 rtn->nb_node = params.nb_node;
1250 rtn->isMPI = params.isMPI;
1252 string real_exe_name = exe_name + parallelLib;
1254 if (parallelLib == "Dummy")
1256 //command = "gdb --args ";
1257 //command = "valgrind --tool=memcheck --log-file=val_log ";
1258 //command += real_exe_name;
1260 command = real_exe_name;
1262 command += " " + _NS->ContainerName(rtn);
1263 command += " " + parallelLib;
1264 command += " " + hostname;
1266 AddOmninamesParams(command);
1269 else if (parallelLib == "Mpi")
1271 // Step 1 : check if MPI is started
1272 if (_MpiStarted == false)
1281 command = "mpiexec -np " + string(buffer) + " ";
1282 // command += "gdb --args ";
1283 command += real_exe_name;
1284 command += " " + _NS->ContainerName(rtn);
1285 command += " " + parallelLib;
1286 command += " " + hostname;
1288 AddOmninamesParams(command);
1293 command = "mpiexec -np 1 ";
1294 command += real_exe_name;
1295 command += " " + _NS->ContainerName(rtn);
1296 command += " " + string(buffer);
1297 command += " " + parallelLib;
1298 command += " " + hostname;
1300 AddOmninamesParams(command);
1305 std::string message("Unknown parallelLib" + parallelLib);
1306 throw SALOME_Exception(message.c_str());
1310 if (log == "default")
1312 command += " > /tmp/";
1313 command += _NS->ContainerName(rtn);
1315 command += Kernel_Utils::GetHostname();
1317 command += getenv( "USER" ) ;
1318 command += ".log 2>&1 &" ;
1322 command = "/usr/X11R6/bin/xterm -e \"export LD_LIBRARY_PATH=$LD_LIBRARY_PATH; export PATH=$PATH; "
1323 + command + " \" &";
1324 // + command + "; echo $LD_LIBRARY_PATH; cat \" &";
1328 /* if (log == "xterm")
1330 command = "/usr/X11R6/bin/xterm -e \"export LD_LIBRARY_PATH=$LD_LIBRARY_PATH; export PATH=$PATH; echo $LD_LIBRARY_PATH; echo $PATH; " + command + "; cat \" &";
1333 /* command = "cd ; rm " + fichier_commande + "; touch " + \
1334 fichier_commande + "; echo \" export LD_LIBRARY_PATH=$LD_LIBRARY_PATH; " + \
1335 command + " >& /tmp/ribes_" + fichier_commande + " & \" > " + fichier_commande + ";";
1336 command += "ssh cn01 sh " + fichier_commande + " &";
1337 cerr << "La commande : " << command << endl;
1341 void SALOME_ContainerManager::startMPI()
1343 cerr << "----------------------------------------------" << endl;
1344 cerr << "----------------------------------------------" << endl;
1345 cerr << "----------------------------------------------" << endl;
1346 cerr << "-Only Lam on Localhost is currently supported-" << endl;
1347 cerr << "----------------------------------------------" << endl;
1348 cerr << "----------------------------------------------" << endl;
1349 cerr << "----------------------------------------------" << endl;
1351 int status = system("lamboot");
1354 INFOS("lamboot failed : system command status -1");
1356 else if (status == 217)
1358 INFOS("lamboot failed : system command status 217");