1 // Copyright (C) 2005 OPEN CASCADE, EADS/CCR, LIP6, CEA/DEN,
2 // CEDRAT, EDF R&D, LEG, PRINCIPIA R&D, BUREAU VERITAS
4 // This library is free software; you can redistribute it and/or
5 // modify it under the terms of the GNU Lesser General Public
6 // License as published by the Free Software Foundation; either
7 // version 2.1 of the License.
9 // This library is distributed in the hope that it will be useful
10 // but WITHOUT ANY WARRANTY; without even the implied warranty of
11 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
12 // Lesser General Public License for more details.
14 // You should have received a copy of the GNU Lesser General Public
15 // License along with this library; if not, write to the Free Software
16 // Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
18 // See http://www.salome-platform.org/ or email : webmaster.salome@opencascade.com
20 #include "SALOME_ContainerManager.hxx"
21 #include "SALOME_NamingService.hxx"
23 #include <sys/types.h>
29 #include "Utils_CorbaException.hxx"
30 #include "Batch_Date.hxx"
32 #ifdef WITH_PACO_PARALLEL
36 #define TIME_OUT_TO_LAUNCH_CONT 21
40 vector<Engines::Container_ptr> SALOME_ContainerManager::_batchLaunchedContainers;
42 vector<Engines::Container_ptr>::iterator SALOME_ContainerManager::_batchLaunchedContainersIter;
44 const char *SALOME_ContainerManager::_ContainerManagerNameInNS =
47 //=============================================================================
51 * Define a CORBA single thread policy for the server, which avoid to deal
52 * with non thread-safe usage like Change_Directory in SALOME naming service
54 //=============================================================================
56 SALOME_ContainerManager::SALOME_ContainerManager(CORBA::ORB_ptr orb, PortableServer::POA_var poa, SALOME_ResourcesManager *rm, SALOME_NamingService *ns)
58 MESSAGE("constructor");
63 PortableServer::POAManager_var pman = poa->the_POAManager();
64 _orb = CORBA::ORB::_duplicate(orb) ;
65 CORBA::PolicyList policies;
67 PortableServer::ThreadPolicy_var threadPol =
68 poa->create_thread_policy(PortableServer::SINGLE_THREAD_MODEL);
69 policies[0] = PortableServer::ThreadPolicy::_duplicate(threadPol);
71 _poa = poa->create_POA("SThreadPOA",pman,policies);
73 PortableServer::ObjectId_var id = _poa->activate_object(this);
74 CORBA::Object_var obj = _poa->id_to_reference(id);
75 Engines::ContainerManager_var refContMan =
76 Engines::ContainerManager::_narrow(obj);
78 _NS->Register(refContMan,_ContainerManagerNameInNS);
80 _isAppliSalomeDefined = (getenv("APPLI") != 0);
81 MESSAGE("constructor end");
84 //=============================================================================
88 //=============================================================================
90 SALOME_ContainerManager::~SALOME_ContainerManager()
92 MESSAGE("destructor");
95 //=============================================================================
97 * shutdown all the containers, then the ContainerManager servant
99 //=============================================================================
101 void SALOME_ContainerManager::Shutdown()
104 ShutdownContainers();
105 _NS->Destroy_Name(_ContainerManagerNameInNS);
106 PortableServer::ObjectId_var oid = _poa->servant_to_id(this);
107 _poa->deactivate_object(oid);
108 //_remove_ref() has already been done at creation
112 //=============================================================================
114 * Loop on all the containers listed in naming service, ask shutdown on each
116 //=============================================================================
118 void SALOME_ContainerManager::ShutdownContainers()
120 MESSAGE("ShutdownContainers");
122 isOK = _NS->Change_Directory("/Containers");
124 vector<string> vec = _NS->list_directory_recurs();
125 list<string> lstCont;
126 for(vector<string>::iterator iter = vec.begin();iter!=vec.end();iter++){
128 CORBA::Object_var obj=_NS->Resolve((*iter).c_str());
129 Engines::Container_var cont=Engines::Container::_narrow(obj);
130 if(!CORBA::is_nil(cont)){
131 lstCont.push_back((*iter));
134 MESSAGE("Container list: ");
135 for(list<string>::iterator iter=lstCont.begin();iter!=lstCont.end();iter++){
138 for(list<string>::iterator iter=lstCont.begin();iter!=lstCont.end();iter++){
140 CORBA::Object_var obj=_NS->Resolve((*iter).c_str());
141 Engines::Container_var cont=Engines::Container::_narrow(obj);
142 if(!CORBA::is_nil(cont))
144 MESSAGE("ShutdownContainers: " << (*iter));
149 catch(CORBA::SystemException& e)
151 INFOS("CORBA::SystemException ignored : " << e);
153 catch(CORBA::Exception&)
155 INFOS("CORBA::Exception ignored.");
159 INFOS("Unknown exception ignored.");
163 MESSAGE("ShutdownContainers: no container ref for " << (*iter));
168 //=============================================================================
170 * Find a suitable Container in a list of machines, or start one
171 * \param params Machine Parameters required for the container
172 * \param possibleComputers list of machines usable for find or start
174 //=============================================================================
176 Engines::Container_ptr
177 SALOME_ContainerManager::
178 FindOrStartContainer(const Engines::MachineParameters& params,
179 const Engines::MachineList& possibleComputers)
181 Engines::Container_ptr ret = FindContainer(params,possibleComputers);
182 if(!CORBA::is_nil(ret))
184 MESSAGE("Container doesn't exist try to launch it ...");
186 return StartContainer(params,possibleComputers,Engines::P_FIRST);
190 //=============================================================================
192 * Start a suitable Container in a list of machines
193 * \param params Machine Parameters required for the container
194 * \param possibleComputers list of machines usable for start
196 //=============================================================================
198 Engines::Container_ptr
199 SALOME_ContainerManager::
200 StartContainer(const Engines::MachineParameters& params,
201 const Engines::MachineList& possibleComputers,
202 Engines::ResPolicy policy)
204 #ifdef WITH_PACO_PARALLEL
205 std::string parallelLib(params.parallelLib);
206 if (parallelLib != "")
207 return FindOrStartParallelContainer(params, possibleComputers);
210 string containerNameInNS;
211 char idc[3*sizeof(long)];
212 Engines::Container_ptr ret = Engines::Container::_nil();
214 MESSAGE("SALOME_ContainerManager::StartContainer " <<
215 possibleComputers.length());
218 for(int i=0;i<possibleComputers.length();i++)
219 lm.push_back(string(possibleComputers[i]));
224 case Engines::P_FIRST:
225 theMachine=_ResManager->GetImpl()->FindFirst(lm);
227 case Engines::P_CYCL:
228 theMachine=_ResManager->GetImpl()->FindNext(lm);
230 case Engines::P_BEST:
231 theMachine=_ResManager->GetImpl()->FindBest(lm);
235 catch( const SALOME_Exception &ex ){
237 return Engines::Container::_nil();
240 //If the machine name is localhost use the real name
241 if(theMachine == "localhost")
242 theMachine=GetHostname();
244 MESSAGE("try to launch it on " << theMachine);
246 // Get Id for container: a parallel container registers in Naming Service
247 // on the machine where is process 0. ContainerManager does'nt know the name
248 // of this machine before the launch of the parallel container. So to get
249 // the IOR of the parallel container in Naming Service, ContainerManager
250 // gives a unique Id. The parallel container registers his name under
251 // /ContainerManager/Id directory in NamingService
253 id = GetIdForContainer();
257 MESSAGE("SALOME_ContainerManager::StartContainer : " <<
258 "no possible computer");
259 return Engines::Container::_nil();
261 else if(theMachine==GetHostname())
262 command = BuildCommandToLaunchLocalContainer(params,id);
264 command = BuildCommandToLaunchRemoteContainer(theMachine,params,id);
268 //check if an entry exists in Naming service
271 containerNameInNS = "/ContainerManager/id";
272 sprintf(idc,"%ld",id);
273 containerNameInNS += idc;
276 containerNameInNS = _NS->BuildContainerNameForNS(params,theMachine.c_str());
278 SCRUTE(containerNameInNS);
279 CORBA::Object_var obj = _NS->Resolve(containerNameInNS.c_str());
280 if ( !CORBA::is_nil(obj) )
282 // unregister the registered container if it exists
283 _NS->Destroy_Name(containerNameInNS.c_str());
284 // unregister component instances ???
285 //Engines::Container_var cont=Engines::Container::_narrow(obj);
288 //redirect stdout and stderr in a file
289 string logFilename="/tmp/"+_NS->ContainerName(params)+"_"+ theMachine +"_"+getenv( "USER" )+".log" ;
290 command += " > " + logFilename + " 2>&1 &";
292 // launch container with a system call
293 int status=system(command.c_str());
295 MESSAGE("SALOME_LifeCycleCORBA::StartOrFindContainer rsh failed " <<
296 "(system command status -1)");
297 return Engines::Container::_nil();
299 else if (status == 217){
300 MESSAGE("SALOME_LifeCycleCORBA::StartOrFindContainer rsh failed " <<
301 "(system command status 217)");
302 return Engines::Container::_nil();
305 int count=TIME_OUT_TO_LAUNCH_CONT;
306 MESSAGE("count = "<<count);
307 while ( CORBA::is_nil(ret) && count ){
315 MESSAGE( count << ". Waiting for container on " << theMachine);
317 CORBA::Object_var obj = _NS->Resolve(containerNameInNS.c_str());
318 ret=Engines::Container::_narrow(obj);
321 if ( CORBA::is_nil(ret) )
323 MESSAGE("SALOME_LifeCycleCORBA::StartOrFindContainer rsh failed");
327 logFilename=":"+logFilename;
328 logFilename="@"+GetHostname()+logFilename;
329 logFilename=getenv( "USER" )+logFilename;
330 ret->logfilename(logFilename.c_str());
337 //=============================================================================
339 * Start a suitable Container in a list of machines
340 * \param params Machine Parameters required for the container
341 * \param possibleComputers list of machines usable for start
343 //=============================================================================
345 Engines::Container_ptr
346 SALOME_ContainerManager::
347 StartContainer(const Engines::MachineParameters& params,
348 Engines::ResPolicy policy,
349 const Engines::CompoList& componentList)
351 Engines::MachineList_var possibleComputers = _ResManager->GetFittingResources(params,componentList);
352 return StartContainer(params,possibleComputers,policy);
355 #ifdef WITH_PACO_PARALLEL
356 //=============================================================================
358 * Find or Start a suitable PaCO++ Parallel Container in a list of machines.
359 * \param params Machine Parameters required for the container
360 * \param possibleComputers list of machines usable for find or start
362 * \return CORBA container reference.
364 //=============================================================================
365 Engines::Container_ptr
366 SALOME_ContainerManager::
367 FindOrStartParallelContainer(const Engines::MachineParameters& params_const,
368 const Engines::MachineList& possibleComputers)
370 CORBA::Object_var obj;
371 PaCO::InterfaceManager_var proxy;
372 Engines::Container_ptr ret = Engines::Container::_nil();
373 Engines::MachineParameters params(params_const);
375 // Step 1 : Try to find a suitable container
376 // Currently not as good as could be since
377 // we have to verified the number of nodes of the container
378 // if a user tell that.
379 ret = FindContainer(params, possibleComputers);
381 if(CORBA::is_nil(ret)) {
382 // Step 2 : Starting a new parallel container
383 INFOS("[FindOrStartParallelContainer] Starting a parallel container");
385 // Step 2.1 : Choose a computer
386 string theMachine = _ResManager->FindFirst(possibleComputers);
387 if(theMachine == "") {
388 INFOS("[FindOrStartParallelContainer] !!!!!!!!!!!!!!!!!!!!!!!!!!");
389 INFOS("[FindOrStartParallelContainer] No possible computer found");
390 INFOS("[FindOrStartParallelContainer] !!!!!!!!!!!!!!!!!!!!!!!!!!");
393 INFOS("[FindOrStartParallelContainer] on machine : " << theMachine);
395 if(theMachine == GetHostname()) {
396 // Step 3 : starting parallel container proxy
397 params.hostname = CORBA::string_dup(theMachine.c_str());
398 Engines::MachineParameters params_proxy(params);
400 command = BuildCommandToLaunchLocalParallelContainer("SALOME_ParallelContainerProxy", params_proxy, "xterm");
402 catch(const SALOME_Exception & ex){
404 return Engines::Container::_nil();
406 // LaunchParallelContainer uses this value to know if it launches the proxy or the nodes
407 params_proxy.nb_component_nodes = 0;
408 obj = LaunchParallelContainer(command, params_proxy, _NS->ContainerName(params));
409 ret = Engines::Container::_narrow(obj);
410 proxy = PaCO::InterfaceManager::_narrow(obj);
412 // Step 4 : starting parallel container nodes
413 command = BuildCommandToLaunchLocalParallelContainer("SALOME_ParallelContainerNode", params, "xterm");
414 string name = _NS->ContainerName(params) + "Node";
415 LaunchParallelContainer(command, params, name);
416 // Step 5 : connecting nodes and the proxy to actually create a parallel container
418 for (int i = 0; i < params.nb_component_nodes; i++) {
422 snprintf(buffer,5,"%d",i);
424 _snprintf(buffer,5,"%d",i);
426 string name_cont = name + string(buffer);
428 string theNodeMachine(CORBA::string_dup(params.hostname));
429 string containerNameInNS = _NS->BuildContainerNameForNS(name_cont.c_str(),theNodeMachine.c_str());
430 int count = TIME_OUT_TO_LAUNCH_CONT;
431 obj = _NS->Resolve(containerNameInNS.c_str());
432 while (CORBA::is_nil(obj) && count) {
433 INFOS("[FindOrStartParallelContainer] CONNECTION FAILED !!!!!!!!!!!!!!!!!!!!!!!!");
440 obj = _NS->Resolve(containerNameInNS.c_str());
443 PaCO::InterfaceParallel_var node = PaCO::InterfaceParallel::_narrow(obj);
444 MESSAGE("[FindOrStartParallelContainer] Deploying node : " << name);
449 catch(CORBA::SystemException& e)
451 INFOS("Caught CORBA::SystemException. : " << e);
453 catch(PortableServer::POA::ServantAlreadyActive&)
455 INFOS("Caught CORBA::ServantAlreadyActiveException");
457 catch(CORBA::Exception&)
459 INFOS("Caught CORBA::Exception.");
461 catch(std::exception& exc)
463 INFOS("Caught std::exception - "<<exc.what());
467 INFOS("Caught unknown exception.");
469 INFOS("[FindOrStartParallelContainer] node " << name << " deployed");
472 INFOS("[FindOrStartParallelContainer] Currently parallel containers are launched only on the local host");
479 //=============================================================================
481 * Find or Start a suitable PaCO++ Parallel Container in a list of machines.
482 * \param params Machine Parameters required for the container
483 * \param possibleComputers list of machines usable for find or start
485 * \return CORBA container reference.
487 //=============================================================================
488 Engines::Container_ptr
489 SALOME_ContainerManager::
490 FindOrStartParallelContainer(const Engines::MachineParameters& params,
491 const Engines::MachineList& possibleComputers)
493 Engines::Container_ptr ret = Engines::Container::_nil();
494 INFOS("[FindOrStartParallelContainer] is disabled !");
495 INFOS("[FindOrStartParallelContainer] recompile SALOME Kernel to enable parallel extension");
500 //=============================================================================
502 * Give a suitable Container in a list of machines
503 * \param params Machine Parameters required for the container
504 * \param possibleComputers list of machines usable for start
506 //=============================================================================
508 Engines::Container_ptr
509 SALOME_ContainerManager::
510 GiveContainer(const Engines::MachineParameters& params,
511 Engines::ResPolicy policy,
512 const Engines::CompoList& componentList)
514 char *valenv=getenv("SALOME_BATCH");
516 if (strcmp(valenv,"1")==0)
518 if(_batchLaunchedContainers.empty())
519 fillBatchLaunchedContainers();
521 if (_batchLaunchedContainersIter == _batchLaunchedContainers.end())
522 _batchLaunchedContainersIter = _batchLaunchedContainers.begin();
524 Engines::Container_ptr rtn = Engines::Container::_duplicate(*_batchLaunchedContainersIter);
525 _batchLaunchedContainersIter++;
528 return StartContainer(params,policy,componentList);
531 //=============================================================================
535 //=============================================================================
537 Engines::Container_ptr
538 SALOME_ContainerManager::
539 FindContainer(const Engines::MachineParameters& params,
540 const char *theMachine)
542 string containerNameInNS(_NS->BuildContainerNameForNS(params,theMachine));
543 CORBA::Object_var obj = _NS->Resolve(containerNameInNS.c_str());
544 if( !CORBA::is_nil(obj) )
545 return Engines::Container::_narrow(obj);
547 return Engines::Container::_nil();
550 //=============================================================================
554 //=============================================================================
556 Engines::Container_ptr
557 SALOME_ContainerManager::
558 FindContainer(const Engines::MachineParameters& params,
559 const Engines::MachineList& possibleComputers)
561 MESSAGE("FindContainer "<<possibleComputers.length());
562 for(unsigned int i=0;i<possibleComputers.length();i++)
564 MESSAGE("FindContainer possible " << possibleComputers[i]);
565 Engines::Container_ptr cont = FindContainer(params,possibleComputers[i]);
566 if( !CORBA::is_nil(cont) )
569 MESSAGE("FindContainer: not found");
570 return Engines::Container::_nil();
573 //=============================================================================
574 /*! This method launches the parallel container.
575 * It will may be placed on the ressources manager.
577 * \param command to launch
578 * \param container's parameters
579 * \param name of the container
581 * \return CORBA container reference
583 //=============================================================================
585 SALOME_ContainerManager::LaunchParallelContainer(const std::string& command,
586 const Engines::MachineParameters& params,
587 const std::string& name)
589 CORBA::Object_ptr obj = CORBA::Object::_nil();
590 string containerNameInNS;
591 MESSAGE("[LaunchParallelContainer] : command to launch...");
593 if (params.nb_component_nodes == 0) {
594 INFOS("[LaunchParallelContainer] launching the proxy of the parallel container");
595 int status = system(command.c_str());
597 INFOS("[LaunchParallelContainer] failed : system command status -1");
599 else if (status == 217) {
600 INFOS("[LaunchParallelContainer] failed : system command status 217");
603 int count = TIME_OUT_TO_LAUNCH_CONT;
604 string theMachine(CORBA::string_dup(params.hostname));
605 containerNameInNS = _NS->BuildContainerNameForNS((char*) name.c_str(),theMachine.c_str());
607 INFOS("[LaunchParallelContainer] Waiting for Parallel Container proxy on " << theMachine);
608 while (CORBA::is_nil(obj) && count) {
615 obj = _NS->Resolve(containerNameInNS.c_str());
619 INFOS("[LaunchParallelContainer] launching the nodes of the parallel container");
620 int status = system(command.c_str());
622 INFOS("[LaunchParallelContainer] failed : system command status -1");
624 else if (status == 217) {
625 INFOS("[LaunchParallelContainer] failed : system command status 217");
627 // We are waiting all the nodes
628 for (int i = 0; i < params.nb_component_nodes; i++) {
629 obj = CORBA::Object::_nil();
630 int count = TIME_OUT_TO_LAUNCH_CONT;
635 snprintf(buffer,5,"%d",i);
637 _snprintf(buffer,5,"%d",i);
640 string name_cont = name + string(buffer);
642 // I don't like this...
643 string theMachine(CORBA::string_dup(params.hostname));
644 containerNameInNS = _NS->BuildContainerNameForNS((char*) name_cont.c_str(),theMachine.c_str());
645 cerr << "[LaunchContainer] Waiting for Parllel Container node " << containerNameInNS << " on " << theMachine << endl;
646 while (CORBA::is_nil(obj) && count) {
653 obj = _NS->Resolve(containerNameInNS.c_str());
658 if ( CORBA::is_nil(obj) ) {
659 INFOS("[LaunchParallelContainer] failed");
664 //=============================================================================
666 * Get Id for container: a parallel container registers in Naming Service
667 * on the machine where is process 0. ContainerManager does'nt know the name
668 * of this machine before the launch of the parallel container. So to get
669 * the IOR of the parallel container in Naming Service, ContainerManager
670 * gives a unique Id. The parallel container registers his name under
671 * /ContainerManager/Id directory in NamingService
673 //=============================================================================
676 long SALOME_ContainerManager::GetIdForContainer(void)
682 void SALOME_ContainerManager::fillBatchLaunchedContainers()
684 _batchLaunchedContainers.clear();
685 _NS->Change_Directory("/Containers");
686 vector<string> vec = _NS->list_directory_recurs();
687 for(vector<string>::iterator iter = vec.begin();iter!=vec.end();iter++){
688 CORBA::Object_var obj=_NS->Resolve((*iter).c_str());
689 Engines::Container_ptr cont=Engines::Container::_narrow(obj);
690 if(!CORBA::is_nil(cont)){
691 _batchLaunchedContainers.push_back(cont);
694 _batchLaunchedContainersIter=_batchLaunchedContainers.begin();
697 //=============================================================================
699 * This is no longer valid (C++ container are also python containers)
701 //=============================================================================
703 bool isPythonContainer(const char* ContainerName)
706 int len = strlen(ContainerName);
709 if (strcmp(ContainerName + len - 2, "Py") == 0)
715 //=============================================================================
717 * Builds the script to be launched
719 * If SALOME Application not defined ($APPLI),
720 * see BuildTempFileToLaunchRemoteContainer()
722 * Else rely on distant configuration. Command is under the form (example):
723 * ssh user@machine distantPath/runRemote.sh hostNS portNS WORKINGDIR workingdir \
724 * SALOME_Container containerName &"
726 * - where user is ommited if not specified in CatalogResources,
727 * - where distant path is always relative to user@machine $HOME, and
728 * equal to $APPLI if not specified in CatalogResources,
729 * - where hostNS is the hostname of CORBA naming server (set by scripts to
730 * use to launch SALOME and servers in $APPLI: runAppli.sh, runRemote.sh)
731 * - where portNS is the port used by CORBA naming server (set by scripts to
732 * use to launch SALOME and servers in $APPLI: runAppli.sh, runRemote.sh)
733 * - where workingdir is the requested working directory for the container.
734 * If WORKINGDIR (and workingdir) is not present the working dir will be $HOME
736 //=============================================================================
739 SALOME_ContainerManager::BuildCommandToLaunchRemoteContainer
740 (const string& machine,
741 const Engines::MachineParameters& params, const long id)
745 char idc[3*sizeof(long)];
747 if ( ! _isAppliSalomeDefined )
748 command = BuildTempFileToLaunchRemoteContainer(machine, params);
752 const ParserResourcesType& resInfo = _ResManager->GetImpl()->GetResourcesList(machine);
756 if ( (params.nb_node <= 0) && (params.nb_proc_per_node <= 0) )
758 else if ( params.nb_node == 0 )
759 nbproc = params.nb_proc_per_node;
760 else if ( params.nb_proc_per_node == 0 )
761 nbproc = params.nb_node;
763 nbproc = params.nb_node * params.nb_proc_per_node;
766 // "ssh user@machine distantPath/runRemote.sh hostNS portNS WORKINGDIR workingdir \
767 // SALOME_Container containerName &"
769 if (resInfo.Protocol == rsh)
771 else if (resInfo.Protocol == ssh)
774 throw SALOME_Exception("Unknown protocol");
776 if (resInfo.UserName != "")
778 command += resInfo.UserName;
785 if (resInfo.AppliPath != "")
786 command += resInfo.AppliPath; // path relative to user@machine $HOME
789 ASSERT(getenv("APPLI"));
790 command += getenv("APPLI"); // path relative to user@machine $HOME
793 command += "/runRemote.sh ";
795 ASSERT(getenv("NSHOST"));
796 command += getenv("NSHOST"); // hostname of CORBA name server
799 ASSERT(getenv("NSPORT"));
800 command += getenv("NSPORT"); // port of CORBA name server
802 std::string wdir=params.workingdir.in();
805 command += " WORKINGDIR ";
807 if(wdir == "$TEMPDIR")
809 command += wdir; // requested working directory
815 command += " mpirun -np ";
816 std::ostringstream o;
820 command += "-x PATH,LD_LIBRARY_PATH,OMNIORB_CONFIG,SALOME_trace ";
822 command += " SALOME_MPIContainer ";
825 command += " SALOME_Container ";
827 command += _NS->ContainerName(params);
829 sprintf(idc,"%ld",id);
832 AddOmninamesParams(command);
834 MESSAGE("command =" << command);
840 //=============================================================================
842 * builds the command to be launched.
844 //=============================================================================
847 SALOME_ContainerManager::BuildCommandToLaunchLocalContainer
848 (const Engines::MachineParameters& params, const long id)
853 char idc[3*sizeof(long)];
857 command = "mpirun -np ";
859 if ( (params.nb_node <= 0) && (params.nb_proc_per_node <= 0) )
861 else if ( params.nb_node == 0 )
862 nbproc = params.nb_proc_per_node;
863 else if ( params.nb_proc_per_node == 0 )
864 nbproc = params.nb_node;
866 nbproc = params.nb_node * params.nb_proc_per_node;
868 std::ostringstream o;
874 command += "-x PATH,LD_LIBRARY_PATH,OMNIORB_CONFIG,SALOME_trace ";
877 if (isPythonContainer(params.container_name))
878 command += "pyMPI SALOME_ContainerPy.py ";
880 command += "SALOME_MPIContainer ";
886 std::string wdir=params.workingdir.in();
889 // a working directory is requested
890 if(wdir == "$TEMPDIR")
892 // a new temporary directory is requested
893 char dir[]="/tmp/salomeXXXXXX";
894 char* mdir=mkdtemp(dir);
896 std::cerr << "Problem in mkdtemp " << dir << " " << mdir << std::endl;
898 command="cd "+std::string(dir)+";";
902 // a permanent directory is requested use it or create it
903 command="mkdir -p " + wdir + " && cd " + wdir + ";";
906 if (isPythonContainer(params.container_name))
907 command += "SALOME_ContainerPy.py ";
909 command += "SALOME_Container ";
912 command += _NS->ContainerName(params);
914 sprintf(idc,"%ld",id);
917 AddOmninamesParams(command);
919 MESSAGE("Command is ... " << command);
924 //=============================================================================
926 * removes the generated temporary file in case of a remote launch.
928 //=============================================================================
930 void SALOME_ContainerManager::RmTmpFile()
932 if (_TmpFileName != "")
935 string command = "rm ";
937 string command = "del /F ";
939 command += _TmpFileName;
940 char *temp = strdup(command.c_str());
941 int lgthTemp = strlen(temp);
942 temp[lgthTemp - 3] = '*';
943 temp[lgthTemp - 2] = '\0';
949 //=============================================================================
951 * add to command all options relative to naming service.
953 //=============================================================================
955 void SALOME_ContainerManager::AddOmninamesParams(string& command) const
957 CORBA::String_var iorstr = _NS->getIORaddr();
958 command += "ORBInitRef NameService=";
963 //=============================================================================
965 * add to command all options relative to naming service.
967 //=============================================================================
969 void SALOME_ContainerManager::AddOmninamesParams(ofstream& fileStream) const
971 CORBA::String_var iorstr = _NS->getIORaddr();
972 fileStream << "ORBInitRef NameService=";
973 fileStream << iorstr;
976 //=============================================================================
978 * generate a file name in /tmp directory
980 //=============================================================================
982 string SALOME_ContainerManager::BuildTemporaryFileName() const
984 //build more complex file name to support multiple salome session
985 char *temp = new char[19];
986 strcpy(temp, "/tmp/command");
987 strcat(temp, "XXXXXX");
994 itoa(getpid(), aPID, 10);
998 string command(temp);
1005 //=============================================================================
1007 * Builds in a temporary file the script to be launched.
1009 * Used if SALOME Application ($APPLI) is not defined.
1010 * The command is build with data from CatalogResources, in which every path
1011 * used on remote computer must be defined.
1013 //=============================================================================
1016 SALOME_ContainerManager::BuildTempFileToLaunchRemoteContainer
1017 (const string& machine,
1018 const Engines::MachineParameters& params) throw(SALOME_Exception)
1022 _TmpFileName = BuildTemporaryFileName();
1023 ofstream tempOutputFile;
1024 tempOutputFile.open(_TmpFileName.c_str(), ofstream::out );
1025 const ParserResourcesType& resInfo = _ResManager->GetImpl()->GetResourcesList(machine);
1026 tempOutputFile << "#! /bin/sh" << endl;
1030 tempOutputFile << "export SALOME_trace=local" << endl; // mkr : 27.11.2006 : PAL13967 - Distributed supervision graphs - Problem with "SALOME_trace"
1031 //tempOutputFile << "source " << resInfo.PreReqFilePath << endl;
1037 tempOutputFile << "mpirun -np ";
1040 if ( (params.nb_node <= 0) && (params.nb_proc_per_node <= 0) )
1042 else if ( params.nb_node == 0 )
1043 nbproc = params.nb_proc_per_node;
1044 else if ( params.nb_proc_per_node == 0 )
1045 nbproc = params.nb_node;
1047 nbproc = params.nb_node * params.nb_proc_per_node;
1049 std::ostringstream o;
1051 tempOutputFile << nbproc << " ";
1053 tempOutputFile << "-x PATH,LD_LIBRARY_PATH,OMNIORB_CONFIG,SALOME_trace ";
1057 tempOutputFile << getenv("KERNEL_ROOT_DIR") << "/bin/salome/";
1061 if (isPythonContainer(params.container_name))
1062 tempOutputFile << "pyMPI SALOME_ContainerPy.py ";
1064 tempOutputFile << "SALOME_MPIContainer ";
1069 if (isPythonContainer(params.container_name))
1070 tempOutputFile << "SALOME_ContainerPy.py ";
1072 tempOutputFile << "SALOME_Container ";
1075 tempOutputFile << _NS->ContainerName(params) << " -";
1076 AddOmninamesParams(tempOutputFile);
1077 tempOutputFile << " &" << endl;
1078 tempOutputFile.flush();
1079 tempOutputFile.close();
1080 chmod(_TmpFileName.c_str(), 0x1ED);
1082 // --- Build command
1086 if (resInfo.Protocol == rsh)
1089 string commandRcp = "rcp ";
1090 commandRcp += _TmpFileName;
1092 commandRcp += machine;
1094 commandRcp += _TmpFileName;
1095 status = system(commandRcp.c_str());
1098 else if (resInfo.Protocol == ssh)
1101 string commandRcp = "scp ";
1102 commandRcp += _TmpFileName;
1104 commandRcp += machine;
1106 commandRcp += _TmpFileName;
1107 status = system(commandRcp.c_str());
1110 throw SALOME_Exception("Unknown protocol");
1113 throw SALOME_Exception("Error of connection on remote host");
1116 _CommandForRemAccess = command;
1118 command += _TmpFileName;
1126 //=============================================================================
1127 /*! Creates a command line that the container manager uses to launch
1128 * a parallel container.
1130 //=============================================================================
1132 SALOME_ContainerManager::BuildCommandToLaunchLocalParallelContainer(const std::string& exe_name,
1133 const Engines::MachineParameters& params,
1134 const std::string& log)
1136 // This method knows the differences between the proxy and the nodes.
1137 // nb_component_nodes is not used in the same way if it is a proxy or
1141 string parallelLib(CORBA::string_dup(params.parallelLib));
1142 string hostname(CORBA::string_dup(params.hostname));
1143 int par = exe_name.find("Proxy");
1144 int nbproc = params.nb_component_nodes;
1146 sprintf(buffer,"%d",nbproc);
1148 Engines::MachineParameters_var rtn = new Engines::MachineParameters();
1149 rtn->container_name = params.container_name;
1150 rtn->hostname = params.hostname;
1151 rtn->OS = params.OS;
1152 rtn->mem_mb = params.mem_mb;
1153 rtn->cpu_clock = params.cpu_clock;
1154 rtn->nb_proc_per_node = params.nb_proc_per_node;
1155 rtn->nb_node = params.nb_node;
1156 rtn->isMPI = params.isMPI;
1158 string real_exe_name = exe_name + parallelLib;
1160 if (parallelLib == "Dummy")
1162 //command = "gdb --args ";
1163 //command = "valgrind --tool=memcheck --log-file=val_log ";
1164 //command += real_exe_name;
1166 command = real_exe_name;
1168 command += " " + _NS->ContainerName(rtn);
1169 command += " " + parallelLib;
1170 command += " " + hostname;
1172 AddOmninamesParams(command);
1175 else if (parallelLib == "Mpi")
1177 // Step 1 : check if MPI is started
1178 if (_MpiStarted == false)
1187 command = "mpiexec -np " + string(buffer) + " ";
1188 // command += "gdb --args ";
1189 command += real_exe_name;
1190 command += " " + _NS->ContainerName(rtn);
1191 command += " " + parallelLib;
1192 command += " " + hostname;
1194 AddOmninamesParams(command);
1199 command = "mpiexec -np 1 ";
1200 command += real_exe_name;
1201 command += " " + _NS->ContainerName(rtn);
1202 command += " " + string(buffer);
1203 command += " " + parallelLib;
1204 command += " " + hostname;
1206 AddOmninamesParams(command);
1211 std::string message("Unknown parallelLib" + parallelLib);
1212 throw SALOME_Exception(message.c_str());
1216 if (log == "default")
1218 command += " > /tmp/";
1219 command += _NS->ContainerName(rtn);
1221 command += GetHostname();
1223 command += getenv( "USER" ) ;
1224 command += ".log 2>&1 &" ;
1228 command = "/usr/X11R6/bin/xterm -e \"export LD_LIBRARY_PATH=$LD_LIBRARY_PATH; export PATH=$PATH; "
1229 + command + " \" &";
1230 // + command + "; echo $LD_LIBRARY_PATH; cat \" &";
1234 /* if (log == "xterm")
1236 command = "/usr/X11R6/bin/xterm -e \"export LD_LIBRARY_PATH=$LD_LIBRARY_PATH; export PATH=$PATH; echo $LD_LIBRARY_PATH; echo $PATH; " + command + "; cat \" &";
1239 /* command = "cd ; rm " + fichier_commande + "; touch " + \
1240 fichier_commande + "; echo \" export LD_LIBRARY_PATH=$LD_LIBRARY_PATH; " + \
1241 command + " >& /tmp/ribes_" + fichier_commande + " & \" > " + fichier_commande + ";";
1242 command += "ssh cn01 sh " + fichier_commande + " &";
1243 cerr << "La commande : " << command << endl;
1247 void SALOME_ContainerManager::startMPI()
1249 cerr << "----------------------------------------------" << endl;
1250 cerr << "----------------------------------------------" << endl;
1251 cerr << "----------------------------------------------" << endl;
1252 cerr << "-Only Lam on Localhost is currently supported-" << endl;
1253 cerr << "----------------------------------------------" << endl;
1254 cerr << "----------------------------------------------" << endl;
1255 cerr << "----------------------------------------------" << endl;
1257 int status = system("lamboot");
1260 INFOS("lamboot failed : system command status -1");
1262 else if (status == 217)
1264 INFOS("lamboot failed : system command status 217");