1 // Copyright (C) 2007-2008 CEA/DEN, EDF R&D, OPEN CASCADE
3 // Copyright (C) 2003-2007 OPEN CASCADE, EADS/CCR, LIP6, CEA/DEN,
4 // CEDRAT, EDF R&D, LEG, PRINCIPIA R&D, BUREAU VERITAS
6 // This library is free software; you can redistribute it and/or
7 // modify it under the terms of the GNU Lesser General Public
8 // License as published by the Free Software Foundation; either
9 // version 2.1 of the License.
11 // This library is distributed in the hope that it will be useful,
12 // but WITHOUT ANY WARRANTY; without even the implied warranty of
13 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 // Lesser General Public License for more details.
16 // You should have received a copy of the GNU Lesser General Public
17 // License along with this library; if not, write to the Free Software
18 // Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
20 // See http://www.salome-platform.org/ or email : webmaster.salome@opencascade.com
22 #include "SALOME_ContainerManager.hxx"
23 #include "SALOME_NamingService.hxx"
24 #include "SALOME_ModuleCatalog.hh"
25 #include "Basics_Utils.hxx"
26 #include "Basics_DirUtils.hxx"
27 #include <sys/types.h>
33 #include "Utils_CorbaException.hxx"
34 #include "Batch_Date.hxx"
36 #ifdef WITH_PACO_PARALLEL
40 #define TIME_OUT_TO_LAUNCH_CONT 61
44 vector<Engines::Container_ptr> SALOME_ContainerManager::_batchLaunchedContainers;
46 vector<Engines::Container_ptr>::iterator SALOME_ContainerManager::_batchLaunchedContainersIter;
48 const char *SALOME_ContainerManager::_ContainerManagerNameInNS =
51 //=============================================================================
55 * Define a CORBA single thread policy for the server, which avoid to deal
56 * with non thread-safe usage like Change_Directory in SALOME naming service
58 //=============================================================================
60 SALOME_ContainerManager::SALOME_ContainerManager(CORBA::ORB_ptr orb, PortableServer::POA_var poa, SALOME_ResourcesManager *rm, SALOME_NamingService *ns)
62 MESSAGE("constructor");
67 PortableServer::POAManager_var pman = poa->the_POAManager();
68 _orb = CORBA::ORB::_duplicate(orb) ;
69 CORBA::PolicyList policies;
71 PortableServer::ThreadPolicy_var threadPol =
72 poa->create_thread_policy(PortableServer::SINGLE_THREAD_MODEL);
73 policies[0] = PortableServer::ThreadPolicy::_duplicate(threadPol);
75 _poa = poa->create_POA("SThreadPOA",pman,policies);
77 PortableServer::ObjectId_var id = _poa->activate_object(this);
78 CORBA::Object_var obj = _poa->id_to_reference(id);
79 Engines::ContainerManager_var refContMan =
80 Engines::ContainerManager::_narrow(obj);
82 _NS->Register(refContMan,_ContainerManagerNameInNS);
84 _isAppliSalomeDefined = (getenv("APPLI") != 0);
85 MESSAGE("constructor end");
88 //=============================================================================
92 //=============================================================================
94 SALOME_ContainerManager::~SALOME_ContainerManager()
96 MESSAGE("destructor");
99 //=============================================================================
101 * shutdown all the containers, then the ContainerManager servant
103 //=============================================================================
105 void SALOME_ContainerManager::Shutdown()
108 ShutdownContainers();
109 _NS->Destroy_Name(_ContainerManagerNameInNS);
110 PortableServer::ObjectId_var oid = _poa->servant_to_id(this);
111 _poa->deactivate_object(oid);
112 //_remove_ref() has already been done at creation
116 //=============================================================================
118 * Loop on all the containers listed in naming service, ask shutdown on each
120 //=============================================================================
122 void SALOME_ContainerManager::ShutdownContainers()
124 MESSAGE("ShutdownContainers");
126 isOK = _NS->Change_Directory("/Containers");
128 vector<string> vec = _NS->list_directory_recurs();
129 list<string> lstCont;
130 for(vector<string>::iterator iter = vec.begin();iter!=vec.end();iter++)
133 CORBA::Object_var obj=_NS->Resolve((*iter).c_str());
136 Engines::Container_var cont=Engines::Container::_narrow(obj);
137 if(!CORBA::is_nil(cont))
138 lstCont.push_back((*iter));
140 catch(const CORBA::Exception& e)
142 // ignore this entry and continue
145 MESSAGE("Container list: ");
146 for(list<string>::iterator iter=lstCont.begin();iter!=lstCont.end();iter++){
149 for(list<string>::iterator iter=lstCont.begin();iter!=lstCont.end();iter++){
151 CORBA::Object_var obj=_NS->Resolve((*iter).c_str());
152 Engines::Container_var cont=Engines::Container::_narrow(obj);
153 if(!CORBA::is_nil(cont))
155 MESSAGE("ShutdownContainers: " << (*iter));
160 catch(CORBA::SystemException& e)
162 INFOS("CORBA::SystemException ignored : " << e);
164 catch(CORBA::Exception&)
166 INFOS("CORBA::Exception ignored.");
170 INFOS("Unknown exception ignored.");
174 MESSAGE("ShutdownContainers: no container ref for " << (*iter));
179 //=============================================================================
180 //! Find a suitable Container in a list of machines, or start one
182 * \param params Machine Parameters required for the container
183 * \param possibleComputers list of machines usable for find or start
185 //=============================================================================
187 Engines::Container_ptr
188 SALOME_ContainerManager::
189 FindOrStartContainer(const Engines::MachineParameters& params,
190 const Engines::MachineList& possibleComputers)
192 Engines::Container_ptr ret = FindContainer(params,possibleComputers);
193 if(!CORBA::is_nil(ret))
195 MESSAGE("Container doesn't exist try to launch it ...");
197 return StartContainer(params,possibleComputers,Engines::P_FIRST);
201 //=============================================================================
202 //! Start a suitable Container in a list of machines with constraints and a policy
204 * Constraints are given by a machine parameters struct
205 * \param params Machine Parameters required for the container
206 * \param possibleComputers list of machines usable for start
207 * \param policy policy to use (first,cycl or best)
208 * \param container_exe specific container executable (default=SALOME_Container)
210 //=============================================================================
212 Engines::Container_ptr
213 SALOME_ContainerManager::
214 StartContainer(const Engines::MachineParameters& params,
215 const Engines::MachineList& possibleComputers,
216 Engines::ResPolicy policy,const std::string& container_exe)
218 #ifdef WITH_PACO_PARALLEL
219 std::string parallelLib(params.parallelLib);
220 if (parallelLib != "")
221 return FindOrStartParallelContainer(params, possibleComputers);
224 string containerNameInNS;
225 char idc[3*sizeof(long)];
226 Engines::Container_ptr ret = Engines::Container::_nil();
228 MESSAGE("SALOME_ContainerManager::StartContainer " <<
229 possibleComputers.length());
232 for(unsigned int i=0;i<possibleComputers.length();i++)
233 lm.push_back(string(possibleComputers[i]));
238 case Engines::P_FIRST:
239 theMachine=_ResManager->GetImpl()->FindFirst(lm);
241 case Engines::P_CYCL:
242 theMachine=_ResManager->GetImpl()->FindNext(lm);
244 case Engines::P_BEST:
245 theMachine=_ResManager->GetImpl()->FindBest(lm);
249 catch( const SALOME_Exception &ex ){
251 return Engines::Container::_nil();
254 //If the machine name is localhost use the real name
255 if(theMachine == "localhost")
256 theMachine=Kernel_Utils::GetHostname();
258 MESSAGE("try to launch it on " << theMachine);
260 // Get Id for container: a parallel container registers in Naming Service
261 // on the machine where is process 0. ContainerManager does'nt know the name
262 // of this machine before the launch of the parallel container. So to get
263 // the IOR of the parallel container in Naming Service, ContainerManager
264 // gives a unique Id. The parallel container registers his name under
265 // /ContainerManager/Id directory in NamingService
267 id = GetIdForContainer();
271 MESSAGE("SALOME_ContainerManager::StartContainer : " <<
272 "no possible computer");
273 return Engines::Container::_nil();
275 else if(theMachine==Kernel_Utils::GetHostname())
276 command = BuildCommandToLaunchLocalContainer(params,id,container_exe);
278 command = BuildCommandToLaunchRemoteContainer(theMachine,params,id,container_exe);
280 //check if an entry exists in Naming service
283 containerNameInNS = "/ContainerManager/id";
284 sprintf(idc,"%ld",id);
285 containerNameInNS += idc;
288 containerNameInNS = _NS->BuildContainerNameForNS(params,theMachine.c_str());
290 SCRUTE(containerNameInNS);
291 CORBA::Object_var obj = _NS->Resolve(containerNameInNS.c_str());
292 if ( !CORBA::is_nil(obj) )
296 // shutdown the registered container if it exists
297 Engines::Container_var cont=Engines::Container::_narrow(obj);
298 if(!CORBA::is_nil(cont))
301 catch(CORBA::Exception&)
303 INFOS("CORBA::Exception ignored.");
307 //redirect stdout and stderr in a file
308 string logFilename="/tmp/"+_NS->ContainerName(params)+"_"+ theMachine +"_"+getenv( "USER" )+".log" ;
309 command += " > " + logFilename + " 2>&1 &";
311 // launch container with a system call
312 int status=system(command.c_str());
316 MESSAGE("SALOME_LifeCycleCORBA::StartOrFindContainer rsh failed " <<
317 "(system command status -1)");
318 RmTmpFile(); // command file can be removed here
319 return Engines::Container::_nil();
321 else if (status == 217){
322 MESSAGE("SALOME_LifeCycleCORBA::StartOrFindContainer rsh failed " <<
323 "(system command status 217)");
324 RmTmpFile(); // command file can be removed here
325 return Engines::Container::_nil();
328 int count=TIME_OUT_TO_LAUNCH_CONT;
329 MESSAGE("count = "<<count);
330 while ( CORBA::is_nil(ret) && count ){
338 MESSAGE( count << ". Waiting for container on " << theMachine);
340 CORBA::Object_var obj = _NS->Resolve(containerNameInNS.c_str());
341 ret=Engines::Container::_narrow(obj);
344 if ( CORBA::is_nil(ret) )
346 MESSAGE("SALOME_LifeCycleCORBA::StartOrFindContainer rsh failed");
350 logFilename=":"+logFilename;
351 logFilename="@"+Kernel_Utils::GetHostname()+logFilename;
352 logFilename=getenv( "USER" )+logFilename;
353 ret->logfilename(logFilename.c_str());
356 RmTmpFile(); // command file can be removed here
361 //=============================================================================
362 //! Start a suitable Container for a list of components with constraints and a policy
364 * \param params Machine Parameters required for the container
365 * \param policy policy to use (first,cycl or best)
366 * \param componentList list of component to be loaded on this container
368 //=============================================================================
370 Engines::Container_ptr
371 SALOME_ContainerManager::
372 StartContainer(const Engines::MachineParameters& params,
373 Engines::ResPolicy policy,
374 const Engines::CompoList& componentList)
376 Engines::MachineList_var possibleComputers = _ResManager->GetFittingResources(params,componentList);
378 // Look into ModulCatalog if a specific container must be launched
379 CORBA::String_var container_exe;
383 CORBA::Object_var obj = _NS->Resolve("/Kernel/ModulCatalog");
384 SALOME_ModuleCatalog::ModuleCatalog_var Catalog = SALOME_ModuleCatalog::ModuleCatalog::_narrow(obj) ;
385 if (CORBA::is_nil (Catalog))
386 return Engines::Container::_nil();
387 // Loop through component list
388 for(unsigned int i=0;i<componentList.length();i++)
390 const char* compoi = componentList[i];
391 SALOME_ModuleCatalog::Acomponent_var compoInfo = Catalog->GetComponent(compoi);
392 if (CORBA::is_nil (compoInfo))
396 SALOME_ModuleCatalog::ImplType impl=compoInfo->implementation_type();
397 container_exe=compoInfo->implementation_name();
398 if(impl==SALOME_ModuleCatalog::CEXE)
402 INFOS("ContainerManager Error: you can't have 2 CEXE component in the same container" );
403 return Engines::Container::_nil();
409 catch (ServiceUnreachable&)
411 INFOS("Caught exception: Naming Service Unreachable");
412 return Engines::Container::_nil();
416 INFOS("Caught unknown exception.");
417 return Engines::Container::_nil();
421 return StartContainer(params,possibleComputers,policy,container_exe.in());
423 return StartContainer(params,possibleComputers,policy);
426 #ifdef WITH_PACO_PARALLEL
427 //=============================================================================
429 * Find or Start a suitable PaCO++ Parallel Container in a list of machines.
430 * \param params Machine Parameters required for the container
431 * \param possibleComputers list of machines usable for find or start
433 * \return CORBA container reference.
435 //=============================================================================
436 Engines::Container_ptr
437 SALOME_ContainerManager::
438 FindOrStartParallelContainer(const Engines::MachineParameters& params_const,
439 const Engines::MachineList& possibleComputers)
441 CORBA::Object_var obj;
442 PaCO::InterfaceManager_var proxy;
443 Engines::Container_ptr ret = Engines::Container::_nil();
444 Engines::MachineParameters params(params_const);
446 // Step 1 : Try to find a suitable container
447 // Currently not as good as could be since
448 // we have to verified the number of nodes of the container
449 // if a user tell that.
450 ret = FindContainer(params, possibleComputers);
452 if(CORBA::is_nil(ret)) {
453 // Step 2 : Starting a new parallel container
454 INFOS("[FindOrStartParallelContainer] Starting a parallel container");
456 // Step 2.1 : Choose a computer
457 string theMachine = _ResManager->FindFirst(possibleComputers);
458 if(theMachine == "") {
459 INFOS("[FindOrStartParallelContainer] !!!!!!!!!!!!!!!!!!!!!!!!!!");
460 INFOS("[FindOrStartParallelContainer] No possible computer found");
461 INFOS("[FindOrStartParallelContainer] !!!!!!!!!!!!!!!!!!!!!!!!!!");
464 INFOS("[FindOrStartParallelContainer] on machine : " << theMachine);
466 if(theMachine == Kernel_Utils::GetHostname()) {
467 // Step 3 : starting parallel container proxy
468 params.hostname = CORBA::string_dup(theMachine.c_str());
469 Engines::MachineParameters params_proxy(params);
471 command = BuildCommandToLaunchLocalParallelContainer("SALOME_ParallelContainerProxy", params_proxy, "xterm");
473 catch(const SALOME_Exception & ex){
475 return Engines::Container::_nil();
477 // LaunchParallelContainer uses this value to know if it launches the proxy or the nodes
478 params_proxy.nb_component_nodes = 0;
479 obj = LaunchParallelContainer(command, params_proxy, _NS->ContainerName(params));
480 ret = Engines::Container::_narrow(obj);
481 proxy = PaCO::InterfaceManager::_narrow(obj);
483 // Step 4 : starting parallel container nodes
484 command = BuildCommandToLaunchLocalParallelContainer("SALOME_ParallelContainerNode", params, "xterm");
485 string name = _NS->ContainerName(params) + "Node";
486 LaunchParallelContainer(command, params, name);
487 // Step 5 : connecting nodes and the proxy to actually create a parallel container
489 for (int i = 0; i < params.nb_component_nodes; i++) {
493 snprintf(buffer,5,"%d",i);
495 _snprintf(buffer,5,"%d",i);
497 string name_cont = name + string(buffer);
499 string theNodeMachine(CORBA::string_dup(params.hostname));
500 string containerNameInNS = _NS->BuildContainerNameForNS(name_cont.c_str(),theNodeMachine.c_str());
501 int count = TIME_OUT_TO_LAUNCH_CONT;
502 obj = _NS->Resolve(containerNameInNS.c_str());
503 while (CORBA::is_nil(obj) && count) {
504 INFOS("[FindOrStartParallelContainer] CONNECTION FAILED !!!!!!!!!!!!!!!!!!!!!!!!");
511 obj = _NS->Resolve(containerNameInNS.c_str());
514 PaCO::InterfaceParallel_var node = PaCO::InterfaceParallel::_narrow(obj);
515 MESSAGE("[FindOrStartParallelContainer] Deploying node : " << name);
520 catch(CORBA::SystemException& e)
522 INFOS("Caught CORBA::SystemException. : " << e);
524 catch(PortableServer::POA::ServantAlreadyActive&)
526 INFOS("Caught CORBA::ServantAlreadyActiveException");
528 catch(CORBA::Exception&)
530 INFOS("Caught CORBA::Exception.");
532 catch(std::exception& exc)
534 INFOS("Caught std::exception - "<<exc.what());
538 INFOS("Caught unknown exception.");
540 INFOS("[FindOrStartParallelContainer] node " << name << " deployed");
543 INFOS("[FindOrStartParallelContainer] Currently parallel containers are launched only on the local host");
550 //=============================================================================
552 * Find or Start a suitable PaCO++ Parallel Container in a list of machines.
553 * \param params Machine Parameters required for the container
554 * \param possibleComputers list of machines usable for find or start
556 * \return CORBA container reference.
558 //=============================================================================
559 Engines::Container_ptr
560 SALOME_ContainerManager::
561 FindOrStartParallelContainer(const Engines::MachineParameters& params,
562 const Engines::MachineList& possibleComputers)
564 Engines::Container_ptr ret = Engines::Container::_nil();
565 INFOS("[FindOrStartParallelContainer] is disabled !");
566 INFOS("[FindOrStartParallelContainer] recompile SALOME Kernel to enable parallel extension");
571 //=============================================================================
572 //! Give a suitable Container for a list of components with constraints and a policy
574 * \param params Machine Parameters required for the container
575 * \param policy policy to use (first,cycl or best)
576 * \param componentList list of component to be loaded on this container
578 //=============================================================================
580 Engines::Container_ptr
581 SALOME_ContainerManager::
582 GiveContainer(const Engines::MachineParameters& params,
583 Engines::ResPolicy policy,
584 const Engines::CompoList& componentList)
586 char *valenv=getenv("SALOME_BATCH");
588 if (strcmp(valenv,"1")==0)
590 if(_batchLaunchedContainers.empty())
591 fillBatchLaunchedContainers();
593 if (_batchLaunchedContainersIter == _batchLaunchedContainers.end())
594 _batchLaunchedContainersIter = _batchLaunchedContainers.begin();
596 Engines::Container_ptr rtn = Engines::Container::_duplicate(*_batchLaunchedContainersIter);
597 _batchLaunchedContainersIter++;
600 return StartContainer(params,policy,componentList);
603 //=============================================================================
607 //=============================================================================
609 Engines::Container_ptr
610 SALOME_ContainerManager::
611 FindContainer(const Engines::MachineParameters& params,
612 const char *theMachine)
614 string containerNameInNS(_NS->BuildContainerNameForNS(params,theMachine));
615 CORBA::Object_var obj = _NS->Resolve(containerNameInNS.c_str());
616 if( !CORBA::is_nil(obj) )
617 return Engines::Container::_narrow(obj);
619 return Engines::Container::_nil();
622 //=============================================================================
626 //=============================================================================
628 Engines::Container_ptr
629 SALOME_ContainerManager::
630 FindContainer(const Engines::MachineParameters& params,
631 const Engines::MachineList& possibleComputers)
633 MESSAGE("FindContainer "<<possibleComputers.length());
634 for(unsigned int i=0;i<possibleComputers.length();i++)
636 MESSAGE("FindContainer possible " << possibleComputers[i]);
637 Engines::Container_ptr cont = FindContainer(params,possibleComputers[i]);
638 if( !CORBA::is_nil(cont) )
641 MESSAGE("FindContainer: not found");
642 return Engines::Container::_nil();
645 //=============================================================================
646 /*! This method launches the parallel container.
647 * It will may be placed on the ressources manager.
649 * \param command to launch
650 * \param container's parameters
651 * \param name of the container
653 * \return CORBA container reference
655 //=============================================================================
657 SALOME_ContainerManager::LaunchParallelContainer(const std::string& command,
658 const Engines::MachineParameters& params,
659 const std::string& name)
661 CORBA::Object_ptr obj = CORBA::Object::_nil();
662 string containerNameInNS;
663 MESSAGE("[LaunchParallelContainer] : command to launch...");
665 if (params.nb_component_nodes == 0) {
666 INFOS("[LaunchParallelContainer] launching the proxy of the parallel container");
667 int status = system(command.c_str());
669 INFOS("[LaunchParallelContainer] failed : system command status -1");
671 else if (status == 217) {
672 INFOS("[LaunchParallelContainer] failed : system command status 217");
675 int count = TIME_OUT_TO_LAUNCH_CONT;
676 string theMachine(CORBA::string_dup(params.hostname));
677 containerNameInNS = _NS->BuildContainerNameForNS((char*) name.c_str(),theMachine.c_str());
679 INFOS("[LaunchParallelContainer] Waiting for Parallel Container proxy on " << theMachine);
680 while (CORBA::is_nil(obj) && count) {
687 obj = _NS->Resolve(containerNameInNS.c_str());
691 INFOS("[LaunchParallelContainer] launching the nodes of the parallel container");
692 int status = system(command.c_str());
694 INFOS("[LaunchParallelContainer] failed : system command status -1");
696 else if (status == 217) {
697 INFOS("[LaunchParallelContainer] failed : system command status 217");
699 // We are waiting all the nodes
700 for (int i = 0; i < params.nb_component_nodes; i++) {
701 obj = CORBA::Object::_nil();
702 int count = TIME_OUT_TO_LAUNCH_CONT;
707 snprintf(buffer,5,"%d",i);
709 _snprintf(buffer,5,"%d",i);
712 string name_cont = name + string(buffer);
714 // I don't like this...
715 string theMachine(CORBA::string_dup(params.hostname));
716 containerNameInNS = _NS->BuildContainerNameForNS((char*) name_cont.c_str(),theMachine.c_str());
717 cerr << "[LaunchContainer] Waiting for Parllel Container node " << containerNameInNS << " on " << theMachine << endl;
718 while (CORBA::is_nil(obj) && count) {
725 obj = _NS->Resolve(containerNameInNS.c_str());
730 if ( CORBA::is_nil(obj) ) {
731 INFOS("[LaunchParallelContainer] failed");
736 //=============================================================================
738 * Get Id for container: a parallel container registers in Naming Service
739 * on the machine where is process 0. ContainerManager does'nt know the name
740 * of this machine before the launch of the parallel container. So to get
741 * the IOR of the parallel container in Naming Service, ContainerManager
742 * gives a unique Id. The parallel container registers his name under
743 * /ContainerManager/Id directory in NamingService
745 //=============================================================================
748 long SALOME_ContainerManager::GetIdForContainer(void)
754 void SALOME_ContainerManager::fillBatchLaunchedContainers()
756 _batchLaunchedContainers.clear();
757 _NS->Change_Directory("/Containers");
758 vector<string> vec = _NS->list_directory_recurs();
759 for(vector<string>::iterator iter = vec.begin();iter!=vec.end();iter++){
760 CORBA::Object_var obj=_NS->Resolve((*iter).c_str());
761 Engines::Container_ptr cont=Engines::Container::_narrow(obj);
762 if(!CORBA::is_nil(cont)){
763 _batchLaunchedContainers.push_back(cont);
766 _batchLaunchedContainersIter=_batchLaunchedContainers.begin();
769 //=============================================================================
771 * This is no longer valid (C++ container are also python containers)
773 //=============================================================================
775 bool isPythonContainer(const char* ContainerName)
778 int len = strlen(ContainerName);
781 if (strcmp(ContainerName + len - 2, "Py") == 0)
787 //=============================================================================
789 * Builds the script to be launched
791 * If SALOME Application not defined ($APPLI),
792 * see BuildTempFileToLaunchRemoteContainer()
794 * Else rely on distant configuration. Command is under the form (example):
795 * ssh user@machine distantPath/runRemote.sh hostNS portNS WORKINGDIR workingdir \
796 * SALOME_Container containerName &"
798 * - where user is ommited if not specified in CatalogResources,
799 * - where distant path is always relative to user@machine $HOME, and
800 * equal to $APPLI if not specified in CatalogResources,
801 * - where hostNS is the hostname of CORBA naming server (set by scripts to
802 * use to launch SALOME and servers in $APPLI: runAppli.sh, runRemote.sh)
803 * - where portNS is the port used by CORBA naming server (set by scripts to
804 * use to launch SALOME and servers in $APPLI: runAppli.sh, runRemote.sh)
805 * - where workingdir is the requested working directory for the container.
806 * If WORKINGDIR (and workingdir) is not present the working dir will be $HOME
808 //=============================================================================
811 SALOME_ContainerManager::BuildCommandToLaunchRemoteContainer
812 (const string& machine,
813 const Engines::MachineParameters& params, const long id,const std::string& container_exe)
817 char idc[3*sizeof(long)];
819 if ( ! _isAppliSalomeDefined )
820 command = BuildTempFileToLaunchRemoteContainer(machine, params);
824 const ParserResourcesType& resInfo = _ResManager->GetImpl()->GetResourcesList(machine);
828 if ( (params.nb_node <= 0) && (params.nb_proc_per_node <= 0) )
830 else if ( params.nb_node == 0 )
831 nbproc = params.nb_proc_per_node;
832 else if ( params.nb_proc_per_node == 0 )
833 nbproc = params.nb_node;
835 nbproc = params.nb_node * params.nb_proc_per_node;
838 // "ssh user@machine distantPath/runRemote.sh hostNS portNS WORKINGDIR workingdir \
839 // SALOME_Container containerName &"
841 if (resInfo.Protocol == rsh)
843 else if (resInfo.Protocol == ssh)
846 throw SALOME_Exception("Unknown protocol");
848 if (resInfo.UserName != "")
850 command += resInfo.UserName;
857 if (resInfo.AppliPath != "")
858 command += resInfo.AppliPath; // path relative to user@machine $HOME
861 ASSERT(getenv("APPLI"));
862 command += getenv("APPLI"); // path relative to user@machine $HOME
865 command += "/runRemote.sh ";
867 ASSERT(getenv("NSHOST"));
868 command += getenv("NSHOST"); // hostname of CORBA name server
871 ASSERT(getenv("NSPORT"));
872 command += getenv("NSPORT"); // port of CORBA name server
874 std::string wdir=params.workingdir.in();
877 command += " WORKINGDIR ";
879 if(wdir == "$TEMPDIR")
881 command += wdir; // requested working directory
887 command += " mpirun -np ";
888 std::ostringstream o;
892 command += "-x PATH,LD_LIBRARY_PATH,OMNIORB_CONFIG,SALOME_trace ";
894 command += " SALOME_MPIContainer ";
897 command += " " +container_exe+ " ";
899 command += _NS->ContainerName(params);
901 sprintf(idc,"%ld",id);
904 AddOmninamesParams(command);
906 MESSAGE("command =" << command);
912 //=============================================================================
914 * builds the command to be launched.
916 //=============================================================================
919 SALOME_ContainerManager::BuildCommandToLaunchLocalContainer
920 (const Engines::MachineParameters& params, const long id,const std::string& container_exe)
922 _TmpFileName = BuildTemporaryFileName();
925 //char idc[3*sizeof(long)];
927 ofstream command_file( _TmpFileName.c_str() );
931 //command = "mpirun -np ";
932 command_file << "mpirun -np ";
934 if ( (params.nb_node <= 0) && (params.nb_proc_per_node <= 0) )
936 else if ( params.nb_node == 0 )
937 nbproc = params.nb_proc_per_node;
938 else if ( params.nb_proc_per_node == 0 )
939 nbproc = params.nb_node;
941 nbproc = params.nb_node * params.nb_proc_per_node;
943 //std::ostringstream o;
945 //o << nbproc << " ";
946 command_file << nbproc << " ";
948 //command += o.str();
950 //command += "-x PATH,LD_LIBRARY_PATH,OMNIORB_CONFIG,SALOME_trace ";
951 command_file << "-x PATH,LD_LIBRARY_PATH,OMNIORB_CONFIG,SALOME_trace ";
954 if (isPythonContainer(params.container_name))
955 //command += "pyMPI SALOME_ContainerPy.py ";
956 command_file << "pyMPI SALOME_ContainerPy.py ";
958 //command += "SALOME_MPIContainer ";
959 command_file << "SALOME_MPIContainer ";
965 std::string wdir=params.workingdir.in();
968 // a working directory is requested
969 if(wdir == "$TEMPDIR")
971 // a new temporary directory is requested
972 string dir = Kernel_Utils::GetTmpDir();
974 //command += "cd /d "+ dir +";";
975 command_file << "cd /d " << dir << endl;
977 //command = "cd "+ dir +";";
978 command_file << "cd " << dir << ";";
984 // a permanent directory is requested use it or create it
986 //command="mkdir " + wdir;
987 command_file << "mkdir " + wdir << endl;
988 command_file << "cd /D " + wdir << endl;
990 //command="mkdir -p " + wdir + " && cd " + wdir + ";";
991 command_file << "mkdir -p " << wdir << " && cd " << wdir + ";";
995 if (isPythonContainer(params.container_name))
996 //command += "SALOME_ContainerPy.py ";
997 command_file << "SALOME_ContainerPy.py ";
999 //command += container_exe + " ";
1000 command_file << container_exe + " ";
1005 /*command += _NS->ContainerName(params);
1007 sprintf(idc,"%ld",id);
1010 AddOmninamesParams(command);*/
1012 command_file << _NS->ContainerName(params);
1013 command_file << " -id " << id << " -";
1014 AddOmninamesParams(command_file);
1015 command_file.close();
1018 chmod(_TmpFileName.c_str(), 0x1ED);
1020 command = _TmpFileName;
1022 MESSAGE("Command is file ... " << command);
1027 //=============================================================================
1029 * removes the generated temporary file in case of a remote launch.
1031 //=============================================================================
1033 void SALOME_ContainerManager::RmTmpFile()
1035 int lenght = _TmpFileName.size();
1039 string command = "del /F ";
1041 string command = "rm ";
1044 command += _TmpFileName.substr(0, lenght - 3 );
1046 command += _TmpFileName;
1048 system(command.c_str());
1049 //if dir is empty - remove it
1050 string tmp_dir = Kernel_Utils::GetDirByPath( _TmpFileName );
1051 if ( Kernel_Utils::IsEmptyDir( tmp_dir ) )
1054 command = "del /F " + tmp_dir;
1056 command = "rmdir " + tmp_dir;
1058 system(command.c_str());
1063 //=============================================================================
1065 * add to command all options relative to naming service.
1067 //=============================================================================
1069 void SALOME_ContainerManager::AddOmninamesParams(string& command) const
1071 CORBA::String_var iorstr = _NS->getIORaddr();
1072 command += "ORBInitRef NameService=";
1077 //=============================================================================
1079 * add to command all options relative to naming service.
1081 //=============================================================================
1083 void SALOME_ContainerManager::AddOmninamesParams(ofstream& fileStream) const
1085 CORBA::String_var iorstr = _NS->getIORaddr();
1086 fileStream << "ORBInitRef NameService=";
1087 fileStream << iorstr;
1090 //=============================================================================
1092 * generate a file name in /tmp directory
1094 //=============================================================================
1096 string SALOME_ContainerManager::BuildTemporaryFileName() const
1098 //build more complex file name to support multiple salome session
1099 string aFileName = Kernel_Utils::GetTmpFileName();
1103 aFileName += ".bat";
1109 //=============================================================================
1111 * Builds in a temporary file the script to be launched.
1113 * Used if SALOME Application ($APPLI) is not defined.
1114 * The command is build with data from CatalogResources, in which every path
1115 * used on remote computer must be defined.
1117 //=============================================================================
1120 SALOME_ContainerManager::BuildTempFileToLaunchRemoteContainer
1121 (const string& machine,
1122 const Engines::MachineParameters& params) throw(SALOME_Exception)
1126 _TmpFileName = BuildTemporaryFileName();
1127 ofstream tempOutputFile;
1128 tempOutputFile.open(_TmpFileName.c_str(), ofstream::out );
1129 const ParserResourcesType& resInfo = _ResManager->GetImpl()->GetResourcesList(machine);
1130 tempOutputFile << "#! /bin/sh" << endl;
1134 tempOutputFile << "export SALOME_trace=local" << endl; // mkr : 27.11.2006 : PAL13967 - Distributed supervision graphs - Problem with "SALOME_trace"
1135 //tempOutputFile << "source " << resInfo.PreReqFilePath << endl;
1141 tempOutputFile << "mpirun -np ";
1144 if ( (params.nb_node <= 0) && (params.nb_proc_per_node <= 0) )
1146 else if ( params.nb_node == 0 )
1147 nbproc = params.nb_proc_per_node;
1148 else if ( params.nb_proc_per_node == 0 )
1149 nbproc = params.nb_node;
1151 nbproc = params.nb_node * params.nb_proc_per_node;
1153 std::ostringstream o;
1155 tempOutputFile << nbproc << " ";
1157 tempOutputFile << "-x PATH,LD_LIBRARY_PATH,OMNIORB_CONFIG,SALOME_trace ";
1161 tempOutputFile << getenv("KERNEL_ROOT_DIR") << "/bin/salome/";
1165 if (isPythonContainer(params.container_name))
1166 tempOutputFile << "pyMPI SALOME_ContainerPy.py ";
1168 tempOutputFile << "SALOME_MPIContainer ";
1173 if (isPythonContainer(params.container_name))
1174 tempOutputFile << "SALOME_ContainerPy.py ";
1176 tempOutputFile << "SALOME_Container ";
1179 tempOutputFile << _NS->ContainerName(params) << " -";
1180 AddOmninamesParams(tempOutputFile);
1181 tempOutputFile << " &" << endl;
1182 tempOutputFile.flush();
1183 tempOutputFile.close();
1185 chmod(_TmpFileName.c_str(), 0x1ED);
1188 // --- Build command
1192 if (resInfo.Protocol == rsh)
1195 string commandRcp = "rcp ";
1196 commandRcp += _TmpFileName;
1198 commandRcp += machine;
1200 commandRcp += _TmpFileName;
1201 status = system(commandRcp.c_str());
1204 else if (resInfo.Protocol == ssh)
1207 string commandRcp = "scp ";
1208 commandRcp += _TmpFileName;
1210 commandRcp += machine;
1212 commandRcp += _TmpFileName;
1213 status = system(commandRcp.c_str());
1216 throw SALOME_Exception("Unknown protocol");
1219 throw SALOME_Exception("Error of connection on remote host");
1222 _CommandForRemAccess = command;
1224 command += _TmpFileName;
1232 //=============================================================================
1233 /*! Creates a command line that the container manager uses to launch
1234 * a parallel container.
1236 //=============================================================================
1238 SALOME_ContainerManager::BuildCommandToLaunchLocalParallelContainer(const std::string& exe_name,
1239 const Engines::MachineParameters& params,
1240 const std::string& log)
1242 // This method knows the differences between the proxy and the nodes.
1243 // nb_component_nodes is not used in the same way if it is a proxy or
1247 string parallelLib(CORBA::string_dup(params.parallelLib));
1248 string hostname(CORBA::string_dup(params.hostname));
1249 int par = exe_name.find("Proxy");
1250 int nbproc = params.nb_component_nodes;
1252 sprintf(buffer,"%d",nbproc);
1254 Engines::MachineParameters_var rtn = new Engines::MachineParameters();
1255 rtn->container_name = params.container_name;
1256 rtn->hostname = params.hostname;
1257 rtn->OS = params.OS;
1258 rtn->mem_mb = params.mem_mb;
1259 rtn->cpu_clock = params.cpu_clock;
1260 rtn->nb_proc_per_node = params.nb_proc_per_node;
1261 rtn->nb_node = params.nb_node;
1262 rtn->isMPI = params.isMPI;
1264 string real_exe_name = exe_name + parallelLib;
1266 if (parallelLib == "Dummy")
1268 //command = "gdb --args ";
1269 //command = "valgrind --tool=memcheck --log-file=val_log ";
1270 //command += real_exe_name;
1272 command = real_exe_name;
1274 command += " " + _NS->ContainerName(rtn);
1275 command += " " + parallelLib;
1276 command += " " + hostname;
1278 AddOmninamesParams(command);
1281 else if (parallelLib == "Mpi")
1283 // Step 1 : check if MPI is started
1284 if (_MpiStarted == false)
1293 command = "mpiexec -np " + string(buffer) + " ";
1294 // command += "gdb --args ";
1295 command += real_exe_name;
1296 command += " " + _NS->ContainerName(rtn);
1297 command += " " + parallelLib;
1298 command += " " + hostname;
1300 AddOmninamesParams(command);
1305 command = "mpiexec -np 1 ";
1306 command += real_exe_name;
1307 command += " " + _NS->ContainerName(rtn);
1308 command += " " + string(buffer);
1309 command += " " + parallelLib;
1310 command += " " + hostname;
1312 AddOmninamesParams(command);
1317 std::string message("Unknown parallelLib" + parallelLib);
1318 throw SALOME_Exception(message.c_str());
1322 if (log == "default")
1324 command += " > /tmp/";
1325 command += _NS->ContainerName(rtn);
1327 command += Kernel_Utils::GetHostname();
1329 command += getenv( "USER" ) ;
1330 command += ".log 2>&1 &" ;
1334 command = "/usr/X11R6/bin/xterm -e \"export LD_LIBRARY_PATH=$LD_LIBRARY_PATH; export PATH=$PATH; "
1335 + command + " \" &";
1336 // + command + "; echo $LD_LIBRARY_PATH; cat \" &";
1340 /* if (log == "xterm")
1342 command = "/usr/X11R6/bin/xterm -e \"export LD_LIBRARY_PATH=$LD_LIBRARY_PATH; export PATH=$PATH; echo $LD_LIBRARY_PATH; echo $PATH; " + command + "; cat \" &";
1345 /* command = "cd ; rm " + fichier_commande + "; touch " + \
1346 fichier_commande + "; echo \" export LD_LIBRARY_PATH=$LD_LIBRARY_PATH; " + \
1347 command + " >& /tmp/ribes_" + fichier_commande + " & \" > " + fichier_commande + ";";
1348 command += "ssh cn01 sh " + fichier_commande + " &";
1349 cerr << "La commande : " << command << endl;
1353 void SALOME_ContainerManager::startMPI()
1355 cerr << "----------------------------------------------" << endl;
1356 cerr << "----------------------------------------------" << endl;
1357 cerr << "----------------------------------------------" << endl;
1358 cerr << "-Only Lam on Localhost is currently supported-" << endl;
1359 cerr << "----------------------------------------------" << endl;
1360 cerr << "----------------------------------------------" << endl;
1361 cerr << "----------------------------------------------" << endl;
1363 int status = system("lamboot");
1366 INFOS("lamboot failed : system command status -1");
1368 else if (status == 217)
1370 INFOS("lamboot failed : system command status 217");