1 // Copyright (C) 2007-2008 CEA/DEN, EDF R&D, OPEN CASCADE
3 // Copyright (C) 2003-2007 OPEN CASCADE, EADS/CCR, LIP6, CEA/DEN,
4 // CEDRAT, EDF R&D, LEG, PRINCIPIA R&D, BUREAU VERITAS
6 // This library is free software; you can redistribute it and/or
7 // modify it under the terms of the GNU Lesser General Public
8 // License as published by the Free Software Foundation; either
9 // version 2.1 of the License.
11 // This library is distributed in the hope that it will be useful,
12 // but WITHOUT ANY WARRANTY; without even the implied warranty of
13 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 // Lesser General Public License for more details.
16 // You should have received a copy of the GNU Lesser General Public
17 // License along with this library; if not, write to the Free Software
18 // Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
20 // See http://www.salome-platform.org/ or email : webmaster.salome@opencascade.com
22 #include "SALOME_ContainerManager.hxx"
23 #include "SALOME_NamingService.hxx"
24 #include "SALOME_ModuleCatalog.hh"
25 #include "Basics_Utils.hxx"
26 #include "Basics_DirUtils.hxx"
27 #include <sys/types.h>
33 #include "Utils_CorbaException.hxx"
34 #include "Batch_Date.hxx"
36 #ifdef WITH_PACO_PARALLEL
40 #define TIME_OUT_TO_LAUNCH_CONT 61
44 vector<Engines::Container_ptr> SALOME_ContainerManager::_batchLaunchedContainers;
46 vector<Engines::Container_ptr>::iterator SALOME_ContainerManager::_batchLaunchedContainersIter;
48 const char *SALOME_ContainerManager::_ContainerManagerNameInNS =
51 //=============================================================================
55 * Define a CORBA single thread policy for the server, which avoid to deal
56 * with non thread-safe usage like Change_Directory in SALOME naming service
58 //=============================================================================
60 SALOME_ContainerManager::SALOME_ContainerManager(CORBA::ORB_ptr orb, PortableServer::POA_var poa, SALOME_ResourcesManager *rm, SALOME_NamingService *ns)
62 MESSAGE("constructor");
66 PortableServer::POAManager_var pman = poa->the_POAManager();
67 _orb = CORBA::ORB::_duplicate(orb) ;
68 CORBA::PolicyList policies;
70 PortableServer::ThreadPolicy_var threadPol =
71 poa->create_thread_policy(PortableServer::SINGLE_THREAD_MODEL);
72 policies[0] = PortableServer::ThreadPolicy::_duplicate(threadPol);
74 _poa = poa->create_POA("SThreadPOA",pman,policies);
76 PortableServer::ObjectId_var id = _poa->activate_object(this);
77 CORBA::Object_var obj = _poa->id_to_reference(id);
78 Engines::ContainerManager_var refContMan =
79 Engines::ContainerManager::_narrow(obj);
81 _NS->Register(refContMan,_ContainerManagerNameInNS);
83 _isAppliSalomeDefined = (getenv("APPLI") != 0);
84 MESSAGE("constructor end");
87 //=============================================================================
91 //=============================================================================
93 SALOME_ContainerManager::~SALOME_ContainerManager()
95 MESSAGE("destructor");
98 //=============================================================================
99 //! shutdown all the containers, then the ContainerManager servant
102 //=============================================================================
104 void SALOME_ContainerManager::Shutdown()
107 ShutdownContainers();
108 _NS->Destroy_Name(_ContainerManagerNameInNS);
109 PortableServer::ObjectId_var oid = _poa->servant_to_id(this);
110 _poa->deactivate_object(oid);
113 //=============================================================================
114 //! Loop on all the containers listed in naming service, ask shutdown on each
117 //=============================================================================
119 void SALOME_ContainerManager::ShutdownContainers()
121 MESSAGE("ShutdownContainers");
123 isOK = _NS->Change_Directory("/Containers");
125 vector<string> vec = _NS->list_directory_recurs();
126 list<string> lstCont;
127 for(vector<string>::iterator iter = vec.begin();iter!=vec.end();iter++)
130 CORBA::Object_var obj=_NS->Resolve((*iter).c_str());
133 Engines::Container_var cont=Engines::Container::_narrow(obj);
134 if(!CORBA::is_nil(cont))
135 lstCont.push_back((*iter));
137 catch(const CORBA::Exception& e)
139 // ignore this entry and continue
142 MESSAGE("Container list: ");
143 for(list<string>::iterator iter=lstCont.begin();iter!=lstCont.end();iter++){
146 for(list<string>::iterator iter=lstCont.begin();iter!=lstCont.end();iter++){
148 CORBA::Object_var obj=_NS->Resolve((*iter).c_str());
149 Engines::Container_var cont=Engines::Container::_narrow(obj);
150 if(!CORBA::is_nil(cont))
152 MESSAGE("ShutdownContainers: " << (*iter));
157 catch(CORBA::SystemException& e)
159 INFOS("CORBA::SystemException ignored : " << e);
161 catch(CORBA::Exception&)
163 INFOS("CORBA::Exception ignored.");
167 INFOS("Unknown exception ignored.");
171 MESSAGE("ShutdownContainers: no container ref for " << (*iter));
176 //=============================================================================
177 //! Give a suitable Container given constraints
179 * \param params Machine Parameters required for the container
180 * \return the container or nil
182 //=============================================================================
184 Engines::Container_ptr
185 SALOME_ContainerManager::GiveContainer(const Engines::MachineParameters& params)
187 char *valenv=getenv("SALOME_BATCH");
189 if (strcmp(valenv,"1")==0)
191 if(_batchLaunchedContainers.empty())
192 fillBatchLaunchedContainers();
194 if (_batchLaunchedContainersIter == _batchLaunchedContainers.end())
195 _batchLaunchedContainersIter = _batchLaunchedContainers.begin();
197 Engines::Container_ptr rtn = Engines::Container::_duplicate(*_batchLaunchedContainersIter);
198 _batchLaunchedContainersIter++;
201 return StartContainer(params);
204 //=============================================================================
205 //! Start a suitable Container in a list of machines with constraints
207 * Constraints are given by a machine parameters struct
208 * \param params Machine Parameters required for the container
209 * \param possibleComputers list of machines usable for start
210 * \param container_exe specific container executable (default=SALOME_Container)
212 //=============================================================================
214 Engines::Container_ptr
215 SALOME_ContainerManager::StartContainer(const Engines::MachineParameters& params,
216 const Engines::MachineList& possibleComputers,
217 const std::string& container_exe)
219 #ifdef WITH_PACO_PARALLEL
220 std::string parallelLib(params.parallelLib);
221 if (parallelLib != "")
223 Engines::MachineParameters myparams(params);
224 myparams.computerList=possibleComputers;
225 return FindOrStartParallelContainer(myparams);
228 string containerNameInNS;
229 Engines::Container_ptr ret = Engines::Container::_nil();
231 MESSAGE("SALOME_ContainerManager::StartContainer " << possibleComputers.length());
234 // if mode is "get" keep only machines with existing containers
235 if(std::string(params.mode.in())=="get")
237 for(unsigned int i=0;i<possibleComputers.length();i++)
239 Engines::Container_ptr cont = FindContainer(params,possibleComputers[i]);
242 if(!cont->_non_existent())
243 lm.push_back(string(possibleComputers[i]));
245 catch(CORBA::Exception&)
247 // CORBA::Exception ignored.
253 for(unsigned int i=0;i<possibleComputers.length();i++)
254 lm.push_back(string(possibleComputers[i]));
260 theMachine=_ResManager->GetImpl()->Find(params.policy.in(),lm);
262 catch( const SALOME_Exception &ex )
265 return Engines::Container::_nil();
268 //If the machine name is localhost use the real name
269 if(theMachine == "localhost")
270 theMachine=Kernel_Utils::GetHostname();
272 //check if an entry exists in Naming service
273 //if params.mode == "start" or "" shutdown the existing container before launching a new one with that name
274 //if params.mode == "getorstart" or "get" use the existing container
275 containerNameInNS = _NS->BuildContainerNameForNS(params,theMachine.c_str());
277 SCRUTE(containerNameInNS);
278 CORBA::Object_var obj = _NS->Resolve(containerNameInNS.c_str());
279 if ( !CORBA::is_nil(obj) )
283 Engines::Container_var cont=Engines::Container::_narrow(obj);
284 if(!cont->_non_existent())
286 if(std::string(params.mode.in())=="getorstart"||std::string(params.mode.in())=="get")
287 return cont._retn(); /* the container exists and params.mode is getorstart or get use it*/
289 cont->Shutdown(); // shutdown the registered container if it exists
292 catch(CORBA::Exception&)
294 INFOS("CORBA::Exception ignored.");
298 //try to launch a new container
299 MESSAGE("try to launch it on " << theMachine);
303 MESSAGE("SALOME_ContainerManager::StartContainer : no possible computer");
304 return Engines::Container::_nil();
306 else if(theMachine==Kernel_Utils::GetHostname())
307 command = BuildCommandToLaunchLocalContainer(params,container_exe);
309 command = BuildCommandToLaunchRemoteContainer(theMachine,params,container_exe);
311 //redirect stdout and stderr in a file
312 string logFilename="/tmp/"+_NS->ContainerName(params)+"_"+ theMachine +"_"+getenv( "USER" )+".log" ;
313 command += " > " + logFilename + " 2>&1 &";
315 // launch container with a system call
316 int status=system(command.c_str());
319 MESSAGE("SALOME_ContainerManager::StartContainer rsh failed (system command status -1)");
320 RmTmpFile(_TmpFileName); // command file can be removed here
321 return Engines::Container::_nil();
323 else if (status == 217){
324 MESSAGE("SALOME_ContainerManager::StartContainer rsh failed (system command status 217)");
325 RmTmpFile(_TmpFileName); // command file can be removed here
326 return Engines::Container::_nil();
329 int count=TIME_OUT_TO_LAUNCH_CONT;
330 MESSAGE("count = "<<count);
331 while ( CORBA::is_nil(ret) && count ){
339 MESSAGE( count << ". Waiting for container on " << theMachine);
341 CORBA::Object_var obj = _NS->Resolve(containerNameInNS.c_str());
342 ret=Engines::Container::_narrow(obj);
345 if ( CORBA::is_nil(ret) )
347 MESSAGE("SALOME_ContainerManager::StartContainer rsh failed");
351 logFilename=":"+logFilename;
352 logFilename="@"+Kernel_Utils::GetHostname()+logFilename;
353 logFilename=getenv( "USER" )+logFilename;
354 ret->logfilename(logFilename.c_str());
357 RmTmpFile(_TmpFileName); // command file can be removed here
362 //=============================================================================
363 //! Start a suitable Container given constraints
365 * \param params Machine Parameters required for the container
367 //=============================================================================
369 Engines::Container_ptr
370 SALOME_ContainerManager::StartContainer(const Engines::MachineParameters& params)
372 Engines::MachineList_var possibleComputers = _ResManager->GetFittingResources(params);
374 // Look into ModulCatalog if a specific container must be launched
375 CORBA::String_var container_exe;
379 CORBA::Object_var obj = _NS->Resolve("/Kernel/ModulCatalog");
380 SALOME_ModuleCatalog::ModuleCatalog_var Catalog = SALOME_ModuleCatalog::ModuleCatalog::_narrow(obj) ;
381 if (CORBA::is_nil (Catalog))
382 return Engines::Container::_nil();
383 // Loop through component list
384 for(unsigned int i=0;i<params.componentList.length();i++)
386 const char* compoi = params.componentList[i];
387 SALOME_ModuleCatalog::Acomponent_var compoInfo = Catalog->GetComponent(compoi);
388 if (CORBA::is_nil (compoInfo))
392 SALOME_ModuleCatalog::ImplType impl=compoInfo->implementation_type();
393 container_exe=compoInfo->implementation_name();
394 if(impl==SALOME_ModuleCatalog::CEXE)
398 INFOS("ContainerManager Error: you can't have 2 CEXE component in the same container" );
399 return Engines::Container::_nil();
405 catch (ServiceUnreachable&)
407 INFOS("Caught exception: Naming Service Unreachable");
408 return Engines::Container::_nil();
412 INFOS("Caught unknown exception.");
413 return Engines::Container::_nil();
417 return StartContainer(params,possibleComputers,container_exe.in());
419 return StartContainer(params,possibleComputers);
422 //=============================================================================
423 //! Find or start a suitable Container given some constraints
425 * \param params Machine Parameters required for the container
426 * \return the container or nil
428 //=============================================================================
430 Engines::Container_ptr
431 SALOME_ContainerManager::FindOrStartContainer(const Engines::MachineParameters& params)
433 Engines::Container_ptr ret = FindContainer(params,params.computerList);
434 if(!CORBA::is_nil(ret))
436 MESSAGE("Container doesn't exist try to launch it ...");
438 return StartContainer(params);
441 //=============================================================================
442 //! Find a container given constraints (params) on a list of machines (possibleComputers)
446 //=============================================================================
448 Engines::Container_ptr
449 SALOME_ContainerManager::FindContainer(const Engines::MachineParameters& params,
450 const Engines::MachineList& possibleComputers)
452 MESSAGE("FindContainer "<<possibleComputers.length());
453 for(unsigned int i=0;i<possibleComputers.length();i++)
455 MESSAGE("FindContainer possible " << possibleComputers[i]);
456 Engines::Container_ptr cont = FindContainer(params,possibleComputers[i]);
457 if( !CORBA::is_nil(cont) )
460 MESSAGE("FindContainer: not found");
461 return Engines::Container::_nil();
464 //=============================================================================
465 //! Find a container given constraints (params) on a machine (theMachine)
469 //=============================================================================
471 Engines::Container_ptr
472 SALOME_ContainerManager::FindContainer(const Engines::MachineParameters& params,
473 const char *theMachine)
475 string containerNameInNS(_NS->BuildContainerNameForNS(params,theMachine));
476 CORBA::Object_var obj = _NS->Resolve(containerNameInNS.c_str());
479 if(obj->_non_existent())
480 return Engines::Container::_nil();
482 return Engines::Container::_narrow(obj);
484 catch(const CORBA::Exception& e)
486 return Engines::Container::_nil();
490 #ifdef WITH_PACO_PARALLEL
491 //=============================================================================
493 * Find or Start a suitable PaCO++ Parallel Container in a list of machines.
494 * \param params Machine Parameters required for the container
495 * \return CORBA container reference.
497 //=============================================================================
498 Engines::Container_ptr
499 SALOME_ContainerManager::FindOrStartParallelContainer(const Engines::MachineParameters& params_const)
501 CORBA::Object_var obj;
502 PaCO::InterfaceManager_var proxy;
503 Engines::Container_ptr ret = Engines::Container::_nil();
504 Engines::MachineParameters params(params_const);
506 // Step 1 : Try to find a suitable container
507 // Currently not as good as could be since
508 // we have to verified the number of nodes of the container
509 // if a user tell that.
510 ret = FindContainer(params, params.computerList);
512 if(CORBA::is_nil(ret)) {
513 // Step 2 : Starting a new parallel container
514 INFOS("[FindOrStartParallelContainer] Starting a parallel container");
516 // Step 2.1 : Choose a computer
517 string theMachine = _ResManager->FindFirst(params.computerList);
518 if(theMachine == "") {
519 INFOS("[FindOrStartParallelContainer] !!!!!!!!!!!!!!!!!!!!!!!!!!");
520 INFOS("[FindOrStartParallelContainer] No possible computer found");
521 INFOS("[FindOrStartParallelContainer] !!!!!!!!!!!!!!!!!!!!!!!!!!");
524 INFOS("[FindOrStartParallelContainer] on machine : " << theMachine);
526 if(theMachine == Kernel_Utils::GetHostname()) {
527 // Step 3 : starting parallel container proxy
528 params.hostname = CORBA::string_dup(theMachine.c_str());
529 Engines::MachineParameters params_proxy(params);
531 command = BuildCommandToLaunchLocalParallelContainer("SALOME_ParallelContainerProxy", params_proxy, "xterm");
533 catch(const SALOME_Exception & ex){
535 return Engines::Container::_nil();
537 // LaunchParallelContainer uses this value to know if it launches the proxy or the nodes
538 params_proxy.nb_component_nodes = 0;
539 obj = LaunchParallelContainer(command, params_proxy, _NS->ContainerName(params));
540 ret = Engines::Container::_narrow(obj);
541 proxy = PaCO::InterfaceManager::_narrow(obj);
543 // Step 4 : starting parallel container nodes
544 command = BuildCommandToLaunchLocalParallelContainer("SALOME_ParallelContainerNode", params, "xterm");
545 string name = _NS->ContainerName(params) + "Node";
546 LaunchParallelContainer(command, params, name);
547 // Step 5 : connecting nodes and the proxy to actually create a parallel container
549 for (int i = 0; i < params.nb_component_nodes; i++) {
553 snprintf(buffer,5,"%d",i);
555 _snprintf(buffer,5,"%d",i);
557 string name_cont = name + string(buffer);
559 string theNodeMachine(CORBA::string_dup(params.hostname));
560 string containerNameInNS = _NS->BuildContainerNameForNS(name_cont.c_str(),theNodeMachine.c_str());
561 int count = TIME_OUT_TO_LAUNCH_CONT;
562 obj = _NS->Resolve(containerNameInNS.c_str());
563 while (CORBA::is_nil(obj) && count) {
564 INFOS("[FindOrStartParallelContainer] CONNECTION FAILED !!!!!!!!!!!!!!!!!!!!!!!!");
571 obj = _NS->Resolve(containerNameInNS.c_str());
574 PaCO::InterfaceParallel_var node = PaCO::InterfaceParallel::_narrow(obj);
575 MESSAGE("[FindOrStartParallelContainer] Deploying node : " << name);
580 catch(CORBA::SystemException& e)
582 INFOS("Caught CORBA::SystemException. : " << e);
584 catch(PortableServer::POA::ServantAlreadyActive&)
586 INFOS("Caught CORBA::ServantAlreadyActiveException");
588 catch(CORBA::Exception&)
590 INFOS("Caught CORBA::Exception.");
592 catch(std::exception& exc)
594 INFOS("Caught std::exception - "<<exc.what());
598 INFOS("Caught unknown exception.");
600 INFOS("[FindOrStartParallelContainer] node " << name << " deployed");
603 INFOS("[FindOrStartParallelContainer] Currently parallel containers are launched only on the local host");
610 //=============================================================================
612 * Find or Start a suitable PaCO++ Parallel Container in a list of machines.
613 * \param params Machine Parameters required for the container
614 * \return CORBA container reference.
616 //=============================================================================
617 Engines::Container_ptr
618 SALOME_ContainerManager::FindOrStartParallelContainer(const Engines::MachineParameters& params)
620 Engines::Container_ptr ret = Engines::Container::_nil();
621 INFOS("[FindOrStartParallelContainer] is disabled !");
622 INFOS("[FindOrStartParallelContainer] recompile SALOME Kernel to enable parallel extension");
627 //=============================================================================
628 /*! This method launches the parallel container.
629 * It will may be placed on the ressources manager.
631 * \param command to launch
632 * \param container's parameters
633 * \param name of the container
635 * \return CORBA container reference
637 //=============================================================================
639 SALOME_ContainerManager::LaunchParallelContainer(const std::string& command,
640 const Engines::MachineParameters& params,
641 const std::string& name)
643 CORBA::Object_ptr obj = CORBA::Object::_nil();
644 string containerNameInNS;
645 MESSAGE("[LaunchParallelContainer] : command to launch...");
647 if (params.nb_component_nodes == 0) {
648 INFOS("[LaunchParallelContainer] launching the proxy of the parallel container");
649 int status = system(command.c_str());
651 INFOS("[LaunchParallelContainer] failed : system command status -1");
653 else if (status == 217) {
654 INFOS("[LaunchParallelContainer] failed : system command status 217");
657 int count = TIME_OUT_TO_LAUNCH_CONT;
658 string theMachine(CORBA::string_dup(params.hostname));
659 containerNameInNS = _NS->BuildContainerNameForNS((char*) name.c_str(),theMachine.c_str());
661 INFOS("[LaunchParallelContainer] Waiting for Parallel Container proxy on " << theMachine);
662 while (CORBA::is_nil(obj) && count) {
669 obj = _NS->Resolve(containerNameInNS.c_str());
673 INFOS("[LaunchParallelContainer] launching the nodes of the parallel container");
674 int status = system(command.c_str());
676 INFOS("[LaunchParallelContainer] failed : system command status -1");
678 else if (status == 217) {
679 INFOS("[LaunchParallelContainer] failed : system command status 217");
681 // We are waiting all the nodes
682 for (int i = 0; i < params.nb_component_nodes; i++) {
683 obj = CORBA::Object::_nil();
684 int count = TIME_OUT_TO_LAUNCH_CONT;
689 snprintf(buffer,5,"%d",i);
691 _snprintf(buffer,5,"%d",i);
694 string name_cont = name + string(buffer);
696 // I don't like this...
697 string theMachine(CORBA::string_dup(params.hostname));
698 containerNameInNS = _NS->BuildContainerNameForNS((char*) name_cont.c_str(),theMachine.c_str());
699 cerr << "[LaunchContainer] Waiting for Parllel Container node " << containerNameInNS << " on " << theMachine << endl;
700 while (CORBA::is_nil(obj) && count) {
707 obj = _NS->Resolve(containerNameInNS.c_str());
712 if ( CORBA::is_nil(obj) ) {
713 INFOS("[LaunchParallelContainer] failed");
718 void SALOME_ContainerManager::fillBatchLaunchedContainers()
720 _batchLaunchedContainers.clear();
721 _NS->Change_Directory("/Containers");
722 vector<string> vec = _NS->list_directory_recurs();
723 for(vector<string>::iterator iter = vec.begin();iter!=vec.end();iter++){
724 CORBA::Object_var obj=_NS->Resolve((*iter).c_str());
725 Engines::Container_ptr cont=Engines::Container::_narrow(obj);
726 if(!CORBA::is_nil(cont)){
727 _batchLaunchedContainers.push_back(cont);
730 _batchLaunchedContainersIter=_batchLaunchedContainers.begin();
733 //=============================================================================
735 * This is no longer valid (C++ container are also python containers)
737 //=============================================================================
739 bool isPythonContainer(const char* ContainerName)
742 int len = strlen(ContainerName);
745 if (strcmp(ContainerName + len - 2, "Py") == 0)
751 //=============================================================================
753 * Builds the script to be launched
755 * If SALOME Application not defined ($APPLI),
756 * see BuildTempFileToLaunchRemoteContainer()
758 * Else rely on distant configuration. Command is under the form (example):
759 * ssh user@machine distantPath/runRemote.sh hostNS portNS WORKINGDIR workingdir \
760 * SALOME_Container containerName &"
762 * - where user is ommited if not specified in CatalogResources,
763 * - where distant path is always relative to user@machine $HOME, and
764 * equal to $APPLI if not specified in CatalogResources,
765 * - where hostNS is the hostname of CORBA naming server (set by scripts to
766 * use to launch SALOME and servers in $APPLI: runAppli.sh, runRemote.sh)
767 * - where portNS is the port used by CORBA naming server (set by scripts to
768 * use to launch SALOME and servers in $APPLI: runAppli.sh, runRemote.sh)
769 * - where workingdir is the requested working directory for the container.
770 * If WORKINGDIR (and workingdir) is not present the working dir will be $HOME
772 //=============================================================================
775 SALOME_ContainerManager::BuildCommandToLaunchRemoteContainer
776 (const string& machine,
777 const Engines::MachineParameters& params, const std::string& container_exe)
782 if ( ! _isAppliSalomeDefined )
783 command = BuildTempFileToLaunchRemoteContainer(machine, params);
787 const ParserResourcesType& resInfo = _ResManager->GetImpl()->GetResourcesList(machine);
791 if ( (params.nb_node <= 0) && (params.nb_proc_per_node <= 0) )
793 else if ( params.nb_node == 0 )
794 nbproc = params.nb_proc_per_node;
795 else if ( params.nb_proc_per_node == 0 )
796 nbproc = params.nb_node;
798 nbproc = params.nb_node * params.nb_proc_per_node;
801 // "ssh user@machine distantPath/runRemote.sh hostNS portNS WORKINGDIR workingdir \
802 // SALOME_Container containerName &"
804 if (resInfo.Protocol == rsh)
806 else if (resInfo.Protocol == ssh)
809 throw SALOME_Exception("Unknown protocol");
811 if (resInfo.UserName != "")
813 command += resInfo.UserName;
820 if (resInfo.AppliPath != "")
821 command += resInfo.AppliPath; // path relative to user@machine $HOME
824 ASSERT(getenv("APPLI"));
825 command += getenv("APPLI"); // path relative to user@machine $HOME
828 command += "/runRemote.sh ";
830 ASSERT(getenv("NSHOST"));
831 command += getenv("NSHOST"); // hostname of CORBA name server
834 ASSERT(getenv("NSPORT"));
835 command += getenv("NSPORT"); // port of CORBA name server
837 std::string wdir=params.workingdir.in();
840 command += " WORKINGDIR ";
842 if(wdir == "$TEMPDIR")
844 command += wdir; // requested working directory
850 command += " mpirun -np ";
851 std::ostringstream o;
855 command += "-x PATH,LD_LIBRARY_PATH,OMNIORB_CONFIG,SALOME_trace ";
856 #elif defined(WITHOPENMPI)
857 if( getenv("OMPI_URI_FILE") == NULL )
858 command += "-x PATH -x LD_LIBRARY_PATH -x OMNIORB_CONFIG -x SALOME_trace";
860 command += "-x PATH -x LD_LIBRARY_PATH -x OMNIORB_CONFIG -x SALOME_trace -ompi-server file:";
861 command += getenv("OMPI_URI_FILE");
864 command += " SALOME_MPIContainer ";
867 command += " " +container_exe+ " ";
869 command += _NS->ContainerName(params);
871 AddOmninamesParams(command);
873 MESSAGE("command =" << command);
879 //=============================================================================
881 * builds the command to be launched.
883 //=============================================================================
886 SALOME_ContainerManager::BuildCommandToLaunchLocalContainer
887 (const Engines::MachineParameters& params, const std::string& container_exe)
889 _TmpFileName = BuildTemporaryFileName();
893 ofstream command_file( _TmpFileName.c_str() );
897 //command = "mpirun -np ";
898 command_file << "mpirun -np ";
900 if ( (params.nb_node <= 0) && (params.nb_proc_per_node <= 0) )
902 else if ( params.nb_node == 0 )
903 nbproc = params.nb_proc_per_node;
904 else if ( params.nb_proc_per_node == 0 )
905 nbproc = params.nb_node;
907 nbproc = params.nb_node * params.nb_proc_per_node;
909 //std::ostringstream o;
911 //o << nbproc << " ";
912 command_file << nbproc << " ";
914 //command += o.str();
916 //command += "-x PATH,LD_LIBRARY_PATH,OMNIORB_CONFIG,SALOME_trace ";
917 command_file << "-x PATH,LD_LIBRARY_PATH,OMNIORB_CONFIG,SALOME_trace ";
918 #elif defined(WITHOPENMPI)
919 //command += "-x PATH -x LD_LIBRARY_PATH -x OMNIORB_CONFIG -x SALOME_trace ";
920 if( getenv("OMPI_URI_FILE") == NULL )
921 command_file << "-x PATH -x LD_LIBRARY_PATH -x OMNIORB_CONFIG -x SALOME_trace";
924 command_file << "-x PATH -x LD_LIBRARY_PATH -x OMNIORB_CONFIG -x SALOME_trace -ompi-server file:";
925 command_file << getenv("OMPI_URI_FILE");
929 if (isPythonContainer(params.container_name))
930 //command += "pyMPI SALOME_ContainerPy.py ";
931 command_file << " pyMPI SALOME_ContainerPy.py ";
933 //command += "SALOME_MPIContainer ";
934 command_file << " SALOME_MPIContainer ";
940 std::string wdir=params.workingdir.in();
943 // a working directory is requested
944 if(wdir == "$TEMPDIR")
946 // a new temporary directory is requested
947 string dir = Kernel_Utils::GetTmpDir();
949 //command += "cd /d "+ dir +";";
950 command_file << "cd /d " << dir << endl;
952 //command = "cd "+ dir +";";
953 command_file << "cd " << dir << ";";
959 // a permanent directory is requested use it or create it
961 //command="mkdir " + wdir;
962 command_file << "mkdir " + wdir << endl;
963 command_file << "cd /D " + wdir << endl;
965 //command="mkdir -p " + wdir + " && cd " + wdir + ";";
966 command_file << "mkdir -p " << wdir << " && cd " << wdir + ";";
970 if (isPythonContainer(params.container_name))
971 //command += "SALOME_ContainerPy.py ";
972 command_file << "SALOME_ContainerPy.py ";
974 //command += container_exe + " ";
975 command_file << container_exe + " ";
979 command_file << _NS->ContainerName(params);
980 command_file << " -";
981 AddOmninamesParams(command_file);
982 command_file.close();
985 chmod(_TmpFileName.c_str(), 0x1ED);
987 command = _TmpFileName;
989 MESSAGE("Command is file ... " << command);
994 //=============================================================================
996 * removes the generated temporary file in case of a remote launch.
998 //=============================================================================
1000 void SALOME_ContainerManager::RmTmpFile(std::string& tmpFileName)
1002 int lenght = tmpFileName.size();
1006 string command = "del /F ";
1008 string command = "rm ";
1011 command += tmpFileName.substr(0, lenght - 3 );
1013 command += tmpFileName;
1015 system(command.c_str());
1016 //if dir is empty - remove it
1017 string tmp_dir = Kernel_Utils::GetDirByPath( tmpFileName );
1018 if ( Kernel_Utils::IsEmptyDir( tmp_dir ) )
1021 command = "del /F " + tmp_dir;
1023 command = "rmdir " + tmp_dir;
1025 system(command.c_str());
1030 //=============================================================================
1032 * add to command all options relative to naming service.
1034 //=============================================================================
1036 void SALOME_ContainerManager::AddOmninamesParams(string& command) const
1038 CORBA::String_var iorstr = _NS->getIORaddr();
1039 command += "ORBInitRef NameService=";
1044 //=============================================================================
1046 * add to command all options relative to naming service.
1048 //=============================================================================
1050 void SALOME_ContainerManager::AddOmninamesParams(ofstream& fileStream) const
1052 CORBA::String_var iorstr = _NS->getIORaddr();
1053 fileStream << "ORBInitRef NameService=";
1054 fileStream << iorstr;
1057 //=============================================================================
1059 * generate a file name in /tmp directory
1061 //=============================================================================
1063 string SALOME_ContainerManager::BuildTemporaryFileName() const
1065 //build more complex file name to support multiple salome session
1066 string aFileName = Kernel_Utils::GetTmpFileName();
1070 aFileName += ".bat";
1076 //=============================================================================
1078 * Builds in a temporary file the script to be launched.
1080 * Used if SALOME Application ($APPLI) is not defined.
1081 * The command is build with data from CatalogResources, in which every path
1082 * used on remote computer must be defined.
1084 //=============================================================================
1087 SALOME_ContainerManager::BuildTempFileToLaunchRemoteContainer
1088 (const string& machine,
1089 const Engines::MachineParameters& params) throw(SALOME_Exception)
1093 _TmpFileName = BuildTemporaryFileName();
1094 ofstream tempOutputFile;
1095 tempOutputFile.open(_TmpFileName.c_str(), ofstream::out );
1096 const ParserResourcesType& resInfo = _ResManager->GetImpl()->GetResourcesList(machine);
1097 tempOutputFile << "#! /bin/sh" << endl;
1101 tempOutputFile << "export SALOME_trace=local" << endl; // mkr : 27.11.2006 : PAL13967 - Distributed supervision graphs - Problem with "SALOME_trace"
1102 //tempOutputFile << "source " << resInfo.PreReqFilePath << endl;
1108 tempOutputFile << "mpirun -np ";
1111 if ( (params.nb_node <= 0) && (params.nb_proc_per_node <= 0) )
1113 else if ( params.nb_node == 0 )
1114 nbproc = params.nb_proc_per_node;
1115 else if ( params.nb_proc_per_node == 0 )
1116 nbproc = params.nb_node;
1118 nbproc = params.nb_node * params.nb_proc_per_node;
1120 std::ostringstream o;
1122 tempOutputFile << nbproc << " ";
1124 tempOutputFile << "-x PATH,LD_LIBRARY_PATH,OMNIORB_CONFIG,SALOME_trace ";
1125 #elif defined(WITHOPENMPI)
1126 if( getenv("OMPI_URI_FILE") == NULL )
1127 tempOutputFile << "-x PATH -x LD_LIBRARY_PATH -x OMNIORB_CONFIG -x SALOME_trace";
1129 tempOutputFile << "-x PATH -x LD_LIBRARY_PATH -x OMNIORB_CONFIG -x SALOME_trace -ompi-server file:";
1130 tempOutputFile << getenv("OMPI_URI_FILE");
1135 tempOutputFile << getenv("KERNEL_ROOT_DIR") << "/bin/salome/";
1139 if (isPythonContainer(params.container_name))
1140 tempOutputFile << " pyMPI SALOME_ContainerPy.py ";
1142 tempOutputFile << " SALOME_MPIContainer ";
1147 if (isPythonContainer(params.container_name))
1148 tempOutputFile << "SALOME_ContainerPy.py ";
1150 tempOutputFile << "SALOME_Container ";
1153 tempOutputFile << _NS->ContainerName(params) << " -";
1154 AddOmninamesParams(tempOutputFile);
1155 tempOutputFile << " &" << endl;
1156 tempOutputFile.flush();
1157 tempOutputFile.close();
1159 chmod(_TmpFileName.c_str(), 0x1ED);
1162 // --- Build command
1166 if (resInfo.Protocol == rsh)
1169 string commandRcp = "rcp ";
1170 commandRcp += _TmpFileName;
1172 commandRcp += machine;
1174 commandRcp += _TmpFileName;
1175 status = system(commandRcp.c_str());
1178 else if (resInfo.Protocol == ssh)
1181 string commandRcp = "scp ";
1182 commandRcp += _TmpFileName;
1184 commandRcp += machine;
1186 commandRcp += _TmpFileName;
1187 status = system(commandRcp.c_str());
1190 throw SALOME_Exception("Unknown protocol");
1193 throw SALOME_Exception("Error of connection on remote host");
1196 _CommandForRemAccess = command;
1198 command += _TmpFileName;
1206 //=============================================================================
1207 /*! Creates a command line that the container manager uses to launch
1208 * a parallel container.
1210 //=============================================================================
1212 SALOME_ContainerManager::BuildCommandToLaunchLocalParallelContainer(const std::string& exe_name,
1213 const Engines::MachineParameters& params,
1214 const std::string& log)
1216 // This method knows the differences between the proxy and the nodes.
1217 // nb_component_nodes is not used in the same way if it is a proxy or
1221 string parallelLib(CORBA::string_dup(params.parallelLib));
1222 string hostname(CORBA::string_dup(params.hostname));
1223 int par = exe_name.find("Proxy");
1224 int nbproc = params.nb_component_nodes;
1226 sprintf(buffer,"%d",nbproc);
1228 Engines::MachineParameters_var rtn = new Engines::MachineParameters();
1229 rtn->container_name = params.container_name;
1230 rtn->hostname = params.hostname;
1231 rtn->OS = params.OS;
1232 rtn->mem_mb = params.mem_mb;
1233 rtn->cpu_clock = params.cpu_clock;
1234 rtn->nb_proc_per_node = params.nb_proc_per_node;
1235 rtn->nb_node = params.nb_node;
1236 rtn->isMPI = params.isMPI;
1238 string real_exe_name = exe_name + parallelLib;
1240 if (parallelLib == "Dummy")
1242 //command = "gdb --args ";
1243 //command = "valgrind --tool=memcheck --log-file=val_log ";
1244 //command += real_exe_name;
1246 command = real_exe_name;
1248 command += " " + _NS->ContainerName(rtn);
1249 command += " " + parallelLib;
1250 command += " " + hostname;
1252 AddOmninamesParams(command);
1255 else if (parallelLib == "Mpi")
1257 // Step 1 : check if MPI is started
1258 if (_MpiStarted == false)
1267 command = "mpiexec -np " + string(buffer) + " ";
1268 // command += "gdb --args ";
1269 command += real_exe_name;
1270 command += " " + _NS->ContainerName(rtn);
1271 command += " " + parallelLib;
1272 command += " " + hostname;
1274 AddOmninamesParams(command);
1279 command = "mpiexec -np 1 ";
1280 command += real_exe_name;
1281 command += " " + _NS->ContainerName(rtn);
1282 command += " " + string(buffer);
1283 command += " " + parallelLib;
1284 command += " " + hostname;
1286 AddOmninamesParams(command);
1291 std::string message("Unknown parallelLib" + parallelLib);
1292 throw SALOME_Exception(message.c_str());
1296 if (log == "default")
1298 command += " > /tmp/";
1299 command += _NS->ContainerName(rtn);
1301 command += Kernel_Utils::GetHostname();
1303 command += getenv( "USER" ) ;
1304 command += ".log 2>&1 &" ;
1308 command = "/usr/X11R6/bin/xterm -e \"export LD_LIBRARY_PATH=$LD_LIBRARY_PATH; export PATH=$PATH; "
1309 + command + " \" &";
1310 // + command + "; echo $LD_LIBRARY_PATH; cat \" &";
1314 /* if (log == "xterm")
1316 command = "/usr/X11R6/bin/xterm -e \"export LD_LIBRARY_PATH=$LD_LIBRARY_PATH; export PATH=$PATH; echo $LD_LIBRARY_PATH; echo $PATH; " + command + "; cat \" &";
1319 /* command = "cd ; rm " + fichier_commande + "; touch " + \
1320 fichier_commande + "; echo \" export LD_LIBRARY_PATH=$LD_LIBRARY_PATH; " + \
1321 command + " >& /tmp/ribes_" + fichier_commande + " & \" > " + fichier_commande + ";";
1322 command += "ssh cn01 sh " + fichier_commande + " &";
1323 cerr << "La commande : " << command << endl;
1327 void SALOME_ContainerManager::startMPI()
1329 cerr << "----------------------------------------------" << endl;
1330 cerr << "----------------------------------------------" << endl;
1331 cerr << "----------------------------------------------" << endl;
1332 cerr << "-Only Lam on Localhost is currently supported-" << endl;
1333 cerr << "----------------------------------------------" << endl;
1334 cerr << "----------------------------------------------" << endl;
1335 cerr << "----------------------------------------------" << endl;
1337 int status = system("lamboot");
1340 INFOS("lamboot failed : system command status -1");
1342 else if (status == 217)
1344 INFOS("lamboot failed : system command status 217");
1352 string SALOME_ContainerManager::GetMPIZeroNode(string machine)
1357 string tmpFile = BuildTemporaryFileName();
1359 cmd = "ssh " + machine + " mpirun -np 1 hostname > " + tmpFile;
1361 status = system(cmd.c_str());
1363 ifstream fp(tmpFile.c_str(),ios::in);