1 // Copyright (C) 2005 OPEN CASCADE, EADS/CCR, LIP6, CEA/DEN,
2 // CEDRAT, EDF R&D, LEG, PRINCIPIA R&D, BUREAU VERITAS
4 // This library is free software; you can redistribute it and/or
5 // modify it under the terms of the GNU Lesser General Public
6 // License as published by the Free Software Foundation; either
7 // version 2.1 of the License.
9 // This library is distributed in the hope that it will be useful
10 // but WITHOUT ANY WARRANTY; without even the implied warranty of
11 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
12 // Lesser General Public License for more details.
14 // You should have received a copy of the GNU Lesser General Public
15 // License along with this library; if not, write to the Free Software
16 // Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
18 // See http://www.salome-platform.org/ or email : webmaster.salome@opencascade.com
20 #include "SALOME_ResourcesManager.hxx"
21 #include "Utils_ExceptHandlers.hxx"
22 #include "Utils_CorbaException.hxx"
39 #include <sys/types.h>
41 #include "utilities.h"
43 #define MAX_SIZE_FOR_HOSTNAME 256;
47 const char *SALOME_ResourcesManager::_ResourcesManagerNameInNS = "/ResourcesManager";
49 //=============================================================================
53 //=============================================================================
55 SALOME_ResourcesManager::
56 SALOME_ResourcesManager(CORBA::ORB_ptr orb,
57 PortableServer::POA_var poa,
58 SALOME_NamingService *ns,
59 const char *xmlFilePath) :
60 _path_resources(xmlFilePath)
62 MESSAGE("constructor");
64 _orb = CORBA::ORB::_duplicate(orb) ;
65 _poa = PortableServer::POA::_duplicate(poa) ;
66 PortableServer::ObjectId_var id = _poa->activate_object(this);
67 CORBA::Object_var obj = _poa->id_to_reference(id);
68 Engines::SalomeLauncher_var refContMan =
69 Engines::SalomeLauncher::_narrow(obj);
71 _NS->Register(refContMan,_ResourcesManagerNameInNS);
73 MESSAGE("constructor end");
76 //=============================================================================
78 * Standard constructor, parse resource file.
79 * - if ${APPLI} exists in environment,
80 * look for ${HOME}/*{APPLI}/CatalogResources.xml
81 * - else look for default:
82 * ${KERNEL_ROOT_DIR}/share/salome/resources/kernel/CatalogResources.xml
83 * - parse XML resource file.
85 //=============================================================================
87 SALOME_ResourcesManager::SALOME_ResourcesManager(CORBA::ORB_ptr orb,
88 PortableServer::POA_var poa,
89 SALOME_NamingService *ns)
91 MESSAGE("constructor");
93 _orb = CORBA::ORB::_duplicate(orb) ;
94 _poa = PortableServer::POA::_duplicate(poa) ;
95 PortableServer::ObjectId_var id = _poa->activate_object(this);
96 CORBA::Object_var obj = _poa->id_to_reference(id);
97 Engines::ResourcesManager_var refContMan = Engines::ResourcesManager::_narrow(obj);
98 _NS->Register(refContMan,_ResourcesManagerNameInNS);
100 _isAppliSalomeDefined = (getenv("APPLI") != 0);
103 if (_isAppliSalomeDefined)
105 _path_resources = getenv("HOME");
106 _path_resources += "/";
107 _path_resources += getenv("APPLI");
108 _path_resources += "/CatalogResources.xml";
113 _path_resources = getenv("KERNEL_ROOT_DIR");
114 _path_resources += "/share/salome/resources/kernel/CatalogResources.xml";
118 MESSAGE("constructor end");
121 //=============================================================================
123 * Standard Destructor
125 //=============================================================================
127 SALOME_ResourcesManager::~SALOME_ResourcesManager()
129 MESSAGE("destructor");
133 //=============================================================================
135 * shutdown all the containers, then the ContainerManager servant
137 //=============================================================================
139 void SALOME_ResourcesManager::Shutdown()
142 _NS->Destroy_Name(_ResourcesManagerNameInNS);
143 PortableServer::ObjectId_var oid = _poa->servant_to_id(this);
144 _poa->deactivate_object(oid);
148 //=============================================================================
150 * get the list of name of ressources fitting for the specified module.
151 * If hostname specified, check it is local or known in resources catalog.
154 * - select first machines with corresponding OS (all machines if
155 * parameter OS empty),
156 * - then select the sublist of machines on witch the module is known
157 * (if the result is empty, that probably means that the inventory of
158 * modules is probably not done, so give complete list from previous step)
160 //=============================================================================
162 Engines::MachineList *
163 SALOME_ResourcesManager::GetFittingResources(const Engines::MachineParameters& params,
164 const Engines::CompoList& componentList)
165 //throw(SALOME_Exception)
167 // MESSAGE("ResourcesManager::GetFittingResources");
168 vector <std::string> vec;
169 Engines::MachineList *ret=new Engines::MachineList;
172 // --- To be sure that we search in a correct list.
175 const char *hostname = (const char *)params.hostname;
176 MESSAGE("GetFittingResources " << hostname << " " << GetHostname().c_str());
178 if (hostname[0] != '\0')
180 // MESSAGE("ResourcesManager::GetFittingResources : hostname specified" );
182 if ( strcmp(hostname, "localhost") == 0 ||
183 strcmp(hostname, GetHostname().c_str()) == 0 )
185 // MESSAGE("ResourcesManager::GetFittingResources : localhost" );
186 vec.push_back(GetHostname().c_str());
187 // MESSAGE("ResourcesManager::GetFittingResources : " << vec.size());
190 else if (_resourcesList.find(hostname) != _resourcesList.end())
192 // --- params.hostname is in the list of resources so return it.
193 vec.push_back(hostname);
198 // Cas d'un cluster: nombre de noeuds > 1
200 for (map<string, ParserResourcesType>::const_iterator iter = _resourcesList.begin(); iter != _resourcesList.end(); iter++){
201 if( (*iter).second.DataForSort._nbOfNodes > 1 ){
202 if( strncmp(hostname,(*iter).first.c_str(),strlen(hostname)) == 0 ){
203 vec.push_back((*iter).first.c_str());
204 //cout << "SALOME_ResourcesManager::GetFittingResources vector["
205 // << cpt << "] = " << (*iter).first.c_str() << endl ;
211 // --- user specified an unknown hostame so notify him.
212 MESSAGE("ResourcesManager::GetFittingResources : SALOME_Exception");
213 throw SALOME_Exception("unknown host");
219 // --- Search for available resources sorted by priority
221 SelectOnlyResourcesWithOS(vec, params.OS);
223 KeepOnlyResourcesWithModule(vec, componentList);
226 SelectOnlyResourcesWithOS(vec, params.OS);
228 // --- set wanted parameters
229 ResourceDataToSort::_nbOfNodesWanted = params.nb_node;
231 ResourceDataToSort::_nbOfProcPerNodeWanted = params.nb_proc_per_node;
233 ResourceDataToSort::_CPUFreqMHzWanted = params.cpu_clock;
235 ResourceDataToSort::_memInMBWanted = params.mem_mb;
239 list<ResourceDataToSort> li;
241 for (vector<string>::iterator iter = vec.begin();
244 li.push_back(_resourcesList[(*iter)].DataForSort);
250 for (list<ResourceDataToSort>::iterator iter2 = li.begin();
253 vec[i++] = (*iter2)._hostName;
256 // MESSAGE("ResourcesManager::GetFittingResources : return" << ret.size());
257 ret->length(vec.size());
258 for(unsigned int i=0;i<vec.size();i++)
259 (*ret)[i]=(vec[i]).c_str();
262 catch(const SALOME_Exception &ex)
264 INFOS("Caught exception.");
265 THROW_SALOME_CORBA_EXCEPTION(ex.what(),SALOME::BAD_PARAM);
272 //=============================================================================
274 * add an entry in the ressources catalog xml file.
275 * Return 0 if OK (KERNEL found in new resources modules) else throw exception
277 //=============================================================================
280 SALOME_ResourcesManager::
281 AddResourceInCatalog(const Engines::MachineParameters& paramsOfNewResources,
282 const vector<string>& modulesOnNewResources,
284 const char *userName,
286 AccessProtocolType prot)
287 throw(SALOME_Exception)
289 vector<string>::const_iterator iter = find(modulesOnNewResources.begin(),
290 modulesOnNewResources.end(),
293 if (iter != modulesOnNewResources.end())
295 ParserResourcesType newElt;
296 newElt.DataForSort._hostName = paramsOfNewResources.hostname;
297 newElt.Alias = alias;
298 newElt.Protocol = prot;
300 newElt.UserName = userName;
301 newElt.ModulesList = modulesOnNewResources;
302 newElt.OS = paramsOfNewResources.OS;
303 newElt.DataForSort._memInMB = paramsOfNewResources.mem_mb;
304 newElt.DataForSort._CPUFreqMHz = paramsOfNewResources.cpu_clock;
305 newElt.DataForSort._nbOfNodes = paramsOfNewResources.nb_node;
306 newElt.DataForSort._nbOfProcPerNode =
307 paramsOfNewResources.nb_proc_per_node;
308 _resourcesList[newElt.DataForSort._hostName] = newElt;
313 throw SALOME_Exception("KERNEL is not present in this resource");
316 //=============================================================================
318 * Deletes a resource from the catalog
320 //=============================================================================
322 void SALOME_ResourcesManager::DeleteResourceInCatalog(const char *hostname)
324 _resourcesList.erase(hostname);
327 //=============================================================================
329 * write the current data in memory in file.
331 //=============================================================================
333 void SALOME_ResourcesManager::WriteInXmlFile()
335 const char* aFilePath = _path_resources.c_str();
337 FILE* aFile = fopen(aFilePath, "w");
341 INFOS("Error opening file !");
345 xmlDocPtr aDoc = xmlNewDoc(BAD_CAST "1.0");
346 xmlNewDocComment(aDoc, BAD_CAST "ResourcesCatalog");
348 SALOME_ResourcesCatalog_Handler* handler =
349 new SALOME_ResourcesCatalog_Handler(_resourcesList);
350 handler->PrepareDocToXmlFile(aDoc);
353 int isOk = xmlSaveFile(aFilePath, aDoc);
356 INFOS("Error while XML file saving.");
361 // Free the global variables that may have been allocated by the parser
366 MESSAGE("WRITING DONE!");
369 //=============================================================================
371 * parse the data type catalog
373 //=============================================================================
375 const MapOfParserResourcesType& SALOME_ResourcesManager::ParseXmlFile()
377 SALOME_ResourcesCatalog_Handler* handler =
378 new SALOME_ResourcesCatalog_Handler(_resourcesList);
380 const char* aFilePath = _path_resources.c_str();
381 FILE* aFile = fopen(aFilePath, "r");
385 xmlDocPtr aDoc = xmlReadFile(aFilePath, NULL, 0);
388 handler->ProcessXmlDocument(aDoc);
390 INFOS("ResourcesManager: could not parse file "<<aFilePath);
395 // Free the global variables that may have been allocated by the parser
400 INFOS("ResourcesManager: file "<<aFilePath<<" is not readable.");
404 return _resourcesList;
407 //=============================================================================
409 * consult the content of the list
411 //=============================================================================
413 const MapOfParserResourcesType& SALOME_ResourcesManager::GetList() const
415 return _resourcesList;
419 //=============================================================================
421 * dynamically obtains the first machines
423 //=============================================================================
426 SALOME_ResourcesManager::FindFirst(const Engines::MachineList& listOfMachines)
428 return CORBA::string_dup(_dynamicResourcesSelecter.FindFirst(listOfMachines).c_str());
431 //=============================================================================
433 * dynamically obtains the best machines
435 //=============================================================================
438 SALOME_ResourcesManager::FindNext(const Engines::MachineList& listOfMachines)
440 return _dynamicResourcesSelecter.FindNext(listOfMachines,_resourcesList,_NS);
442 //=============================================================================
444 * dynamically obtains the best machines
446 //=============================================================================
449 SALOME_ResourcesManager::FindBest(const Engines::MachineList& listOfMachines)
451 return _dynamicResourcesSelecter.FindBest(listOfMachines);
454 //=============================================================================
456 * This is no longer valid (C++ container are also python containers)
458 //=============================================================================
460 bool isPythonContainer(const char* ContainerName)
463 int len = strlen(ContainerName);
466 if (strcmp(ContainerName + len - 2, "Py") == 0)
473 //=============================================================================
475 * Builds the script to be launched
477 * If SALOME Application not defined ($APPLI),
478 * see BuildTempFileToLaunchRemoteContainer()
480 * Else rely on distant configuration. Command is under the form (example):
481 * ssh user@machine distantPath/runRemote.sh hostNS portNS \
482 * SALOME_Container containerName &"
484 * - where user is ommited if not specified in CatalogResources,
485 * - where distant path is always relative to user@machine $HOME, and
486 * equal to $APPLI if not specified in CatalogResources,
487 * - where hostNS is the hostname of CORBA naming server (set by scripts to
488 * use to launch SALOME and servers in $APPLI: runAppli.sh, runRemote.sh)
489 * - where portNS is the port used by CORBA naming server (set by scripts to
490 * use to launch SALOME and servers in $APPLI: runAppli.sh, runRemote.sh)
492 //=============================================================================
495 SALOME_ResourcesManager::BuildCommandToLaunchRemoteContainer
496 (const string& machine,
497 const Engines::MachineParameters& params, const long id)
501 char idc[3*sizeof(long)];
503 if ( ! _isAppliSalomeDefined )
504 command = BuildTempFileToLaunchRemoteContainer(machine, params);
508 const ParserResourcesType& resInfo = _resourcesList[machine];
512 if ( (params.nb_node <= 0) && (params.nb_proc_per_node <= 0) )
514 else if ( params.nb_node == 0 )
515 nbproc = params.nb_proc_per_node;
516 else if ( params.nb_proc_per_node == 0 )
517 nbproc = params.nb_node;
519 nbproc = params.nb_node * params.nb_proc_per_node;
522 // "ssh user@machine distantPath/runRemote.sh hostNS portNS \
523 // SALOME_Container containerName &"
525 if (resInfo.Protocol == rsh)
527 else if (resInfo.Protocol == ssh)
530 throw SALOME_Exception("Unknown protocol");
532 if (resInfo.UserName != "")
534 command += resInfo.UserName;
541 if (resInfo.AppliPath != "")
542 command += resInfo.AppliPath; // path relative to user@machine $HOME
545 ASSERT(getenv("APPLI"));
546 command += getenv("APPLI"); // path relative to user@machine $HOME
549 command += "/runRemote.sh ";
551 ASSERT(getenv("NSHOST"));
552 command += getenv("NSHOST"); // hostname of CORBA name server
555 ASSERT(getenv("NSPORT"));
556 command += getenv("NSPORT"); // port of CORBA name server
560 command += " mpirun -np ";
561 std::ostringstream o;
565 command += "-x PATH,LD_LIBRARY_PATH,OMNIORB_CONFIG,SALOME_trace ";
567 command += " SALOME_MPIContainer ";
570 command += " SALOME_Container ";
572 command += _NS->ContainerName(params);
574 sprintf(idc,"%ld",id);
577 AddOmninamesParams(command);
578 command += " > /tmp/";
579 command += _NS->ContainerName(params);
581 command += GetHostname();
583 command += getenv( "USER" ) ;
584 command += ".log 2>&1 &" ;
586 MESSAGE("command =" << command);
592 //=============================================================================
594 * builds the command to be launched.
596 //=============================================================================
599 SALOME_ResourcesManager::BuildCommandToLaunchLocalContainer
600 (const Engines::MachineParameters& params, const long id)
605 char idc[3*sizeof(long)];
609 command = "mpirun -np ";
611 if ( (params.nb_node <= 0) && (params.nb_proc_per_node <= 0) )
613 else if ( params.nb_node == 0 )
614 nbproc = params.nb_proc_per_node;
615 else if ( params.nb_proc_per_node == 0 )
616 nbproc = params.nb_node;
618 nbproc = params.nb_node * params.nb_proc_per_node;
620 std::ostringstream o;
626 command += "-x PATH,LD_LIBRARY_PATH,OMNIORB_CONFIG,SALOME_trace ";
629 if (isPythonContainer(params.container_name))
630 command += "pyMPI SALOME_ContainerPy.py ";
632 command += "SALOME_MPIContainer ";
637 if (isPythonContainer(params.container_name))
638 command = "SALOME_ContainerPy.py ";
640 command = "SALOME_Container ";
643 command += _NS->ContainerName(params);
645 sprintf(idc,"%ld",id);
648 AddOmninamesParams(command);
649 command += " > /tmp/";
650 command += _NS->ContainerName(params);
652 command += GetHostname();
654 command += getenv( "USER" ) ;
655 command += ".log 2>&1 &" ;
656 MESSAGE("Command is ... " << command);
661 //=============================================================================
663 * removes the generated temporary file in case of a remote launch.
665 //=============================================================================
667 void SALOME_ResourcesManager::RmTmpFile()
669 if (_TmpFileName != "")
672 string command = "rm ";
674 string command = "del /F ";
676 command += _TmpFileName;
677 char *temp = strdup(command.c_str());
678 int lgthTemp = strlen(temp);
679 temp[lgthTemp - 3] = '*';
680 temp[lgthTemp - 2] = '\0';
687 //=============================================================================
689 * builds the script to be launched
691 //=============================================================================
694 SALOME_ResourcesManager::BuildCommand
695 (const string& machine,
696 const char *containerName)
698 // rsh -n ikkyo /export/home/rahuel/SALOME_ROOT/bin/runSession SALOME_Container -ORBInitRef NameService=corbaname::dm2s0017:1515 &
699 const ParserResourcesType& resInfo = _resourcesList[machine];
700 bool pyCont = isPythonContainer(containerName);
704 if (resInfo.Protocol == rsh)
705 command = "rsh -n " ;
706 else if (resInfo.Protocol == ssh)
707 command = "ssh -f -n ";
709 throw SALOME_Exception("Not implemented yet...");
713 string path = getenv("KERNEL_ROOT_DIR");
715 command += "/bin/salome/";
718 command += "SALOME_ContainerPy.py ";
720 command += "SALOME_Container ";
722 command += containerName;
724 AddOmninamesParams(command);
725 command += " > /tmp/";
726 command += containerName;
730 command += getenv( "USER" ) ;
731 command += ".log 2>&1 &" ;
737 //=============================================================================
739 * Gives a sublist of machines with matching OS.
740 * If parameter OS is empty, gives the complete list of machines
742 //=============================================================================
744 // Warning need an updated parsed list : _resourcesList
746 SALOME_ResourcesManager::SelectOnlyResourcesWithOS
747 ( vector<string>& hosts,
748 const char *OS) const
749 throw(SALOME_Exception)
753 for (map<string, ParserResourcesType>::const_iterator iter =
754 _resourcesList.begin();
755 iter != _resourcesList.end();
758 if ( (*iter).second.OS == base || base.size() == 0)
759 hosts.push_back((*iter).first);
764 //=============================================================================
766 * Gives a sublist of machines on which the module is known.
768 //=============================================================================
770 //Warning need an updated parsed list : _resourcesList
772 SALOME_ResourcesManager::KeepOnlyResourcesWithModule
773 ( vector<string>& hosts,
774 const Engines::CompoList& componentList) const
775 throw(SALOME_Exception)
777 for (vector<string>::iterator iter = hosts.begin(); iter != hosts.end();)
779 MapOfParserResourcesType::const_iterator it = _resourcesList.find(*iter);
780 const vector<string>& mapOfModulesOfCurrentHost = (((*it).second).ModulesList);
782 bool erasedHost = false;
783 if( mapOfModulesOfCurrentHost.size() > 0 ){
784 for(int i=0;i<componentList.length();i++){
785 const char* compoi = componentList[i];
786 vector<string>::const_iterator itt = find(mapOfModulesOfCurrentHost.begin(),
787 mapOfModulesOfCurrentHost.end(),
789 // componentList[i]);
790 if (itt == mapOfModulesOfCurrentHost.end()){
804 //=============================================================================
806 * add to command all options relative to naming service.
808 //=============================================================================
810 void SALOME_ResourcesManager::AddOmninamesParams(string& command) const
812 // If env variable OMNIORB_CONFIG is not defined or the file is more complex than one line
814 // Even if we use it we have to check if env variable exists
815 //string omniORBcfg( getenv( "OMNIORB_CONFIG" ) ) ;
816 //ifstream omniORBfile( omniORBcfg.c_str() ) ;
817 //char ORBInitRef[11] ;
819 //char nameservice[132] ;
820 //omniORBfile >> ORBInitRef ;
821 //command += "ORBInitRef " ;
822 //omniORBfile >> egal ;
823 //omniORBfile >> nameservice ;
824 //omniORBfile.close() ;
825 //char * bsn = strchr( nameservice , '\n' ) ;
829 //command += nameservice ;
831 char *iorstr = _NS->getIORaddr();
832 command += "ORBInitRef NameService=";
837 //=============================================================================
839 * add to command all options relative to naming service.
841 //=============================================================================
843 void SALOME_ResourcesManager::AddOmninamesParams(ofstream& fileStream) const
845 fileStream << "ORBInitRef NameService=";
846 fileStream << _NS->getIORaddr();
850 //=============================================================================
852 * generate a file name in /tmp directory
854 //=============================================================================
856 string SALOME_ResourcesManager::BuildTemporaryFileName() const
858 //build more complex file name to support multiple salome session
859 char *temp = new char[19];
860 strcpy(temp, "/tmp/command");
861 strcat(temp, "XXXXXX");
868 itoa(getpid(), aPID, 10);
872 string command(temp);
879 //=============================================================================
881 * Builds in a temporary file the script to be launched.
883 * Used if SALOME Application ($APPLI) is not defined.
884 * The command is build with data from CatalogResources, in which every path
885 * used on remote computer must be defined.
887 //=============================================================================
890 SALOME_ResourcesManager::BuildTempFileToLaunchRemoteContainer
891 (const string& machine,
892 const Engines::MachineParameters& params) throw(SALOME_Exception)
896 _TmpFileName = BuildTemporaryFileName();
897 ofstream tempOutputFile;
898 tempOutputFile.open(_TmpFileName.c_str(), ofstream::out );
899 const ParserResourcesType& resInfo = _resourcesList[machine];
900 tempOutputFile << "#! /bin/sh" << endl;
904 tempOutputFile << "export SALOME_trace=local" << endl; // mkr : 27.11.2006 : PAL13967 - Distributed supervision graphs - Problem with "SALOME_trace"
905 //tempOutputFile << "source " << resInfo.PreReqFilePath << endl;
911 tempOutputFile << "mpirun -np ";
914 if ( (params.nb_node <= 0) && (params.nb_proc_per_node <= 0) )
916 else if ( params.nb_node == 0 )
917 nbproc = params.nb_proc_per_node;
918 else if ( params.nb_proc_per_node == 0 )
919 nbproc = params.nb_node;
921 nbproc = params.nb_node * params.nb_proc_per_node;
923 std::ostringstream o;
925 tempOutputFile << nbproc << " ";
927 tempOutputFile << "-x PATH,LD_LIBRARY_PATH,OMNIORB_CONFIG,SALOME_trace ";
931 tempOutputFile << getenv("KERNEL_ROOT_DIR") << "/bin/salome/";
935 if (isPythonContainer(params.container_name))
936 tempOutputFile << "pyMPI SALOME_ContainerPy.py ";
938 tempOutputFile << "SALOME_MPIContainer ";
943 if (isPythonContainer(params.container_name))
944 tempOutputFile << "SALOME_ContainerPy.py ";
946 tempOutputFile << "SALOME_Container ";
949 tempOutputFile << _NS->ContainerName(params) << " -";
950 AddOmninamesParams(tempOutputFile);
951 tempOutputFile << " &" << endl;
952 tempOutputFile.flush();
953 tempOutputFile.close();
954 chmod(_TmpFileName.c_str(), 0x1ED);
960 if (resInfo.Protocol == rsh)
963 string commandRcp = "rcp ";
964 commandRcp += _TmpFileName;
966 commandRcp += machine;
968 commandRcp += _TmpFileName;
969 status = system(commandRcp.c_str());
972 else if (resInfo.Protocol == ssh)
975 string commandRcp = "scp ";
976 commandRcp += _TmpFileName;
978 commandRcp += machine;
980 commandRcp += _TmpFileName;
981 status = system(commandRcp.c_str());
984 throw SALOME_Exception("Unknown protocol");
987 throw SALOME_Exception("Error of connection on remote host");
990 _CommandForRemAccess = command;
992 command += _TmpFileName;
995 command += _NS->ContainerName(params);
999 command += getenv( "USER" ) ;
1000 command += ".log 2>&1 &";
1007 //=============================================================================
1008 /*! Creates a command line that the container manager uses to launch
1009 * a parallel container.
1011 //=============================================================================
1013 SALOME_ResourcesManager::BuildCommandToLaunchLocalParallelContainer(const std::string& exe_name,
1014 const Engines::MachineParameters& params,
1015 const std::string& log)
1017 // This method knows the differences between the proxy and the nodes.
1018 // nb_component_nodes is not used in the same way if it is a proxy or
1022 string parallelLib(CORBA::string_dup(params.parallelLib));
1023 string hostname(CORBA::string_dup(params.hostname));
1024 int par = exe_name.find("Proxy");
1025 int nbproc = params.nb_component_nodes;
1027 sprintf(buffer,"%d",nbproc);
1029 Engines::MachineParameters_var rtn = new Engines::MachineParameters();
1030 rtn->container_name = params.container_name;
1031 rtn->hostname = params.hostname;
1032 rtn->OS = params.OS;
1033 rtn->mem_mb = params.mem_mb;
1034 rtn->cpu_clock = params.cpu_clock;
1035 rtn->nb_proc_per_node = params.nb_proc_per_node;
1036 rtn->nb_node = params.nb_node;
1037 rtn->isMPI = params.isMPI;
1039 string real_exe_name = exe_name + parallelLib;
1041 if (parallelLib == "Dummy")
1043 //command = "gdb --args ";
1044 //command = "valgrind --tool=memcheck --log-file=val_log ";
1045 //command += real_exe_name;
1047 command = real_exe_name;
1049 command += " " + _NS->ContainerName(rtn);
1050 command += " " + parallelLib;
1051 command += " " + hostname;
1053 AddOmninamesParams(command);
1056 if (parallelLib == "Mpi")
1058 // Step 1 : check if MPI is started
1059 if (_MpiStarted == false)
1068 command = "mpiexec -np " + string(buffer) + " ";
1069 // command += "gdb --args ";
1070 command += real_exe_name;
1071 command += " " + _NS->ContainerName(rtn);
1072 command += " " + parallelLib;
1073 command += " " + hostname;
1075 AddOmninamesParams(command);
1080 command = "mpiexec -np 1 ";
1081 command += real_exe_name;
1082 command += " " + _NS->ContainerName(rtn);
1083 command += " " + string(buffer);
1084 command += " " + parallelLib;
1085 command += " " + hostname;
1087 AddOmninamesParams(command);
1092 if (log == "default")
1094 command += " > /tmp/";
1095 command += _NS->ContainerName(rtn);
1097 command += GetHostname();
1099 command += getenv( "USER" ) ;
1100 command += ".log 2>&1 &" ;
1104 command = "/usr/X11R6/bin/xterm -e \"export LD_LIBRARY_PATH=$LD_LIBRARY_PATH; export PATH=$PATH; "
1105 + command + " \" &";
1106 // + command + "; echo $LD_LIBRARY_PATH; cat \" &";
1110 /* if (log == "xterm")
1112 command = "/usr/X11R6/bin/xterm -e \"export LD_LIBRARY_PATH=$LD_LIBRARY_PATH; export PATH=$PATH; echo $LD_LIBRARY_PATH; echo $PATH; " + command + "; cat \" &";
1115 /* command = "cd ; rm " + fichier_commande + "; touch " + \
1116 fichier_commande + "; echo \" export LD_LIBRARY_PATH=$LD_LIBRARY_PATH; " + \
1117 command + " >& /tmp/ribes_" + fichier_commande + " & \" > " + fichier_commande + ";";
1118 command += "ssh cn01 sh " + fichier_commande + " &";
1119 cerr << "La commande : " << command << endl;
1123 void SALOME_ResourcesManager::startMPI()
1125 cerr << "----------------------------------------------" << endl;
1126 cerr << "----------------------------------------------" << endl;
1127 cerr << "----------------------------------------------" << endl;
1128 cerr << "-Only Lam on Localhost is currently supported-" << endl;
1129 cerr << "----------------------------------------------" << endl;
1130 cerr << "----------------------------------------------" << endl;
1131 cerr << "----------------------------------------------" << endl;
1133 int status = system("lamboot");
1136 INFOS("lamboot failed : system command status -1");
1138 else if (status == 217)
1140 INFOS("lamboot failed : system command status 217");
1148 Engines::MachineParameters* SALOME_ResourcesManager::GetMachineParameters(const char *hostname)
1150 ParserResourcesType resource = _resourcesList[string(hostname)];
1151 Engines::MachineParameters *p_ptr = new Engines::MachineParameters;
1152 p_ptr->container_name = CORBA::string_dup("");
1153 p_ptr->hostname = CORBA::string_dup("hostname");
1154 p_ptr->alias = CORBA::string_dup(resource.Alias.c_str());
1155 if( resource.Protocol == rsh )
1156 p_ptr->protocol = "rsh";
1157 else if( resource.Protocol == ssh )
1158 p_ptr->protocol = "ssh";
1159 p_ptr->username = CORBA::string_dup(resource.UserName.c_str());
1160 p_ptr->applipath = CORBA::string_dup(resource.AppliPath.c_str());
1161 p_ptr->modList.length(resource.ModulesList.size());
1162 for(int i=0;i<resource.ModulesList.size();i++)
1163 p_ptr->modList[i] = CORBA::string_dup(resource.ModulesList[i].c_str());
1164 p_ptr->OS = CORBA::string_dup(resource.OS.c_str());
1165 p_ptr->mem_mb = resource.DataForSort._memInMB;
1166 p_ptr->cpu_clock = resource.DataForSort._CPUFreqMHz;
1167 p_ptr->nb_proc_per_node = resource.DataForSort._nbOfProcPerNode;
1168 p_ptr->nb_node = resource.DataForSort._nbOfNodes;
1169 if( resource.mpi == indif )
1170 p_ptr->mpiImpl = "indif";
1171 else if( resource.mpi == lam )
1172 p_ptr->mpiImpl = "lam";
1173 else if( resource.mpi == mpich1 )
1174 p_ptr->mpiImpl = "mpich1";
1175 else if( resource.mpi == mpich2 )
1176 p_ptr->mpiImpl = "mpich2";
1177 else if( resource.mpi == openmpi )
1178 p_ptr->mpiImpl = "openmpi";
1179 if( resource.Batch == pbs )
1180 p_ptr->batch = "pbs";
1181 else if( resource.Batch == lsf )
1182 p_ptr->batch = "lsf";
1183 else if( resource.Batch == slurm )
1184 p_ptr->batch = "slurm";