1 // Copyright (C) 2005 OPEN CASCADE, EADS/CCR, LIP6, CEA/DEN,
2 // CEDRAT, EDF R&D, LEG, PRINCIPIA R&D, BUREAU VERITAS
4 // This library is free software; you can redistribute it and/or
5 // modify it under the terms of the GNU Lesser General Public
6 // License as published by the Free Software Foundation; either
7 // version 2.1 of the License.
9 // This library is distributed in the hope that it will be useful
10 // but WITHOUT ANY WARRANTY; without even the implied warranty of
11 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
12 // Lesser General Public License for more details.
14 // You should have received a copy of the GNU Lesser General Public
15 // License along with this library; if not, write to the Free Software
16 // Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
18 // See http://www.salome-platform.org/ or email : webmaster.salome@opencascade.com
20 #include "SALOME_ResourcesManager.hxx"
21 #include "Utils_ExceptHandlers.hxx"
22 #include "Utils_CorbaException.hxx"
39 #include <sys/types.h>
41 #include "utilities.h"
43 #define MAX_SIZE_FOR_HOSTNAME 256;
47 const char *SALOME_ResourcesManager::_ResourcesManagerNameInNS = "/ResourcesManager";
49 //=============================================================================
53 //=============================================================================
55 SALOME_ResourcesManager::
56 SALOME_ResourcesManager(CORBA::ORB_ptr orb,
57 PortableServer::POA_var poa,
58 SALOME_NamingService *ns,
59 const char *xmlFilePath) :
60 _path_resources(xmlFilePath)
62 MESSAGE("constructor");
64 _orb = CORBA::ORB::_duplicate(orb) ;
65 _poa = PortableServer::POA::_duplicate(poa) ;
66 PortableServer::ObjectId_var id = _poa->activate_object(this);
67 CORBA::Object_var obj = _poa->id_to_reference(id);
68 Engines::SalomeLauncher_var refContMan =
69 Engines::SalomeLauncher::_narrow(obj);
71 _NS->Register(refContMan,_ResourcesManagerNameInNS);
73 MESSAGE("constructor end");
76 //=============================================================================
78 * Standard constructor, parse resource file.
79 * - if ${APPLI} exists in environment,
80 * look for ${HOME}/${APPLI}/CatalogResources.xml
81 * - else look for default:
82 * ${KERNEL_ROOT_DIR}/share/salome/resources/kernel/CatalogResources.xml
83 * - parse XML resource file.
85 //=============================================================================
87 SALOME_ResourcesManager::SALOME_ResourcesManager(CORBA::ORB_ptr orb,
88 PortableServer::POA_var poa,
89 SALOME_NamingService *ns)
91 MESSAGE("constructor");
93 _orb = CORBA::ORB::_duplicate(orb) ;
94 _poa = PortableServer::POA::_duplicate(poa) ;
95 PortableServer::ObjectId_var id = _poa->activate_object(this);
96 CORBA::Object_var obj = _poa->id_to_reference(id);
97 Engines::ResourcesManager_var refContMan = Engines::ResourcesManager::_narrow(obj);
98 _NS->Register(refContMan,_ResourcesManagerNameInNS);
100 _isAppliSalomeDefined = (getenv("APPLI") != 0);
103 if (_isAppliSalomeDefined)
105 _path_resources = getenv("HOME");
106 _path_resources += "/";
107 _path_resources += getenv("APPLI");
108 _path_resources += "/CatalogResources.xml";
113 _path_resources = getenv("KERNEL_ROOT_DIR");
114 _path_resources += "/share/salome/resources/kernel/CatalogResources.xml";
118 MESSAGE("constructor end");
121 //=============================================================================
123 * Standard Destructor
125 //=============================================================================
127 SALOME_ResourcesManager::~SALOME_ResourcesManager()
129 MESSAGE("destructor");
133 //=============================================================================
135 * shutdown all the containers, then the ContainerManager servant
137 //=============================================================================
139 void SALOME_ResourcesManager::Shutdown()
142 _NS->Destroy_Name(_ResourcesManagerNameInNS);
143 PortableServer::ObjectId_var oid = _poa->servant_to_id(this);
144 _poa->deactivate_object(oid);
148 //=============================================================================
150 * get the list of name of ressources fitting for the specified module.
151 * If hostname specified, check it is local or known in resources catalog.
154 * - select first machines with corresponding OS (all machines if
155 * parameter OS empty),
156 * - then select the sublist of machines on witch the module is known
157 * (if the result is empty, that probably means that the inventory of
158 * modules is probably not done, so give complete list from previous step)
160 //=============================================================================
162 Engines::MachineList *
163 SALOME_ResourcesManager::GetFittingResources(const Engines::MachineParameters& params,
164 const Engines::CompoList& componentList)
165 //throw(SALOME_Exception)
167 // MESSAGE("ResourcesManager::GetFittingResources");
168 vector <std::string> vec;
169 Engines::MachineList *ret=new Engines::MachineList;
172 // --- To be sure that we search in a correct list.
175 const char *hostname = (const char *)params.hostname;
176 MESSAGE("GetFittingResources " << hostname << " " << GetHostname().c_str());
178 if (hostname[0] != '\0')
180 // MESSAGE("ResourcesManager::GetFittingResources : hostname specified" );
182 if ( strcmp(hostname, "localhost") == 0 ||
183 strcmp(hostname, GetHostname().c_str()) == 0 )
185 // MESSAGE("ResourcesManager::GetFittingResources : localhost" );
186 vec.push_back(GetHostname().c_str());
187 // MESSAGE("ResourcesManager::GetFittingResources : " << vec.size());
190 else if (_resourcesList.find(hostname) != _resourcesList.end())
192 // --- params.hostname is in the list of resources so return it.
193 vec.push_back(hostname);
198 // Cas d'un cluster: nombre de noeuds > 1
200 for (map<string, ParserResourcesType>::const_iterator iter = _resourcesList.begin(); iter != _resourcesList.end(); iter++){
201 if( (*iter).second.DataForSort._nbOfNodes > 1 ){
202 if( strncmp(hostname,(*iter).first.c_str(),strlen(hostname)) == 0 ){
203 vec.push_back((*iter).first.c_str());
204 //cout << "SALOME_ResourcesManager::GetFittingResources vector["
205 // << cpt << "] = " << (*iter).first.c_str() << endl ;
211 // --- user specified an unknown hostame so notify him.
212 MESSAGE("ResourcesManager::GetFittingResources : SALOME_Exception");
213 throw SALOME_Exception("unknown host");
219 // --- Search for available resources sorted by priority
221 SelectOnlyResourcesWithOS(vec, params.OS);
223 KeepOnlyResourcesWithModule(vec, componentList);
226 SelectOnlyResourcesWithOS(vec, params.OS);
228 // --- set wanted parameters
229 ResourceDataToSort::_nbOfNodesWanted = params.nb_node;
231 ResourceDataToSort::_nbOfProcPerNodeWanted = params.nb_proc_per_node;
233 ResourceDataToSort::_CPUFreqMHzWanted = params.cpu_clock;
235 ResourceDataToSort::_memInMBWanted = params.mem_mb;
239 list<ResourceDataToSort> li;
241 for (vector<string>::iterator iter = vec.begin();
244 li.push_back(_resourcesList[(*iter)].DataForSort);
250 for (list<ResourceDataToSort>::iterator iter2 = li.begin();
253 vec[i++] = (*iter2)._hostName;
256 // MESSAGE("ResourcesManager::GetFittingResources : return" << ret.size());
257 ret->length(vec.size());
258 for(unsigned int i=0;i<vec.size();i++)
259 (*ret)[i]=(vec[i]).c_str();
262 catch(const SALOME_Exception &ex)
264 INFOS("Caught exception.");
265 THROW_SALOME_CORBA_EXCEPTION(ex.what(),SALOME::BAD_PARAM);
272 //=============================================================================
274 * add an entry in the ressources catalog xml file.
275 * Return 0 if OK (KERNEL found in new resources modules) else throw exception
277 //=============================================================================
280 SALOME_ResourcesManager::
281 AddResourceInCatalog(const Engines::MachineParameters& paramsOfNewResources,
282 const vector<string>& modulesOnNewResources,
284 const char *userName,
286 AccessProtocolType prot)
287 throw(SALOME_Exception)
289 vector<string>::const_iterator iter = find(modulesOnNewResources.begin(),
290 modulesOnNewResources.end(),
293 if (iter != modulesOnNewResources.end())
295 ParserResourcesType newElt;
296 newElt.DataForSort._hostName = paramsOfNewResources.hostname;
297 newElt.Alias = alias;
298 newElt.Protocol = prot;
300 newElt.UserName = userName;
301 newElt.ModulesList = modulesOnNewResources;
302 newElt.OS = paramsOfNewResources.OS;
303 newElt.DataForSort._memInMB = paramsOfNewResources.mem_mb;
304 newElt.DataForSort._CPUFreqMHz = paramsOfNewResources.cpu_clock;
305 newElt.DataForSort._nbOfNodes = paramsOfNewResources.nb_node;
306 newElt.DataForSort._nbOfProcPerNode =
307 paramsOfNewResources.nb_proc_per_node;
308 _resourcesList[newElt.DataForSort._hostName] = newElt;
313 throw SALOME_Exception("KERNEL is not present in this resource");
316 //=============================================================================
318 * Deletes a resource from the catalog
320 //=============================================================================
322 void SALOME_ResourcesManager::DeleteResourceInCatalog(const char *hostname)
324 _resourcesList.erase(hostname);
327 //=============================================================================
329 * write the current data in memory in file.
331 //=============================================================================
333 void SALOME_ResourcesManager::WriteInXmlFile()
335 const char* aFilePath = _path_resources.c_str();
337 FILE* aFile = fopen(aFilePath, "w");
341 INFOS("Error opening file !");
345 xmlDocPtr aDoc = xmlNewDoc(BAD_CAST "1.0");
346 xmlNewDocComment(aDoc, BAD_CAST "ResourcesCatalog");
348 SALOME_ResourcesCatalog_Handler* handler =
349 new SALOME_ResourcesCatalog_Handler(_resourcesList);
350 handler->PrepareDocToXmlFile(aDoc);
353 int isOk = xmlSaveFile(aFilePath, aDoc);
356 INFOS("Error while XML file saving.");
363 MESSAGE("WRITING DONE!");
366 //=============================================================================
368 * parse the data type catalog
370 //=============================================================================
372 const MapOfParserResourcesType& SALOME_ResourcesManager::ParseXmlFile()
374 SALOME_ResourcesCatalog_Handler* handler =
375 new SALOME_ResourcesCatalog_Handler(_resourcesList);
377 const char* aFilePath = _path_resources.c_str();
378 FILE* aFile = fopen(aFilePath, "r");
382 xmlDocPtr aDoc = xmlReadFile(aFilePath, NULL, 0);
385 handler->ProcessXmlDocument(aDoc);
387 INFOS("ResourcesManager: could not parse file "<<aFilePath);
395 INFOS("ResourcesManager: file "<<aFilePath<<" is not readable.");
399 return _resourcesList;
402 //=============================================================================
404 * consult the content of the list
406 //=============================================================================
408 const MapOfParserResourcesType& SALOME_ResourcesManager::GetList() const
410 return _resourcesList;
414 //=============================================================================
416 * dynamically obtains the first machines
418 //=============================================================================
421 SALOME_ResourcesManager::FindFirst(const Engines::MachineList& listOfMachines)
423 return CORBA::string_dup(_dynamicResourcesSelecter.FindFirst(listOfMachines).c_str());
426 //=============================================================================
428 * dynamically obtains the best machines
430 //=============================================================================
433 SALOME_ResourcesManager::FindNext(const Engines::MachineList& listOfMachines)
435 return _dynamicResourcesSelecter.FindNext(listOfMachines,_resourcesList,_NS);
437 //=============================================================================
439 * dynamically obtains the best machines
441 //=============================================================================
444 SALOME_ResourcesManager::FindBest(const Engines::MachineList& listOfMachines)
446 return _dynamicResourcesSelecter.FindBest(listOfMachines);
449 //=============================================================================
451 * This is no longer valid (C++ container are also python containers)
453 //=============================================================================
455 bool isPythonContainer(const char* ContainerName)
458 int len = strlen(ContainerName);
461 if (strcmp(ContainerName + len - 2, "Py") == 0)
468 //=============================================================================
470 * Builds the script to be launched
472 * If SALOME Application not defined ($APPLI),
473 * see BuildTempFileToLaunchRemoteContainer()
475 * Else rely on distant configuration. Command is under the form (example):
476 * ssh user@machine distantPath/runRemote.sh hostNS portNS \
477 * SALOME_Container containerName &"
479 * - where user is ommited if not specified in CatalogResources,
480 * - where distant path is always relative to user@machine $HOME, and
481 * equal to $APPLI if not specified in CatalogResources,
482 * - where hostNS is the hostname of CORBA naming server (set by scripts to
483 * use to launch SALOME and servers in $APPLI: runAppli.sh, runRemote.sh)
484 * - where portNS is the port used by CORBA naming server (set by scripts to
485 * use to launch SALOME and servers in $APPLI: runAppli.sh, runRemote.sh)
487 //=============================================================================
490 SALOME_ResourcesManager::BuildCommandToLaunchRemoteContainer
491 (const string& machine,
492 const Engines::MachineParameters& params, const long id)
496 char idc[3*sizeof(long)];
498 if ( ! _isAppliSalomeDefined )
499 command = BuildTempFileToLaunchRemoteContainer(machine, params);
503 const ParserResourcesType& resInfo = _resourcesList[machine];
507 if ( (params.nb_node <= 0) && (params.nb_proc_per_node <= 0) )
509 else if ( params.nb_node == 0 )
510 nbproc = params.nb_proc_per_node;
511 else if ( params.nb_proc_per_node == 0 )
512 nbproc = params.nb_node;
514 nbproc = params.nb_node * params.nb_proc_per_node;
517 // "ssh user@machine distantPath/runRemote.sh hostNS portNS \
518 // SALOME_Container containerName &"
520 if (resInfo.Protocol == rsh)
522 else if (resInfo.Protocol == ssh)
525 throw SALOME_Exception("Unknown protocol");
527 if (resInfo.UserName != "")
529 command += resInfo.UserName;
536 if (resInfo.AppliPath != "")
537 command += resInfo.AppliPath; // path relative to user@machine $HOME
540 ASSERT(getenv("APPLI"));
541 command += getenv("APPLI"); // path relative to user@machine $HOME
544 command += "/runRemote.sh ";
546 ASSERT(getenv("NSHOST"));
547 command += getenv("NSHOST"); // hostname of CORBA name server
550 ASSERT(getenv("NSPORT"));
551 command += getenv("NSPORT"); // port of CORBA name server
555 command += " mpirun -np ";
556 std::ostringstream o;
560 command += "-x PATH,LD_LIBRARY_PATH,OMNIORB_CONFIG,SALOME_trace ";
562 command += " SALOME_MPIContainer ";
565 command += " SALOME_Container ";
567 command += _NS->ContainerName(params);
569 sprintf(idc,"%ld",id);
572 AddOmninamesParams(command);
573 command += " > /tmp/";
574 command += _NS->ContainerName(params);
576 command += GetHostname();
578 command += getenv( "USER" ) ;
579 command += ".log 2>&1 &" ;
581 MESSAGE("command =" << command);
587 //=============================================================================
589 * builds the command to be launched.
591 //=============================================================================
594 SALOME_ResourcesManager::BuildCommandToLaunchLocalContainer
595 (const Engines::MachineParameters& params, const long id)
600 char idc[3*sizeof(long)];
604 command = "mpirun -np ";
606 if ( (params.nb_node <= 0) && (params.nb_proc_per_node <= 0) )
608 else if ( params.nb_node == 0 )
609 nbproc = params.nb_proc_per_node;
610 else if ( params.nb_proc_per_node == 0 )
611 nbproc = params.nb_node;
613 nbproc = params.nb_node * params.nb_proc_per_node;
615 std::ostringstream o;
621 command += "-x PATH,LD_LIBRARY_PATH,OMNIORB_CONFIG,SALOME_trace ";
624 if (isPythonContainer(params.container_name))
625 command += "pyMPI SALOME_ContainerPy.py ";
627 command += "SALOME_MPIContainer ";
632 if (isPythonContainer(params.container_name))
633 command = "SALOME_ContainerPy.py ";
635 command = "SALOME_Container ";
638 command += _NS->ContainerName(params);
640 sprintf(idc,"%ld",id);
643 AddOmninamesParams(command);
644 command += " > /tmp/";
645 command += _NS->ContainerName(params);
647 command += GetHostname();
649 command += getenv( "USER" ) ;
650 command += ".log 2>&1 &" ;
651 MESSAGE("Command is ... " << command);
656 //=============================================================================
658 * removes the generated temporary file in case of a remote launch.
660 //=============================================================================
662 void SALOME_ResourcesManager::RmTmpFile()
664 if (_TmpFileName != "")
667 string command = "rm ";
669 string command = "del /F ";
671 command += _TmpFileName;
672 char *temp = strdup(command.c_str());
673 int lgthTemp = strlen(temp);
674 temp[lgthTemp - 3] = '*';
675 temp[lgthTemp - 2] = '\0';
682 //=============================================================================
684 * builds the script to be launched
686 //=============================================================================
689 SALOME_ResourcesManager::BuildCommand
690 (const string& machine,
691 const char *containerName)
693 // rsh -n ikkyo /export/home/rahuel/SALOME_ROOT/bin/runSession SALOME_Container -ORBInitRef NameService=corbaname::dm2s0017:1515 &
694 const ParserResourcesType& resInfo = _resourcesList[machine];
695 bool pyCont = isPythonContainer(containerName);
699 if (resInfo.Protocol == rsh)
700 command = "rsh -n " ;
701 else if (resInfo.Protocol == ssh)
702 command = "ssh -f -n ";
704 throw SALOME_Exception("Not implemented yet...");
708 string path = getenv("KERNEL_ROOT_DIR");
710 command += "/bin/salome/";
713 command += "SALOME_ContainerPy.py ";
715 command += "SALOME_Container ";
717 command += containerName;
719 AddOmninamesParams(command);
720 command += " > /tmp/";
721 command += containerName;
725 command += getenv( "USER" ) ;
726 command += ".log 2>&1 &" ;
732 //=============================================================================
734 * Gives a sublist of machines with matching OS.
735 * If parameter OS is empty, gives the complete list of machines
737 //=============================================================================
739 // Warning need an updated parsed list : _resourcesList
741 SALOME_ResourcesManager::SelectOnlyResourcesWithOS
742 ( vector<string>& hosts,
743 const char *OS) const
744 throw(SALOME_Exception)
748 for (map<string, ParserResourcesType>::const_iterator iter =
749 _resourcesList.begin();
750 iter != _resourcesList.end();
753 if ( (*iter).second.OS == base || base.size() == 0)
754 hosts.push_back((*iter).first);
759 //=============================================================================
761 * Gives a sublist of machines on which the module is known.
763 //=============================================================================
765 //Warning need an updated parsed list : _resourcesList
767 SALOME_ResourcesManager::KeepOnlyResourcesWithModule
768 ( vector<string>& hosts,
769 const Engines::CompoList& componentList) const
770 throw(SALOME_Exception)
772 for (vector<string>::iterator iter = hosts.begin(); iter != hosts.end();)
774 MapOfParserResourcesType::const_iterator it = _resourcesList.find(*iter);
775 const vector<string>& mapOfModulesOfCurrentHost = (((*it).second).ModulesList);
777 bool erasedHost = false;
778 if( mapOfModulesOfCurrentHost.size() > 0 ){
779 for(int i=0;i<componentList.length();i++){
780 const char* compoi = componentList[i];
781 vector<string>::const_iterator itt = find(mapOfModulesOfCurrentHost.begin(),
782 mapOfModulesOfCurrentHost.end(),
784 // componentList[i]);
785 if (itt == mapOfModulesOfCurrentHost.end()){
799 //=============================================================================
801 * add to command all options relative to naming service.
803 //=============================================================================
805 void SALOME_ResourcesManager::AddOmninamesParams(string& command) const
807 // If env variable OMNIORB_CONFIG is not defined or the file is more complex than one line
809 // Even if we use it we have to check if env variable exists
810 //string omniORBcfg( getenv( "OMNIORB_CONFIG" ) ) ;
811 //ifstream omniORBfile( omniORBcfg.c_str() ) ;
812 //char ORBInitRef[11] ;
814 //char nameservice[132] ;
815 //omniORBfile >> ORBInitRef ;
816 //command += "ORBInitRef " ;
817 //omniORBfile >> egal ;
818 //omniORBfile >> nameservice ;
819 //omniORBfile.close() ;
820 //char * bsn = strchr( nameservice , '\n' ) ;
824 //command += nameservice ;
826 char *iorstr = _NS->getIORaddr();
827 command += "ORBInitRef NameService=";
829 //It's in fact a CORBA::String allocated with new [] !!!
834 //=============================================================================
836 * add to command all options relative to naming service.
838 //=============================================================================
840 void SALOME_ResourcesManager::AddOmninamesParams(ofstream& fileStream) const
842 fileStream << "ORBInitRef NameService=";
843 fileStream << _NS->getIORaddr();
847 //=============================================================================
849 * generate a file name in /tmp directory
851 //=============================================================================
853 string SALOME_ResourcesManager::BuildTemporaryFileName() const
855 //build more complex file name to support multiple salome session
856 char *temp = new char[19];
857 strcpy(temp, "/tmp/command");
858 strcat(temp, "XXXXXX");
865 itoa(getpid(), aPID, 10);
869 string command(temp);
876 //=============================================================================
878 * Builds in a temporary file the script to be launched.
880 * Used if SALOME Application ($APPLI) is not defined.
881 * The command is build with data from CatalogResources, in which every path
882 * used on remote computer must be defined.
884 //=============================================================================
887 SALOME_ResourcesManager::BuildTempFileToLaunchRemoteContainer
888 (const string& machine,
889 const Engines::MachineParameters& params) throw(SALOME_Exception)
893 _TmpFileName = BuildTemporaryFileName();
894 ofstream tempOutputFile;
895 tempOutputFile.open(_TmpFileName.c_str(), ofstream::out );
896 const ParserResourcesType& resInfo = _resourcesList[machine];
897 tempOutputFile << "#! /bin/sh" << endl;
901 tempOutputFile << "export SALOME_trace=local" << endl; // mkr : 27.11.2006 : PAL13967 - Distributed supervision graphs - Problem with "SALOME_trace"
902 //tempOutputFile << "source " << resInfo.PreReqFilePath << endl;
908 tempOutputFile << "mpirun -np ";
911 if ( (params.nb_node <= 0) && (params.nb_proc_per_node <= 0) )
913 else if ( params.nb_node == 0 )
914 nbproc = params.nb_proc_per_node;
915 else if ( params.nb_proc_per_node == 0 )
916 nbproc = params.nb_node;
918 nbproc = params.nb_node * params.nb_proc_per_node;
920 std::ostringstream o;
922 tempOutputFile << nbproc << " ";
924 tempOutputFile << "-x PATH,LD_LIBRARY_PATH,OMNIORB_CONFIG,SALOME_trace ";
928 tempOutputFile << getenv("KERNEL_ROOT_DIR") << "/bin/salome/";
932 if (isPythonContainer(params.container_name))
933 tempOutputFile << "pyMPI SALOME_ContainerPy.py ";
935 tempOutputFile << "SALOME_MPIContainer ";
940 if (isPythonContainer(params.container_name))
941 tempOutputFile << "SALOME_ContainerPy.py ";
943 tempOutputFile << "SALOME_Container ";
946 tempOutputFile << _NS->ContainerName(params) << " -";
947 AddOmninamesParams(tempOutputFile);
948 tempOutputFile << " &" << endl;
949 tempOutputFile.flush();
950 tempOutputFile.close();
951 chmod(_TmpFileName.c_str(), 0x1ED);
957 if (resInfo.Protocol == rsh)
960 string commandRcp = "rcp ";
961 commandRcp += _TmpFileName;
963 commandRcp += machine;
965 commandRcp += _TmpFileName;
966 status = system(commandRcp.c_str());
969 else if (resInfo.Protocol == ssh)
972 string commandRcp = "scp ";
973 commandRcp += _TmpFileName;
975 commandRcp += machine;
977 commandRcp += _TmpFileName;
978 status = system(commandRcp.c_str());
981 throw SALOME_Exception("Unknown protocol");
984 throw SALOME_Exception("Error of connection on remote host");
987 _CommandForRemAccess = command;
989 command += _TmpFileName;
992 command += _NS->ContainerName(params);
996 command += getenv( "USER" ) ;
997 command += ".log 2>&1 &";
1004 //=============================================================================
1005 /*! Creates a command line that the container manager uses to launch
1006 * a parallel container.
1008 //=============================================================================
1010 SALOME_ResourcesManager::BuildCommandToLaunchLocalParallelContainer(const std::string& exe_name,
1011 const Engines::MachineParameters& params,
1012 const std::string& log)
1014 // This method knows the differences between the proxy and the nodes.
1015 // nb_component_nodes is not used in the same way if it is a proxy or
1019 string parallelLib(CORBA::string_dup(params.parallelLib));
1020 string hostname(CORBA::string_dup(params.hostname));
1021 int par = exe_name.find("Proxy");
1022 int nbproc = params.nb_component_nodes;
1024 sprintf(buffer,"%d",nbproc);
1026 Engines::MachineParameters_var rtn = new Engines::MachineParameters();
1027 rtn->container_name = params.container_name;
1028 rtn->hostname = params.hostname;
1029 rtn->OS = params.OS;
1030 rtn->mem_mb = params.mem_mb;
1031 rtn->cpu_clock = params.cpu_clock;
1032 rtn->nb_proc_per_node = params.nb_proc_per_node;
1033 rtn->nb_node = params.nb_node;
1034 rtn->isMPI = params.isMPI;
1036 string real_exe_name = exe_name + parallelLib;
1038 if (parallelLib == "Dummy")
1040 //command = "gdb --args ";
1041 //command = "valgrind --tool=memcheck --log-file=val_log ";
1042 //command += real_exe_name;
1044 command = real_exe_name;
1046 command += " " + _NS->ContainerName(rtn);
1047 command += " " + parallelLib;
1048 command += " " + hostname;
1050 AddOmninamesParams(command);
1053 else if (parallelLib == "Mpi")
1055 // Step 1 : check if MPI is started
1056 if (_MpiStarted == false)
1065 command = "mpiexec -np " + string(buffer) + " ";
1066 // command += "gdb --args ";
1067 command += real_exe_name;
1068 command += " " + _NS->ContainerName(rtn);
1069 command += " " + parallelLib;
1070 command += " " + hostname;
1072 AddOmninamesParams(command);
1077 command = "mpiexec -np 1 ";
1078 command += real_exe_name;
1079 command += " " + _NS->ContainerName(rtn);
1080 command += " " + string(buffer);
1081 command += " " + parallelLib;
1082 command += " " + hostname;
1084 AddOmninamesParams(command);
1089 std::string message("Unknown parallelLib" + parallelLib);
1090 throw SALOME_Exception(message.c_str());
1094 if (log == "default")
1096 command += " > /tmp/";
1097 command += _NS->ContainerName(rtn);
1099 command += GetHostname();
1101 command += getenv( "USER" ) ;
1102 command += ".log 2>&1 &" ;
1106 command = "/usr/X11R6/bin/xterm -e \"export LD_LIBRARY_PATH=$LD_LIBRARY_PATH; export PATH=$PATH; "
1107 + command + " \" &";
1108 // + command + "; echo $LD_LIBRARY_PATH; cat \" &";
1112 /* if (log == "xterm")
1114 command = "/usr/X11R6/bin/xterm -e \"export LD_LIBRARY_PATH=$LD_LIBRARY_PATH; export PATH=$PATH; echo $LD_LIBRARY_PATH; echo $PATH; " + command + "; cat \" &";
1117 /* command = "cd ; rm " + fichier_commande + "; touch " + \
1118 fichier_commande + "; echo \" export LD_LIBRARY_PATH=$LD_LIBRARY_PATH; " + \
1119 command + " >& /tmp/ribes_" + fichier_commande + " & \" > " + fichier_commande + ";";
1120 command += "ssh cn01 sh " + fichier_commande + " &";
1121 cerr << "La commande : " << command << endl;
1125 void SALOME_ResourcesManager::startMPI()
1127 cerr << "----------------------------------------------" << endl;
1128 cerr << "----------------------------------------------" << endl;
1129 cerr << "----------------------------------------------" << endl;
1130 cerr << "-Only Lam on Localhost is currently supported-" << endl;
1131 cerr << "----------------------------------------------" << endl;
1132 cerr << "----------------------------------------------" << endl;
1133 cerr << "----------------------------------------------" << endl;
1135 int status = system("lamboot");
1138 INFOS("lamboot failed : system command status -1");
1140 else if (status == 217)
1142 INFOS("lamboot failed : system command status 217");
1150 Engines::MachineParameters* SALOME_ResourcesManager::GetMachineParameters(const char *hostname)
1152 ParserResourcesType resource = _resourcesList[string(hostname)];
1153 Engines::MachineParameters *p_ptr = new Engines::MachineParameters;
1154 p_ptr->container_name = CORBA::string_dup("");
1155 p_ptr->hostname = CORBA::string_dup("hostname");
1156 p_ptr->alias = CORBA::string_dup(resource.Alias.c_str());
1157 if( resource.Protocol == rsh )
1158 p_ptr->protocol = "rsh";
1159 else if( resource.Protocol == ssh )
1160 p_ptr->protocol = "ssh";
1161 p_ptr->username = CORBA::string_dup(resource.UserName.c_str());
1162 p_ptr->applipath = CORBA::string_dup(resource.AppliPath.c_str());
1163 p_ptr->modList.length(resource.ModulesList.size());
1164 for(int i=0;i<resource.ModulesList.size();i++)
1165 p_ptr->modList[i] = CORBA::string_dup(resource.ModulesList[i].c_str());
1166 p_ptr->OS = CORBA::string_dup(resource.OS.c_str());
1167 p_ptr->mem_mb = resource.DataForSort._memInMB;
1168 p_ptr->cpu_clock = resource.DataForSort._CPUFreqMHz;
1169 p_ptr->nb_proc_per_node = resource.DataForSort._nbOfProcPerNode;
1170 p_ptr->nb_node = resource.DataForSort._nbOfNodes;
1171 if( resource.mpi == indif )
1172 p_ptr->mpiImpl = "indif";
1173 else if( resource.mpi == lam )
1174 p_ptr->mpiImpl = "lam";
1175 else if( resource.mpi == mpich1 )
1176 p_ptr->mpiImpl = "mpich1";
1177 else if( resource.mpi == mpich2 )
1178 p_ptr->mpiImpl = "mpich2";
1179 else if( resource.mpi == openmpi )
1180 p_ptr->mpiImpl = "openmpi";
1181 if( resource.Batch == pbs )
1182 p_ptr->batch = "pbs";
1183 else if( resource.Batch == lsf )
1184 p_ptr->batch = "lsf";
1185 else if( resource.Batch == slurm )
1186 p_ptr->batch = "slurm";