From 22803b58efd3f49d804a7fb3611d652498a65317 Mon Sep 17 00:00:00 2001 From: secher Date: Thu, 13 Sep 2007 13:48:24 +0000 Subject: [PATCH] add PBS batch manager --- src/Batch/BatchLight_BatchManager.cxx | 33 +- src/Batch/BatchLight_BatchManager.hxx | 7 +- src/Batch/BatchLight_BatchManager_PBS.cxx | 423 ++++++++++++++++++ src/Batch/BatchLight_BatchManager_PBS.hxx | 63 +++ src/Batch/BatchLight_BatchManager_SLURM.cxx | 32 -- src/Batch/BatchLight_BatchManager_SLURM.hxx | 11 +- src/Batch/Makefile.am | 2 + .../SALOME_ResourcesCatalog_Handler.cxx | 34 +- .../SALOME_ResourcesCatalog_Handler.hxx | 1 + .../SALOME_ResourcesCatalog_Parser.cxx | 2 + .../SALOME_ResourcesCatalog_Parser.hxx | 3 + .../SALOME_ResourcesManager.cxx | 16 + .../SALOME_ResourcesManager.hxx | 2 +- 13 files changed, 587 insertions(+), 42 deletions(-) create mode 100644 src/Batch/BatchLight_BatchManager_PBS.cxx create mode 100644 src/Batch/BatchLight_BatchManager_PBS.hxx diff --git a/src/Batch/BatchLight_BatchManager.cxx b/src/Batch/BatchLight_BatchManager.cxx index c69bcfbd9..4047c47a4 100644 --- a/src/Batch/BatchLight_BatchManager.cxx +++ b/src/Batch/BatchLight_BatchManager.cxx @@ -55,7 +55,38 @@ namespace BatchLight { // Destructeur BatchManager::~BatchManager() { - // Nothing to do + MESSAGE("BatchManager destructor "<<_params.hostname); + std::map < int, const BatchLight::Job * >::const_iterator it; + for(it=_jobmap.begin();it!=_jobmap.end();it++) + delete it->second; + } + + // Methode pour le controle des jobs : soumet un job au gestionnaire + const int BatchManager::submitJob(Job* job) + { + BEGIN_OF("BatchManager::submitJob"); + int id; + + // temporary directory on cluster to put input files for job + setDirForTmpFiles(); + SCRUTE(_dirForTmpFiles); + + // export input files on cluster + exportInputFiles(job->getFileToExecute(),job->getFilesToExportList()); + + // build salome coupling script for job + buildSalomeCouplingScript(job->getFileToExecute()); + + // build batch script for job + buildSalomeBatchScript(job->getNbProc()); + + // submit job on cluster + id = submit(); + + // register job on map + _jobmap[id] = job; + END_OF("BatchManager::submitJob"); + return id; } void BatchManager::setDirForTmpFiles() diff --git a/src/Batch/BatchLight_BatchManager.hxx b/src/Batch/BatchLight_BatchManager.hxx index 43f69f6b2..73bf92571 100644 --- a/src/Batch/BatchLight_BatchManager.hxx +++ b/src/Batch/BatchLight_BatchManager.hxx @@ -34,6 +34,7 @@ #include #include "Utils_SALOME_Exception.hxx" #include +#include #include CORBA_CLIENT_HEADER(SALOME_ContainerManager) namespace BatchLight { @@ -46,6 +47,9 @@ namespace BatchLight { std::string username; // username d'acces au serveur std::string applipath; // path of apllication directory on server std::vector modulesList; // list of Salome modules installed on server + unsigned int nbnodes; // number of nodes on cluster + unsigned int nbprocpernode; // number of processors on each node + std::string mpiImpl; // mpi implementation }; class BatchManager @@ -56,7 +60,7 @@ namespace BatchLight { virtual ~BatchManager(); // Methodes pour le controle des jobs : virtuelles pures - virtual const int submitJob(BatchLight::Job* job) = 0; // soumet un job au gestionnaire + const int submitJob(BatchLight::Job* job); // soumet un job au gestionnaire virtual void deleteJob(const int & jobid) = 0; // retire un job du gestionnaire virtual std::string queryJob(const int & jobid) = 0; // renvoie l'etat du job void importOutputFiles( const char *directory, const CORBA::Long jobId ) throw(SALOME_Exception); @@ -69,6 +73,7 @@ namespace BatchLight { std::string _TmpFileName; std::string _fileNameToExecute; + virtual int submit() throw(SALOME_Exception) = 0; void setDirForTmpFiles(); void exportInputFiles( const char *fileToExecute, const Engines::FilesList filesToExportList ) throw(SALOME_Exception); virtual void buildSalomeCouplingScript( const char *fileToExecute ) throw(SALOME_Exception) = 0; diff --git a/src/Batch/BatchLight_BatchManager_PBS.cxx b/src/Batch/BatchLight_BatchManager_PBS.cxx new file mode 100644 index 000000000..e008699da --- /dev/null +++ b/src/Batch/BatchLight_BatchManager_PBS.cxx @@ -0,0 +1,423 @@ +// Copyright (C) 2005 OPEN CASCADE, EADS/CCR, LIP6, CEA/DEN, +// CEDRAT, EDF R&D, LEG, PRINCIPIA R&D, BUREAU VERITAS +// +// This library is free software; you can redistribute it and/or +// modify it under the terms of the GNU Lesser General Public +// License as published by the Free Software Foundation; either +// version 2.1 of the License. +// +// This library is distributed in the hope that it will be useful +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +// Lesser General Public License for more details. +// +// You should have received a copy of the GNU Lesser General Public +// License along with this library; if not, write to the Free Software +// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +// +// See http://www.salome-platform.org/ or email : webmaster.salome@opencascade.com +// +/* + * BatchManager.cxx : + * + * Auteur : Bernard SECHER - CEA/DEN + * Date : Juillet 2007 + * Projet : SALOME + * + */ + +#include "BatchLight_BatchManager_PBS.hxx" +#include "utilities.h" +#include "BatchLight_Job.hxx" +#include +#include +#include +#include + +using namespace std; + +namespace BatchLight { + + // Constructeur + BatchManager_PBS::BatchManager_PBS(const batchParams& p) throw(SALOME_Exception) : BatchManager(p) + { + } + + // Destructeur + BatchManager_PBS::~BatchManager_PBS() + { + MESSAGE("BatchManager_PBS destructor "<<_params.hostname); + } + + // Methode pour le controle des jobs : retire un job du gestionnaire + void BatchManager_PBS::deleteJob(const int & jobid) + { + BEGIN_OF("BatchManager_PBS::deleteJob"); + string command; + int status; + ostringstream oss; + oss << jobid; + + // define command to submit batch + if( _params.protocol == "rsh" ) + command = "rsh "; + else if( _params.protocol == "ssh" ) + command = "ssh "; + else + throw SALOME_Exception("Unknown protocol"); + + if (_params.username != ""){ + command += _params.username; + command += "@"; + } + + command += _params.hostname; + command += " \"qdel " ; + command += oss.str(); + command += "\""; + SCRUTE(command.c_str()); + status = system(command.c_str()); + if(status) + throw SALOME_Exception("Error of connection on remote host"); + + MESSAGE("jobId = " << jobid << "killed"); + END_OF("BatchManager_PBS::deleteJob"); + } + + // Methode pour le controle des jobs : renvoie l'etat du job + string BatchManager_PBS::queryJob(const int & jobid) + { + BEGIN_OF("BatchManager_PBS::queryJob"); + // define name of log file + string jstatus; + string logFile="/tmp/logs/"; + logFile += getenv("USER"); + logFile += "/batchSalome_"; + + srand ( time(NULL) ); + int ir = rand(); + ostringstream oss; + oss << ir; + logFile += oss.str(); + logFile += ".log"; + + string command; + int status; + + // define command to submit batch + if( _params.protocol == "rsh" ) + command = "rsh "; + else if( _params.protocol == "ssh" ) + command = "ssh "; + else + throw SALOME_Exception("Unknown protocol"); + + if (_params.username != ""){ + command += _params.username; + command += "@"; + } + + command += _params.hostname; + command += " \"qstat -f " ; + ostringstream oss2; + oss2 << jobid; + command += oss2.str(); + command += "\" > "; + command += logFile; + SCRUTE(command.c_str()); + status = system(command.c_str()); + if(status && status != 153 && status != 256*153){ + MESSAGE("status="<> jstatus; + iss >> jstatus; + iss >> jstatus; + } + else + jstatus = "U"; + } + + MESSAGE("jobId = " << jobid << " " << jstatus); + END_OF("BatchManager_PBS::queryJob"); + return jstatus; + } + + void BatchManager_PBS::buildSalomeCouplingScript( const char *fileToExecute ) throw(SALOME_Exception) + { + BEGIN_OF("BatchManager_PBS::buildSalomeCouplingScript"); + int status; + int lenf = strlen( fileToExecute ) ; + int i = lenf-1 ; + while ( i >= 0 && fileToExecute[i] != '/' ) { + i -= 1 ; + } + char * FileNameToExecute = new char[lenf-4-i] ; + strncpy(FileNameToExecute , &fileToExecute[i+1] , lenf-4-i) ; + _fileNameToExecute = string( FileNameToExecute ) ; + delete FileNameToExecute ; + SCRUTE(_fileNameToExecute) ; + + _TmpFileName = BuildTemporaryFileName(); + ofstream tempOutputFile; + tempOutputFile.open(_TmpFileName.c_str(), ofstream::out ); + tempOutputFile << "#! /bin/sh -f" << endl ; + tempOutputFile << "cd " ; + tempOutputFile << _params.applipath << endl ; + tempOutputFile << "export PYTHONPATH=~/" ; + tempOutputFile << _dirForTmpFiles ; + tempOutputFile << ":$PYTHONPATH" << endl ; + tempOutputFile << "if test " ; + tempOutputFile << mpiRank() ; + tempOutputFile << " = 0; then" << endl ; + tempOutputFile << " ./runAppli --terminal --batch --modules=" ; + for ( i = 0 ; i < _params.modulesList.size() ; i++ ) { + tempOutputFile << _params.modulesList[i] ; + if ( i != _params.modulesList.size()-1 ) + tempOutputFile << "," ; + } + tempOutputFile << " --standalone=registry,study,moduleCatalog --killall &" << endl ; + tempOutputFile << " for ((ip=1; ip < "; + tempOutputFile << mpiSize(); + tempOutputFile << " ; ip++))" << endl; + tempOutputFile << " do" << endl ; + tempOutputFile << " arglist=\"$arglist YACS_Server_\"$ip" << endl ; + tempOutputFile << " done" << endl ; + tempOutputFile << " sleep 5" << endl ; + tempOutputFile << " ./runSession waitContainers.py $arglist" << endl ; + tempOutputFile << " ./runSession python ~/" << _dirForTmpFiles << "/" << _fileNameToExecute << ".py" << endl; + tempOutputFile << " ./runSession killCurrentPort" << endl; + tempOutputFile << "else" << endl ; + tempOutputFile << " sleep 5" << endl ; + tempOutputFile << " ./runSession waitNS.py" << endl ; + tempOutputFile << " ./runSession SALOME_Container 'YACS_Server_'"; + tempOutputFile << mpiRank() << endl ; + tempOutputFile << "fi" << endl ; + tempOutputFile.flush(); + tempOutputFile.close(); + chmod(_TmpFileName.c_str(), 0x1ED); + SCRUTE(_TmpFileName.c_str()) ; + + string command; + if( _params.protocol == "rsh" ) + command = "rcp "; + else if( _params.protocol == "ssh" ) + command = "scp "; + else + throw SALOME_Exception("Unknown protocol"); + + command += _TmpFileName; + command += " "; + if (_params.username != ""){ + command += _params.username; + command += "@"; + } + command += _params.hostname; + command += ":"; + command += _dirForTmpFiles ; + command += "/runSalome_" ; + command += _fileNameToExecute ; + command += "_Batch.sh" ; + SCRUTE(command.c_str()); + status = system(command.c_str()); + if(status) + throw SALOME_Exception("Error of connection on remote host"); + RmTmpFile(); + + END_OF("BatchManager_PBS::buildSalomeCouplingScript"); + } + + void BatchManager_PBS::buildSalomeBatchScript( const int nbproc ) throw(SALOME_Exception) + { + BEGIN_OF("BatchManager_PBS::buildSalomeBatchScript"); + int status; + + int nbmaxproc = _params.nbnodes * _params.nbprocpernode; + if( nbproc > nbmaxproc ){ + MESSAGE(nbproc << " processors asked on a cluster of " << nbmaxproc << " processors"); + throw SALOME_Exception("Too much processors asked for that cluster"); + } + + int nbnodes; + if( nbproc < _params.nbnodes ) + nbnodes = nbproc; + else + nbnodes = _params.nbnodes; + + _TmpFileName = BuildTemporaryFileName(); + ofstream tempOutputFile; + tempOutputFile.open(_TmpFileName.c_str(), ofstream::out ); + + tempOutputFile << "#! /bin/sh -f" << endl ; + tempOutputFile << "#PBS -l nodes=" << nbnodes << endl ; + tempOutputFile << "#PBS -o runSalome.log" << endl ; + tempOutputFile << mpiBoot() << endl ; + tempOutputFile << "mpirun -np " << nbproc << " ~/" ; + tempOutputFile << _dirForTmpFiles ; + tempOutputFile << "/runSalome_" ; + tempOutputFile << _fileNameToExecute ; + tempOutputFile << "_Batch.sh" << endl ; + tempOutputFile << mpiHalt() << endl ; + tempOutputFile.flush(); + tempOutputFile.close(); + chmod(_TmpFileName.c_str(), 0x1ED); + SCRUTE(_TmpFileName.c_str()) ; + + string command; + if( _params.protocol == "rsh" ) + command = "rcp "; + else if( _params.protocol == "ssh" ) + command = "scp "; + else + throw SALOME_Exception("Unknown protocol"); + command += _TmpFileName; + command += " "; + if (_params.username != ""){ + command += _params.username; + command += "@"; + } + command += _params.hostname; + command += ":"; + command += _dirForTmpFiles ; + command += "/" ; + command += _fileNameToExecute ; + command += "_Batch.sh" ; + SCRUTE(command.c_str()); + status = system(command.c_str()); + if(status) + throw SALOME_Exception("Error of connection on remote host"); + + RmTmpFile(); + END_OF("BatchManager_PBS::buildSalomeBatchScript"); + + } + + int BatchManager_PBS::submit() throw(SALOME_Exception) + { + BEGIN_OF("BatchManager_PBS::submit"); + + // define name of log file + string logFile="/tmp/logs/"; + logFile += getenv("USER"); + logFile += "/batchSalome_"; + + srand ( time(NULL) ); + int ir = rand(); + ostringstream oss; + oss << ir; + logFile += oss.str(); + logFile += ".log"; + + string command; + int status; + + // define command to submit batch + if( _params.protocol == "rsh" ) + command = "rsh "; + else if( _params.protocol == "ssh" ) + command = "ssh "; + else + throw SALOME_Exception("Unknown protocol"); + + if (_params.username != ""){ + command += _params.username; + command += "@"; + } + + command += _params.hostname; + command += " \"qsub " ; + command += _dirForTmpFiles ; + command += "/" ; + command += _fileNameToExecute ; + command += "_Batch.sh\" > "; + command += logFile; + SCRUTE(command.c_str()); + status = system(command.c_str()); + if(status) + throw SALOME_Exception("Error of connection on remote host"); + + // read id of submitted job in log file + char line[128]; + FILE *fp = fopen(logFile.c_str(),"r"); + fgets( line, 128, fp); + fclose(fp); + + string sline(line); + int pos = sline.find("."); + string strjob; + if(pos == string::npos) + strjob = sline; + else + strjob = sline.substr(0,pos); + + int id; + istringstream iss(strjob); + iss >> id; + + END_OF("BatchManager_PBS::submit"); + return id; + } + + std::string BatchManager_PBS::mpiRank() throw(SALOME_Exception) + { + if(_params.mpiImpl == "indif") + throw SALOME_Exception("You have to specify MPI implementation in CatalogResources.xml file"); + else if(_params.mpiImpl == "lam") + return "${LAMRANK}"; + else + throw SALOME_Exception("not yet implemented"); + } + + std::string BatchManager_PBS::mpiSize() throw(SALOME_Exception) + { + if(_params.mpiImpl == "indif") + throw SALOME_Exception("You have to specify MPI implementation in CatalogResources.xml file"); + else if(_params.mpiImpl == "lam") + return "${LAMWORLD}"; + else + throw SALOME_Exception("not yet implemented"); + } + + std::string BatchManager_PBS::mpiBoot() throw(SALOME_Exception) + { + if(_params.mpiImpl == "indif") + throw SALOME_Exception("You have to specify MPI implementation in CatalogResources.xml file"); + else if(_params.mpiImpl == "lam") + return "lamboot ${PBS_NODEFILE}"; + else if(_params.mpiImpl == "mpich1") + return ""; + else + throw SALOME_Exception("not yet implemented"); + } + + std::string BatchManager_PBS::mpiHalt() throw(SALOME_Exception) + { + if(_params.mpiImpl == "indif") + throw SALOME_Exception("You have to specify MPI implementation in CatalogResources.xml file"); + else if(_params.mpiImpl == "lam") + return "lamhalt"; + else if(_params.mpiImpl == "mpich1") + return ""; + else + throw SALOME_Exception("not yet implemented"); + } + +} diff --git a/src/Batch/BatchLight_BatchManager_PBS.hxx b/src/Batch/BatchLight_BatchManager_PBS.hxx new file mode 100644 index 000000000..661f110aa --- /dev/null +++ b/src/Batch/BatchLight_BatchManager_PBS.hxx @@ -0,0 +1,63 @@ +// Copyright (C) 2005 OPEN CASCADE, EADS/CCR, LIP6, CEA/DEN, +// CEDRAT, EDF R&D, LEG, PRINCIPIA R&D, BUREAU VERITAS +// +// This library is free software; you can redistribute it and/or +// modify it under the terms of the GNU Lesser General Public +// License as published by the Free Software Foundation; either +// version 2.1 of the License. +// +// This library is distributed in the hope that it will be useful +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +// Lesser General Public License for more details. +// +// You should have received a copy of the GNU Lesser General Public +// License along with this library; if not, write to the Free Software +// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +// +// See http://www.salome-platform.org/ or email : webmaster.salome@opencascade.com +// +/* + * BatchManager.hxx : + * + * Auteur : Bernard SECHER - CEA/DEN + * Date : Juillet 2007 + * Projet : SALOME + * + */ + +#ifndef _BL_BATCHMANAGER_PBS_H_ +#define _BL_BATCHMANAGER_PBS_H_ + +#include +#include "Utils_SALOME_Exception.hxx" +#include "BatchLight_BatchManager.hxx" + +namespace BatchLight { + + class Job; + + class BatchManager_PBS : public BatchManager + { + public: + // Constructeur et destructeur + BatchManager_PBS(const batchParams& p) throw(SALOME_Exception); // connexion a la machine host + virtual ~BatchManager_PBS(); + + // Methodes pour le controle des jobs : virtuelles pures + void deleteJob(const int & jobid); // retire un job du gestionnaire + std::string queryJob(const int & jobid); // renvoie l'etat du job + + private: + void buildSalomeCouplingScript( const char *fileToExecute ) throw(SALOME_Exception); + void buildSalomeBatchScript( const int nbproc ) throw(SALOME_Exception); + int submit() throw(SALOME_Exception); + std::string mpiRank() throw(SALOME_Exception); + std::string mpiSize() throw(SALOME_Exception); + std::string mpiBoot() throw(SALOME_Exception); + std::string mpiHalt() throw(SALOME_Exception); + }; + +} + +#endif diff --git a/src/Batch/BatchLight_BatchManager_SLURM.cxx b/src/Batch/BatchLight_BatchManager_SLURM.cxx index 096325666..3e0fc0fbf 100644 --- a/src/Batch/BatchLight_BatchManager_SLURM.cxx +++ b/src/Batch/BatchLight_BatchManager_SLURM.cxx @@ -47,38 +47,6 @@ namespace BatchLight { BatchManager_SLURM::~BatchManager_SLURM() { MESSAGE("BatchManager_SLURM destructor "<<_params.hostname); - std::map < int, const BatchLight::Job * >::const_iterator it; - for(it=_jobmap.begin();it!=_jobmap.end();it++) - delete it->second; - - } - - // Methode pour le controle des jobs : soumet un job au gestionnaire - const int BatchManager_SLURM::submitJob(Job* job) - { - BEGIN_OF("BatchManager_SLURM::submitJob"); - int id; - - // temporary directory on cluster to put input files for job - setDirForTmpFiles(); - SCRUTE(_dirForTmpFiles); - - // export input files on cluster - exportInputFiles(job->getFileToExecute(),job->getFilesToExportList()); - - // build salome coupling script for job - buildSalomeCouplingScript(job->getFileToExecute()); - - // build batch script for job - buildSalomeBatchScript(job->getNbProc()); - - // submit job on cluster - id = submit(); - - // register job on map - _jobmap[id] = job; - END_OF("BatchManager_SLURM::submitJob"); - return id; } // Methode pour le controle des jobs : retire un job du gestionnaire diff --git a/src/Batch/BatchLight_BatchManager_SLURM.hxx b/src/Batch/BatchLight_BatchManager_SLURM.hxx index 7fda6e3d1..ed21624dd 100644 --- a/src/Batch/BatchLight_BatchManager_SLURM.hxx +++ b/src/Batch/BatchLight_BatchManager_SLURM.hxx @@ -45,14 +45,13 @@ namespace BatchLight { virtual ~BatchManager_SLURM(); // Methodes pour le controle des jobs : virtuelles pures - virtual const int submitJob(BatchLight::Job* job); // soumet un job au gestionnaire - virtual void deleteJob(const int & jobid); // retire un job du gestionnaire - virtual std::string queryJob(const int & jobid); // renvoie l'etat du job + void deleteJob(const int & jobid); // retire un job du gestionnaire + std::string queryJob(const int & jobid); // renvoie l'etat du job protected: - virtual void buildSalomeCouplingScript( const char *fileToExecute ) throw(SALOME_Exception); - virtual void buildSalomeBatchScript( const int nbproc ) throw(SALOME_Exception); - virtual int submit() throw(SALOME_Exception); + void buildSalomeCouplingScript( const char *fileToExecute ) throw(SALOME_Exception); + void buildSalomeBatchScript( const int nbproc ) throw(SALOME_Exception); + int submit() throw(SALOME_Exception); private: diff --git a/src/Batch/Makefile.am b/src/Batch/Makefile.am index 26916b3a9..8e971b740 100644 --- a/src/Batch/Makefile.am +++ b/src/Batch/Makefile.am @@ -61,6 +61,7 @@ LIB_INCLUDES = \ Batch_StringType.hxx \ Batch_TypeMismatchException.hxx \ BatchLight_BatchManager.hxx \ + BatchLight_BatchManager_PBS.hxx \ BatchLight_BatchManager_SLURM.hxx \ BatchLight_Job.hxx @@ -97,6 +98,7 @@ LIB_SRC = \ Batch_TypeMismatchException.cxx \ BatchLight_BatchManager.cxx \ BatchLight_BatchManager_SLURM.cxx \ + BatchLight_BatchManager_PBS.cxx \ BatchLight_Job.cxx diff --git a/src/ResourcesManager/SALOME_ResourcesCatalog_Handler.cxx b/src/ResourcesManager/SALOME_ResourcesCatalog_Handler.cxx index 1fa7967bc..93c114de0 100755 --- a/src/ResourcesManager/SALOME_ResourcesCatalog_Handler.cxx +++ b/src/ResourcesManager/SALOME_ResourcesCatalog_Handler.cxx @@ -55,6 +55,7 @@ SALOME_ResourcesCatalog_Handler(MapOfParserResourcesType& listOfResources): test_protocol = "protocol"; test_mode = "mode"; test_batch = "batch"; + test_mpi = "mpi"; test_user_name = "userName"; test_appli_path = "appliPath"; test_modules = "modules"; @@ -191,7 +192,19 @@ startElement( const QString&, _resource.Batch = none; } - if ((qName.compare(QString(test_user_name)) == 0)) + if ((qName.compare(QString(test_mpi)) == 0)) + { + if( content.compare("lam") == 0 ) + _resource.mpi = lam; + else if( content.compare("mpich1") == 0 ) + _resource.mpi = mpich1; + else if( content.compare("mpich2") == 0 ) + _resource.mpi = mpich2; + else + _resource.mpi = indif; + } + + if ((qName.compare(QString(test_user_name)) == 0)) _resource.UserName = content; if ((qName.compare(QString(test_appli_path)) == 0)) @@ -397,6 +410,25 @@ void SALOME_ResourcesCatalog_Handler::PrepareDocToXmlFile(QDomDocument& doc) eltRoot.setAttribute((char *)test_batch, ""); } + switch ((*iter).second.mpi) + { + + case pbs: + eltRoot.setAttribute((char *)test_mpi, "lam"); + break; + + case lsf: + eltRoot.setAttribute((char *)test_mpi, "mpich1"); + break; + + case slurm: + eltRoot.setAttribute((char *)test_mpi, "mpich2"); + break; + + default: + eltRoot.setAttribute((char *)test_mpi, ""); + } + eltRoot.setAttribute((char *)test_user_name, (*iter).second.UserName.c_str()); diff --git a/src/ResourcesManager/SALOME_ResourcesCatalog_Handler.hxx b/src/ResourcesManager/SALOME_ResourcesCatalog_Handler.hxx index 9bd4e2a5c..aad6a2932 100755 --- a/src/ResourcesManager/SALOME_ResourcesCatalog_Handler.hxx +++ b/src/ResourcesManager/SALOME_ResourcesCatalog_Handler.hxx @@ -83,6 +83,7 @@ class SALOME_ResourcesCatalog_Handler : public QXmlDefaultHandler const char *test_protocol; const char *test_mode; const char *test_batch; + const char *test_mpi; const char *test_user_name; const char *test_appli_path; const char *test_modules; diff --git a/src/ResourcesManager/SALOME_ResourcesCatalog_Parser.cxx b/src/ResourcesManager/SALOME_ResourcesCatalog_Parser.cxx index 57154b3f0..e20dbfc6a 100644 --- a/src/ResourcesManager/SALOME_ResourcesCatalog_Parser.cxx +++ b/src/ResourcesManager/SALOME_ResourcesCatalog_Parser.cxx @@ -125,6 +125,7 @@ void ParserResourcesType::Print() const "Protocol : " << Protocol << endl << "Mode : " << Mode << endl << "Batch : " << Batch << endl << + "mpi : " << mpi << endl << "UserName : " << UserName << endl << "AppliPath : " << AppliPath << endl << "OS : " << OS << endl << @@ -148,6 +149,7 @@ void ParserResourcesType::Clear() Protocol = rsh; Mode = interactive; Batch = none; + mpi = indif; UserName = ""; AppliPath = ""; ModulesList.clear(); diff --git a/src/ResourcesManager/SALOME_ResourcesCatalog_Parser.hxx b/src/ResourcesManager/SALOME_ResourcesCatalog_Parser.hxx index 5c49924eb..7bc9ec686 100755 --- a/src/ResourcesManager/SALOME_ResourcesCatalog_Parser.hxx +++ b/src/ResourcesManager/SALOME_ResourcesCatalog_Parser.hxx @@ -40,6 +40,8 @@ enum AccessModeType {interactive, batch}; enum BatchType {none, pbs, lsf, slurm}; +enum mpiImpl {indif, lam, mpich1, mpich2}; + class ResourceDataToSort { @@ -75,6 +77,7 @@ struct ParserResourcesType AccessProtocolType Protocol; AccessModeType Mode; BatchType Batch; + mpiImpl mpi; std::string UserName; std::string AppliPath; std::vector ModulesList; diff --git a/src/ResourcesManager/SALOME_ResourcesManager.cxx b/src/ResourcesManager/SALOME_ResourcesManager.cxx index c93b8847d..a98144a68 100644 --- a/src/ResourcesManager/SALOME_ResourcesManager.cxx +++ b/src/ResourcesManager/SALOME_ResourcesManager.cxx @@ -17,6 +17,8 @@ // // See http://www.salome-platform.org/ or email : webmaster.salome@opencascade.com // +#include "BatchLight_BatchManager_PBS.hxx" +#include "BatchLight_BatchManager_SLURM.hxx" #include "SALOME_ResourcesManager.hxx" #include "BatchLight_Job.hxx" #include "Utils_ExceptHandlers.hxx" @@ -570,8 +572,22 @@ BatchLight::BatchManager *SALOME_ResourcesManager::FactoryBatchManager( const Pa p.username = resInfo.UserName; p.applipath = resInfo.AppliPath; p.modulesList = resInfo.ModulesList; + p.nbnodes = resInfo.DataForSort._nbOfNodes; + p.nbprocpernode = resInfo.DataForSort._nbOfProcPerNode; + if( resInfo.mpi == indif ) + p.mpiImpl = "indif"; + else if( resInfo.mpi == lam ) + p.mpiImpl = "lam"; + else if( resInfo.mpi == mpich1 ) + p.mpiImpl = "mpich1"; + else if( resInfo.mpi == mpich2 ) + p.mpiImpl = "mpich2"; + else + throw SALOME_Exception("Unknown mpi implementation"); switch( resInfo.Batch ){ + case pbs: + return new BatchLight::BatchManager_PBS(p); case slurm: return new BatchLight::BatchManager_SLURM(p); default: diff --git a/src/ResourcesManager/SALOME_ResourcesManager.hxx b/src/ResourcesManager/SALOME_ResourcesManager.hxx index ec751ec75..45e370ff8 100644 --- a/src/ResourcesManager/SALOME_ResourcesManager.hxx +++ b/src/ResourcesManager/SALOME_ResourcesManager.hxx @@ -23,7 +23,7 @@ #include "Utils_SALOME_Exception.hxx" #include "utilities.h" #include -#include "BatchLight_BatchManager_SLURM.hxx" +#include "BatchLight_BatchManager.hxx" #include "SALOME_ResourcesCatalog_Handler.hxx" #include "SALOME_LoadRateManager.hxx" #include "SALOME_NamingService.hxx" -- 2.39.2