From b7af1b2cd7ad08d9b43cdb3e8cc9f4c820293a13 Mon Sep 17 00:00:00 2001 From: secher Date: Wed, 14 May 2008 13:42:35 +0000 Subject: [PATCH] first version of launcher with use of inified batchmanager --- idl/SALOME_ContainerManager.idl | 7 +- src/Batch/Batch_BatchManager_eClient.cxx | 58 +++- src/Batch/Batch_BatchManager_eClient.hxx | 10 +- src/Batch/Batch_BatchManager_eLSF.cxx | 3 +- src/Batch/Batch_BatchManager_eLSF.hxx | 2 +- src/Batch/Batch_BatchManager_ePBS.cxx | 2 +- src/Batch/Batch_BatchManager_ePBS.hxx | 2 +- src/Batch/Batch_FactBatchManager_eClient.cxx | 48 +++ .../Batch_FactBatchManager_eClient.hxx} | 34 +- src/Batch/Batch_FactBatchManager_eLSF.cxx | 7 +- src/Batch/Batch_FactBatchManager_eLSF.hxx | 6 +- src/Batch/Batch_FactBatchManager_ePBS.cxx | 8 +- src/Batch/Batch_FactBatchManager_ePBS.hxx | 6 +- src/Batch/Makefile.am | 2 + src/Launcher/BatchLight_BatchManager.cxx | 264 -------------- src/Launcher/BatchLight_BatchManager.hxx | 103 ------ src/Launcher/BatchLight_BatchManager_PBS.cxx | 323 ------------------ src/Launcher/BatchLight_BatchManager_PBS.hxx | 62 ---- .../BatchLight_BatchManager_SLURM.cxx | 276 --------------- src/Launcher/BatchLight_Job.cxx | 179 ---------- src/Launcher/BatchLight_Job.hxx | 73 ---- src/Launcher/Launcher.cxx | 169 ++++++--- src/Launcher/Launcher.hxx | 20 +- src/Launcher/Makefile.am | 10 +- src/Launcher/SALOME_Launcher.cxx | 2 +- 25 files changed, 280 insertions(+), 1396 deletions(-) create mode 100644 src/Batch/Batch_FactBatchManager_eClient.cxx rename src/{Launcher/BatchLight_BatchManager_SLURM.hxx => Batch/Batch_FactBatchManager_eClient.hxx} (57%) delete mode 100644 src/Launcher/BatchLight_BatchManager.cxx delete mode 100644 src/Launcher/BatchLight_BatchManager.hxx delete mode 100644 src/Launcher/BatchLight_BatchManager_PBS.cxx delete mode 100644 src/Launcher/BatchLight_BatchManager_PBS.hxx delete mode 100644 src/Launcher/BatchLight_BatchManager_SLURM.cxx delete mode 100644 src/Launcher/BatchLight_Job.cxx delete mode 100644 src/Launcher/BatchLight_Job.hxx diff --git a/idl/SALOME_ContainerManager.idl b/idl/SALOME_ContainerManager.idl index 8d2258cf5..c62d784a3 100644 --- a/idl/SALOME_ContainerManager.idl +++ b/idl/SALOME_ContainerManager.idl @@ -76,11 +76,8 @@ struct BatchParameters { string batch_directory; // Where batch command will be launched // and log files will be created - string expected_during_time; // Time for the batch - // has to be like this : hh:mm - string mem; // Minimum of memory needed - // has to be like : 32gb or 512mb - + long expected_during_time; // Time for the batch + long mem; // Minimum of memory needed long nb_proc; // Number of processors requested }; diff --git a/src/Batch/Batch_BatchManager_eClient.cxx b/src/Batch/Batch_BatchManager_eClient.cxx index 474288ad4..41ce47c2e 100644 --- a/src/Batch/Batch_BatchManager_eClient.cxx +++ b/src/Batch/Batch_BatchManager_eClient.cxx @@ -35,7 +35,7 @@ namespace Batch { - BatchManager_eClient::BatchManager_eClient(const char* host, const char* protocol, const char* mpiImpl) : _host(host), _protocol(protocol), _username("") + BatchManager_eClient::BatchManager_eClient(const Batch::FactBatchManager * parent, const char* host, const char* protocol, const char* mpiImpl) : BatchManager(parent, host), _protocol(protocol), _username("") { // instanciation of mpi implementation needed to launch executable in batch script _mpiImpl = FactoryMpiImpl(mpiImpl); @@ -54,8 +54,8 @@ namespace Batch { Parametre params = job.getParametre(); Versatile V = params[INFILE]; Versatile::iterator Vit; - std::string command; - std::string copy_command; + string command; + string copy_command; _username = string(params[USER]); // Test protocol @@ -73,7 +73,7 @@ namespace Batch { command += _username; command += "@"; } - command += _host; + command += _hostname; command += " \"mkdir -p "; command += string(params[TMPDIR]); command += "\"" ; @@ -96,7 +96,7 @@ namespace Batch { command += _username; command += "@"; } - command += _host; + command += _hostname; command += ":"; command += string(params[TMPDIR]); cerr << command.c_str() << endl; @@ -121,10 +121,8 @@ namespace Batch { command += _username; command += "@"; } - command += _host; + command += _hostname; command += ":"; - command += string(params[TMPDIR]); - command += "/"; command += inputFile.getRemote(); cerr << command.c_str() << endl; status = system(command.c_str()); @@ -139,6 +137,50 @@ namespace Batch { } + void BatchManager_eClient::importOutputFiles( const Job & job, const string directory ) throw(EmulationException) + { + string command; + int status; + + Parametre params = job.getParametre(); + Versatile V = params[OUTFILE]; + Versatile::iterator Vit; + + for(Vit=V.begin(); Vit!=V.end(); Vit++) { + CoupleType cpt = *static_cast< CoupleType * >(*Vit); + Couple outputFile = cpt; + if( _protocol == "rsh" ) + command = "rcp "; + else if( _protocol == "ssh" ) + command = "scp "; + else + throw EmulationException("Unknown protocol"); + + if (_username != ""){ + command += _username; + command += "@"; + } + command += _hostname; + command += ":"; + command += outputFile.getRemote(); + command += " "; + command += directory; + cerr << command.c_str() << endl; + status = system(command.c_str()); + if(status) + { + // Try to get what we can (logs files) + // throw BatchException("Error of connection on remote host"); + std::string mess("Copy command failed ! status is :"); + ostringstream status_str; + status_str << status; + mess += status_str.str(); + cerr << mess << endl; + } + } + + } + MpiImpl *BatchManager_eClient::FactoryMpiImpl(string mpiImpl) throw(EmulationException) { if(mpiImpl == "lam") diff --git a/src/Batch/Batch_BatchManager_eClient.hxx b/src/Batch/Batch_BatchManager_eClient.hxx index 369155db1..717eae6a3 100644 --- a/src/Batch/Batch_BatchManager_eClient.hxx +++ b/src/Batch/Batch_BatchManager_eClient.hxx @@ -18,7 +18,7 @@ // See http://www.salome-platform.org/ or email : webmaster.salome@opencascade.com // /* - * BatchManager_eLSF.hxx : emulation of LSF client + * BatchManager_eLSF.hxx : emulation of client * * Auteur : Bernard SECHER - CEA DEN * Mail : mailto:bernard.secher@cea.fr @@ -32,7 +32,7 @@ #include "MpiImpl.hxx" -#include "Batch_Job.hxx" +#include "Batch_BatchManager.hxx" namespace Batch { @@ -46,15 +46,15 @@ namespace Batch { EmulationException(const std::string m) : msg(m) {} }; - class BatchManager_eClient + class BatchManager_eClient : public BatchManager { public: // Constructeur et destructeur - BatchManager_eClient(const char* host="localhost", const char* protocol="ssh", const char* mpiImpl="indif"); + BatchManager_eClient(const Batch::FactBatchManager * parent, const char* host="localhost", const char* protocol="ssh", const char* mpiImpl="indif"); virtual ~BatchManager_eClient(); + void importOutputFiles( const Job & job, const std::string directory ) throw(EmulationException); protected: - std::string _host; // serveur ou tourne le BatchManager std::string _protocol; // protocol to access _hostname std::string _username; // username to access _hostname MpiImpl *_mpiImpl; // Mpi implementation to launch executable in batch script diff --git a/src/Batch/Batch_BatchManager_eLSF.cxx b/src/Batch/Batch_BatchManager_eLSF.cxx index 653fd3874..b157e0547 100644 --- a/src/Batch/Batch_BatchManager_eLSF.cxx +++ b/src/Batch/Batch_BatchManager_eLSF.cxx @@ -35,7 +35,7 @@ namespace Batch { - BatchManager_eLSF::BatchManager_eLSF(const FactBatchManager * parent, const char * host, const char * protocol, const char * mpiImpl) throw(InvalidArgumentException,ConnexionFailureException) : BatchManager(parent, host), BatchManager_eClient(host,protocol,mpiImpl) + BatchManager_eLSF::BatchManager_eLSF(const FactBatchManager * parent, const char * host, const char * protocol, const char * mpiImpl) throw(InvalidArgumentException,ConnexionFailureException) : BatchManager_eClient(parent,host,protocol,mpiImpl) { // Nothing to do } @@ -197,6 +197,7 @@ namespace Batch { // define command to submit batch command = _protocol; + command += " "; if (_username != ""){ command += _username; diff --git a/src/Batch/Batch_BatchManager_eLSF.hxx b/src/Batch/Batch_BatchManager_eLSF.hxx index 6713aa70b..c978d64a9 100644 --- a/src/Batch/Batch_BatchManager_eLSF.hxx +++ b/src/Batch/Batch_BatchManager_eLSF.hxx @@ -48,7 +48,7 @@ namespace Batch { class JobInfo; class FactBatchManager; - class BatchManager_eLSF : public BatchManager, public BatchManager_eClient + class BatchManager_eLSF : public BatchManager_eClient { public: // Constructeur et destructeur diff --git a/src/Batch/Batch_BatchManager_ePBS.cxx b/src/Batch/Batch_BatchManager_ePBS.cxx index 6073cf9ee..0546c783c 100644 --- a/src/Batch/Batch_BatchManager_ePBS.cxx +++ b/src/Batch/Batch_BatchManager_ePBS.cxx @@ -35,7 +35,7 @@ namespace Batch { - BatchManager_ePBS::BatchManager_ePBS(const FactBatchManager * parent, const char * host, const char * protocol, const char * mpiImpl) throw(InvalidArgumentException,ConnexionFailureException) : BatchManager(parent, host), BatchManager_eClient(host,protocol,mpiImpl) + BatchManager_ePBS::BatchManager_ePBS(const FactBatchManager * parent, const char * host, const char * protocol, const char * mpiImpl) throw(InvalidArgumentException,ConnexionFailureException) : BatchManager_eClient(parent,host,protocol,mpiImpl) { // Nothing to do } diff --git a/src/Batch/Batch_BatchManager_ePBS.hxx b/src/Batch/Batch_BatchManager_ePBS.hxx index c704ccda3..42f4b1b69 100644 --- a/src/Batch/Batch_BatchManager_ePBS.hxx +++ b/src/Batch/Batch_BatchManager_ePBS.hxx @@ -47,7 +47,7 @@ namespace Batch { class JobInfo; class FactBatchManager; - class BatchManager_ePBS : public BatchManager, public BatchManager_eClient + class BatchManager_ePBS : public BatchManager_eClient { public: // Constructeur et destructeur diff --git a/src/Batch/Batch_FactBatchManager_eClient.cxx b/src/Batch/Batch_FactBatchManager_eClient.cxx new file mode 100644 index 000000000..6673879de --- /dev/null +++ b/src/Batch/Batch_FactBatchManager_eClient.cxx @@ -0,0 +1,48 @@ +// Copyright (C) 2005 OPEN CASCADE, EADS/CCR, LIP6, CEA/DEN, +// CEDRAT, EDF R&D, LEG, PRINCIPIA R&D, BUREAU VERITAS +// +// This library is free software; you can redistribute it and/or +// modify it under the terms of the GNU Lesser General Public +// License as published by the Free Software Foundation; either +// version 2.1 of the License. +// +// This library is distributed in the hope that it will be useful +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +// Lesser General Public License for more details. +// +// You should have received a copy of the GNU Lesser General Public +// License along with this library; if not, write to the Free Software +// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +// +// See http://www.salome-platform.org/ or email : webmaster.salome@opencascade.com +// +/* + * FactBatchManager_eClient.cxx : emulation of client + * + * Auteur : Bernard SECHER - CEA DEN + * Mail : mailto:bernard.secher@cea.fr + * Date : Thu Apr 24 10:17:22 2008 + * Projet : PAL Salome + * + */ + +#include +#include +#include "Batch_FactBatchManager_eClient.hxx" +using namespace std; + +namespace Batch { + + // Constructeur + FactBatchManager_eClient::FactBatchManager_eClient(const string & _t) : FactBatchManager(_t) + { + } + + // Destructeur + FactBatchManager_eClient::~FactBatchManager_eClient() + { + // Nothing to do + } + +} diff --git a/src/Launcher/BatchLight_BatchManager_SLURM.hxx b/src/Batch/Batch_FactBatchManager_eClient.hxx similarity index 57% rename from src/Launcher/BatchLight_BatchManager_SLURM.hxx rename to src/Batch/Batch_FactBatchManager_eClient.hxx index 97d2df190..616a6626d 100644 --- a/src/Launcher/BatchLight_BatchManager_SLURM.hxx +++ b/src/Batch/Batch_FactBatchManager_eClient.hxx @@ -18,38 +18,36 @@ // See http://www.salome-platform.org/ or email : webmaster.salome@opencascade.com // /* - * BatchManager.hxx : + * FactBatchManager_eClient.hxx : emulation of client * - * Auteur : Bernard SECHER - CEA/DEN - * Date : Juillet 2007 - * Projet : SALOME + * Auteur : Bernard SECHER - CEA DEN + * Mail : mailto:bernard.secher@cea.fr + * Date : Thu Apr 24 10:17:22 2008 + * Projet : PAL Salome * */ -#ifndef _BL_BATCHMANAGER_SLURM_H_ -#define _BL_BATCHMANAGER_SLURM_H_ +#ifndef _FACTBATCHMANAGER_eClient_H_ +#define _FACTBATCHMANAGER_eClient_H_ #include -#include "BatchLight_BatchManager.hxx" +#include +#include "Batch_FactBatchManager.hxx" -namespace BatchLight { +namespace Batch { + + class BatchManager_eClient; - class Job; - - class BatchManager_SLURM : public BatchManager + class FactBatchManager_eClient : public FactBatchManager { public: // Constructeur et destructeur - BatchManager_SLURM(const clusterParams& p) throw(BatchException); // connexion a la machine host - virtual ~BatchManager_SLURM(); + FactBatchManager_eClient(const std::string & type); + virtual ~FactBatchManager_eClient(); - // Methodes pour le controle des jobs : virtuelles pures - void deleteJob(const int & jobid); // retire un job du gestionnaire - std::string queryJob(const int & jobid); // renvoie l'etat du job + virtual Batch::BatchManager_eClient * operator() (const char * hostname,const char * protocol, const char * mpi) const = 0; protected: - void buildBatchScript(BatchLight::Job* job) throw(BatchException); - int submit(BatchLight::Job* job) throw(BatchException); private: diff --git a/src/Batch/Batch_FactBatchManager_eLSF.cxx b/src/Batch/Batch_FactBatchManager_eLSF.cxx index 5496deb45..227bffa32 100644 --- a/src/Batch/Batch_FactBatchManager_eLSF.cxx +++ b/src/Batch/Batch_FactBatchManager_eLSF.cxx @@ -36,7 +36,7 @@ namespace Batch { static FactBatchManager_eLSF sFBM_eLSF; // Constructeur - FactBatchManager_eLSF::FactBatchManager_eLSF() : FactBatchManager("eLSF") + FactBatchManager_eLSF::FactBatchManager_eLSF() : FactBatchManager_eClient("eLSF") { // Nothing to do } @@ -54,5 +54,10 @@ namespace Batch { return new BatchManager_eLSF(this, hostname); } + BatchManager_eClient * FactBatchManager_eLSF::operator() (const char * hostname, const char * protocol, const char * mpiImpl) const + { + // MESSAGE("Building new BatchManager_LSF on host '" << hostname << "'"); + return new BatchManager_eLSF(this, hostname, protocol, mpiImpl); + } } diff --git a/src/Batch/Batch_FactBatchManager_eLSF.hxx b/src/Batch/Batch_FactBatchManager_eLSF.hxx index 9b54913d3..e1660aaaa 100644 --- a/src/Batch/Batch_FactBatchManager_eLSF.hxx +++ b/src/Batch/Batch_FactBatchManager_eLSF.hxx @@ -32,13 +32,14 @@ using namespace std; #include #include -#include "Batch_FactBatchManager.hxx" +#include "Batch_BatchManager_eClient.hxx" +#include "Batch_FactBatchManager_eClient.hxx" namespace Batch { class BatchManager_eLSF; - class FactBatchManager_eLSF : public FactBatchManager + class FactBatchManager_eLSF : public FactBatchManager_eClient { public: // Constructeur et destructeur @@ -46,6 +47,7 @@ namespace Batch { virtual ~FactBatchManager_eLSF(); virtual BatchManager * operator() (const char * hostname) const; + virtual BatchManager_eClient * operator() (const char * hostname, const char * protocol, const char * mpiImpl) const; protected: diff --git a/src/Batch/Batch_FactBatchManager_ePBS.cxx b/src/Batch/Batch_FactBatchManager_ePBS.cxx index 62642327b..3bcbda530 100644 --- a/src/Batch/Batch_FactBatchManager_ePBS.cxx +++ b/src/Batch/Batch_FactBatchManager_ePBS.cxx @@ -36,7 +36,7 @@ namespace Batch { static FactBatchManager_ePBS sFBM_ePBS; // Constructeur - FactBatchManager_ePBS::FactBatchManager_ePBS() : FactBatchManager("ePBS") + FactBatchManager_ePBS::FactBatchManager_ePBS() : FactBatchManager_eClient("ePBS") { // Nothing to do } @@ -54,5 +54,11 @@ namespace Batch { return new BatchManager_ePBS(this, hostname); } + BatchManager_eClient * FactBatchManager_ePBS::operator() (const char * hostname, const char * protocol, const char * mpiImpl) const + { + // MESSAGE("Building new BatchManager_PBS on host '" << hostname << "'"); + return new BatchManager_ePBS(this, hostname, protocol, mpiImpl); + } + } diff --git a/src/Batch/Batch_FactBatchManager_ePBS.hxx b/src/Batch/Batch_FactBatchManager_ePBS.hxx index 1ec238d8a..69fdf322a 100644 --- a/src/Batch/Batch_FactBatchManager_ePBS.hxx +++ b/src/Batch/Batch_FactBatchManager_ePBS.hxx @@ -32,13 +32,14 @@ using namespace std; #include #include -#include "Batch_FactBatchManager.hxx" +#include "Batch_BatchManager_eClient.hxx" +#include "Batch_FactBatchManager_eClient.hxx" namespace Batch { class BatchManager_ePBS; - class FactBatchManager_ePBS : public FactBatchManager + class FactBatchManager_ePBS : public FactBatchManager_eClient { public: // Constructeur et destructeur @@ -46,6 +47,7 @@ namespace Batch { virtual ~FactBatchManager_ePBS(); virtual BatchManager * operator() (const char * hostname) const; + virtual BatchManager_eClient * operator() (const char * hostname, const char * protocol, const char * mpiImpl) const; protected: diff --git a/src/Batch/Makefile.am b/src/Batch/Makefile.am index d744a071e..b936e299a 100644 --- a/src/Batch/Makefile.am +++ b/src/Batch/Makefile.am @@ -61,6 +61,7 @@ LIB_INCLUDES = \ Batch_StringType.hxx \ Batch_TypeMismatchException.hxx \ Batch_BatchManager_eClient.hxx \ + Batch_FactBatchManager_eClient.hxx \ Batch_BatchManager_eLSF.hxx \ Batch_FactBatchManager_eLSF.hxx \ Batch_JobInfo_eLSF.hxx \ @@ -101,6 +102,7 @@ LIB_SRC = \ Batch_StringType.cxx \ Batch_TypeMismatchException.cxx \ Batch_BatchManager_eClient.cxx \ + Batch_FactBatchManager_eClient.cxx \ Batch_BatchManager_eLSF.cxx \ Batch_FactBatchManager_eLSF.cxx \ Batch_JobInfo_eLSF.cxx \ diff --git a/src/Launcher/BatchLight_BatchManager.cxx b/src/Launcher/BatchLight_BatchManager.cxx deleted file mode 100644 index c1948bbc0..000000000 --- a/src/Launcher/BatchLight_BatchManager.cxx +++ /dev/null @@ -1,264 +0,0 @@ -// Copyright (C) 2005 OPEN CASCADE, EADS/CCR, LIP6, CEA/DEN, -// CEDRAT, EDF R&D, LEG, PRINCIPIA R&D, BUREAU VERITAS -// -// This library is free software; you can redistribute it and/or -// modify it under the terms of the GNU Lesser General Public -// License as published by the Free Software Foundation; either -// version 2.1 of the License. -// -// This library is distributed in the hope that it will be useful -// but WITHOUT ANY WARRANTY; without even the implied warranty of -// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU -// Lesser General Public License for more details. -// -// You should have received a copy of the GNU Lesser General Public -// License along with this library; if not, write to the Free Software -// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA -// -// See http://www.salome-platform.org/ or email : webmaster.salome@opencascade.com -// -/* - * BatchManager.cxx : - * - * Auteur : Bernard SECHER - CEA/DEN - * Date : Juillet 2007 - * Projet : SALOME - * - */ - -#include -#include -#include -#include -#include "BatchLight_Job.hxx" -#include "BatchLight_BatchManager.hxx" -#include "Batch_Date.hxx" -using namespace std; - -namespace BatchLight { - - // Constructeur - BatchManager::BatchManager(const clusterParams& p) throw(BatchException) : _params(p) - { - cerr << _params.hostname << endl; - cerr << _params.protocol << endl; - cerr << _params.username << endl; - // On verifie que le hostname est correct - if (!gethostbyname(_params.hostname.c_str())) { // hostname unknown from network - string msg = "hostname \""; - msg += _params.hostname; - msg += "\" unknown from the network"; - throw BatchException(msg.c_str()); - } - _mpiImpl = NULL; - } - - // Destructeur - BatchManager::~BatchManager() - { - cerr << "BatchManager destructor "<<_params.hostname << endl; - std::map < int, const BatchLight::Job * >::const_iterator it; - for(it=_jobmap.begin();it!=_jobmap.end();it++) - delete it->second; - if(_mpiImpl) delete _mpiImpl; - } - - // Methode pour le controle des jobs : soumet un job au gestionnaire - const int BatchManager::submitJob(Job* job) - { - int id; - - // export input files on cluster - exportInputFiles(job); - - // build batch script for job - buildBatchScript(job); - - // submit job on cluster - id = submit(job); - - // register job on map - _jobmap[id] = job; - return id; - } - - void BatchManager::exportInputFiles(BatchLight::Job* job) throw(BatchException) - { - int status; - const string fileToExecute = job->getFileToExecute(); - const vector filesToExportList = job->getFilesToExportList(); - const std::string dirForTmpFiles = job->getDirForTmpFiles(); - std::string command; - std::string copy_command; - - // Test protocol - if( _params.protocol == "rsh" ) - copy_command = "rcp "; - else if( _params.protocol == "ssh" ) - copy_command = "scp "; - else - throw BatchException("Unknown protocol : only rsh and ssh are known !"); - - // First step : creating batch tmp files directory - command = _params.protocol; - command += " "; - if (_params.username != ""){ - command += _params.username; - command += "@"; - } - command += _params.hostname; - command += " \"mkdir -p "; - command += dirForTmpFiles; - command += "\"" ; - cerr << command.c_str() << endl; - status = system(command.c_str()); - if(status) { - std::ostringstream oss; - oss << status; - std::string ex_mess("Error of connection on remote host ! status = "); - ex_mess += oss.str(); - throw BatchException(ex_mess.c_str()); - } - - // Second step : copy fileToExecute into - // batch tmp files directory - command = copy_command; - command += fileToExecute.c_str(); - command += " "; - if (_params.username != ""){ - command += _params.username; - command += "@"; - } - command += _params.hostname; - command += ":"; - command += dirForTmpFiles; - cerr << command.c_str() << endl; - status = system(command.c_str()); - if(status) { - std::ostringstream oss; - oss << status; - std::string ex_mess("Error of connection on remote host ! status = "); - ex_mess += oss.str(); - throw BatchException(ex_mess.c_str()); - } - - // Third step : copy filesToExportList into - // batch tmp files directory - for (int i = 0 ; i < filesToExportList.size() ; i++ ) { - command = copy_command; - command += filesToExportList[i] ; - command += " "; - if (_params.username != ""){ - command += _params.username; - command += "@"; - } - command += _params.hostname; - command += ":"; - command += dirForTmpFiles ; - cerr << command.c_str() << endl; - status = system(command.c_str()); - if(status) { - std::ostringstream oss; - oss << status; - std::string ex_mess("Error of connection on remote host ! status = "); - ex_mess += oss.str(); - throw BatchException(ex_mess.c_str()); - } - } - - } - - void BatchManager::importOutputFiles( const string directory, const int &jobId ) throw(BatchException) - { - string command; - int status; - - const BatchLight::Job* myJob = _jobmap[jobId]; - vector filesToImportList = myJob->getFilesToImportList(); - - for ( int i = 0 ; i < filesToImportList.size() ; i++ ) { - if( _params.protocol == "rsh" ) - command = "rcp "; - else if( _params.protocol == "ssh" ) - command = "scp "; - else - throw BatchException("Unknown protocol"); - if (_params.username != ""){ - command += _params.username; - command += "@"; - } - command += _params.hostname; - command += ":"; - command += filesToImportList[i] ; - command += " "; - command += directory; - cerr << command.c_str() << endl; - status = system(command.c_str()); - if(status) - { - // Try to get what we can (logs files) - // throw BatchException("Error of connection on remote host"); - std::string mess("Copy command failed ! status is :"); - ostringstream status_str; - status_str << status; - mess += status_str.str(); - cerr << mess << endl; - } - } - - } - - string BatchManager::BuildTemporaryFileName() const - { - //build more complex file name to support multiple salome session - char *temp = new char[19]; - strcpy(temp, "/tmp/command"); - strcat(temp, "XXXXXX"); -#ifndef WNT - mkstemp(temp); -#else - char aPID[80]; - itoa(getpid(), aPID, 10); - strcat(temp, aPID); -#endif - - string command(temp); - delete [] temp; - command += ".sh"; - return command; - } - - void BatchManager::RmTmpFile(std::string & TemporaryFileName) - { - string command = "rm "; - command += TemporaryFileName; - char *temp = strdup(command.c_str()); - int lgthTemp = strlen(temp); - temp[lgthTemp - 3] = '*'; - temp[lgthTemp - 2] = '\0'; - system(temp); - free(temp); - } - - MpiImpl *BatchManager::FactoryMpiImpl(string mpiImpl) throw(BatchException) - { - if(mpiImpl == "lam") - return new MpiImpl_LAM(); - else if(mpiImpl == "mpich1") - return new MpiImpl_MPICH1(); - else if(mpiImpl == "mpich2") - return new MpiImpl_MPICH2(); - else if(mpiImpl == "openmpi") - return new MpiImpl_OPENMPI(); - else if(mpiImpl == "slurm") - return new MpiImpl_SLURM(); - else if(mpiImpl == "indif") - throw BatchException("you must specify a mpi implementation in CatalogResources.xml file"); - else{ - ostringstream oss; - oss << mpiImpl << " : not yet implemented"; - throw BatchException(oss.str().c_str()); - } - } - -} diff --git a/src/Launcher/BatchLight_BatchManager.hxx b/src/Launcher/BatchLight_BatchManager.hxx deleted file mode 100644 index 753b06b56..000000000 --- a/src/Launcher/BatchLight_BatchManager.hxx +++ /dev/null @@ -1,103 +0,0 @@ -// Copyright (C) 2005 OPEN CASCADE, EADS/CCR, LIP6, CEA/DEN, -// CEDRAT, EDF R&D, LEG, PRINCIPIA R&D, BUREAU VERITAS -// -// This library is free software; you can redistribute it and/or -// modify it under the terms of the GNU Lesser General Public -// License as published by the Free Software Foundation; either -// version 2.1 of the License. -// -// This library is distributed in the hope that it will be useful -// but WITHOUT ANY WARRANTY; without even the implied warranty of -// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU -// Lesser General Public License for more details. -// -// You should have received a copy of the GNU Lesser General Public -// License along with this library; if not, write to the Free Software -// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA -// -// See http://www.salome-platform.org/ or email : webmaster.salome@opencascade.com -// -/* - * BatchManager.hxx : - * - * Auteur : Bernard SECHER - CEA/DEN - * Date : Juillet 2007 - * Projet : SALOME - * - */ - -#ifndef _BL_BATCHMANAGER_H_ -#define _BL_BATCHMANAGER_H_ - -#include -#include -#include -#include -#include "MpiImpl.hxx" - -namespace BatchLight { - - class Job; - - struct batchParams{ - std::string batch_directory; // Where batch command will be launched - // and log files will be created - std::string expected_during_time; // Time for the batch - // has to be like this : hh:mm - std::string mem; // Minimum of memory needed - // has to be like : 32gb or 512mb - - long nb_proc; // Number of processors requested - }; - - struct clusterParams{ - std::string hostname; // serveur ou tourne le BatchManager - std::string protocol; // protocole d'acces au serveur: ssh ou rsh - std::string username; // username d'acces au serveur - std::string applipath; // path of apllication directory on server - std::vector modulesList; // list of Salome modules installed on server - unsigned int nbnodes; // number of nodes on cluster - unsigned int nbprocpernode; // number of processors on each node - std::string mpiImpl; // mpi implementation - }; - - class BatchException - { - public: - const std::string msg; - - BatchException(const std::string m) : msg(m) {} - }; - - class BatchManager - { - public: - // Constructeur et destructeur - BatchManager(const clusterParams& p) throw(BatchException); // connexion a la machine host - virtual ~BatchManager(); - - // Methodes pour le controle des jobs : virtuelles pures - const int submitJob(BatchLight::Job* job); // soumet un job au gestionnaire - virtual void deleteJob(const int & jobid) = 0; // retire un job du gestionnaire - virtual std::string queryJob(const int & jobid) = 0; // renvoie l'etat du job - void importOutputFiles( const std::string directory, const int & jobId ) throw(BatchException); - - protected: - clusterParams _params; - MpiImpl *_mpiImpl; - std::map _jobmap; - - virtual int submit(BatchLight::Job* job) throw(BatchException) = 0; - void exportInputFiles(BatchLight::Job* job) throw(BatchException); - virtual void buildBatchScript(BatchLight::Job* job) throw(BatchException) = 0; - - std::string BuildTemporaryFileName() const; - void RmTmpFile(std::string & TemporaryFileName); - MpiImpl *FactoryMpiImpl(std::string mpiImpl) throw(BatchException); - private: - - }; - -} - -#endif diff --git a/src/Launcher/BatchLight_BatchManager_PBS.cxx b/src/Launcher/BatchLight_BatchManager_PBS.cxx deleted file mode 100644 index 39e6e9fd6..000000000 --- a/src/Launcher/BatchLight_BatchManager_PBS.cxx +++ /dev/null @@ -1,323 +0,0 @@ -// Copyright (C) 2005 OPEN CASCADE, EADS/CCR, LIP6, CEA/DEN, -// CEDRAT, EDF R&D, LEG, PRINCIPIA R&D, BUREAU VERITAS -// -// This library is free software; you can redistribute it and/or -// modify it under the terms of the GNU Lesser General Public -// License as published by the Free Software Foundation; either -// version 2.1 of the License. -// -// This library is distributed in the hope that it will be useful -// but WITHOUT ANY WARRANTY; without even the implied warranty of -// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU -// Lesser General Public License for more details. -// -// You should have received a copy of the GNU Lesser General Public -// License along with this library; if not, write to the Free Software -// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA -// -// See http://www.salome-platform.org/ or email : webmaster.salome@opencascade.com -// -/* - * BatchManager.cxx : - * - * Auteur : Bernard SECHER - CEA/DEN - * Date : Juillet 2007 - * Projet : SALOME - * - */ - -#include "BatchLight_BatchManager_PBS.hxx" -#include "BatchLight_Job.hxx" -#include -#include -#include -#include - -using namespace std; - -namespace BatchLight { - - // Constructeur - BatchManager_PBS::BatchManager_PBS(const clusterParams& p) throw(BatchException) : BatchManager(p) - { - // pbs batch system needs to know mpi implementation - _mpiImpl = FactoryMpiImpl(_params.mpiImpl); - } - - // Destructeur - BatchManager_PBS::~BatchManager_PBS() - { - cerr << "BatchManager_PBS destructor " << _params.hostname << endl; - } - - // Methode pour le controle des jobs : retire un job du gestionnaire - void BatchManager_PBS::deleteJob(const int & jobid) - { - string command; - int status; - ostringstream oss; - oss << jobid; - - // define command to submit batch - if( _params.protocol == "rsh" ) - command = "rsh "; - else if( _params.protocol == "ssh" ) - command = "ssh "; - else - throw BatchException("Unknown protocol"); - - if (_params.username != ""){ - command += _params.username; - command += "@"; - } - - command += _params.hostname; - command += " \"qdel " ; - command += oss.str(); - command += "\""; - cerr << command.c_str() << endl; - status = system(command.c_str()); - if(status) - throw BatchException("Error of connection on remote host"); - - cerr << "jobId = " << jobid << "killed" << endl; - } - - // Methode pour le controle des jobs : renvoie l'etat du job - string BatchManager_PBS::queryJob(const int & jobid) - { - // define name of log file - string jstatus; - string logFile="/tmp/logs/"; - logFile += getenv("USER"); - logFile += "/batchSalome_"; - - //srand ( time(NULL) ); - //int ir = rand(); - ostringstream oss; - //oss << ir; - oss << this << "_" << jobid; - logFile += oss.str(); - logFile += ".log"; - - string command; - int status; - - // define command to submit batch - if( _params.protocol == "rsh" ) - command = "rsh "; - else if( _params.protocol == "ssh" ) - command = "ssh "; - else - throw BatchException("Unknown protocol"); - - if (_params.username != ""){ - command += _params.username; - command += "@"; - } - - command += _params.hostname; - command += " \"qstat -f " ; - //ostringstream oss2; - //oss2 << jobid; - //command += oss2.str(); - command += _pbs_job_name[jobid]; - command += "\" > "; - command += logFile; - cerr << command.c_str() << endl; - status = system(command.c_str()); - if(status && status != 153 && status != 256*153){ - cerr << "status="<> jstatus; - iss >> jstatus; - iss >> jstatus; - } - else - jstatus = "U"; - } - - cerr << "jobId = " << jobid << " " << jstatus << endl; - return jstatus; - } - - void BatchManager_PBS::buildBatchScript(BatchLight::Job* job) throw(BatchException) - { - int status; - const int nbproc = job->getNbProc(); - std::string edt = job->getExpectedDuringTime(); - std::string mem = job->getMemory(); - const std::string dirForTmpFiles = job->getDirForTmpFiles(); - const string fileToExecute = job->getFileToExecute(); - string::size_type p1 = fileToExecute.find_last_of("/"); - string::size_type p2 = fileToExecute.find_last_of("."); - std::string rootNameToExecute = fileToExecute.substr(p1+1,p2-p1-1); - std::string fileNameToExecute = "~/" + dirForTmpFiles + "/" + string(basename(fileToExecute.c_str())); - - int idx = dirForTmpFiles.find("Batch/"); - std::string filelogtemp = dirForTmpFiles.substr(idx+6, dirForTmpFiles.length()); - - int nbmaxproc = _params.nbnodes * _params.nbprocpernode; - if( nbproc > nbmaxproc ){ - cerr << nbproc << " processors asked on a cluster of " << nbmaxproc << " processors" << endl; - throw BatchException("Too much processors asked for that cluster"); - } - - int nbnodes; - if( nbproc < _params.nbnodes ) - nbnodes = nbproc; - else - nbnodes = _params.nbnodes; - - std::string TmpFileName = BuildTemporaryFileName(); - ofstream tempOutputFile; - tempOutputFile.open(TmpFileName.c_str(), ofstream::out ); - - tempOutputFile << "#! /bin/sh -f" << endl ; - tempOutputFile << "#PBS -l nodes=" << nbnodes << endl ; - if (edt != "") - tempOutputFile << "#PBS -l walltime=" << edt << ":00" << endl ; - if (mem != "") - tempOutputFile << "#PBS -l mem=" << mem << endl ; - // In some systems qsub does not correctly expand env variables - // like PBS_O_HOME for #PBS directives.... - //tempOutputFile << "#PBS -o /$PBS_O_HOME/" << dirForTmpFiles << "/runSalome.output.log.${PBS_JOBID}" << endl ; - //tempOutputFile << "#PBS -e /$PBS_O_HOME/" << dirForTmpFiles << "/runSalome.error.log.${PBS_JOBID}" << endl ; - tempOutputFile << "#PBS -o runSalome.output.log." << filelogtemp << endl ; - tempOutputFile << "#PBS -e runSalome.error.log." << filelogtemp << endl ; - tempOutputFile << _mpiImpl->boot("${PBS_NODEFILE}",nbnodes); - tempOutputFile << _mpiImpl->run("${PBS_NODEFILE}",nbproc,fileNameToExecute); - tempOutputFile << _mpiImpl->halt(); - tempOutputFile.flush(); - tempOutputFile.close(); - chmod(TmpFileName.c_str(), 0x1ED); - cerr << TmpFileName.c_str() << endl; - - string command; - if( _params.protocol == "rsh" ) - command = "rcp "; - else if( _params.protocol == "ssh" ) - command = "scp "; - else - throw BatchException("Unknown protocol"); - command += TmpFileName; - command += " "; - if (_params.username != ""){ - command += _params.username; - command += "@"; - } - command += _params.hostname; - command += ":"; - command += dirForTmpFiles ; - command += "/" ; - command += rootNameToExecute ; - command += "_Batch.sh" ; - cerr << command.c_str() << endl; - status = system(command.c_str()); - if(status) - throw BatchException("Error of connection on remote host"); - - // Adding log files into import list files - ostringstream file_name_output; - file_name_output << "~/" << dirForTmpFiles << "/" << "runSalome.output.log*"; - ostringstream file_name_error; - file_name_error << "~/" << dirForTmpFiles << "/" << "runSalome.error.log*"; - ostringstream file_container_log; - file_container_log << "~/" << dirForTmpFiles << "/" << "YACS_Server*"; - job->addFileToImportList(file_name_output.str()); - job->addFileToImportList(file_name_error.str()); - job->addFileToImportList(file_container_log.str()); - RmTmpFile(TmpFileName); - } - - int BatchManager_PBS::submit(BatchLight::Job* job) throw(BatchException) - { - const std::string dirForTmpFiles = job->getDirForTmpFiles(); - const string fileToExecute = job->getFileToExecute(); - string::size_type p1 = fileToExecute.find_last_of("/"); - string::size_type p2 = fileToExecute.find_last_of("."); - std::string fileNameToExecute = fileToExecute.substr(p1+1,p2-p1-1); - - // define name of log file - string logFile="/tmp/logs/"; - logFile += getenv("USER"); - logFile += "/batchSalome_"; - - srand ( time(NULL) ); - int ir = rand(); - ostringstream oss; - oss << ir; - logFile += oss.str(); - logFile += ".log"; - - string command; - int status; - - // define command to submit batch - if( _params.protocol == "rsh" ) - command = "rsh "; - else if( _params.protocol == "ssh" ) - command = "ssh "; - else - throw BatchException("Unknown protocol"); - - if (_params.username != ""){ - command += _params.username; - command += "@"; - } - - command += _params.hostname; - command += " \"cd " ; - command += dirForTmpFiles; - command += "; qsub " ; - command += fileNameToExecute ; - command += "_Batch.sh\" > "; - command += logFile; - cerr << command.c_str() << endl; - status = system(command.c_str()); - if(status) - throw BatchException("Error of connection on remote host"); - - // read id of submitted job in log file - char line[128]; - FILE *fp = fopen(logFile.c_str(),"r"); - fgets( line, 128, fp); - fclose(fp); - - string sline(line); - int pos = sline.find("."); - string strjob; - if(pos == string::npos) - strjob = sline; - else - strjob = sline.substr(0,pos); - - int id; - istringstream iss(strjob); - iss >> id; - - // Ajout dans la map - _pbs_job_name[id] = sline; - return id; - } - -} diff --git a/src/Launcher/BatchLight_BatchManager_PBS.hxx b/src/Launcher/BatchLight_BatchManager_PBS.hxx deleted file mode 100644 index 28c62580e..000000000 --- a/src/Launcher/BatchLight_BatchManager_PBS.hxx +++ /dev/null @@ -1,62 +0,0 @@ -// Copyright (C) 2005 OPEN CASCADE, EADS/CCR, LIP6, CEA/DEN, -// CEDRAT, EDF R&D, LEG, PRINCIPIA R&D, BUREAU VERITAS -// -// This library is free software; you can redistribute it and/or -// modify it under the terms of the GNU Lesser General Public -// License as published by the Free Software Foundation; either -// version 2.1 of the License. -// -// This library is distributed in the hope that it will be useful -// but WITHOUT ANY WARRANTY; without even the implied warranty of -// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU -// Lesser General Public License for more details. -// -// You should have received a copy of the GNU Lesser General Public -// License along with this library; if not, write to the Free Software -// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA -// -// See http://www.salome-platform.org/ or email : webmaster.salome@opencascade.com -// -/* - * BatchManager.hxx : - * - * Auteur : Bernard SECHER - CEA/DEN - * Date : Juillet 2007 - * Projet : SALOME - * - */ - -#ifndef _BL_BATCHMANAGER_PBS_H_ -#define _BL_BATCHMANAGER_PBS_H_ - -#include -#include "BatchLight_BatchManager.hxx" - -namespace BatchLight { - - class Job; - - class BatchManager_PBS : public BatchManager - { - public: - // Constructeur et destructeur - BatchManager_PBS(const clusterParams& p) throw(BatchException); // connexion a la machine host - virtual ~BatchManager_PBS(); - - // Methodes pour le controle des jobs : virtuelles pures - void deleteJob(const int & jobid); // retire un job du gestionnaire - std::string queryJob(const int & jobid); // renvoie l'etat du job - - private: - void buildBatchScript(BatchLight::Job* job) throw(BatchException); - int submit(BatchLight::Job* job) throw(BatchException); - - // Permet d'avoir la chaîne complête pour demander - // le statut du job - typedef std::map _pbs_job_name_t; - _pbs_job_name_t _pbs_job_name; - }; - -} - -#endif diff --git a/src/Launcher/BatchLight_BatchManager_SLURM.cxx b/src/Launcher/BatchLight_BatchManager_SLURM.cxx deleted file mode 100644 index 3aefaee9a..000000000 --- a/src/Launcher/BatchLight_BatchManager_SLURM.cxx +++ /dev/null @@ -1,276 +0,0 @@ -// Copyright (C) 2005 OPEN CASCADE, EADS/CCR, LIP6, CEA/DEN, -// CEDRAT, EDF R&D, LEG, PRINCIPIA R&D, BUREAU VERITAS -// -// This library is free software; you can redistribute it and/or -// modify it under the terms of the GNU Lesser General Public -// License as published by the Free Software Foundation; either -// version 2.1 of the License. -// -// This library is distributed in the hope that it will be useful -// but WITHOUT ANY WARRANTY; without even the implied warranty of -// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU -// Lesser General Public License for more details. -// -// You should have received a copy of the GNU Lesser General Public -// License along with this library; if not, write to the Free Software -// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA -// -// See http://www.salome-platform.org/ or email : webmaster.salome@opencascade.com -// -/* - * BatchManager.cxx : - * - * Auteur : Bernard SECHER - CEA/DEN - * Date : Juillet 2007 - * Projet : SALOME - * - */ - -#include "BatchLight_BatchManager_SLURM.hxx" -#include "BatchLight_Job.hxx" -#include -#include -#include -#include - -using namespace std; - -namespace BatchLight { - - // Constructeur - BatchManager_SLURM::BatchManager_SLURM(const clusterParams& p) throw(BatchException) : BatchManager(p) - { - _mpiImpl = FactoryMpiImpl(_params.mpiImpl); - } - - // Destructeur - BatchManager_SLURM::~BatchManager_SLURM() - { - cerr << "BatchManager_SLURM destructor "<<_params.hostname << endl; - } - - // Methode pour le controle des jobs : retire un job du gestionnaire - void BatchManager_SLURM::deleteJob(const int & jobid) - { - string command; - int status; - ostringstream oss; - oss << jobid; - - // define command to submit batch - if( _params.protocol == "rsh" ) - command = "rsh "; - else if( _params.protocol == "ssh" ) - command = "ssh "; - else - throw BatchException("Unknown protocol"); - - if (_params.username != ""){ - command += _params.username; - command += "@"; - } - - command += _params.hostname; - command += " \"bkill " ; - command += oss.str(); - command += "\""; - cerr << command.c_str() << endl; - status = system(command.c_str()); - if(status) - throw BatchException("Error of connection on remote host"); - - cerr << "jobId = " << jobid << "killed" << endl; - } - - // Methode pour le controle des jobs : renvoie l'etat du job - string BatchManager_SLURM::queryJob(const int & jobid) - { - // define name of log file - string logFile="/tmp/logs/"; - logFile += getenv("USER"); - logFile += "/batchSalome_"; - - srand ( time(NULL) ); - int ir = rand(); - ostringstream oss; - oss << ir; - logFile += oss.str(); - logFile += ".log"; - - string command; - int status; - - // define command to submit batch - if( _params.protocol == "rsh" ) - command = "rsh "; - else if( _params.protocol == "ssh" ) - command = "ssh "; - else - throw BatchException("Unknown protocol"); - - if (_params.username != ""){ - command += _params.username; - command += "@"; - } - - command += _params.hostname; - command += " \"bjobs " ; - ostringstream oss2; - oss2 << jobid; - command += oss2.str(); - command += "\" > "; - command += logFile; - cerr << command.c_str() << endl; - status = system(command.c_str()); - if(status) - throw BatchException("Error of connection on remote host"); - - // read staus of job in log file - char line[128]; - ifstream fp(logFile.c_str(),ios::in); - fp.getline(line,80,'\n'); - - string sjobid, username, jstatus; - fp >> sjobid; - fp >> username; - fp >> jstatus; - - cerr << "jobId = " << jobid << " " << jstatus << endl; - return jstatus; - } - - void BatchManager_SLURM::buildBatchScript(BatchLight::Job* job) throw(BatchException) - { - int status; - const int nbproc = job->getNbProc(); - std::string edt = job->getExpectedDuringTime(); - std::string mem = job->getMemory(); - const std::string dirForTmpFiles = job->getDirForTmpFiles(); - const string fileToExecute = job->getFileToExecute(); - string::size_type p1 = fileToExecute.find_last_of("/"); - string::size_type p2 = fileToExecute.find_last_of("."); - std::string rootNameToExecute = fileToExecute.substr(p1+1,p2-p1-1); - std::string fileNameToExecute = "~/" + dirForTmpFiles + "/" + string(basename(fileToExecute.c_str())); - - int nbmaxproc = _params.nbnodes * _params.nbprocpernode; - if( nbproc > nbmaxproc ){ - cerr << nbproc << " processors asked on a cluster of " << nbmaxproc << " processors" << endl; - throw BatchException("Too much processors asked for that cluster"); - } - - int nbnodes; - if( nbproc < _params.nbnodes ) - nbnodes = nbproc; - else - nbnodes = _params.nbnodes; - - std::string TmpFileName = BuildTemporaryFileName(); - ofstream tempOutputFile; - tempOutputFile.open(TmpFileName.c_str(), ofstream::out ); - - tempOutputFile << "#! /bin/sh -f" << endl ; - tempOutputFile << "#BSUB -n " << nbproc << endl ; - tempOutputFile << "#BSUB -o " << dirForTmpFiles << "/runSalome.log%J" << endl ; - tempOutputFile << _mpiImpl->boot("",nbproc); - tempOutputFile << _mpiImpl->run("",nbproc,fileNameToExecute); - tempOutputFile << _mpiImpl->halt(); - tempOutputFile.flush(); - tempOutputFile.close(); - chmod(TmpFileName.c_str(), 0x1ED); - cerr << TmpFileName.c_str() << endl; - - string command; - if( _params.protocol == "rsh" ) - command = "rcp "; - else if( _params.protocol == "ssh" ) - command = "scp "; - else - throw BatchException("Unknown protocol"); - command += TmpFileName; - command += " "; - if (_params.username != ""){ - command += _params.username; - command += "@"; - } - command += _params.hostname; - command += ":"; - command += dirForTmpFiles ; - command += "/" ; - command += rootNameToExecute ; - command += "_Batch.sh" ; - cerr << command.c_str() << endl; - status = system(command.c_str()); - if(status) - throw BatchException("Error of connection on remote host"); - - RmTmpFile(TmpFileName); - - } - - int BatchManager_SLURM::submit(BatchLight::Job* job) throw(BatchException) - { - const std::string dirForTmpFiles = job->getDirForTmpFiles(); - const string fileToExecute = job->getFileToExecute(); - string::size_type p1 = fileToExecute.find_last_of("/"); - string::size_type p2 = fileToExecute.find_last_of("."); - std::string fileNameToExecute = fileToExecute.substr(p1+1,p2-p1-1); - - // define name of log file - string logFile="/tmp/logs/"; - logFile += getenv("USER"); - logFile += "/batchSalome_"; - - srand ( time(NULL) ); - int ir = rand(); - ostringstream oss; - oss << ir; - logFile += oss.str(); - logFile += ".log"; - - string command; - int status; - - // define command to submit batch - if( _params.protocol == "rsh" ) - command = "rsh "; - else if( _params.protocol == "ssh" ) - command = "ssh "; - else - throw BatchException("Unknown protocol"); - - if (_params.username != ""){ - command += _params.username; - command += "@"; - } - - command += _params.hostname; - command += " \"bsub < " ; - command += dirForTmpFiles ; - command += "/" ; - command += fileNameToExecute ; - command += "_Batch.sh\" > "; - command += logFile; - cerr << command.c_str() << endl; - status = system(command.c_str()); - if(status) - throw BatchException("Error of connection on remote host"); - - // read id of submitted job in log file - char line[128]; - FILE *fp = fopen(logFile.c_str(),"r"); - fgets( line, 128, fp); - fclose(fp); - - string sline(line); - int p10 = sline.find("<"); - int p20 = sline.find(">"); - string strjob = sline.substr(p10+1,p20-p10-1); - - int id; - istringstream iss(strjob); - iss >> id; - - return id; - } - -} diff --git a/src/Launcher/BatchLight_Job.cxx b/src/Launcher/BatchLight_Job.cxx deleted file mode 100644 index b2750d892..000000000 --- a/src/Launcher/BatchLight_Job.cxx +++ /dev/null @@ -1,179 +0,0 @@ -// Copyright (C) 2005 OPEN CASCADE, EADS/CCR, LIP6, CEA/DEN, -// CEDRAT, EDF R&D, LEG, PRINCIPIA R&D, BUREAU VERITAS -// -// This library is free software; you can redistribute it and/or -// modify it under the terms of the GNU Lesser General Public -// License as published by the Free Software Foundation; either -// version 2.1 of the License. -// -// This library is distributed in the hope that it will be useful -// but WITHOUT ANY WARRANTY; without even the implied warranty of -// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU -// Lesser General Public License for more details. -// -// You should have received a copy of the GNU Lesser General Public -// License along with this library; if not, write to the Free Software -// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA -// -// See http://www.salome-platform.org/ or email : webmaster.salome@opencascade.com -// -/* - * Job.cxx : - * - * Auteur : Bernard SECHER - CEA/DEN - * Date : Juillet 2007 - * Projet : SALOME - * - */ - -#include "Batch_Date.hxx" -#include "BatchLight_Job.hxx" -#include - -using namespace std; -using namespace BatchLight; - -Job::Job(const string fileToExecute, - const vector& filesToExport, - const vector& filesToImport, - const batchParams& batch_params) : _fileToExecute(fileToExecute), - _filesToExport(filesToExport), - _filesToImport(filesToImport), - _batch_params(batch_params) -{ - std::string thedate; - - // Adding date to the directory name - Batch::Date date = Batch::Date(time(0)); - thedate = date.str(); - int lend = thedate.size() ; - int i = 0 ; - while ( i < lend ) { - if ( thedate[i] == '/' || thedate[i] == '-' || thedate[i] == ':' ) { - thedate[i] = '_' ; - } - i++ ; - } - - _dirForTmpFiles = string("Batch/"); - _dirForTmpFiles += thedate ; -} - -Job::~Job() -{ - cerr << "Job destructor" << endl; -} - -void -Job::addFileToExportList(std::string file_name) -{ - _filesToExport.push_back(file_name); -} - -void -Job::addFileToImportList(std::string file_name) -{ - _filesToImport.push_back(file_name); -} - -const std::string -Job::getExpectedDuringTime() -{ - std::string str(_batch_params.expected_during_time); - return str; -} - -const std::string -Job::getMemory() -{ - std::string str(_batch_params.mem); - return str; -} - -bool -Job::check() { - bool rtn = true; - cerr << "Warning : batch_directory option is not currently implemented" << endl; - cerr << "Warning : currently these informations are only in the PBS batch manager" << endl; - cerr << "Job parameters are :" <> value)) { - edt_info = "Error on definition ! : " + edt_value; - rtn = false; - } - else if (value < 0) { - edt_info = "Error on definition time is negative ! : " + value; - rtn = false; - } - std::istringstream iss_2(end_edt_value); - if (!(iss_2 >> value)) { - edt_info = "Error on definition ! : " + edt_value; - rtn = false; - } - else if (value < 0) { - edt_info = "Error on definition time is negative ! : " + value; - rtn = false; - } - if (mid_edt_value != ":") { - edt_info = "Error on definition ! :" + edt_value; - rtn = false; - } - } - else { - edt_info = "No value given"; - } - cerr << "Expected during time : " << edt_info << endl; - - // check memory (check the format) - std::string mem_info; - std::string mem_value = _batch_params.mem.c_str(); - if (mem_value != "") { - std::string begin_mem_value = mem_value.substr(0, mem_value.length()-2); - long re_mem_value; - std::istringstream iss(begin_mem_value); - if (!(iss >> re_mem_value)) { - mem_info = "Error on definition ! : " + mem_value; - rtn = false; - } - else if (re_mem_value <= 0) { - mem_info = "Error on definition memory is negative ! : " + mem_value; - rtn = false; - } - std::string end_mem_value = mem_value.substr(mem_value.length()-2); - if (end_mem_value != "gb" and end_mem_value != "mb") { - mem_info = "Error on definition, type is bad ! " + mem_value; - rtn = false; - } - } - else { - mem_info = "No value given"; - } - cerr << "Memory : " << mem_info << endl; - - // check nb_proc - std::string nb_proc_info; - ostringstream nb_proc_value; - nb_proc_value << _batch_params.nb_proc; - if(_batch_params.nb_proc <= 0) { - nb_proc_info = "Bad value ! nb_proc = "; - nb_proc_info += nb_proc_value.str(); - rtn = false; - } - else { - nb_proc_info = nb_proc_value.str(); - } - cerr << "Nb of processors : " << nb_proc_info << endl; - - return rtn; -} diff --git a/src/Launcher/BatchLight_Job.hxx b/src/Launcher/BatchLight_Job.hxx deleted file mode 100644 index 3287adc66..000000000 --- a/src/Launcher/BatchLight_Job.hxx +++ /dev/null @@ -1,73 +0,0 @@ -// Copyright (C) 2005 OPEN CASCADE, EADS/CCR, LIP6, CEA/DEN, -// CEDRAT, EDF R&D, LEG, PRINCIPIA R&D, BUREAU VERITAS -// -// This library is free software; you can redistribute it and/or -// modify it under the terms of the GNU Lesser General Public -// License as published by the Free Software Foundation; either -// version 2.1 of the License. -// -// This library is distributed in the hope that it will be useful -// but WITHOUT ANY WARRANTY; without even the implied warranty of -// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU -// Lesser General Public License for more details. -// -// You should have received a copy of the GNU Lesser General Public -// License along with this library; if not, write to the Free Software -// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA -// -// See http://www.salome-platform.org/ or email : webmaster.salome@opencascade.com -// -/* - * Job.hxx : - * - * Auteur : Bernard SECHER - CEA/DEN - * Date : Juillet 2007 - * Projet : SALOME - * - */ - -#ifndef _BL_JOB_H_ -#define _BL_JOB_H_ - -#include -#include "BatchLight_BatchManager.hxx" - -namespace BatchLight { - - class Job - { - public: - // Constructeurs et destructeur - Job(const std::string fileToExecute, - const std::vector& filesToExport, - const std::vector& filesToImport, - const batchParams& batch_params); - virtual ~Job(); - - const std::string getFileToExecute() const { return _fileToExecute; } - void setFileToExecute(const std::string fileToExecute) { _fileToExecute=fileToExecute; } - const std::vector getFilesToExportList() const { return _filesToExport; } - const std::vector getFilesToImportList() const { return _filesToImport; } - void addFileToExportList(std::string file_name); - void addFileToImportList(std::string file_name); - const long getNbProc() const { return _batch_params.nb_proc; } - const std::string getExpectedDuringTime(); - const std::string getMemory(); - - const std::string getDirForTmpFiles() const { return _dirForTmpFiles;} - void setDirForTmpFiles(std::string dirForTmpFiles) { _dirForTmpFiles = dirForTmpFiles; - std::cerr << _dirForTmpFiles << std::endl;} - bool check(); - protected: - std::string _fileToExecute; - std::vector _filesToExport; - std::vector _filesToImport; - batchParams _batch_params; - std::string _dirForTmpFiles; // Tmp directory on the server - private: - - }; - -} - -#endif diff --git a/src/Launcher/Launcher.cxx b/src/Launcher/Launcher.cxx index 527c43282..9757b0fe7 100644 --- a/src/Launcher/Launcher.cxx +++ b/src/Launcher/Launcher.cxx @@ -17,9 +17,9 @@ // // See http://www.salome-platform.org/ or email : webmaster.salome@opencascade.com // -#include "BatchLight_BatchManager_PBS.hxx" -#include "BatchLight_BatchManager_SLURM.hxx" -#include "BatchLight_Job.hxx" +#include "Batch_Date.hxx" +#include "Batch_FactBatchManager_eLSF.hxx" +#include "Batch_FactBatchManager_ePBS.hxx" #include "Launcher.hxx" #include #include @@ -50,9 +50,12 @@ Launcher_cpp::Launcher_cpp() Launcher_cpp::~Launcher_cpp() { cerr << "Launcher_cpp destructor" << endl; - std::map < string, BatchLight::BatchManager * >::const_iterator it; - for(it=_batchmap.begin();it!=_batchmap.end();it++) - delete it->second; + std::map < string, Batch::BatchManager_eClient * >::const_iterator it1; + for(it1=_batchmap.begin();it1!=_batchmap.end();it1++) + delete it1->second; + std::map < std::pair , Batch::Job* >::const_iterator it2; + for(it2=_jobmap.begin();it2!=_jobmap.end();it2++) + delete it2->second; } //============================================================================= @@ -67,7 +70,7 @@ Launcher_cpp::~Launcher_cpp() long Launcher_cpp::submitSalomeJob( const string fileToExecute , const vector& filesToExport , const vector& filesToImport , - const BatchLight::batchParams& batch_params, + const batchParams& batch_params, const machineParams& params) throw(LauncherException) { cerr << "BEGIN OF Launcher_cpp::submitSalomeJob" << endl; @@ -90,7 +93,7 @@ long Launcher_cpp::submitSalomeJob( const string fileToExecute , cerr << "Choose cluster: " << clustername << endl; // search batch manager for that cluster in map or instanciate one - map < string, BatchLight::BatchManager * >::const_iterator it = _batchmap.find(clustername); + map < string, Batch::BatchManager_eClient * >::const_iterator it = _batchmap.find(clustername); if(it == _batchmap.end()) { _batchmap[clustername] = FactoryBatchManager(p); @@ -98,21 +101,41 @@ long Launcher_cpp::submitSalomeJob( const string fileToExecute , } try{ + // tmp directory on cluster to put files to execute + string tmpdir = getTmpDirForBatchFiles(); + // create and submit job on cluster - BatchLight::Job* job = new BatchLight::Job(fileToExecute, filesToExport, filesToImport, batch_params); - bool res = job->check(); - if (!res) { - delete job; - throw LauncherException("Job parameters are bad (see informations above)"); + Batch::Parametre param; + param[USER] = p.UserName; + param[EXECUTABLE] = buildSalomeCouplingScript(fileToExecute,tmpdir,p); + param[INFILE] = Batch::Couple( fileToExecute, getRemoteFile(tmpdir,fileToExecute) ); + for(int i=0;i 0 ){ + param[OUTFILE] = Batch::Couple( "", filesToImport[0] ); + for(int i=1;isubmitJob(job); + Batch::JobId jid = _batchmap[clustername]->submitJob(*job); + + // get job id in long + istringstream iss(jid.getReference()); + iss >> jobId; + + _jobmap[ pair(clustername,jobId) ] = job; } - catch(const BatchLight::BatchException &ex){ + catch(const Batch::EmulationException &ex){ throw LauncherException(ex.msg.c_str()); } @@ -126,8 +149,8 @@ long Launcher_cpp::submitSalomeJob( const string fileToExecute , * \param params : Constraints for the choice of the batch cluster */ //============================================================================= -string Launcher_cpp::querySalomeJob( long jobId, - const machineParams& params) throw(LauncherException) +string Launcher_cpp::querySalomeJob( long id, + const machineParams& params) throw(LauncherException) { // find a cluster matching params structure vector aCompoList ; @@ -136,11 +159,17 @@ string Launcher_cpp::querySalomeJob( long jobId, string clustername(p.Alias); // search batch manager for that cluster in map - std::map < string, BatchLight::BatchManager * >::const_iterator it = _batchmap.find(clustername); + std::map < string, Batch::BatchManager_eClient * >::const_iterator it = _batchmap.find(clustername); if(it == _batchmap.end()) throw LauncherException("no batchmanager for that cluster"); - return _batchmap[clustername]->queryJob(jobId); + ostringstream oss; + oss << id; + Batch::JobId jobId( _batchmap[clustername], oss.str() ); + + Batch::JobInfo jinfo = jobId.queryJob(); + Batch::Parametre par = jinfo.getParametre(); + return par[STATE]; } //============================================================================= @@ -150,7 +179,7 @@ string Launcher_cpp::querySalomeJob( long jobId, * \param params : Constraints for the choice of the batch cluster */ //============================================================================= -void Launcher_cpp::deleteSalomeJob( const long jobId, +void Launcher_cpp::deleteSalomeJob( const long id, const machineParams& params) throw(LauncherException) { // find a cluster matching params structure @@ -160,11 +189,15 @@ void Launcher_cpp::deleteSalomeJob( const long jobId, string clustername(p.Alias); // search batch manager for that cluster in map - map < string, BatchLight::BatchManager * >::const_iterator it = _batchmap.find(clustername); + map < string, Batch::BatchManager_eClient * >::const_iterator it = _batchmap.find(clustername); if(it == _batchmap.end()) throw LauncherException("no batchmanager for that cluster"); - _batchmap[clustername]->deleteJob(jobId); + ostringstream oss; + oss << id; + Batch::JobId jobId( _batchmap[clustername], oss.str() ); + + jobId.deleteJob(); } //============================================================================= @@ -175,7 +208,7 @@ void Launcher_cpp::deleteSalomeJob( const long jobId, */ //============================================================================= void Launcher_cpp::getResultSalomeJob( const string directory, - const long jobId, + const long id, const machineParams& params) throw(LauncherException) { vector aCompoList ; @@ -184,11 +217,13 @@ void Launcher_cpp::getResultSalomeJob( const string directory, string clustername(p.Alias); // search batch manager for that cluster in map - map < string, BatchLight::BatchManager * >::const_iterator it = _batchmap.find(clustername); + map < string, Batch::BatchManager_eClient * >::const_iterator it = _batchmap.find(clustername); if(it == _batchmap.end()) throw LauncherException("no batchmanager for that cluster"); - _batchmap[clustername]->importOutputFiles( directory, jobId ); + Batch::Job* job = _jobmap[ pair(clustername,id) ]; + + _batchmap[clustername]->importOutputFiles( *job, directory ); } //============================================================================= @@ -197,68 +232,63 @@ void Launcher_cpp::getResultSalomeJob( const string directory, */ //============================================================================= -BatchLight::BatchManager *Launcher_cpp::FactoryBatchManager( const ParserResourcesType& params ) throw(LauncherException) +Batch::BatchManager_eClient *Launcher_cpp::FactoryBatchManager( const ParserResourcesType& params ) throw(LauncherException) { - cerr << "Begin of Launcher_cpp::FactoryBatchManager" << endl; - // Fill structure for batch manager - BatchLight::clusterParams p; - p.hostname = params.Alias; + std::string hostname, protocol, mpi; + Batch::FactBatchManager_eClient* fact; + + hostname = params.Alias; switch(params.Protocol){ case rsh: - p.protocol = "rsh"; + protocol = "rsh"; break; case ssh: - p.protocol = "ssh"; + protocol = "ssh"; break; default: throw LauncherException("unknown protocol"); break; } - p.username = params.UserName; - p.applipath = params.AppliPath; - p.modulesList = params.ModulesList; - p.nbnodes = params.DataForSort._nbOfNodes; - p.nbprocpernode = params.DataForSort._nbOfProcPerNode; switch(params.mpi){ case lam: - p.mpiImpl = "lam"; + mpi = "lam"; break; case mpich1: - p.mpiImpl = "mpich1"; + mpi = "mpich1"; break; case mpich2: - p.mpiImpl = "mpich2"; + mpi = "mpich2"; break; case openmpi: - p.mpiImpl = "openmpi"; + mpi = "openmpi"; break; case slurm: - p.mpiImpl = "slurm"; + mpi = "slurm"; break; default: - p.mpiImpl = "indif"; + mpi = "indif"; break; } - cerr << "Instanciation of batch manager" << endl; switch( params.Batch ){ case pbs: cerr << "Instantiation of PBS batch manager" << endl; - return new BatchLight::BatchManager_PBS(p); + fact = new Batch::FactBatchManager_ePBS; + break; case lsf: - cerr << "Instantiation of SLURM batch manager" << endl; - return new BatchLight::BatchManager_SLURM(p); + cerr << "Instantiation of LSF batch manager" << endl; + fact = new Batch::FactBatchManager_eLSF; + break; default: cerr << "BATCH = " << params.Batch << endl; throw LauncherException("no batchmanager for that cluster"); } + return (*fact)(hostname.c_str(),protocol.c_str(),mpi.c_str()); } -void Launcher_cpp::buildSalomeCouplingScript(BatchLight::Job* job, const ParserResourcesType& params) +string Launcher_cpp::buildSalomeCouplingScript(const string fileToExecute, const string dirForTmpFiles, const ParserResourcesType& params) { - const string fileToExecute = job->getFileToExecute(); - const std::string dirForTmpFiles = job->getDirForTmpFiles(); int idx = dirForTmpFiles.find("Batch/"); std::string filelogtemp = dirForTmpFiles.substr(idx+6, dirForTmpFiles.length()); @@ -364,10 +394,9 @@ void Launcher_cpp::buildSalomeCouplingScript(BatchLight::Job* job, const ParserR chmod(TmpFileName.c_str(), 0x1ED); cerr << TmpFileName.c_str() << endl; - job->addFileToExportList(fileToExecute); - job->setFileToExecute(TmpFileName); - delete mpiImpl; + + return TmpFileName; } @@ -393,3 +422,33 @@ MpiImpl *Launcher_cpp::FactoryMpiImpl(MpiImplType mpi) throw(LauncherException) } } + +string Launcher_cpp::getTmpDirForBatchFiles() +{ + string ret; + string thedate; + + // Adding date to the directory name + Batch::Date date = Batch::Date(time(0)); + thedate = date.str(); + int lend = thedate.size() ; + int i = 0 ; + while ( i < lend ) { + if ( thedate[i] == '/' || thedate[i] == '-' || thedate[i] == ':' ) { + thedate[i] = '_' ; + } + i++ ; + } + + ret = string("Batch/"); + ret += thedate; + return ret; +} + +string Launcher_cpp::getRemoteFile( std::string remoteDir, std::string localFile ) +{ + string::size_type pos = localFile.find_last_of("/") + 1; + int ln = localFile.length() - pos; + string remoteFile = remoteDir + "/" + localFile.substr(pos,ln); + return remoteFile; +} diff --git a/src/Launcher/Launcher.hxx b/src/Launcher/Launcher.hxx index dc88799d8..daa9163e2 100644 --- a/src/Launcher/Launcher.hxx +++ b/src/Launcher/Launcher.hxx @@ -20,11 +20,18 @@ #ifndef __LAUNCHER_HXX__ #define __LAUNCHER_HXX__ -#include "BatchLight_BatchManager.hxx" +#include "Batch_BatchManager_eClient.hxx" #include "ResourcesManager.hxx" #include +struct batchParams{ + std::string batch_directory; + unsigned long expected_during_time; + unsigned long mem; + unsigned long nb_proc; +}; + class LauncherException { public: @@ -43,7 +50,7 @@ public: long submitSalomeJob(const std::string fileToExecute , const std::vector& filesToExport , const std::vector& filesToImport , - const BatchLight::batchParams& batch_params, + const batchParams& batch_params, const machineParams& params) throw(LauncherException); std::string querySalomeJob( const long jobId, const machineParams& params) throw(LauncherException); @@ -54,11 +61,14 @@ public: protected: - void buildSalomeCouplingScript(BatchLight::Job* job, const ParserResourcesType& params); + std::string buildSalomeCouplingScript(const string fileToExecute, const string dirForTmpFiles, const ParserResourcesType& params); MpiImpl *FactoryMpiImpl(MpiImplType mpiImpl) throw(LauncherException); - BatchLight::BatchManager *FactoryBatchManager( const ParserResourcesType& params ) throw(LauncherException); + Batch::BatchManager_eClient *FactoryBatchManager( const ParserResourcesType& params ) throw(LauncherException); + std::string getTmpDirForBatchFiles(); + std::string getRemoteFile( std::string remoteDir, std::string localFile ); - std::map _batchmap; + std::map _batchmap; + std::map < std::pair , Batch::Job* > _jobmap; ResourcesManager_cpp *_ResManager; }; diff --git a/src/Launcher/Makefile.am b/src/Launcher/Makefile.am index 7b6986fe1..e6996d932 100644 --- a/src/Launcher/Makefile.am +++ b/src/Launcher/Makefile.am @@ -36,10 +36,6 @@ include $(top_srcdir)/salome_adm/unix/make_common_starter.am # # header files salomeinclude_HEADERS = \ - BatchLight_BatchManager.hxx \ - BatchLight_BatchManager_PBS.hxx \ - BatchLight_BatchManager_SLURM.hxx \ - BatchLight_Job.hxx \ SALOME_Launcher.hxx \ Launcher.hxx @@ -110,11 +106,7 @@ libSalomeLauncher_la_LIBADD =\ $(COMMON_LIBS) libLauncher.la libLauncher_la_SOURCES=\ - Launcher.cxx \ - BatchLight_BatchManager.cxx \ - BatchLight_BatchManager_SLURM.cxx \ - BatchLight_BatchManager_PBS.cxx \ - BatchLight_Job.cxx + Launcher.cxx libLauncher_la_CPPFLAGS =\ -I$(srcdir)/../Batch \ diff --git a/src/Launcher/SALOME_Launcher.cxx b/src/Launcher/SALOME_Launcher.cxx index ed58dbf7f..f051cb32c 100644 --- a/src/Launcher/SALOME_Launcher.cxx +++ b/src/Launcher/SALOME_Launcher.cxx @@ -126,7 +126,7 @@ CORBA::Long SALOME_Launcher::submitSalomeJob( const char * fileToExecute , p.cpu_clock = params.cpu_clock; p.mem_mb = params.mem_mb; - BatchLight::batchParams bp; + batchParams bp; bp.batch_directory = batch_params.batch_directory; bp.expected_during_time = batch_params.expected_during_time; bp.mem = batch_params.mem; -- 2.39.2