From 2a1f7e0299153211505140f22fb5b7a6918bda04 Mon Sep 17 00:00:00 2001 From: secher Date: Fri, 18 Apr 2008 10:00:11 +0000 Subject: [PATCH] create a pure c++ launcher implementation --- src/Launcher/BatchLight_BatchManager.cxx | 62 ++--- src/Launcher/BatchLight_BatchManager.hxx | 38 ++- src/Launcher/BatchLight_BatchManager_PBS.cxx | 91 +++---- src/Launcher/BatchLight_BatchManager_PBS.hxx | 9 +- .../BatchLight_BatchManager_SLURM.cxx | 83 +++--- .../BatchLight_BatchManager_SLURM.hxx | 9 +- src/Launcher/BatchLight_Job.cxx | 32 ++- src/Launcher/BatchLight_Job.hxx | 33 ++- src/Launcher/Launcher.cxx | 246 ++++++++++++++++++ src/Launcher/Launcher.hxx | 62 +++++ src/Launcher/Makefile.am | 32 ++- src/Launcher/MpiImpl.cxx | 17 +- src/Launcher/MpiImpl.hxx | 10 +- src/Launcher/SALOME_Launcher.cxx | 178 +++++-------- src/Launcher/SALOME_Launcher.hxx | 7 +- 15 files changed, 585 insertions(+), 324 deletions(-) create mode 100644 src/Launcher/Launcher.cxx create mode 100644 src/Launcher/Launcher.hxx diff --git a/src/Launcher/BatchLight_BatchManager.cxx b/src/Launcher/BatchLight_BatchManager.cxx index 37def271e..d615126bd 100644 --- a/src/Launcher/BatchLight_BatchManager.cxx +++ b/src/Launcher/BatchLight_BatchManager.cxx @@ -38,17 +38,17 @@ using namespace std; namespace BatchLight { // Constructeur - BatchManager::BatchManager(const batchParams& p) throw(SALOME_Exception) : _params(p) + BatchManager::BatchManager(const clusterParams& p) throw(BatchException) : _params(p) { - SCRUTE(_params.hostname); - SCRUTE(_params.protocol); - SCRUTE(_params.username); + cerr << _params.hostname << endl; + cerr << _params.protocol << endl; + cerr << _params.username << endl; // On verifie que le hostname est correct if (!gethostbyname(_params.hostname.c_str())) { // hostname unknown from network string msg = "hostname \""; msg += _params.hostname; msg += "\" unknown from the network"; - throw SALOME_Exception(msg.c_str()); + throw BatchException(msg.c_str()); } _mpiImpl = NULL; } @@ -56,7 +56,7 @@ namespace BatchLight { // Destructeur BatchManager::~BatchManager() { - MESSAGE("BatchManager destructor "<<_params.hostname); + cerr << "BatchManager destructor "<<_params.hostname << endl; std::map < int, const BatchLight::Job * >::const_iterator it; for(it=_jobmap.begin();it!=_jobmap.end();it++) delete it->second; @@ -66,7 +66,6 @@ namespace BatchLight { // Methode pour le controle des jobs : soumet un job au gestionnaire const int BatchManager::submitJob(Job* job) { - BEGIN_OF("BatchManager::submitJob"); int id; // temporary directory on cluster to put input files for job @@ -86,7 +85,6 @@ namespace BatchLight { // register job on map _jobmap[id] = job; - END_OF("BatchManager::submitJob"); return id; } @@ -112,12 +110,11 @@ namespace BatchLight { job->setDirForTmpFiles(dirForTmpFiles); } - void BatchManager::exportInputFiles(BatchLight::Job* job) throw(SALOME_Exception) + void BatchManager::exportInputFiles(BatchLight::Job* job) throw(BatchException) { - BEGIN_OF("BatchManager::exportInFiles"); int status; - const char * fileToExecute = job->getFileToExecute(); - const Engines::FilesList filesToExportList = job->getFilesToExportList(); + const string fileToExecute = job->getFileToExecute(); + const vector filesToExportList = job->getFilesToExportList(); const std::string dirForTmpFiles = job->getDirForTmpFiles(); std::string command; std::string copy_command; @@ -128,7 +125,7 @@ namespace BatchLight { else if( _params.protocol == "ssh" ) copy_command = "scp "; else - throw SALOME_Exception("Unknown protocol : only rsh and ssh are known !"); + throw BatchException("Unknown protocol : only rsh and ssh are known !"); // First step : creating batch tmp files directory command = _params.protocol; @@ -141,20 +138,20 @@ namespace BatchLight { command += " \"mkdir -p "; command += dirForTmpFiles; command += "\"" ; - SCRUTE(command.c_str()); + cerr << command.c_str() << endl; status = system(command.c_str()); if(status) { std::ostringstream oss; oss << status; std::string ex_mess("Error of connection on remote host ! status = "); ex_mess += oss.str(); - throw SALOME_Exception(ex_mess.c_str()); + throw BatchException(ex_mess.c_str()); } // Second step : copy fileToExecute into // batch tmp files directory command = copy_command; - command += fileToExecute; + command += fileToExecute.c_str(); command += " "; if (_params.username != ""){ command += _params.username; @@ -163,19 +160,19 @@ namespace BatchLight { command += _params.hostname; command += ":"; command += dirForTmpFiles; - SCRUTE(command.c_str()); + cerr << command.c_str() << endl; status = system(command.c_str()); if(status) { std::ostringstream oss; oss << status; std::string ex_mess("Error of connection on remote host ! status = "); ex_mess += oss.str(); - throw SALOME_Exception(ex_mess.c_str()); + throw BatchException(ex_mess.c_str()); } // Third step : copy filesToExportList into // batch tmp files directory - for (int i = 0 ; i < filesToExportList.length() ; i++ ) { + for (int i = 0 ; i < filesToExportList.size() ; i++ ) { command = copy_command; command += filesToExportList[i] ; command += " "; @@ -186,36 +183,34 @@ namespace BatchLight { command += _params.hostname; command += ":"; command += dirForTmpFiles ; - SCRUTE(command.c_str()); + cerr << command.c_str() << endl; status = system(command.c_str()); if(status) { std::ostringstream oss; oss << status; std::string ex_mess("Error of connection on remote host ! status = "); ex_mess += oss.str(); - throw SALOME_Exception(ex_mess.c_str()); + throw BatchException(ex_mess.c_str()); } } - END_OF("BatchManager::exportInFiles"); } - void BatchManager::importOutputFiles( const char *directory, const CORBA::Long jobId ) throw(SALOME_Exception) + void BatchManager::importOutputFiles( const string directory, const int &jobId ) throw(BatchException) { - BEGIN_OF("BatchManager::importOutputFiles"); string command; int status; const BatchLight::Job* myJob = _jobmap[jobId]; - Engines::FilesList filesToImportList = myJob->getFilesToImportList(); + vector filesToImportList = myJob->getFilesToImportList(); - for ( int i = 0 ; i < filesToImportList.length() ; i++ ) { + for ( int i = 0 ; i < filesToImportList.size() ; i++ ) { if( _params.protocol == "rsh" ) command = "rcp "; else if( _params.protocol == "ssh" ) command = "scp "; else - throw SALOME_Exception("Unknown protocol"); + throw BatchException("Unknown protocol"); if (_params.username != ""){ command += _params.username; command += "@"; @@ -225,21 +220,20 @@ namespace BatchLight { command += filesToImportList[i] ; command += " "; command += directory; - SCRUTE(command.c_str()); + cerr << command.c_str() << endl; status = system(command.c_str()); if(status) { // Try to get what we can (logs files) - // throw SALOME_Exception("Error of connection on remote host"); + // throw BatchException("Error of connection on remote host"); std::string mess("Copy command failed ! status is :"); ostringstream status_str; status_str << status; mess += status_str.str(); - INFOS(mess); + cerr << mess << endl; } } - END_OF("BatchManager::importOutputFiles"); } string BatchManager::BuildTemporaryFileName() const @@ -274,7 +268,7 @@ namespace BatchLight { free(temp); } - MpiImpl *BatchManager::FactoryMpiImpl(string mpiImpl) throw(SALOME_Exception) + MpiImpl *BatchManager::FactoryMpiImpl(string mpiImpl) throw(BatchException) { if(mpiImpl == "lam") return new MpiImpl_LAM(); @@ -285,11 +279,11 @@ namespace BatchLight { else if(mpiImpl == "openmpi") return new MpiImpl_OPENMPI(); else if(mpiImpl == "indif") - throw SALOME_Exception("you must specify a mpi implementation in CatalogResources.xml file"); + throw BatchException("you must specify a mpi implementation in CatalogResources.xml file"); else{ ostringstream oss; oss << mpiImpl << " : not yet implemented"; - throw SALOME_Exception(oss.str().c_str()); + throw BatchException(oss.str().c_str()); } } diff --git a/src/Launcher/BatchLight_BatchManager.hxx b/src/Launcher/BatchLight_BatchManager.hxx index 7183c1d1b..65eb3d44f 100644 --- a/src/Launcher/BatchLight_BatchManager.hxx +++ b/src/Launcher/BatchLight_BatchManager.hxx @@ -32,10 +32,7 @@ #include #include #include -#include "Utils_SALOME_Exception.hxx" -#include #include -#include CORBA_CLIENT_HEADER(SALOME_ContainerManager) #include "MpiImpl.hxx" namespace BatchLight { @@ -43,6 +40,17 @@ namespace BatchLight { class Job; struct batchParams{ + std::string batch_directory; // Where batch command will be launched + // and log files will be created + std::string expected_during_time; // Time for the batch + // has to be like this : hh:mm + std::string mem; // Minimum of memory needed + // has to be like : 32gb or 512mb + + long nb_proc; // Number of processors requested + }; + + struct clusterParams{ std::string hostname; // serveur ou tourne le BatchManager std::string protocol; // protocole d'acces au serveur: ssh ou rsh std::string username; // username d'acces au serveur @@ -53,33 +61,41 @@ namespace BatchLight { std::string mpiImpl; // mpi implementation }; + class BatchException + { + public: + const std::string msg; + + BatchException(const std::string m) : msg(m) {} + }; + class BatchManager { public: // Constructeur et destructeur - BatchManager(const batchParams& p) throw(SALOME_Exception); // connexion a la machine host + BatchManager(const clusterParams& p) throw(BatchException); // connexion a la machine host virtual ~BatchManager(); // Methodes pour le controle des jobs : virtuelles pures const int submitJob(BatchLight::Job* job); // soumet un job au gestionnaire virtual void deleteJob(const int & jobid) = 0; // retire un job du gestionnaire virtual std::string queryJob(const int & jobid) = 0; // renvoie l'etat du job - void importOutputFiles( const char *directory, const CORBA::Long jobId ) throw(SALOME_Exception); + void importOutputFiles( const std::string directory, const int & jobId ) throw(BatchException); protected: - batchParams _params; + clusterParams _params; MpiImpl *_mpiImpl; std::map _jobmap; - virtual int submit(BatchLight::Job* job) throw(SALOME_Exception) = 0; + virtual int submit(BatchLight::Job* job) throw(BatchException) = 0; void setDirForTmpFiles(BatchLight::Job* job); - void exportInputFiles(BatchLight::Job* job) throw(SALOME_Exception); - virtual void buildSalomeCouplingScript(BatchLight::Job* job) throw(SALOME_Exception) = 0; - virtual void buildSalomeBatchScript(BatchLight::Job* job) throw(SALOME_Exception) = 0; + void exportInputFiles(BatchLight::Job* job) throw(BatchException); + virtual void buildSalomeCouplingScript(BatchLight::Job* job) throw(BatchException) = 0; + virtual void buildSalomeBatchScript(BatchLight::Job* job) throw(BatchException) = 0; std::string BuildTemporaryFileName() const; void RmTmpFile(std::string & TemporaryFileName); - MpiImpl *FactoryMpiImpl(std::string mpiImpl) throw(SALOME_Exception); + MpiImpl *FactoryMpiImpl(std::string mpiImpl) throw(BatchException); private: }; diff --git a/src/Launcher/BatchLight_BatchManager_PBS.cxx b/src/Launcher/BatchLight_BatchManager_PBS.cxx index eec063a02..3f2f4bc49 100644 --- a/src/Launcher/BatchLight_BatchManager_PBS.cxx +++ b/src/Launcher/BatchLight_BatchManager_PBS.cxx @@ -27,7 +27,6 @@ */ #include "BatchLight_BatchManager_PBS.hxx" -#include "utilities.h" #include "BatchLight_Job.hxx" #include #include @@ -39,7 +38,7 @@ using namespace std; namespace BatchLight { // Constructeur - BatchManager_PBS::BatchManager_PBS(const batchParams& p) throw(SALOME_Exception) : BatchManager(p) + BatchManager_PBS::BatchManager_PBS(const clusterParams& p) throw(BatchException) : BatchManager(p) { // pbs batch system needs to know mpi implementation _mpiImpl = FactoryMpiImpl(_params.mpiImpl); @@ -48,13 +47,12 @@ namespace BatchLight { // Destructeur BatchManager_PBS::~BatchManager_PBS() { - MESSAGE("BatchManager_PBS destructor "<<_params.hostname); + cerr << "BatchManager_PBS destructor " << _params.hostname << endl; } // Methode pour le controle des jobs : retire un job du gestionnaire void BatchManager_PBS::deleteJob(const int & jobid) { - BEGIN_OF("BatchManager_PBS::deleteJob"); string command; int status; ostringstream oss; @@ -66,7 +64,7 @@ namespace BatchLight { else if( _params.protocol == "ssh" ) command = "ssh "; else - throw SALOME_Exception("Unknown protocol"); + throw BatchException("Unknown protocol"); if (_params.username != ""){ command += _params.username; @@ -77,19 +75,17 @@ namespace BatchLight { command += " \"qdel " ; command += oss.str(); command += "\""; - SCRUTE(command.c_str()); + cerr << command.c_str() << endl; status = system(command.c_str()); if(status) - throw SALOME_Exception("Error of connection on remote host"); + throw BatchException("Error of connection on remote host"); - MESSAGE("jobId = " << jobid << "killed"); - END_OF("BatchManager_PBS::deleteJob"); + cerr << "jobId = " << jobid << "killed" << endl; } // Methode pour le controle des jobs : renvoie l'etat du job string BatchManager_PBS::queryJob(const int & jobid) { - BEGIN_OF("BatchManager_PBS::queryJob"); // define name of log file string jstatus; string logFile="/tmp/logs/"; @@ -113,7 +109,7 @@ namespace BatchLight { else if( _params.protocol == "ssh" ) command = "ssh "; else - throw SALOME_Exception("Unknown protocol"); + throw BatchException("Unknown protocol"); if (_params.username != ""){ command += _params.username; @@ -128,11 +124,11 @@ namespace BatchLight { command += _pbs_job_name[jobid]; command += "\" > "; command += logFile; - SCRUTE(command.c_str()); + cerr << command.c_str() << endl; status = system(command.c_str()); if(status && status != 153 && status != 256*153){ - MESSAGE("status="<getFileToExecute(); + const string fileToExecute = job->getFileToExecute(); const std::string dirForTmpFiles = job->getDirForTmpFiles(); int idx = dirForTmpFiles.find("Batch/"); std::string filelogtemp = dirForTmpFiles.substr(idx+6, dirForTmpFiles.length()); - string::size_type p1 = string(fileToExecute).find_last_of("/"); - string::size_type p2 = string(fileToExecute).find_last_of("."); - std::string fileNameToExecute = string(fileToExecute).substr(p1+1,p2-p1-1); + string::size_type p1 = fileToExecute.find_last_of("/"); + string::size_type p2 = fileToExecute.find_last_of("."); + std::string fileNameToExecute = fileToExecute.substr(p1+1,p2-p1-1); std::string TmpFileName = BuildTemporaryFileName(); ofstream tempOutputFile; @@ -275,7 +269,7 @@ namespace BatchLight { tempOutputFile.flush(); tempOutputFile.close(); chmod(TmpFileName.c_str(), 0x1ED); - SCRUTE(TmpFileName.c_str()) ; + cerr << TmpFileName.c_str() << endl; string command; if( _params.protocol == "rsh" ) @@ -283,7 +277,7 @@ namespace BatchLight { else if( _params.protocol == "ssh" ) command = "scp "; else - throw SALOME_Exception("Unknown protocol"); + throw BatchException("Unknown protocol"); command += TmpFileName; command += " "; @@ -297,35 +291,33 @@ namespace BatchLight { command += "/runSalome_" ; command += fileNameToExecute ; command += "_Batch.sh" ; - SCRUTE(fileNameToExecute) ; - SCRUTE(command.c_str()); + cerr << fileNameToExecute << endl; + cerr << command.c_str() << endl; status = system(command.c_str()); if(status) - throw SALOME_Exception("Error of connection on remote host"); + throw BatchException("Error of connection on remote host"); RmTmpFile(TmpFileName); - END_OF("BatchManager_PBS::buildSalomeCouplingScript"); } - void BatchManager_PBS::buildSalomeBatchScript(BatchLight::Job* job) throw(SALOME_Exception) + void BatchManager_PBS::buildSalomeBatchScript(BatchLight::Job* job) throw(BatchException) { - BEGIN_OF("BatchManager_PBS::buildSalomeBatchScript"); int status; const int nbproc = job->getNbProc(); std::string edt = job->getExpectedDuringTime(); std::string mem = job->getMemory(); const std::string dirForTmpFiles = job->getDirForTmpFiles(); - const char *fileToExecute = job->getFileToExecute(); - string::size_type p1 = string(fileToExecute).find_last_of("/"); - string::size_type p2 = string(fileToExecute).find_last_of("."); - std::string fileNameToExecute = string(fileToExecute).substr(p1+1,p2-p1-1); + const string fileToExecute = job->getFileToExecute(); + string::size_type p1 = fileToExecute.find_last_of("/"); + string::size_type p2 = fileToExecute.find_last_of("."); + std::string fileNameToExecute = fileToExecute.substr(p1+1,p2-p1-1); int idx = dirForTmpFiles.find("Batch/"); std::string filelogtemp = dirForTmpFiles.substr(idx+6, dirForTmpFiles.length()); int nbmaxproc = _params.nbnodes * _params.nbprocpernode; if( nbproc > nbmaxproc ){ - MESSAGE(nbproc << " processors asked on a cluster of " << nbmaxproc << " processors"); - throw SALOME_Exception("Too much processors asked for that cluster"); + cerr << nbproc << " processors asked on a cluster of " << nbmaxproc << " processors" << endl; + throw BatchException("Too much processors asked for that cluster"); } int nbnodes; @@ -359,7 +351,7 @@ namespace BatchLight { tempOutputFile.flush(); tempOutputFile.close(); chmod(TmpFileName.c_str(), 0x1ED); - SCRUTE(TmpFileName.c_str()) ; + cerr << TmpFileName.c_str() << endl; string command; if( _params.protocol == "rsh" ) @@ -367,7 +359,7 @@ namespace BatchLight { else if( _params.protocol == "ssh" ) command = "scp "; else - throw SALOME_Exception("Unknown protocol"); + throw BatchException("Unknown protocol"); command += TmpFileName; command += " "; if (_params.username != ""){ @@ -380,10 +372,10 @@ namespace BatchLight { command += "/" ; command += fileNameToExecute ; command += "_Batch.sh" ; - SCRUTE(command.c_str()); + cerr << command.c_str() << endl; status = system(command.c_str()); if(status) - throw SALOME_Exception("Error of connection on remote host"); + throw BatchException("Error of connection on remote host"); // Adding log files into import list files ostringstream file_name_output; @@ -396,17 +388,15 @@ namespace BatchLight { job->addFileToImportList(file_name_error.str()); job->addFileToImportList(file_container_log.str()); RmTmpFile(TmpFileName); - END_OF("BatchManager_PBS::buildSalomeBatchScript"); } - int BatchManager_PBS::submit(BatchLight::Job* job) throw(SALOME_Exception) + int BatchManager_PBS::submit(BatchLight::Job* job) throw(BatchException) { - BEGIN_OF("BatchManager_PBS::submit"); const std::string dirForTmpFiles = job->getDirForTmpFiles(); - const char *fileToExecute = job->getFileToExecute(); - string::size_type p1 = string(fileToExecute).find_last_of("/"); - string::size_type p2 = string(fileToExecute).find_last_of("."); - std::string fileNameToExecute = string(fileToExecute).substr(p1+1,p2-p1-1); + const string fileToExecute = job->getFileToExecute(); + string::size_type p1 = fileToExecute.find_last_of("/"); + string::size_type p2 = fileToExecute.find_last_of("."); + std::string fileNameToExecute = fileToExecute.substr(p1+1,p2-p1-1); // define name of log file string logFile="/tmp/logs/"; @@ -429,7 +419,7 @@ namespace BatchLight { else if( _params.protocol == "ssh" ) command = "ssh "; else - throw SALOME_Exception("Unknown protocol"); + throw BatchException("Unknown protocol"); if (_params.username != ""){ command += _params.username; @@ -443,10 +433,10 @@ namespace BatchLight { command += fileNameToExecute ; command += "_Batch.sh\" > "; command += logFile; - SCRUTE(command.c_str()); + cerr << command.c_str() << endl; status = system(command.c_str()); if(status) - throw SALOME_Exception("Error of connection on remote host"); + throw BatchException("Error of connection on remote host"); // read id of submitted job in log file char line[128]; @@ -468,7 +458,6 @@ namespace BatchLight { // Ajout dans la map _pbs_job_name[id] = sline; - END_OF("BatchManager_PBS::submit"); return id; } diff --git a/src/Launcher/BatchLight_BatchManager_PBS.hxx b/src/Launcher/BatchLight_BatchManager_PBS.hxx index e0c21651b..3f23f21f7 100644 --- a/src/Launcher/BatchLight_BatchManager_PBS.hxx +++ b/src/Launcher/BatchLight_BatchManager_PBS.hxx @@ -30,7 +30,6 @@ #define _BL_BATCHMANAGER_PBS_H_ #include -#include "Utils_SALOME_Exception.hxx" #include "BatchLight_BatchManager.hxx" namespace BatchLight { @@ -41,7 +40,7 @@ namespace BatchLight { { public: // Constructeur et destructeur - BatchManager_PBS(const batchParams& p) throw(SALOME_Exception); // connexion a la machine host + BatchManager_PBS(const clusterParams& p) throw(BatchException); // connexion a la machine host virtual ~BatchManager_PBS(); // Methodes pour le controle des jobs : virtuelles pures @@ -49,9 +48,9 @@ namespace BatchLight { std::string queryJob(const int & jobid); // renvoie l'etat du job private: - void buildSalomeCouplingScript(BatchLight::Job* job) throw(SALOME_Exception); - void buildSalomeBatchScript(BatchLight::Job* job) throw(SALOME_Exception); - int submit(BatchLight::Job* job) throw(SALOME_Exception); + void buildSalomeCouplingScript(BatchLight::Job* job) throw(BatchException); + void buildSalomeBatchScript(BatchLight::Job* job) throw(BatchException); + int submit(BatchLight::Job* job) throw(BatchException); // Permet d'avoir la chaîne complête pour demander // le statut du job diff --git a/src/Launcher/BatchLight_BatchManager_SLURM.cxx b/src/Launcher/BatchLight_BatchManager_SLURM.cxx index d184ca6bb..0c72d8b6b 100644 --- a/src/Launcher/BatchLight_BatchManager_SLURM.cxx +++ b/src/Launcher/BatchLight_BatchManager_SLURM.cxx @@ -27,7 +27,6 @@ */ #include "BatchLight_BatchManager_SLURM.hxx" -#include "utilities.h" #include "BatchLight_Job.hxx" #include #include @@ -39,20 +38,19 @@ using namespace std; namespace BatchLight { // Constructeur - BatchManager_SLURM::BatchManager_SLURM(const batchParams& p) throw(SALOME_Exception) : BatchManager(p) + BatchManager_SLURM::BatchManager_SLURM(const clusterParams& p) throw(BatchException) : BatchManager(p) { } // Destructeur BatchManager_SLURM::~BatchManager_SLURM() { - MESSAGE("BatchManager_SLURM destructor "<<_params.hostname); + cerr << "BatchManager_SLURM destructor "<<_params.hostname << endl; } // Methode pour le controle des jobs : retire un job du gestionnaire void BatchManager_SLURM::deleteJob(const int & jobid) { - BEGIN_OF("BatchManager_SLURM::deleteJob"); string command; int status; ostringstream oss; @@ -64,7 +62,7 @@ namespace BatchLight { else if( _params.protocol == "ssh" ) command = "ssh "; else - throw SALOME_Exception("Unknown protocol"); + throw BatchException("Unknown protocol"); if (_params.username != ""){ command += _params.username; @@ -75,19 +73,17 @@ namespace BatchLight { command += " \"bkill " ; command += oss.str(); command += "\""; - SCRUTE(command.c_str()); + cerr << command.c_str() << endl; status = system(command.c_str()); if(status) - throw SALOME_Exception("Error of connection on remote host"); + throw BatchException("Error of connection on remote host"); - MESSAGE("jobId = " << jobid << "killed"); - END_OF("BatchManager_SLURM::deleteJob"); + cerr << "jobId = " << jobid << "killed" << endl; } // Methode pour le controle des jobs : renvoie l'etat du job string BatchManager_SLURM::queryJob(const int & jobid) { - BEGIN_OF("BatchManager_SLURM::queryJob"); // define name of log file string logFile="/tmp/logs/"; logFile += getenv("USER"); @@ -109,7 +105,7 @@ namespace BatchLight { else if( _params.protocol == "ssh" ) command = "ssh "; else - throw SALOME_Exception("Unknown protocol"); + throw BatchException("Unknown protocol"); if (_params.username != ""){ command += _params.username; @@ -123,10 +119,10 @@ namespace BatchLight { command += oss2.str(); command += "\" > "; command += logFile; - SCRUTE(command.c_str()); + cerr << command.c_str() << endl; status = system(command.c_str()); if(status) - throw SALOME_Exception("Error of connection on remote host"); + throw BatchException("Error of connection on remote host"); // read staus of job in log file char line[128]; @@ -138,21 +134,19 @@ namespace BatchLight { fp >> username; fp >> jstatus; - MESSAGE("jobId = " << jobid << " " << jstatus); - END_OF("BatchManager_SLURM::queryJob"); + cerr << "jobId = " << jobid << " " << jstatus << endl; return jstatus; } - void BatchManager_SLURM::buildSalomeCouplingScript(BatchLight::Job* job) throw(SALOME_Exception) + void BatchManager_SLURM::buildSalomeCouplingScript(BatchLight::Job* job) throw(BatchException) { - BEGIN_OF("BatchManager_SLURM::buildSalomeCouplingScript"); int status; - const char *fileToExecute = job->getFileToExecute(); + const string fileToExecute = job->getFileToExecute(); const std::string dirForTmpFiles = job->getDirForTmpFiles(); - string::size_type p1 = string(fileToExecute).find_last_of("/"); - string::size_type p2 = string(fileToExecute).find_last_of("."); - std::string fileNameToExecute = string(fileToExecute).substr(p1+1,p2-p1-1); + string::size_type p1 = fileToExecute.find_last_of("/"); + string::size_type p2 = fileToExecute.find_last_of("."); + std::string fileNameToExecute = fileToExecute.substr(p1+1,p2-p1-1); std::string TmpFileName = BuildTemporaryFileName(); ofstream tempOutputFile; @@ -186,7 +180,7 @@ namespace BatchLight { tempOutputFile.flush(); tempOutputFile.close(); chmod(TmpFileName.c_str(), 0x1ED); - SCRUTE(TmpFileName.c_str()) ; + cerr << TmpFileName.c_str() << endl; string command; if( _params.protocol == "rsh" ) @@ -194,7 +188,7 @@ namespace BatchLight { else if( _params.protocol == "ssh" ) command = "scp "; else - throw SALOME_Exception("Unknown protocol"); + throw BatchException("Unknown protocol"); command += TmpFileName; command += " "; @@ -208,28 +202,26 @@ namespace BatchLight { command += "/runSalome_" ; command += fileNameToExecute ; command += "_Batch.sh" ; - SCRUTE(command.c_str()); + cerr << command.c_str() << endl; status = system(command.c_str()); if(status) - throw SALOME_Exception("Error of connection on remote host"); + throw BatchException("Error of connection on remote host"); RmTmpFile(TmpFileName); - END_OF("BatchManager_SLURM::buildSalomeCouplingScript"); } - void BatchManager_SLURM::buildSalomeBatchScript(BatchLight::Job* job) throw(SALOME_Exception) + void BatchManager_SLURM::buildSalomeBatchScript(BatchLight::Job* job) throw(BatchException) { - BEGIN_OF("BatchManager_SLURM::buildSalomeBatchScript"); int status; const int nbproc = job->getNbProc(); const std::string dirForTmpFiles = job->getDirForTmpFiles(); std::string TmpFileName = BuildTemporaryFileName(); ofstream tempOutputFile; tempOutputFile.open(TmpFileName.c_str(), ofstream::out ); - const char *fileToExecute = job->getFileToExecute(); - string::size_type p1 = string(fileToExecute).find_last_of("/"); - string::size_type p2 = string(fileToExecute).find_last_of("."); - std::string fileNameToExecute = string(fileToExecute).substr(p1+1,p2-p1-1); + const string fileToExecute = job->getFileToExecute(); + string::size_type p1 = fileToExecute.find_last_of("/"); + string::size_type p2 = fileToExecute.find_last_of("."); + std::string fileNameToExecute = fileToExecute.substr(p1+1,p2-p1-1); tempOutputFile << "#! /bin/sh -f" << endl ; tempOutputFile << "#BSUB -n " << nbproc << endl ; @@ -238,7 +230,7 @@ namespace BatchLight { tempOutputFile.flush(); tempOutputFile.close(); chmod(TmpFileName.c_str(), 0x1ED); - SCRUTE(TmpFileName.c_str()) ; + cerr << TmpFileName.c_str() << endl; string command; if( _params.protocol == "rsh" ) @@ -246,7 +238,7 @@ namespace BatchLight { else if( _params.protocol == "ssh" ) command = "scp "; else - throw SALOME_Exception("Unknown protocol"); + throw BatchException("Unknown protocol"); command += TmpFileName; command += " "; if (_params.username != ""){ @@ -259,24 +251,22 @@ namespace BatchLight { command += "/" ; command += fileNameToExecute ; command += "_Batch.sh" ; - SCRUTE(command.c_str()); + cerr << command.c_str() << endl; status = system(command.c_str()); if(status) - throw SALOME_Exception("Error of connection on remote host"); + throw BatchException("Error of connection on remote host"); RmTmpFile(TmpFileName); - END_OF("BatchManager_SLURM::buildSalomeBatchScript"); } - int BatchManager_SLURM::submit(BatchLight::Job* job) throw(SALOME_Exception) + int BatchManager_SLURM::submit(BatchLight::Job* job) throw(BatchException) { - BEGIN_OF("BatchManager_SLURM::submit"); const std::string dirForTmpFiles = job->getDirForTmpFiles(); - const char *fileToExecute = job->getFileToExecute(); - string::size_type p1 = string(fileToExecute).find_last_of("/"); - string::size_type p2 = string(fileToExecute).find_last_of("."); - std::string fileNameToExecute = string(fileToExecute).substr(p1+1,p2-p1-1); + const string fileToExecute = job->getFileToExecute(); + string::size_type p1 = fileToExecute.find_last_of("/"); + string::size_type p2 = fileToExecute.find_last_of("."); + std::string fileNameToExecute = fileToExecute.substr(p1+1,p2-p1-1); // define name of log file string logFile="/tmp/logs/"; @@ -299,7 +289,7 @@ namespace BatchLight { else if( _params.protocol == "ssh" ) command = "ssh "; else - throw SALOME_Exception("Unknown protocol"); + throw BatchException("Unknown protocol"); if (_params.username != ""){ command += _params.username; @@ -313,10 +303,10 @@ namespace BatchLight { command += fileNameToExecute ; command += "_Batch.sh\" > "; command += logFile; - SCRUTE(command.c_str()); + cerr << command.c_str() << endl; status = system(command.c_str()); if(status) - throw SALOME_Exception("Error of connection on remote host"); + throw BatchException("Error of connection on remote host"); // read id of submitted job in log file char line[128]; @@ -333,7 +323,6 @@ namespace BatchLight { istringstream iss(strjob); iss >> id; - END_OF("BatchManager_SLURM::submit"); return id; } diff --git a/src/Launcher/BatchLight_BatchManager_SLURM.hxx b/src/Launcher/BatchLight_BatchManager_SLURM.hxx index 6024b28de..72ce92624 100644 --- a/src/Launcher/BatchLight_BatchManager_SLURM.hxx +++ b/src/Launcher/BatchLight_BatchManager_SLURM.hxx @@ -30,7 +30,6 @@ #define _BL_BATCHMANAGER_SLURM_H_ #include -#include "Utils_SALOME_Exception.hxx" #include "BatchLight_BatchManager.hxx" namespace BatchLight { @@ -41,7 +40,7 @@ namespace BatchLight { { public: // Constructeur et destructeur - BatchManager_SLURM(const batchParams& p) throw(SALOME_Exception); // connexion a la machine host + BatchManager_SLURM(const clusterParams& p) throw(BatchException); // connexion a la machine host virtual ~BatchManager_SLURM(); // Methodes pour le controle des jobs : virtuelles pures @@ -49,9 +48,9 @@ namespace BatchLight { std::string queryJob(const int & jobid); // renvoie l'etat du job protected: - void buildSalomeCouplingScript(BatchLight::Job* job) throw(SALOME_Exception); - void buildSalomeBatchScript(BatchLight::Job* job) throw(SALOME_Exception); - int submit(BatchLight::Job* job) throw(SALOME_Exception); + void buildSalomeCouplingScript(BatchLight::Job* job) throw(BatchException); + void buildSalomeBatchScript(BatchLight::Job* job) throw(BatchException); + int submit(BatchLight::Job* job) throw(BatchException); private: diff --git a/src/Launcher/BatchLight_Job.cxx b/src/Launcher/BatchLight_Job.cxx index 1980054a9..494693e8a 100644 --- a/src/Launcher/BatchLight_Job.cxx +++ b/src/Launcher/BatchLight_Job.cxx @@ -32,10 +32,10 @@ using namespace std; using namespace BatchLight; -Job::Job(const char *fileToExecute, - const Engines::FilesList& filesToExport, - const Engines::FilesList& filesToImport, - const Engines::BatchParameters& batch_params) : _fileToExecute(fileToExecute), +Job::Job(const string fileToExecute, + const vector& filesToExport, + const vector& filesToImport, + const batchParams& batch_params) : _fileToExecute(fileToExecute), _filesToExport(filesToExport), _filesToImport(filesToImport), _batch_params(batch_params) @@ -46,15 +46,13 @@ Job::Job(const char *fileToExecute, Job::~Job() { - MESSAGE("Job destructor"); + cerr << "Job destructor" << endl; } void Job::addFileToImportList(std::string file_name) { - CORBA::ULong lgth = _filesToImport.length(); - _filesToImport.length(lgth+1); - _filesToImport[lgth] = CORBA::string_dup(file_name.c_str()); + _filesToImport.push_back(file_name); } const std::string @@ -74,14 +72,14 @@ Job::getMemory() bool Job::check() { bool rtn = true; - INFOS("Warning : batch_directory option is not currently implemented"); - INFOS("Warning : currently these informations are only in the PBS batch manager"); - INFOS("Job parameters are :"); - INFOS("Directory : $HOME/Batch/$date"); + cerr << "Warning : batch_directory option is not currently implemented" << endl; + cerr << "Warning : currently these informations are only in the PBS batch manager" << endl; + cerr << "Job parameters are :" < -#include CORBA_CLIENT_HEADER(SALOME_ContainerManager) +#include +#include "BatchLight_BatchManager.hxx" namespace BatchLight { @@ -39,29 +38,29 @@ namespace BatchLight { { public: // Constructeurs et destructeur - Job(const char *fileToExecute, - const Engines::FilesList& filesToExport, - const Engines::FilesList& filesToImport, - const Engines::BatchParameters& batch_params); + Job(const std::string fileToExecute, + const std::vector& filesToExport, + const std::vector& filesToImport, + const batchParams& batch_params); virtual ~Job(); - const char *getFileToExecute() const { return _fileToExecute; } - const Engines::FilesList getFilesToExportList() const { return _filesToExport; } - const Engines::FilesList getFilesToImportList() const { return _filesToImport; } + const std::string getFileToExecute() const { return _fileToExecute; } + const std::vector getFilesToExportList() const { return _filesToExport; } + const std::vector getFilesToImportList() const { return _filesToImport; } void addFileToImportList(std::string file_name); - const CORBA::Long getNbProc() const { return _batch_params.nb_proc; } + const long getNbProc() const { return _batch_params.nb_proc; } const std::string getExpectedDuringTime(); const std::string getMemory(); const std::string getDirForTmpFiles() const { return _dirForTmpFiles;} - void setDirForTmpFiles(std::string dirForTmpFiles) {_dirForTmpFiles = dirForTmpFiles; - SCRUTE(_dirForTmpFiles);} + void setDirForTmpFiles(std::string dirForTmpFiles) { _dirForTmpFiles = dirForTmpFiles; + std::cerr << _dirForTmpFiles << std::endl;} bool check(); protected: - const char* _fileToExecute; - const Engines::FilesList _filesToExport; - Engines::FilesList _filesToImport; - Engines::BatchParameters _batch_params; + const std::string _fileToExecute; + const std::vector _filesToExport; + std::vector _filesToImport; + batchParams _batch_params; std::string _dirForTmpFiles; // Tmp directory on the server private: diff --git a/src/Launcher/Launcher.cxx b/src/Launcher/Launcher.cxx new file mode 100644 index 000000000..2385de73d --- /dev/null +++ b/src/Launcher/Launcher.cxx @@ -0,0 +1,246 @@ +// Copyright (C) 2005 OPEN CASCADE, EADS/CCR, LIP6, CEA/DEN, +// CEDRAT, EDF R&D, LEG, PRINCIPIA R&D, BUREAU VERITAS +// +// This library is free software; you can redistribute it and/or +// modify it under the terms of the GNU Lesser General Public +// License as published by the Free Software Foundation; either +// version 2.1 of the License. +// +// This library is distributed in the hope that it will be useful +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +// Lesser General Public License for more details. +// +// You should have received a copy of the GNU Lesser General Public +// License along with this library; if not, write to the Free Software +// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +// +// See http://www.salome-platform.org/ or email : webmaster.salome@opencascade.com +// +#include "BatchLight_BatchManager_PBS.hxx" +#include "BatchLight_BatchManager_SLURM.hxx" +#include "BatchLight_Job.hxx" +#include "Launcher.hxx" + +using namespace std; + +//============================================================================= +/*! + * Constructor + * \param orb + * Define a CORBA single thread policy for the server, which avoid to deal + * with non thread-safe usage like Change_Directory in SALOME naming service + */ +//============================================================================= + +Launcher_cpp::Launcher_cpp() +{ + cerr << "Launcher_cpp constructor" << endl; +} + +//============================================================================= +/*! + * destructor + */ +//============================================================================= + +Launcher_cpp::~Launcher_cpp() +{ + cerr << "Launcher_cpp destructor" << endl; + std::map < string, BatchLight::BatchManager * >::const_iterator it; + for(it=_batchmap.begin();it!=_batchmap.end();it++) + delete it->second; +} + +//============================================================================= +/*! CORBA Method: + * Submit a batch job on a cluster and returns the JobId + * \param fileToExecute : .py/.exe/.sh/... to execute on the batch cluster + * \param filesToExport : to export on the batch cluster + * \param NumberOfProcessors : Number of processors needed on the batch cluster + * \param params : Constraints for the choice of the batch cluster + */ +//============================================================================= +long Launcher_cpp::submitSalomeJob( const string fileToExecute , + const vector& filesToExport , + const vector& filesToImport , + const BatchLight::batchParams& batch_params, + const machineParams& params) throw(LauncherException) +{ + cerr << "BEGIN OF Launcher_cpp::submitSalomeJob" << endl; + long jobId; + vector aMachineList; + + // find a cluster matching the structure params + vector aCompoList ; + try{ + aMachineList = _ResManager->GetFittingResources(params, aCompoList); + } + catch(const ResourcesException &ex){ + throw LauncherException(ex.msg.c_str()); + } + if (aMachineList.size() == 0) + throw LauncherException("No resources have been found with your parameters"); + + ParserResourcesType p = _ResManager->GetResourcesList(aMachineList[0]); + string clustername(p.Alias); + cerr << "Choose cluster: " << clustername << endl; + + // search batch manager for that cluster in map or instanciate one + map < string, BatchLight::BatchManager * >::const_iterator it = _batchmap.find(clustername); + if(it == _batchmap.end()) + { + _batchmap[clustername] = FactoryBatchManager(p); + // TODO: Add a test for the cluster ! + } + + try{ + // create and submit job on cluster + BatchLight::Job* job = new BatchLight::Job(fileToExecute, filesToExport, filesToImport, batch_params); + bool res = job->check(); + if (!res) { + delete job; + throw LauncherException("Job parameters are bad (see informations above)"); + } + jobId = _batchmap[clustername]->submitJob(job); + } + catch(const BatchLight::BatchException &ex){ + throw LauncherException(ex.msg.c_str()); + } + + return jobId; +} + +//============================================================================= +/*! CORBA Method: + * Query a batch job on a cluster and returns the status of job + * \param jobId : identification of Salome job + * \param params : Constraints for the choice of the batch cluster + */ +//============================================================================= +string Launcher_cpp::querySalomeJob( long jobId, + const machineParams& params) throw(LauncherException) +{ + // find a cluster matching params structure + vector aCompoList ; + vector aMachineList = _ResManager->GetFittingResources( params , aCompoList ) ; + ParserResourcesType p = _ResManager->GetResourcesList(aMachineList[0]); + string clustername(p.Alias); + + // search batch manager for that cluster in map + std::map < string, BatchLight::BatchManager * >::const_iterator it = _batchmap.find(clustername); + if(it == _batchmap.end()) + throw LauncherException("no batchmanager for that cluster"); + + return _batchmap[clustername]->queryJob(jobId); +} + +//============================================================================= +/*! CORBA Method: + * Delete a batch job on a cluster + * \param jobId : identification of Salome job + * \param params : Constraints for the choice of the batch cluster + */ +//============================================================================= +void Launcher_cpp::deleteSalomeJob( const long jobId, + const machineParams& params) throw(LauncherException) +{ + // find a cluster matching params structure + vector aCompoList ; + vector aMachineList = _ResManager->GetFittingResources( params , aCompoList ) ; + ParserResourcesType p = _ResManager->GetResourcesList(aMachineList[0]); + string clustername(p.Alias); + + // search batch manager for that cluster in map + map < string, BatchLight::BatchManager * >::const_iterator it = _batchmap.find(clustername); + if(it == _batchmap.end()) + throw LauncherException("no batchmanager for that cluster"); + + _batchmap[clustername]->deleteJob(jobId); +} + +//============================================================================= +/*! CORBA Method: + * Get result files of job on a cluster + * \param jobId : identification of Salome job + * \param params : Constraints for the choice of the batch cluster + */ +//============================================================================= +void Launcher_cpp::getResultSalomeJob( const string directory, + const long jobId, + const machineParams& params) throw(LauncherException) +{ + vector aCompoList ; + vector aMachineList = _ResManager->GetFittingResources( params , aCompoList ) ; + ParserResourcesType p = _ResManager->GetResourcesList(aMachineList[0]); + string clustername(p.Alias); + + // search batch manager for that cluster in map + map < string, BatchLight::BatchManager * >::const_iterator it = _batchmap.find(clustername); + if(it == _batchmap.end()) + throw LauncherException("no batchmanager for that cluster"); + + _batchmap[clustername]->importOutputFiles( directory, jobId ); +} + +//============================================================================= +/*! + * Factory to instanciate the good batch manager for choosen cluster. + */ +//============================================================================= + +BatchLight::BatchManager *Launcher_cpp::FactoryBatchManager( const ParserResourcesType& params ) throw(LauncherException) +{ + + cerr << "Begin of Launcher_cpp::FactoryBatchManager" << endl; + // Fill structure for batch manager + BatchLight::clusterParams p; + p.hostname = params.Alias; + switch(params.Protocol){ + case rsh: + p.protocol = "rsh"; + break; + case ssh: + p.protocol = "ssh"; + break; + default: + throw LauncherException("unknown protocol"); + break; + } + p.username = params.UserName; + p.applipath = params.AppliPath; + p.modulesList = params.ModulesList; + p.nbnodes = params.DataForSort._nbOfNodes; + p.nbprocpernode = params.DataForSort._nbOfProcPerNode; + switch(params.mpi){ + case lam: + p.mpiImpl = "lam"; + break; + case mpich1: + p.mpiImpl = "mpich1"; + break; + case mpich2: + p.mpiImpl = "mpich2"; + break; + case openmpi: + p.mpiImpl = "openmpi"; + break; + default: + p.mpiImpl = "indif"; + break; + } + + cerr << "Instanciation of batch manager" << endl; + switch( params.Batch ){ + case pbs: + cerr << "Instantiation of PBS batch manager" << endl; + return new BatchLight::BatchManager_PBS(p); + case slurm: + cerr << "Instantiation of SLURM batch manager" << endl; + return new BatchLight::BatchManager_SLURM(p); + default: + cerr << "BATCH = " << params.Batch << endl; + throw LauncherException("no batchmanager for that cluster"); + } +} + diff --git a/src/Launcher/Launcher.hxx b/src/Launcher/Launcher.hxx new file mode 100644 index 000000000..b46dd0b2f --- /dev/null +++ b/src/Launcher/Launcher.hxx @@ -0,0 +1,62 @@ +// Copyright (C) 2005 OPEN CASCADE, EADS/CCR, LIP6, CEA/DEN, +// CEDRAT, EDF R&D, LEG, PRINCIPIA R&D, BUREAU VERITAS +// +// This library is free software; you can redistribute it and/or +// modify it under the terms of the GNU Lesser General Public +// License as published by the Free Software Foundation; either +// version 2.1 of the License. +// +// This library is distributed in the hope that it will be useful +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +// Lesser General Public License for more details. +// +// You should have received a copy of the GNU Lesser General Public +// License along with this library; if not, write to the Free Software +// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +// +// See http://www.salome-platform.org/ or email : webmaster.salome@opencascade.com +// +#ifndef __LAUNCHER_HXX__ +#define __LAUNCHER_HXX__ + +#include "BatchLight_BatchManager.hxx" +#include "ResourcesManager.hxx" + +#include + +class LauncherException +{ +public: + const std::string msg; + + LauncherException(const std::string m) : msg(m) {} +}; + +class Launcher_cpp +{ + +public: + Launcher_cpp(); + ~Launcher_cpp(); + + long submitSalomeJob(const std::string fileToExecute , + const std::vector& filesToExport , + const std::vector& filesToImport , + const BatchLight::batchParams& batch_params, + const machineParams& params) throw(LauncherException); + + std::string querySalomeJob( const long jobId, const machineParams& params) throw(LauncherException); + void deleteSalomeJob( const long jobId, const machineParams& params) throw(LauncherException); + void getResultSalomeJob( const std::string directory, const long jobId, const machineParams& params ) throw(LauncherException); + + void SetResourcesManager( ResourcesManager_cpp* rm ) { _ResManager = rm; } + +protected: + BatchLight::BatchManager *FactoryBatchManager( const ParserResourcesType& params ) throw(LauncherException); + + std::map _batchmap; + ResourcesManager_cpp *_ResManager; +}; + +#endif diff --git a/src/Launcher/Makefile.am b/src/Launcher/Makefile.am index a78dc3edb..21a52f2ba 100644 --- a/src/Launcher/Makefile.am +++ b/src/Launcher/Makefile.am @@ -95,14 +95,9 @@ COMMON_LIBS =\ # Libraries targets # =============================================================== # -lib_LTLIBRARIES = libSalomeLauncher.la +lib_LTLIBRARIES = libLauncher.la libSalomeLauncher.la libSalomeLauncher_la_SOURCES=\ - SALOME_Launcher.cxx \ - BatchLight_BatchManager.cxx \ - BatchLight_BatchManager_SLURM.cxx \ - BatchLight_BatchManager_PBS.cxx \ - BatchLight_Job.cxx \ - MpiImpl.cxx + SALOME_Launcher.cxx libSalomeLauncher_la_CPPFLAGS =\ $(COMMON_CPPFLAGS) @@ -112,8 +107,29 @@ libSalomeLauncher_la_LDFLAGS =\ @LDEXPDYNFLAGS@ libSalomeLauncher_la_LIBADD =\ - $(COMMON_LIBS) + $(COMMON_LIBS) libLauncher.la + +libLauncher_la_SOURCES=\ + Launcher.cxx \ + BatchLight_BatchManager.cxx \ + BatchLight_BatchManager_SLURM.cxx \ + BatchLight_BatchManager_PBS.cxx \ + BatchLight_Job.cxx \ + MpiImpl.cxx + +libLauncher_la_CPPFLAGS =\ + -I$(srcdir)/../Batch \ + -I$(srcdir)/../ResourcesManager \ + @MPI_INCLUDES@ \ + @LIBXML_INCLUDES@ +libLauncher_la_LDFLAGS =\ + -no-undefined -version-info=0:0:0 \ + @LDEXPDYNFLAGS@ + +libLauncher_la_LIBADD =\ + @MPI_LIBS@ \ + @LIBXML_LIBS@ # # =============================================================== diff --git a/src/Launcher/MpiImpl.cxx b/src/Launcher/MpiImpl.cxx index 036018b1e..1ac4074aa 100644 --- a/src/Launcher/MpiImpl.cxx +++ b/src/Launcher/MpiImpl.cxx @@ -29,7 +29,6 @@ #include #include #include -#include "utilities.h" #include "MpiImpl.hxx" using namespace std; @@ -37,13 +36,13 @@ using namespace std; // Constructor MpiImpl::MpiImpl() { - MESSAGE("MpiImpl constructor"); + cerr << "MpiImpl constructor" << endl; } // Destructor MpiImpl::~MpiImpl() { - MESSAGE("MpiImpl destructor"); + cerr << "MpiImpl destructor" << endl; } // lam implementation @@ -55,7 +54,7 @@ MpiImpl_LAM::MpiImpl_LAM() : MpiImpl() // Destructor MpiImpl_LAM::~MpiImpl_LAM() { - MESSAGE("MpiImpl_LAM destructor"); + cerr << "MpiImpl_LAM destructor" << endl; } string MpiImpl_LAM::size() @@ -98,17 +97,17 @@ MpiImpl_MPICH1::MpiImpl_MPICH1() : MpiImpl() // Destructor MpiImpl_MPICH1::~MpiImpl_MPICH1() { - MESSAGE("MpiImpl_MPICH1 destructor"); + cerr << "MpiImpl_MPICH1 destructor" << endl; } string MpiImpl_MPICH1::size() { - throw SALOME_Exception("mpich1 doesn't work with this batch system to submit salome session"); + throw MpiImplException("mpich1 doesn't work with this batch system to submit salome session"); } string MpiImpl_MPICH1::rank() { - throw SALOME_Exception("mpich1 doesn't work with this batch system to submit salome session"); + throw MpiImplException("mpich1 doesn't work with this batch system to submit salome session"); } string MpiImpl_MPICH1::boot(const string machinefile, const unsigned int nbnodes) @@ -137,7 +136,7 @@ MpiImpl_MPICH2::MpiImpl_MPICH2() : MpiImpl() // Destructor MpiImpl_MPICH2::~MpiImpl_MPICH2() { - MESSAGE("MpiImpl_MPICH2 destructor"); + cerr << "MpiImpl_MPICH2 destructor" << endl; } string MpiImpl_MPICH2::size() @@ -180,7 +179,7 @@ MpiImpl_OPENMPI::MpiImpl_OPENMPI() : MpiImpl() // Destructor MpiImpl_OPENMPI::~MpiImpl_OPENMPI() { - MESSAGE("MpiImpl_OPENMPI destructor"); + cerr << "MpiImpl_OPENMPI destructor" << endl; } string MpiImpl_OPENMPI::size() diff --git a/src/Launcher/MpiImpl.hxx b/src/Launcher/MpiImpl.hxx index beeac0301..47f1283ef 100644 --- a/src/Launcher/MpiImpl.hxx +++ b/src/Launcher/MpiImpl.hxx @@ -30,8 +30,14 @@ #define _BL_MPIIMPL_H_ #include -#include "Utils_SALOME_Exception.hxx" -#include + +class MpiImplException +{ +public: + const std::string msg; + + MpiImplException(const std::string m) : msg(m) {} +}; class MpiImpl { diff --git a/src/Launcher/SALOME_Launcher.cxx b/src/Launcher/SALOME_Launcher.cxx index a507c010d..ed58dbf7f 100644 --- a/src/Launcher/SALOME_Launcher.cxx +++ b/src/Launcher/SALOME_Launcher.cxx @@ -17,9 +17,6 @@ // // See http://www.salome-platform.org/ or email : webmaster.salome@opencascade.com // -#include "BatchLight_BatchManager_PBS.hxx" -#include "BatchLight_BatchManager_SLURM.hxx" -#include "BatchLight_Job.hxx" #include "SALOME_Launcher.hxx" #include "OpUtil.hxx" #include @@ -28,9 +25,6 @@ #endif #include #include "Utils_CorbaException.hxx" -#include "Batch_Date.hxx" - -#define TIME_OUT_TO_LAUNCH_CONT 21 using namespace std; @@ -45,11 +39,12 @@ const char *SALOME_Launcher::_LauncherNameInNS = "/SalomeLauncher"; */ //============================================================================= -SALOME_Launcher::SALOME_Launcher(CORBA::ORB_ptr orb, PortableServer::POA_var poa) +SALOME_Launcher::SALOME_Launcher(CORBA::ORB_ptr orb, PortableServer::POA_var poa) : _l() { - MESSAGE("constructor"); + MESSAGE("SALOME_Launcher constructor"); _NS = new SALOME_NamingService(orb); _ResManager = new SALOME_ResourcesManager(orb,poa,_NS); + _l.SetResourcesManager(_ResManager->GetImpl()); _ContManager = new SALOME_ContainerManager(orb,poa,_ResManager,_NS); _ResManager->_remove_ref(); _ContManager->_remove_ref(); @@ -61,7 +56,7 @@ SALOME_Launcher::SALOME_Launcher(CORBA::ORB_ptr orb, PortableServer::POA_var poa Engines::SalomeLauncher_var refContMan = Engines::SalomeLauncher::_narrow(obj); _NS->Register(refContMan,_LauncherNameInNS); - MESSAGE("constructor end"); + MESSAGE("SALOME_Launcher constructor end"); } //============================================================================= @@ -74,9 +69,6 @@ SALOME_Launcher::~SALOME_Launcher() { MESSAGE("destructor"); delete _NS; - std::map < string, BatchLight::BatchManager * >::const_iterator it; - for(it=_batchmap.begin();it!=_batchmap.end();it++) - delete it->second; } //============================================================================= @@ -125,37 +117,35 @@ CORBA::Long SALOME_Launcher::submitSalomeJob( const char * fileToExecute , { MESSAGE("BEGIN OF SALOME_Launcher::submitSalomeJob"); CORBA::Long jobId; - try{ - // find a cluster matching the structure params - Engines::CompoList aCompoList ; - Engines::MachineList *aMachineList = _ResManager->GetFittingResources(params, aCompoList); - if (aMachineList->length() == 0) - throw SALOME_Exception("No resources have been found with your parameters"); + + machineParams p; + p.hostname = params.hostname; + p.OS = params.OS; + p.nb_node = params.nb_node; + p.nb_proc_per_node = params.nb_proc_per_node; + p.cpu_clock = params.cpu_clock; + p.mem_mb = params.mem_mb; + + BatchLight::batchParams bp; + bp.batch_directory = batch_params.batch_directory; + bp.expected_during_time = batch_params.expected_during_time; + bp.mem = batch_params.mem; + bp.nb_proc = batch_params.nb_proc; + + vector efl; + for(int i=0;i ifl; + for(int i=0;iGetMachineParameters((*aMachineList)[0]); - string clustername(p->alias); - INFOS("Choose cluster" << clustername); - - // search batch manager for that cluster in map or instanciate one - std::map < string, BatchLight::BatchManager * >::const_iterator it = _batchmap.find(clustername); - if(it == _batchmap.end()) - { - _batchmap[clustername] = FactoryBatchManager(p); - // TODO: Add a test for the cluster ! - } - - // create and submit job on cluster - BatchLight::Job* job = new BatchLight::Job(fileToExecute, filesToExport, filesToImport, batch_params); - bool res = job->check(); - if (!res) { - delete job; - throw SALOME_Exception("Job parameters are bad (see informations above)"); - } - jobId = _batchmap[clustername]->submitJob(job); + try{ + jobId = _l.submitSalomeJob(fileToExecute,efl,ifl,bp,p); } - catch(const SALOME_Exception &ex){ - INFOS(ex.what()); - THROW_SALOME_CORBA_EXCEPTION(ex.what(),SALOME::INTERNAL_ERROR); + catch(const LauncherException &ex){ + INFOS(ex.msg.c_str()); + THROW_SALOME_CORBA_EXCEPTION(ex.msg.c_str(),SALOME::INTERNAL_ERROR); } return jobId; } @@ -171,23 +161,20 @@ char* SALOME_Launcher::querySalomeJob( const CORBA::Long jobId, const Engines::MachineParameters& params) { string status; + machineParams p; + p.hostname = params.hostname; + p.OS = params.OS; + p.nb_node = params.nb_node; + p.nb_proc_per_node = params.nb_proc_per_node; + p.cpu_clock = params.cpu_clock; + p.mem_mb = params.mem_mb; + try{ - // find a cluster matching params structure - Engines::CompoList aCompoList ; - Engines::MachineList * aMachineList = _ResManager->GetFittingResources( params , aCompoList ) ; - const Engines::MachineParameters* p = _ResManager->GetMachineParameters((*aMachineList)[0]); - string clustername(p->alias); - - // search batch manager for that cluster in map - std::map < string, BatchLight::BatchManager * >::const_iterator it = _batchmap.find(clustername); - if(it == _batchmap.end()) - throw SALOME_Exception("no batchmanager for that cluster"); - - status = _batchmap[clustername]->queryJob(jobId); + status = _l.querySalomeJob(jobId,p); } - catch(const SALOME_Exception &ex){ + catch(const LauncherException &ex){ INFOS("Caught exception."); - THROW_SALOME_CORBA_EXCEPTION(ex.what(),SALOME::BAD_PARAM); + THROW_SALOME_CORBA_EXCEPTION(ex.msg.c_str(),SALOME::BAD_PARAM); } return CORBA::string_dup(status.c_str()); } @@ -202,23 +189,20 @@ char* SALOME_Launcher::querySalomeJob( const CORBA::Long jobId, void SALOME_Launcher::deleteSalomeJob( const CORBA::Long jobId, const Engines::MachineParameters& params) { + machineParams p; + p.hostname = params.hostname; + p.OS = params.OS; + p.nb_node = params.nb_node; + p.nb_proc_per_node = params.nb_proc_per_node; + p.cpu_clock = params.cpu_clock; + p.mem_mb = params.mem_mb; + try{ - // find a cluster matching params structure - Engines::CompoList aCompoList ; - Engines::MachineList *aMachineList = _ResManager->GetFittingResources( params , aCompoList ) ; - const Engines::MachineParameters* p = _ResManager->GetMachineParameters((*aMachineList)[0]); - string clustername(p->alias); - - // search batch manager for that cluster in map - std::map < string, BatchLight::BatchManager * >::const_iterator it = _batchmap.find(clustername); - if(it == _batchmap.end()) - throw SALOME_Exception("no batchmanager for that cluster"); - - _batchmap[clustername]->deleteJob(jobId); + _l.deleteSalomeJob(jobId,p); } - catch(const SALOME_Exception &ex){ + catch(const LauncherException &ex){ INFOS("Caught exception."); - THROW_SALOME_CORBA_EXCEPTION(ex.what(),SALOME::BAD_PARAM); + THROW_SALOME_CORBA_EXCEPTION(ex.msg.c_str(),SALOME::BAD_PARAM); } } @@ -233,54 +217,20 @@ void SALOME_Launcher::getResultSalomeJob( const char *directory, const CORBA::Long jobId, const Engines::MachineParameters& params) { + machineParams p; + p.hostname = params.hostname; + p.OS = params.OS; + p.nb_node = params.nb_node; + p.nb_proc_per_node = params.nb_proc_per_node; + p.cpu_clock = params.cpu_clock; + p.mem_mb = params.mem_mb; + try{ - // find a cluster matching params structure - Engines::CompoList aCompoList ; - Engines::MachineList *aMachineList = _ResManager->GetFittingResources( params , aCompoList ) ; - const Engines::MachineParameters* p = _ResManager->GetMachineParameters((*aMachineList)[0]); - string clustername(p->alias); - - // search batch manager for that cluster in map - std::map < string, BatchLight::BatchManager * >::const_iterator it = _batchmap.find(clustername); - if(it == _batchmap.end()) - throw SALOME_Exception("no batchmanager for that cluster"); - - _batchmap[clustername]->importOutputFiles( directory, jobId ); + _l.getResultSalomeJob( directory, jobId, p ); } - catch(const SALOME_Exception &ex){ + catch(const LauncherException &ex){ INFOS("Caught exception."); - THROW_SALOME_CORBA_EXCEPTION(ex.what(),SALOME::BAD_PARAM); - } -} - -//============================================================================= -/*! - * Factory to instanciate the good batch manager for choosen cluster. - */ -//============================================================================= - -BatchLight::BatchManager *SALOME_Launcher::FactoryBatchManager( const Engines::MachineParameters* params ) throw(SALOME_Exception) -{ - // Fill structure for batch manager - BatchLight::batchParams p; - p.hostname = params->alias; - p.protocol = params->protocol; - p.username = params->username; - p.applipath = params->applipath; - for(int i=0;imodList.length();i++) - p.modulesList.push_back((const char*)params->modList[i]); - p.nbnodes = params->nb_node; - p.nbprocpernode = params->nb_proc_per_node; - p.mpiImpl = params->mpiImpl; - - string sb = (const char*)params->batch; - if(sb == "pbs") - return new BatchLight::BatchManager_PBS(p); - else if(sb == "slurm") - return new BatchLight::BatchManager_SLURM(p); - else{ - MESSAGE("BATCH = " << params->batch); - throw SALOME_Exception("no batchmanager for that cluster"); + THROW_SALOME_CORBA_EXCEPTION(ex.msg.c_str(),SALOME::BAD_PARAM); } } diff --git a/src/Launcher/SALOME_Launcher.hxx b/src/Launcher/SALOME_Launcher.hxx index 4bc5d65ce..350fa03d2 100644 --- a/src/Launcher/SALOME_Launcher.hxx +++ b/src/Launcher/SALOME_Launcher.hxx @@ -23,7 +23,7 @@ #include #include CORBA_CLIENT_HEADER(SALOME_ContainerManager) #include "SALOME_ContainerManager.hxx" -#include "BatchLight_BatchManager.hxx" +#include "Launcher.hxx" #include @@ -69,14 +69,13 @@ public: static const char *_LauncherNameInNS; protected: - BatchLight::BatchManager *FactoryBatchManager( const Engines::MachineParameters* params ) throw(SALOME_Exception); - - std::map _batchmap; CORBA::ORB_var _orb; PortableServer::POA_var _poa; SALOME_ContainerManager *_ContManager; SALOME_ResourcesManager *_ResManager; SALOME_NamingService *_NS; + + Launcher_cpp _l; }; #endif -- 2.39.2