From: secher Date: Fri, 25 Apr 2008 13:43:07 +0000 (+0000) Subject: add new classes in batch manager to emulate PBS and LSF clients X-Git-Tag: afterMerge_V4_1_3rc1~9 X-Git-Url: http://git.salome-platform.org/gitweb/?a=commitdiff_plain;h=3b65234ac63c63776c0d464a0d60ecfd32dc4447;p=modules%2Fkernel.git add new classes in batch manager to emulate PBS and LSF clients --- diff --git a/src/Batch/Batch_BatchManager_eClient.cxx b/src/Batch/Batch_BatchManager_eClient.cxx new file mode 100644 index 000000000..474288ad4 --- /dev/null +++ b/src/Batch/Batch_BatchManager_eClient.cxx @@ -0,0 +1,193 @@ +// Copyright (C) 2005 OPEN CASCADE, EADS/CCR, LIP6, CEA/DEN, +// CEDRAT, EDF R&D, LEG, PRINCIPIA R&D, BUREAU VERITAS +// +// This library is free software; you can redistribute it and/or +// modify it under the terms of the GNU Lesser General Public +// License as published by the Free Software Foundation; either +// version 2.1 of the License. +// +// This library is distributed in the hope that it will be useful +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +// Lesser General Public License for more details. +// +// You should have received a copy of the GNU Lesser General Public +// License along with this library; if not, write to the Free Software +// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +// +// See http://www.salome-platform.org/ or email : webmaster.salome@opencascade.com +// +/* + * BatchManager_eLSF.cxx : emulation of LSF client + * + * Auteur : Bernard SECHER - CEA DEN + * Mail : mailto:bernard.secher@cea.fr + * Date : Thu Apr 24 10:17:22 2008 + * Projet : PAL Salome + * + */ + +#include +#include +#include +#include +#include "Batch_BatchManager_eClient.hxx" + +namespace Batch { + + BatchManager_eClient::BatchManager_eClient(const char* host, const char* protocol, const char* mpiImpl) : _host(host), _protocol(protocol), _username("") + { + // instanciation of mpi implementation needed to launch executable in batch script + _mpiImpl = FactoryMpiImpl(mpiImpl); + } + + // Destructeur + BatchManager_eClient::~BatchManager_eClient() + { + // Nothing to do + delete _mpiImpl; + } + + void BatchManager_eClient::exportInputFiles(const Job& job) throw(EmulationException) + { + int status; + Parametre params = job.getParametre(); + Versatile V = params[INFILE]; + Versatile::iterator Vit; + std::string command; + std::string copy_command; + _username = string(params[USER]); + + // Test protocol + if( _protocol == "rsh" ) + copy_command = "rcp "; + else if( _protocol == "ssh" ) + copy_command = "scp "; + else + throw EmulationException("Unknown protocol : only rsh and ssh are known !"); + + // First step : creating batch tmp files directory + command = _protocol; + command += " "; + if(_username != ""){ + command += _username; + command += "@"; + } + command += _host; + command += " \"mkdir -p "; + command += string(params[TMPDIR]); + command += "\"" ; + cerr << command.c_str() << endl; + status = system(command.c_str()); + if(status) { + std::ostringstream oss; + oss << status; + std::string ex_mess("Error of connection on remote host ! status = "); + ex_mess += oss.str(); + throw EmulationException(ex_mess.c_str()); + } + + // Second step : copy fileToExecute into + // batch tmp files directory + command = copy_command; + command += string(params[EXECUTABLE]); + command += " "; + if(_username != ""){ + command += _username; + command += "@"; + } + command += _host; + command += ":"; + command += string(params[TMPDIR]); + cerr << command.c_str() << endl; + status = system(command.c_str()); + if(status) { + std::ostringstream oss; + oss << status; + std::string ex_mess("Error of connection on remote host ! status = "); + ex_mess += oss.str(); + throw EmulationException(ex_mess.c_str()); + } + + // Third step : copy filesToExportList into + // batch tmp files directory + for(Vit=V.begin(); Vit!=V.end(); Vit++) { + CoupleType cpt = *static_cast< CoupleType * >(*Vit); + Couple inputFile = cpt; + command = copy_command; + command += inputFile.getLocal(); + command += " "; + if(_username != ""){ + command += _username; + command += "@"; + } + command += _host; + command += ":"; + command += string(params[TMPDIR]); + command += "/"; + command += inputFile.getRemote(); + cerr << command.c_str() << endl; + status = system(command.c_str()); + if(status) { + std::ostringstream oss; + oss << status; + std::string ex_mess("Error of connection on remote host ! status = "); + ex_mess += oss.str(); + throw EmulationException(ex_mess.c_str()); + } + } + + } + + MpiImpl *BatchManager_eClient::FactoryMpiImpl(string mpiImpl) throw(EmulationException) + { + if(mpiImpl == "lam") + return new MpiImpl_LAM(); + else if(mpiImpl == "mpich1") + return new MpiImpl_MPICH1(); + else if(mpiImpl == "mpich2") + return new MpiImpl_MPICH2(); + else if(mpiImpl == "openmpi") + return new MpiImpl_OPENMPI(); + else if(mpiImpl == "slurm") + return new MpiImpl_SLURM(); + else{ + ostringstream oss; + oss << mpiImpl << " : not yet implemented"; + throw EmulationException(oss.str().c_str()); + } + } + + string BatchManager_eClient::BuildTemporaryFileName() const + { + //build more complex file name to support multiple salome session + char *temp = new char[19]; + strcpy(temp, "/tmp/command"); + strcat(temp, "XXXXXX"); +#ifndef WNT + mkstemp(temp); +#else + char aPID[80]; + itoa(getpid(), aPID, 10); + strcat(temp, aPID); +#endif + + string command(temp); + delete [] temp; + command += ".sh"; + return command; + } + + void BatchManager_eClient::RmTmpFile(std::string & TemporaryFileName) + { + string command = "rm "; + command += TemporaryFileName; + char *temp = strdup(command.c_str()); + int lgthTemp = strlen(temp); + temp[lgthTemp - 3] = '*'; + temp[lgthTemp - 2] = '\0'; + system(temp); + free(temp); + } + +} diff --git a/src/Batch/Batch_BatchManager_eClient.hxx b/src/Batch/Batch_BatchManager_eClient.hxx new file mode 100644 index 000000000..369155db1 --- /dev/null +++ b/src/Batch/Batch_BatchManager_eClient.hxx @@ -0,0 +1,73 @@ +// Copyright (C) 2005 OPEN CASCADE, EADS/CCR, LIP6, CEA/DEN, +// CEDRAT, EDF R&D, LEG, PRINCIPIA R&D, BUREAU VERITAS +// +// This library is free software; you can redistribute it and/or +// modify it under the terms of the GNU Lesser General Public +// License as published by the Free Software Foundation; either +// version 2.1 of the License. +// +// This library is distributed in the hope that it will be useful +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +// Lesser General Public License for more details. +// +// You should have received a copy of the GNU Lesser General Public +// License along with this library; if not, write to the Free Software +// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +// +// See http://www.salome-platform.org/ or email : webmaster.salome@opencascade.com +// +/* + * BatchManager_eLSF.hxx : emulation of LSF client + * + * Auteur : Bernard SECHER - CEA DEN + * Mail : mailto:bernard.secher@cea.fr + * Date : Thu Apr 24 10:17:22 2008 + * Projet : PAL Salome + * + */ + +#ifndef _BATCHMANAGER_eClient_H_ +#define _BATCHMANAGER_eClient_H_ + + +#include "MpiImpl.hxx" +#include "Batch_Job.hxx" + +namespace Batch { + + class Job; + + class EmulationException + { + public: + const std::string msg; + + EmulationException(const std::string m) : msg(m) {} + }; + + class BatchManager_eClient + { + public: + // Constructeur et destructeur + BatchManager_eClient(const char* host="localhost", const char* protocol="ssh", const char* mpiImpl="indif"); + virtual ~BatchManager_eClient(); + + protected: + std::string _host; // serveur ou tourne le BatchManager + std::string _protocol; // protocol to access _hostname + std::string _username; // username to access _hostname + MpiImpl *_mpiImpl; // Mpi implementation to launch executable in batch script + + std::string BuildTemporaryFileName() const; + void RmTmpFile(std::string & TemporaryFileName); + MpiImpl* FactoryMpiImpl(string mpiImpl) throw(EmulationException); + void exportInputFiles(const Job & job) throw(EmulationException); + + private: + + }; + +} + +#endif diff --git a/src/Batch/Batch_BatchManager_eLSF.cxx b/src/Batch/Batch_BatchManager_eLSF.cxx new file mode 100644 index 000000000..653fd3874 --- /dev/null +++ b/src/Batch/Batch_BatchManager_eLSF.cxx @@ -0,0 +1,295 @@ +// Copyright (C) 2005 OPEN CASCADE, EADS/CCR, LIP6, CEA/DEN, +// CEDRAT, EDF R&D, LEG, PRINCIPIA R&D, BUREAU VERITAS +// +// This library is free software; you can redistribute it and/or +// modify it under the terms of the GNU Lesser General Public +// License as published by the Free Software Foundation; either +// version 2.1 of the License. +// +// This library is distributed in the hope that it will be useful +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +// Lesser General Public License for more details. +// +// You should have received a copy of the GNU Lesser General Public +// License along with this library; if not, write to the Free Software +// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +// +// See http://www.salome-platform.org/ or email : webmaster.salome@opencascade.com +// +/* + * BatchManager_eLSF.cxx : emulation of LSF client + * + * Auteur : Bernard SECHER - CEA DEN + * Mail : mailto:bernard.secher@cea.fr + * Date : Thu Apr 24 10:17:22 2008 + * Projet : PAL Salome + * + */ + +#include +#include +#include +#include +#include "Batch_BatchManager_eLSF.hxx" + +namespace Batch { + + BatchManager_eLSF::BatchManager_eLSF(const FactBatchManager * parent, const char * host, const char * protocol, const char * mpiImpl) throw(InvalidArgumentException,ConnexionFailureException) : BatchManager(parent, host), BatchManager_eClient(host,protocol,mpiImpl) + { + // Nothing to do + } + + // Destructeur + BatchManager_eLSF::~BatchManager_eLSF() + { + // Nothing to do + } + + // Methode pour le controle des jobs : soumet un job au gestionnaire + const JobId BatchManager_eLSF::submitJob(const Job & job) + { + int status; + Parametre params = job.getParametre(); + const std::string dirForTmpFiles = params[TMPDIR]; + const string fileToExecute = params[EXECUTABLE]; + string::size_type p1 = fileToExecute.find_last_of("/"); + string::size_type p2 = fileToExecute.find_last_of("."); + std::string fileNameToExecute = fileToExecute.substr(p1+1,p2-p1-1); + + // export input files on cluster + exportInputFiles(job); + + // build batch script for job + buildBatchScript(job); + + // define name of log file + string logFile="/tmp/logs/"; + logFile += getenv("USER"); + logFile += "/batchSalome_"; + srand ( time(NULL) ); + int ir = rand(); + ostringstream oss; + oss << ir; + logFile += oss.str(); + logFile += ".log"; + + string command; + + // define command to submit batch + command = _protocol; + command += " "; + + if(_username != ""){ + command += _username; + command += "@"; + } + + command += _hostname; + command += " \"cd " ; + command += dirForTmpFiles ; + command += "; bsub < " ; + command += fileNameToExecute ; + command += "_Batch.sh\" > "; + command += logFile; + cerr << command.c_str() << endl; + status = system(command.c_str()); + if(status) + throw EmulationException("Error of connection on remote host"); + + // read id of submitted job in log file + char line[128]; + FILE *fp = fopen(logFile.c_str(),"r"); + fgets( line, 128, fp); + fclose(fp); + + string sline(line); + int p10 = sline.find("<"); + int p20 = sline.find(">"); + string strjob = sline.substr(p10+1,p20-p10-1); + + JobId id(this, strjob); + return id; + } + + // Methode pour le controle des jobs : retire un job du gestionnaire + void BatchManager_eLSF::deleteJob(const JobId & jobid) + { + int status; + int ref; + istringstream iss(jobid.getReference()); + iss >> ref; + + // define command to submit batch + string command; + command = _protocol; + command += " "; + + if (_username != ""){ + command += _username; + command += "@"; + } + + command += _hostname; + command += " \"bkill " ; + command += iss.str(); + command += "\""; + cerr << command.c_str() << endl; + status = system(command.c_str()); + if(status) + throw EmulationException("Error of connection on remote host"); + + cerr << "jobId = " << ref << "killed" << endl; + } + + // Methode pour le controle des jobs : suspend un job en file d'attente + void BatchManager_eLSF::holdJob(const JobId & jobid) + { + throw EmulationException("Not yet implemented"); + } + + // Methode pour le controle des jobs : relache un job suspendu + void BatchManager_eLSF::releaseJob(const JobId & jobid) + { + throw EmulationException("Not yet implemented"); + } + + + // Methode pour le controle des jobs : modifie un job en file d'attente + void BatchManager_eLSF::alterJob(const JobId & jobid, const Parametre & param, const Environnement & env) + { + throw EmulationException("Not yet implemented"); + } + + // Methode pour le controle des jobs : modifie un job en file d'attente + void BatchManager_eLSF::alterJob(const JobId & jobid, const Parametre & param) + { + alterJob(jobid, param, Environnement()); + } + + // Methode pour le controle des jobs : modifie un job en file d'attente + void BatchManager_eLSF::alterJob(const JobId & jobid, const Environnement & env) + { + alterJob(jobid, Parametre(), env); + } + + // Methode pour le controle des jobs : renvoie l'etat du job + JobInfo BatchManager_eLSF::queryJob(const JobId & jobid) + { + int id; + istringstream iss(jobid.getReference()); + iss >> id; + + // define name of log file + string logFile="/tmp/logs/"; + logFile += getenv("USER"); + logFile += "/batchSalome_"; + + srand ( time(NULL) ); + int ir = rand(); + ostringstream oss; + oss << ir; + logFile += oss.str(); + logFile += ".log"; + + string command; + int status; + + // define command to submit batch + command = _protocol; + + if (_username != ""){ + command += _username; + command += "@"; + } + + command += _hostname; + command += " \"bjobs " ; + command += iss.str(); + command += "\" > "; + command += logFile; + cerr << command.c_str() << endl; + status = system(command.c_str()); + if(status) + throw EmulationException("Error of connection on remote host"); + + // read status of job in log file + char line[128]; + ifstream fp(logFile.c_str(),ios::in); + fp.getline(line,80,'\n'); + + string sjobid, username, jstatus; + fp >> sjobid; + fp >> username; + fp >> jstatus; + + cerr << "jobId = " << id << " " << jstatus << endl; + + JobInfo_eLSF ji = JobInfo_eLSF(id,jstatus); + return ji; + } + + + + // Methode pour le controle des jobs : teste si un job est present en machine + bool BatchManager_eLSF::isRunning(const JobId & jobid) + { + throw EmulationException("Not yet implemented"); + } + + void BatchManager_eLSF::buildBatchScript(const Job & job) throw(EmulationException) + { + int status; + Parametre params = job.getParametre(); + const int nbproc = params[NBPROC]; + const std::string dirForTmpFiles = params[TMPDIR]; + const string fileToExecute = params[EXECUTABLE]; + string::size_type p1 = fileToExecute.find_last_of("/"); + string::size_type p2 = fileToExecute.find_last_of("."); + std::string rootNameToExecute = fileToExecute.substr(p1+1,p2-p1-1); + std::string fileNameToExecute = "~/" + dirForTmpFiles + "/" + string(basename(fileToExecute.c_str())); + + std::string TmpFileName = BuildTemporaryFileName(); + ofstream tempOutputFile; + tempOutputFile.open(TmpFileName.c_str(), ofstream::out ); + + tempOutputFile << "#! /bin/sh -f" << endl ; + tempOutputFile << "#BSUB -n " << nbproc << endl ; + tempOutputFile << "#BSUB -o " << dirForTmpFiles << "/runSalome.log%J" << endl ; + tempOutputFile << _mpiImpl->boot("",nbproc); + tempOutputFile << _mpiImpl->run("",nbproc,fileNameToExecute); + tempOutputFile << _mpiImpl->halt(); + tempOutputFile.flush(); + tempOutputFile.close(); + chmod(TmpFileName.c_str(), 0x1ED); + cerr << TmpFileName.c_str() << endl; + + string command; + if( _protocol == "rsh" ) + command = "rcp "; + else if( _protocol == "ssh" ) + command = "scp "; + else + throw EmulationException("Unknown protocol"); + command += TmpFileName; + command += " "; + if(_username != ""){ + command += _username; + command += "@"; + } + command += _hostname; + command += ":"; + command += dirForTmpFiles ; + command += "/" ; + command += rootNameToExecute ; + command += "_Batch.sh" ; + cerr << command.c_str() << endl; + status = system(command.c_str()); + if(status) + throw EmulationException("Error of connection on remote host"); + + RmTmpFile(TmpFileName); + + } + +} diff --git a/src/Batch/Batch_BatchManager_eLSF.hxx b/src/Batch/Batch_BatchManager_eLSF.hxx new file mode 100644 index 000000000..6713aa70b --- /dev/null +++ b/src/Batch/Batch_BatchManager_eLSF.hxx @@ -0,0 +1,92 @@ +// Copyright (C) 2005 OPEN CASCADE, EADS/CCR, LIP6, CEA/DEN, +// CEDRAT, EDF R&D, LEG, PRINCIPIA R&D, BUREAU VERITAS +// +// This library is free software; you can redistribute it and/or +// modify it under the terms of the GNU Lesser General Public +// License as published by the Free Software Foundation; either +// version 2.1 of the License. +// +// This library is distributed in the hope that it will be useful +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +// Lesser General Public License for more details. +// +// You should have received a copy of the GNU Lesser General Public +// License along with this library; if not, write to the Free Software +// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +// +// See http://www.salome-platform.org/ or email : webmaster.salome@opencascade.com +// +/* + * BatchManager_eLSF.hxx : emulation of LSF client + * + * Auteur : Bernard SECHER - CEA DEN + * Mail : mailto:bernard.secher@cea.fr + * Date : Thu Apr 24 10:17:22 2008 + * Projet : PAL Salome + * + */ + +#ifndef _BATCHMANAGER_eLSF_H_ +#define _BATCHMANAGER_eLSF_H_ + + +#include "Batch_JobId.hxx" +#include "Batch_JobInfo.hxx" +#include "Batch_JobInfo_eLSF.hxx" +#include "Batch_InvalidArgumentException.hxx" +#include "Batch_ConnexionFailureException.hxx" +#include "Batch_APIInternalFailureException.hxx" +#include "Batch_NotYetImplementedException.hxx" +#include "Batch_BatchManager.hxx" +#include "Batch_BatchManager_eClient.hxx" + +namespace Batch { + + class Job; + class JobId; + class JobInfo; + class FactBatchManager; + + class BatchManager_eLSF : public BatchManager, public BatchManager_eClient + { + public: + // Constructeur et destructeur + BatchManager_eLSF(const FactBatchManager * parent, const char * host="localhost", const char * protocol="ssh", const char * mpiImpl="indif") throw(InvalidArgumentException,ConnexionFailureException); // connexion a la machine host + virtual ~BatchManager_eLSF(); + + // Recupere le nom du serveur par defaut + // static string BatchManager_LSF::getDefaultServer(); + + // Methodes pour le controle des jobs + virtual const JobId submitJob(const Job & job); // soumet un job au gestionnaire + virtual void deleteJob(const JobId & jobid); // retire un job du gestionnaire + virtual void holdJob(const JobId & jobid); // suspend un job en file d'attente + virtual void releaseJob(const JobId & jobid); // relache un job suspendu + virtual void alterJob(const JobId & jobid, const Parametre & param, const Environnement & env); // modifie un job en file d'attente + virtual void alterJob(const JobId & jobid, const Parametre & param); // modifie un job en file d'attente + virtual void alterJob(const JobId & jobid, const Environnement & env); // modifie un job en file d'attente + virtual JobInfo queryJob(const JobId & jobid); // renvoie l'etat du job + virtual bool isRunning(const JobId & jobid); // teste si un job est present en machine + + virtual void setParametre(const JobId & jobid, const Parametre & param) { return alterJob(jobid, param); } // modifie un job en file d'attente + virtual void setEnvironnement(const JobId & jobid, const Environnement & env) { return alterJob(jobid, env); } // modifie un job en file d'attente + + + protected: + void buildBatchScript(const Job & job) throw(EmulationException); + + private: + +#ifdef SWIG + public: + // Recupere le l'identifiant d'un job deja soumis au BatchManager + //virtual const JobId getJobIdByReference(const string & ref) { return BatchManager::getJobIdByReference(ref); } + virtual const JobId getJobIdByReference(const char * ref) { return BatchManager::getJobIdByReference(ref); } +#endif + + }; + +} + +#endif diff --git a/src/Batch/Batch_BatchManager_ePBS.cxx b/src/Batch/Batch_BatchManager_ePBS.cxx new file mode 100644 index 000000000..6073cf9ee --- /dev/null +++ b/src/Batch/Batch_BatchManager_ePBS.cxx @@ -0,0 +1,313 @@ +// Copyright (C) 2005 OPEN CASCADE, EADS/CCR, LIP6, CEA/DEN, +// CEDRAT, EDF R&D, LEG, PRINCIPIA R&D, BUREAU VERITAS +// +// This library is free software; you can redistribute it and/or +// modify it under the terms of the GNU Lesser General Public +// License as published by the Free Software Foundation; either +// version 2.1 of the License. +// +// This library is distributed in the hope that it will be useful +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +// Lesser General Public License for more details. +// +// You should have received a copy of the GNU Lesser General Public +// License along with this library; if not, write to the Free Software +// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +// +// See http://www.salome-platform.org/ or email : webmaster.salome@opencascade.com +// +/* + * BatchManager_ePBS.cxx : emulation of PBS client + * + * Auteur : Bernard SECHER - CEA DEN + * Mail : mailto:bernard.secher@cea.fr + * Date : Thu Apr 24 10:17:22 2008 + * Projet : PAL Salome + * + */ + +#include +#include +#include +#include +#include "Batch_BatchManager_ePBS.hxx" + +namespace Batch { + + BatchManager_ePBS::BatchManager_ePBS(const FactBatchManager * parent, const char * host, const char * protocol, const char * mpiImpl) throw(InvalidArgumentException,ConnexionFailureException) : BatchManager(parent, host), BatchManager_eClient(host,protocol,mpiImpl) + { + // Nothing to do + } + + // Destructeur + BatchManager_ePBS::~BatchManager_ePBS() + { + // Nothing to do + } + + // Methode pour le controle des jobs : soumet un job au gestionnaire + const JobId BatchManager_ePBS::submitJob(const Job & job) + { + int status; + Parametre params = job.getParametre(); + const std::string dirForTmpFiles = params[TMPDIR]; + const string fileToExecute = params[EXECUTABLE]; + string::size_type p1 = fileToExecute.find_last_of("/"); + string::size_type p2 = fileToExecute.find_last_of("."); + std::string fileNameToExecute = fileToExecute.substr(p1+1,p2-p1-1); + + // export input files on cluster + exportInputFiles(job); + + // build batch script for job + buildBatchScript(job); + + // define name of log file + string logFile="/tmp/logs/"; + logFile += getenv("USER"); + logFile += "/batchSalome_"; + srand ( time(NULL) ); + int ir = rand(); + ostringstream oss; + oss << ir; + logFile += oss.str(); + logFile += ".log"; + + string command; + + // define command to submit batch + command = _protocol; + command += " "; + + if(_username != ""){ + command += _username; + command += "@"; + } + + command += _hostname; + command += " \"cd " ; + command += dirForTmpFiles ; + command += "; qsub " ; + command += fileNameToExecute ; + command += "_Batch.sh\" > "; + command += logFile; + cerr << command.c_str() << endl; + status = system(command.c_str()); + if(status) + throw EmulationException("Error of connection on remote host"); + + // read id of submitted job in log file + char line[128]; + FILE *fp = fopen(logFile.c_str(),"r"); + fgets( line, 128, fp); + fclose(fp); + + string sline(line); + int pos = sline.find("."); + string strjob; + if(pos == string::npos) + strjob = sline; + else + strjob = sline.substr(0,pos); + + JobId id(this, strjob); + return id; + } + + // Methode pour le controle des jobs : retire un job du gestionnaire + void BatchManager_ePBS::deleteJob(const JobId & jobid) + { + int status; + int ref; + istringstream iss(jobid.getReference()); + iss >> ref; + + // define command to submit batch + string command; + command = _protocol; + command += " "; + + if (_username != ""){ + command += _username; + command += "@"; + } + + command += _hostname; + command += " \"qdel " ; + command += iss.str(); + command += "\""; + cerr << command.c_str() << endl; + status = system(command.c_str()); + if(status) + throw EmulationException("Error of connection on remote host"); + + cerr << "jobId = " << ref << "killed" << endl; + } + + // Methode pour le controle des jobs : suspend un job en file d'attente + void BatchManager_ePBS::holdJob(const JobId & jobid) + { + throw EmulationException("Not yet implemented"); + } + + // Methode pour le controle des jobs : relache un job suspendu + void BatchManager_ePBS::releaseJob(const JobId & jobid) + { + throw EmulationException("Not yet implemented"); + } + + + // Methode pour le controle des jobs : modifie un job en file d'attente + void BatchManager_ePBS::alterJob(const JobId & jobid, const Parametre & param, const Environnement & env) + { + throw EmulationException("Not yet implemented"); + } + + // Methode pour le controle des jobs : modifie un job en file d'attente + void BatchManager_ePBS::alterJob(const JobId & jobid, const Parametre & param) + { + alterJob(jobid, param, Environnement()); + } + + // Methode pour le controle des jobs : modifie un job en file d'attente + void BatchManager_ePBS::alterJob(const JobId & jobid, const Environnement & env) + { + alterJob(jobid, Parametre(), env); + } + + // Methode pour le controle des jobs : renvoie l'etat du job + JobInfo BatchManager_ePBS::queryJob(const JobId & jobid) + { + string jstatus; + int id; + istringstream iss(jobid.getReference()); + iss >> id; + + // define name of log file + string logFile="/tmp/logs/"; + logFile += getenv("USER"); + logFile += "/batchSalome_"; + + ostringstream oss; + oss << this << "_" << id; + logFile += oss.str(); + logFile += ".log"; + + string command; + int status; + + // define command to submit batch + command = _protocol; + + if (_username != ""){ + command += _username; + command += "@"; + } + + command += _hostname; + command += " \"qstat -f " ; + command += iss.str(); + command += "\" > "; + command += logFile; + cerr << command.c_str() << endl; + status = system(command.c_str()); + if(status && status != 153 && status != 256*153) + throw EmulationException("Error of connection on remote host"); + + if(status == 153 || status == 256*153 ) + // If job is finished qstat command return 153 status + jstatus = "D"; + else{ + // read status of job in log file + char line[128]; + ifstream fp(logFile.c_str(),ios::in); + + string sline; + int pos = string::npos; + while( (pos == string::npos) && fp.getline(line,80,'\n') ){ + sline = string(line); + pos = sline.find("job_state"); + }; + + if(pos!=string::npos){ + istringstream iss(sline); + iss >> jstatus; + iss >> jstatus; + iss >> jstatus; + } + else + jstatus = "U"; + } + + cerr << "jobId = " << id << " " << jstatus << endl; + + JobInfo_ePBS ji = JobInfo_ePBS(id,jstatus); + return ji; + } + + // Methode pour le controle des jobs : teste si un job est present en machine + bool BatchManager_ePBS::isRunning(const JobId & jobid) + { + throw EmulationException("Not yet implemented"); + } + + void BatchManager_ePBS::buildBatchScript(const Job & job) throw(EmulationException) + { + int status; + Parametre params = job.getParametre(); + const int nbproc = params[NBPROC]; + const std::string dirForTmpFiles = params[TMPDIR]; + const string fileToExecute = params[EXECUTABLE]; + string::size_type p1 = fileToExecute.find_last_of("/"); + string::size_type p2 = fileToExecute.find_last_of("."); + std::string rootNameToExecute = fileToExecute.substr(p1+1,p2-p1-1); + std::string fileNameToExecute = "~/" + dirForTmpFiles + "/" + string(basename(fileToExecute.c_str())); + + int idx = dirForTmpFiles.find("Batch/"); + std::string filelogtemp = dirForTmpFiles.substr(idx+6, dirForTmpFiles.length()); + + std::string TmpFileName = BuildTemporaryFileName(); + ofstream tempOutputFile; + tempOutputFile.open(TmpFileName.c_str(), ofstream::out ); + + tempOutputFile << "#! /bin/sh -f" << endl ; + tempOutputFile << "#PBS -o runSalome.output.log." << filelogtemp << endl ; + tempOutputFile << "#PBS -e runSalome.error.log." << filelogtemp << endl ; + tempOutputFile << _mpiImpl->boot("${PBS_NODEFILE}",nbproc); + tempOutputFile << _mpiImpl->run("${PBS_NODEFILE}",nbproc,fileNameToExecute); + tempOutputFile << _mpiImpl->halt(); + tempOutputFile.flush(); + tempOutputFile.close(); + chmod(TmpFileName.c_str(), 0x1ED); + cerr << TmpFileName.c_str() << endl; + + string command; + if( _protocol == "rsh" ) + command = "rcp "; + else if( _protocol == "ssh" ) + command = "scp "; + else + throw EmulationException("Unknown protocol"); + command += TmpFileName; + command += " "; + if(_username != ""){ + command += _username; + command += "@"; + } + command += _hostname; + command += ":"; + command += dirForTmpFiles ; + command += "/" ; + command += rootNameToExecute ; + command += "_Batch.sh" ; + cerr << command.c_str() << endl; + status = system(command.c_str()); + if(status) + throw EmulationException("Error of connection on remote host"); + + RmTmpFile(TmpFileName); + + } + +} diff --git a/src/Batch/Batch_BatchManager_ePBS.hxx b/src/Batch/Batch_BatchManager_ePBS.hxx new file mode 100644 index 000000000..c704ccda3 --- /dev/null +++ b/src/Batch/Batch_BatchManager_ePBS.hxx @@ -0,0 +1,91 @@ +// Copyright (C) 2005 OPEN CASCADE, EADS/CCR, LIP6, CEA/DEN, +// CEDRAT, EDF R&D, LEG, PRINCIPIA R&D, BUREAU VERITAS +// +// This library is free software; you can redistribute it and/or +// modify it under the terms of the GNU Lesser General Public +// License as published by the Free Software Foundation; either +// version 2.1 of the License. +// +// This library is distributed in the hope that it will be useful +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +// Lesser General Public License for more details. +// +// You should have received a copy of the GNU Lesser General Public +// License along with this library; if not, write to the Free Software +// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +// +// See http://www.salome-platform.org/ or email : webmaster.salome@opencascade.com +// +/* + * BatchManager_ePBS.hxx : emulation of PBS client + * + * Auteur : Bernard SECHER - CEA DEN + * Mail : mailto:bernard.secher@cea.fr + * Date : Thu Apr 24 10:17:22 2008 + * Projet : PAL Salome + * + */ + +#ifndef _BATCHMANAGER_eLSF_H_ +#define _BATCHMANAGER_eLSF_H_ + +#include "Batch_JobId.hxx" +#include "Batch_JobInfo.hxx" +#include "Batch_JobInfo_ePBS.hxx" +#include "Batch_InvalidArgumentException.hxx" +#include "Batch_ConnexionFailureException.hxx" +#include "Batch_APIInternalFailureException.hxx" +#include "Batch_NotYetImplementedException.hxx" +#include "Batch_BatchManager.hxx" +#include "Batch_BatchManager_eClient.hxx" + +namespace Batch { + + class Job; + class JobId; + class JobInfo; + class FactBatchManager; + + class BatchManager_ePBS : public BatchManager, public BatchManager_eClient + { + public: + // Constructeur et destructeur + BatchManager_ePBS(const FactBatchManager * parent, const char * host="localhost", const char * protocol="ssh", const char * mpiImpl="indif") throw(InvalidArgumentException,ConnexionFailureException); // connexion a la machine host + virtual ~BatchManager_ePBS(); + + // Recupere le nom du serveur par defaut + // static string BatchManager_LSF::getDefaultServer(); + + // Methodes pour le controle des jobs + virtual const JobId submitJob(const Job & job); // soumet un job au gestionnaire + virtual void deleteJob(const JobId & jobid); // retire un job du gestionnaire + virtual void holdJob(const JobId & jobid); // suspend un job en file d'attente + virtual void releaseJob(const JobId & jobid); // relache un job suspendu + virtual void alterJob(const JobId & jobid, const Parametre & param, const Environnement & env); // modifie un job en file d'attente + virtual void alterJob(const JobId & jobid, const Parametre & param); // modifie un job en file d'attente + virtual void alterJob(const JobId & jobid, const Environnement & env); // modifie un job en file d'attente + virtual JobInfo queryJob(const JobId & jobid); // renvoie l'etat du job + virtual bool isRunning(const JobId & jobid); // teste si un job est present en machine + + virtual void setParametre(const JobId & jobid, const Parametre & param) { return alterJob(jobid, param); } // modifie un job en file d'attente + virtual void setEnvironnement(const JobId & jobid, const Environnement & env) { return alterJob(jobid, env); } // modifie un job en file d'attente + + + protected: + void buildBatchScript(const Job & job) throw(EmulationException); + + private: + +#ifdef SWIG + public: + // Recupere le l'identifiant d'un job deja soumis au BatchManager + //virtual const JobId getJobIdByReference(const string & ref) { return BatchManager::getJobIdByReference(ref); } + virtual const JobId getJobIdByReference(const char * ref) { return BatchManager::getJobIdByReference(ref); } +#endif + + }; + +} + +#endif diff --git a/src/Batch/Batch_FactBatchManager_eLSF.cxx b/src/Batch/Batch_FactBatchManager_eLSF.cxx new file mode 100644 index 000000000..5496deb45 --- /dev/null +++ b/src/Batch/Batch_FactBatchManager_eLSF.cxx @@ -0,0 +1,58 @@ +// Copyright (C) 2005 OPEN CASCADE, EADS/CCR, LIP6, CEA/DEN, +// CEDRAT, EDF R&D, LEG, PRINCIPIA R&D, BUREAU VERITAS +// +// This library is free software; you can redistribute it and/or +// modify it under the terms of the GNU Lesser General Public +// License as published by the Free Software Foundation; either +// version 2.1 of the License. +// +// This library is distributed in the hope that it will be useful +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +// Lesser General Public License for more details. +// +// You should have received a copy of the GNU Lesser General Public +// License along with this library; if not, write to the Free Software +// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +// +// See http://www.salome-platform.org/ or email : webmaster.salome@opencascade.com +// +/* + * FactBatchManager_eLSF.cxx : + * + * Auteur : Bernard SECHER - CEA DEN + * Date : Avril 2008 + * Projet : PAL Salome + * + */ + +#include +#include "Batch_BatchManager_eLSF.hxx" +#include "Batch_FactBatchManager_eLSF.hxx" +//#include "utilities.h" + +namespace Batch { + + static FactBatchManager_eLSF sFBM_eLSF; + + // Constructeur + FactBatchManager_eLSF::FactBatchManager_eLSF() : FactBatchManager("eLSF") + { + // Nothing to do + } + + // Destructeur + FactBatchManager_eLSF::~FactBatchManager_eLSF() + { + // Nothing to do + } + + // Functor + BatchManager * FactBatchManager_eLSF::operator() (const char * hostname) const + { + // MESSAGE("Building new BatchManager_LSF on host '" << hostname << "'"); + return new BatchManager_eLSF(this, hostname); + } + + +} diff --git a/src/Batch/Batch_FactBatchManager_eLSF.hxx b/src/Batch/Batch_FactBatchManager_eLSF.hxx new file mode 100644 index 000000000..9b54913d3 --- /dev/null +++ b/src/Batch/Batch_FactBatchManager_eLSF.hxx @@ -0,0 +1,58 @@ +// Copyright (C) 2005 OPEN CASCADE, EADS/CCR, LIP6, CEA/DEN, +// CEDRAT, EDF R&D, LEG, PRINCIPIA R&D, BUREAU VERITAS +// +// This library is free software; you can redistribute it and/or +// modify it under the terms of the GNU Lesser General Public +// License as published by the Free Software Foundation; either +// version 2.1 of the License. +// +// This library is distributed in the hope that it will be useful +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +// Lesser General Public License for more details. +// +// You should have received a copy of the GNU Lesser General Public +// License along with this library; if not, write to the Free Software +// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +// +// See http://www.salome-platform.org/ or email : webmaster.salome@opencascade.com +// +/* + * FactBatchManager_eLSF.hxx : + * + * Auteur : Bernard SECHER : CEA DEN + * Date : Avril 2008 + * Projet : PAL Salome + * + */ + +#ifndef _FACTBATCHMANAGER_eLSF_H_ +#define _FACTBATCHMANAGER_eLSF_H_ + +using namespace std; +#include +#include +#include "Batch_FactBatchManager.hxx" + +namespace Batch { + + class BatchManager_eLSF; + + class FactBatchManager_eLSF : public FactBatchManager + { + public: + // Constructeur et destructeur + FactBatchManager_eLSF(); + virtual ~FactBatchManager_eLSF(); + + virtual BatchManager * operator() (const char * hostname) const; + + protected: + + private: + + }; + +} + +#endif diff --git a/src/Batch/Batch_FactBatchManager_ePBS.cxx b/src/Batch/Batch_FactBatchManager_ePBS.cxx new file mode 100644 index 000000000..62642327b --- /dev/null +++ b/src/Batch/Batch_FactBatchManager_ePBS.cxx @@ -0,0 +1,58 @@ +// Copyright (C) 2005 OPEN CASCADE, EADS/CCR, LIP6, CEA/DEN, +// CEDRAT, EDF R&D, LEG, PRINCIPIA R&D, BUREAU VERITAS +// +// This library is free software; you can redistribute it and/or +// modify it under the terms of the GNU Lesser General Public +// License as published by the Free Software Foundation; either +// version 2.1 of the License. +// +// This library is distributed in the hope that it will be useful +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +// Lesser General Public License for more details. +// +// You should have received a copy of the GNU Lesser General Public +// License along with this library; if not, write to the Free Software +// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +// +// See http://www.salome-platform.org/ or email : webmaster.salome@opencascade.com +// +/* + * FactBatchManager_ePBS.cxx : + * + * Auteur : Bernard SECHER - CEA DEN + * Date : Avril 2008 + * Projet : PAL Salome + * + */ + +#include +#include "Batch_BatchManager_ePBS.hxx" +#include "Batch_FactBatchManager_ePBS.hxx" +//#include "utilities.h" + +namespace Batch { + + static FactBatchManager_ePBS sFBM_ePBS; + + // Constructeur + FactBatchManager_ePBS::FactBatchManager_ePBS() : FactBatchManager("ePBS") + { + // Nothing to do + } + + // Destructeur + FactBatchManager_ePBS::~FactBatchManager_ePBS() + { + // Nothing to do + } + + // Functor + BatchManager * FactBatchManager_ePBS::operator() (const char * hostname) const + { + // MESSAGE("Building new BatchManager_PBS on host '" << hostname << "'"); + return new BatchManager_ePBS(this, hostname); + } + + +} diff --git a/src/Batch/Batch_FactBatchManager_ePBS.hxx b/src/Batch/Batch_FactBatchManager_ePBS.hxx new file mode 100644 index 000000000..1ec238d8a --- /dev/null +++ b/src/Batch/Batch_FactBatchManager_ePBS.hxx @@ -0,0 +1,58 @@ +// Copyright (C) 2005 OPEN CASCADE, EADS/CCR, LIP6, CEA/DEN, +// CEDRAT, EDF R&D, LEG, PRINCIPIA R&D, BUREAU VERITAS +// +// This library is free software; you can redistribute it and/or +// modify it under the terms of the GNU Lesser General Public +// License as published by the Free Software Foundation; either +// version 2.1 of the License. +// +// This library is distributed in the hope that it will be useful +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +// Lesser General Public License for more details. +// +// You should have received a copy of the GNU Lesser General Public +// License along with this library; if not, write to the Free Software +// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +// +// See http://www.salome-platform.org/ or email : webmaster.salome@opencascade.com +// +/* + * FactBatchManager_ePBS.hxx : + * + * Auteur : Bernard SECHER : CEA DEN + * Date : Avril 2008 + * Projet : PAL Salome + * + */ + +#ifndef _FACTBATCHMANAGER_ePBS_H_ +#define _FACTBATCHMANAGER_ePBS_H_ + +using namespace std; +#include +#include +#include "Batch_FactBatchManager.hxx" + +namespace Batch { + + class BatchManager_ePBS; + + class FactBatchManager_ePBS : public FactBatchManager + { + public: + // Constructeur et destructeur + FactBatchManager_ePBS(); + virtual ~FactBatchManager_ePBS(); + + virtual BatchManager * operator() (const char * hostname) const; + + protected: + + private: + + }; + +} + +#endif diff --git a/src/Batch/Batch_JobInfo_eLSF.cxx b/src/Batch/Batch_JobInfo_eLSF.cxx new file mode 100644 index 000000000..3b8383811 --- /dev/null +++ b/src/Batch/Batch_JobInfo_eLSF.cxx @@ -0,0 +1,90 @@ +// Copyright (C) 2005 OPEN CASCADE, EADS/CCR, LIP6, CEA/DEN, +// CEDRAT, EDF R&D, LEG, PRINCIPIA R&D, BUREAU VERITAS +// +// This library is free software; you can redistribute it and/or +// modify it under the terms of the GNU Lesser General Public +// License as published by the Free Software Foundation; either +// version 2.1 of the License. +// +// This library is distributed in the hope that it will be useful +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +// Lesser General Public License for more details. +// +// You should have received a copy of the GNU Lesser General Public +// License along with this library; if not, write to the Free Software +// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +// +// See http://www.salome-platform.org/ or email : webmaster.salome@opencascade.com +// +/* + * JobInfo_eLSF.cxx : emulation of LSF client + * + * Auteur : Bernard SECHER - CEA DEN + * Mail : mailto:bernard.secher@cea.fr + * Date : Thu Apr 24 10:17:22 2008 + * Projet : PAL Salome + * + */ + +#include +#include +#include "Batch_Parametre.hxx" +#include "Batch_Environnement.hxx" +#include "Batch_RunTimeException.hxx" +#include "Batch_APIInternalFailureException.hxx" +#include "Batch_JobInfo_eLSF.hxx" + +namespace Batch { + + + + // Constructeurs + JobInfo_eLSF::JobInfo_eLSF(int id, string status) : JobInfo() + { + // On remplit les membres _param et _env + ostringstream oss; + oss << id; + _param[ID] = oss.str(); + _param[STATE] = status; + + if( status.find("RUN") != string::npos) + _running = true; + + } + + // Teste si un job est present en machine + bool JobInfo_eLSF::isRunning() const + { + return _running; + } + + + // Destructeur + JobInfo_eLSF::~JobInfo_eLSF() + { + // Nothing to do + } + + // Convertit une date HH:MM:SS en secondes + long JobInfo_eLSF::HMStoLong(const string & s) + { + long hour, min, sec; + + sscanf( s.c_str(), "%ld:%ld:%ld", &hour, &min, &sec); + return ( ( ( hour * 60L ) + min ) * 60L ) + sec; + } + + // Methode pour l'interfacage avec Python (SWIG) : affichage en Python + string JobInfo_eLSF::__str__() const + { + ostringstream sst; + sst << " +#include +#include "Batch_Parametre.hxx" +#include "Batch_Environnement.hxx" +#include "Batch_RunTimeException.hxx" +#include "Batch_APIInternalFailureException.hxx" +#include "Batch_JobInfo_ePBS.hxx" + +namespace Batch { + + + + // Constructeurs + JobInfo_ePBS::JobInfo_ePBS(int id, string status) : JobInfo() + { + // On remplit les membres _param et _env + ostringstream oss; + oss << id; + _param[ID] = oss.str(); + _param[STATE] = status; + + if( status.find("R") != string::npos) + _running = true; + + } + + // Teste si un job est present en machine + bool JobInfo_ePBS::isRunning() const + { + return _running; + } + + + // Destructeur + JobInfo_ePBS::~JobInfo_ePBS() + { + // Nothing to do + } + + // Convertit une date HH:MM:SS en secondes + long JobInfo_ePBS::HMStoLong(const string & s) + { + long hour, min, sec; + + sscanf( s.c_str(), "%ld:%ld:%ld", &hour, &min, &sec); + return ( ( ( hour * 60L ) + min ) * 60L ) + sec; + } + + // Methode pour l'interfacage avec Python (SWIG) : affichage en Python + string JobInfo_ePBS::__str__() const + { + ostringstream sst; + sst << " +#include +#include +#include "MpiImpl.hxx" + +using namespace std; + +// Constructor +MpiImpl::MpiImpl() +{ + cerr << "MpiImpl constructor" << endl; +} + +// Destructor +MpiImpl::~MpiImpl() +{ + cerr << "MpiImpl destructor" << endl; +} + +// lam implementation +// Constructor +MpiImpl_LAM::MpiImpl_LAM() : MpiImpl() +{ +} + +// Destructor +MpiImpl_LAM::~MpiImpl_LAM() +{ + cerr << "MpiImpl_LAM destructor" << endl; +} + +string MpiImpl_LAM::size() +{ + return "${LAMWORLD}"; +} + +string MpiImpl_LAM::rank() +{ + return "${LAMRANK}"; +} + +string MpiImpl_LAM::boot(const string machinefile, const unsigned int nbnodes) +{ + ostringstream oss; + oss << "lamboot " << machinefile << endl; + return oss.str(); +} + +string MpiImpl_LAM::run(const string machinefile, const unsigned int nbproc, const string fileNameToExecute) +{ + ostringstream oss; + oss << "mpirun -np " << nbproc << " " << fileNameToExecute << endl; + return oss.str(); +} + +string MpiImpl_LAM::halt() +{ + ostringstream oss; + oss << "lamhalt" << endl; + return oss.str(); +} + +// mpich1 implementation +// Constructor +MpiImpl_MPICH1::MpiImpl_MPICH1() : MpiImpl() +{ +} + +// Destructor +MpiImpl_MPICH1::~MpiImpl_MPICH1() +{ + cerr << "MpiImpl_MPICH1 destructor" << endl; +} + +string MpiImpl_MPICH1::size() +{ + throw MpiImplException("mpich1 doesn't work with this batch system to submit salome session"); +} + +string MpiImpl_MPICH1::rank() +{ + throw MpiImplException("mpich1 doesn't work with this batch system to submit salome session"); +} + +string MpiImpl_MPICH1::boot(const string machinefile, const unsigned int nbnodes) +{ + return ""; +} + +string MpiImpl_MPICH1::run(const string machinefile, const unsigned int nbproc, const string fileNameToExecute) +{ + ostringstream oss; + oss << "mpirun -machinefile " << machinefile << " -np " << nbproc << " " << fileNameToExecute << endl; + return oss.str(); +} + +string MpiImpl_MPICH1::halt() +{ + return ""; +} + +// mpich2 implementation +// Constructor +MpiImpl_MPICH2::MpiImpl_MPICH2() : MpiImpl() +{ +} + +// Destructor +MpiImpl_MPICH2::~MpiImpl_MPICH2() +{ + cerr << "MpiImpl_MPICH2 destructor" << endl; +} + +string MpiImpl_MPICH2::size() +{ + return "${PMI_SIZE}"; +} + +string MpiImpl_MPICH2::rank() +{ + return "${PMI_RANK}"; +} + +string MpiImpl_MPICH2::boot(const string machinefile, const unsigned int nbnodes) +{ + ostringstream oss; + oss << "mpdboot -n " << nbnodes << " -f " << machinefile << endl; + return oss.str(); +} + +string MpiImpl_MPICH2::run(const string machinefile, const unsigned int nbproc, const string fileNameToExecute) +{ + ostringstream oss; + oss << "mpirun -np " << nbproc << " " << fileNameToExecute << endl; + return oss.str(); +} + +string MpiImpl_MPICH2::halt() +{ + ostringstream oss; + oss << "mpdallexit" << endl; + return oss.str(); +} + +// openmpi implementation +// Constructor +MpiImpl_OPENMPI::MpiImpl_OPENMPI() : MpiImpl() +{ +} + +// Destructor +MpiImpl_OPENMPI::~MpiImpl_OPENMPI() +{ + cerr << "MpiImpl_OPENMPI destructor" << endl; +} + +string MpiImpl_OPENMPI::size() +{ + return "${OMPI_MCA_ns_nds_num_procs}"; +} + +string MpiImpl_OPENMPI::rank() +{ + return "${OMPI_MCA_ns_nds_vpid}"; +} + +string MpiImpl_OPENMPI::boot(const string machinefile, const unsigned int nbnodes) +{ + return ""; +} + +string MpiImpl_OPENMPI::run(const string machinefile, const unsigned int nbproc, const string fileNameToExecute) +{ + ostringstream oss; + oss << "mpirun -hostfile " << machinefile << " -np " << nbproc << " " << fileNameToExecute << endl; + return oss.str(); +} + +string MpiImpl_OPENMPI::halt() +{ + return ""; +} + +// slurm implementation +// Constructor +MpiImpl_SLURM::MpiImpl_SLURM() : MpiImpl() +{ +} + +// Destructor +MpiImpl_SLURM::~MpiImpl_SLURM() +{ + cerr << "MpiImpl_SLURM destructor" << endl; +} + +string MpiImpl_SLURM::size() +{ + return "${SLURM_NPROCS}"; +} + +string MpiImpl_SLURM::rank() +{ + return "${SLURM_PROCID}"; +} + +string MpiImpl_SLURM::boot(const string machinefile, const unsigned int nbnodes) +{ + return ""; +} + +string MpiImpl_SLURM::run(const string machinefile, const unsigned int nbproc, const string fileNameToExecute) +{ + ostringstream oss; + oss << "srun " << fileNameToExecute << endl; + return oss.str(); +} + +string MpiImpl_SLURM::halt() +{ + return ""; +} + diff --git a/src/Batch/MpiImpl.hxx b/src/Batch/MpiImpl.hxx new file mode 100644 index 000000000..07f306bdf --- /dev/null +++ b/src/Batch/MpiImpl.hxx @@ -0,0 +1,156 @@ +// Copyright (C) 2005 OPEN CASCADE, EADS/CCR, LIP6, CEA/DEN, +// CEDRAT, EDF R&D, LEG, PRINCIPIA R&D, BUREAU VERITAS +// +// This library is free software; you can redistribute it and/or +// modify it under the terms of the GNU Lesser General Public +// License as published by the Free Software Foundation; either +// version 2.1 of the License. +// +// This library is distributed in the hope that it will be useful +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +// Lesser General Public License for more details. +// +// You should have received a copy of the GNU Lesser General Public +// License along with this library; if not, write to the Free Software +// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +// +// See http://www.salome-platform.org/ or email : webmaster.salome@opencascade.com +// +/* + * BatchManager.hxx : + * + * Auteur : Bernard SECHER - CEA/DEN + * Date : Juillet 2007 + * Projet : SALOME + * + */ + +#ifndef _BL_MPIIMPL_H_ +#define _BL_MPIIMPL_H_ + +#include + +class MpiImplException +{ +public: + const std::string msg; + + MpiImplException(const std::string m) : msg(m) {} +}; + +class MpiImpl +{ +public: + // Constructeur et destructeur + MpiImpl(); // constrcuctor + virtual ~MpiImpl(); //Destructor + + virtual std::string size() = 0; // get number of process of current job + virtual std::string rank() = 0; // get process number of current job + virtual std::string boot(const std::string machinefile, const unsigned int nbnodes) = 0; // get boot command + virtual std::string run(const std::string machinefile, const unsigned int nbproc, const std::string fileNameToExecute) = 0; // get run command + virtual std::string halt() = 0; // get stop command + +protected: + +private: + +}; + +class MpiImpl_LAM : public MpiImpl +{ +public: + // Constructeur et destructeur + MpiImpl_LAM(); // constructor + virtual ~MpiImpl_LAM(); //Destructor + + std::string size(); // get number of process of current job + std::string rank(); // get process number of current job + std::string boot( const std::string machinefile, const unsigned int nbnodes); // get boot command + std::string run( const std::string machinefile, const unsigned int nbproc, const std::string fileNameToExecute); // get run command + std::string halt(); // get stop command + +protected: + +private: + +}; + +class MpiImpl_MPICH1 : public MpiImpl +{ +public: + // Constructeur et destructeur + MpiImpl_MPICH1(); // constructor + virtual ~MpiImpl_MPICH1(); //Destructor + + std::string size(); // get number of process of current job + std::string rank(); // get process number of current job + std::string boot( const std::string machinefile, const unsigned int nbnodes); // get boot command + std::string run( const std::string machinefile, const unsigned int nbproc, const std::string fileNameToExecute); // get run command + std::string halt(); // get stop command + +protected: + +private: + +}; + +class MpiImpl_MPICH2 : public MpiImpl +{ +public: + // Constructeur et destructeur + MpiImpl_MPICH2(); // constructor + virtual ~MpiImpl_MPICH2(); //Destructor + + std::string size(); // get number of process of current job + std::string rank(); // get process number of current job + std::string boot( const std::string machinefile, const unsigned int nbnodes); // get boot command + std::string run( const std::string machinefile, const unsigned int nbproc, const std::string fileNameToExecute); // get run command + std::string halt(); // get stop command + +protected: + +private: + +}; + +class MpiImpl_OPENMPI : public MpiImpl +{ +public: + // Constructeur et destructeur + MpiImpl_OPENMPI(); // constructor + virtual ~MpiImpl_OPENMPI(); //Destructor + + std::string size(); // get number of process of current job + std::string rank(); // get process number of current job + std::string boot( const std::string machinefile, const unsigned int nbnodes); // get boot command + std::string run( const std::string machinefile, const unsigned int nbproc, const std::string fileNameToExecute); // get run command + std::string halt(); // get stop command + +protected: + +private: + +}; + +class MpiImpl_SLURM : public MpiImpl +{ +public: + // Constructeur et destructeur + MpiImpl_SLURM(); // constructor + virtual ~MpiImpl_SLURM(); //Destructor + + std::string size(); // get number of process of current job + std::string rank(); // get process number of current job + std::string boot( const std::string machinefile, const unsigned int nbnodes); // get boot command + std::string run( const std::string machinefile, const unsigned int nbproc, const std::string fileNameToExecute); // get run command + std::string halt(); // get stop command + +protected: + +private: + +}; + +#endif diff --git a/src/Launcher/Launcher.cxx b/src/Launcher/Launcher.cxx index 0730228bd..527c43282 100644 --- a/src/Launcher/Launcher.cxx +++ b/src/Launcher/Launcher.cxx @@ -332,6 +332,7 @@ void Launcher_cpp::buildSalomeCouplingScript(BatchLight::Job* job, const ParserR // waiting standard killing improvement by P. Rascle tempOutputFile << " killall notifd" << endl; tempOutputFile << " killall omniNames" << endl; + tempOutputFile << " rm core.*" << endl; // ------------------------------------- // Other nodes launch a container diff --git a/src/Launcher/Makefile.am b/src/Launcher/Makefile.am index a54ef4490..7b6986fe1 100644 --- a/src/Launcher/Makefile.am +++ b/src/Launcher/Makefile.am @@ -40,7 +40,6 @@ salomeinclude_HEADERS = \ BatchLight_BatchManager_PBS.hxx \ BatchLight_BatchManager_SLURM.hxx \ BatchLight_Job.hxx \ - MpiImpl.hxx \ SALOME_Launcher.hxx \ Launcher.hxx @@ -115,8 +114,7 @@ libLauncher_la_SOURCES=\ BatchLight_BatchManager.cxx \ BatchLight_BatchManager_SLURM.cxx \ BatchLight_BatchManager_PBS.cxx \ - BatchLight_Job.cxx \ - MpiImpl.cxx + BatchLight_Job.cxx libLauncher_la_CPPFLAGS =\ -I$(srcdir)/../Batch \ diff --git a/src/Launcher/MpiImpl.cxx b/src/Launcher/MpiImpl.cxx deleted file mode 100644 index 012eeee73..000000000 --- a/src/Launcher/MpiImpl.cxx +++ /dev/null @@ -1,250 +0,0 @@ -// Copyright (C) 2005 OPEN CASCADE, EADS/CCR, LIP6, CEA/DEN, -// CEDRAT, EDF R&D, LEG, PRINCIPIA R&D, BUREAU VERITAS -// -// This library is free software; you can redistribute it and/or -// modify it under the terms of the GNU Lesser General Public -// License as published by the Free Software Foundation; either -// version 2.1 of the License. -// -// This library is distributed in the hope that it will be useful -// but WITHOUT ANY WARRANTY; without even the implied warranty of -// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU -// Lesser General Public License for more details. -// -// You should have received a copy of the GNU Lesser General Public -// License along with this library; if not, write to the Free Software -// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA -// -// See http://www.salome-platform.org/ or email : webmaster.salome@opencascade.com -// -/* - * BatchManager.cxx : - * - * Auteur : Bernard SECHER - CEA/DEN - * Date : Juillet 2007 - * Projet : SALOME - * - */ - -#include -#include -#include -#include "MpiImpl.hxx" - -using namespace std; - -// Constructor -MpiImpl::MpiImpl() -{ - cerr << "MpiImpl constructor" << endl; -} - -// Destructor -MpiImpl::~MpiImpl() -{ - cerr << "MpiImpl destructor" << endl; -} - -// lam implementation -// Constructor -MpiImpl_LAM::MpiImpl_LAM() : MpiImpl() -{ -} - -// Destructor -MpiImpl_LAM::~MpiImpl_LAM() -{ - cerr << "MpiImpl_LAM destructor" << endl; -} - -string MpiImpl_LAM::size() -{ - return "${LAMWORLD}"; -} - -string MpiImpl_LAM::rank() -{ - return "${LAMRANK}"; -} - -string MpiImpl_LAM::boot(const string machinefile, const unsigned int nbnodes) -{ - ostringstream oss; - oss << "lamboot " << machinefile << endl; - return oss.str(); -} - -string MpiImpl_LAM::run(const string machinefile, const unsigned int nbproc, const string fileNameToExecute) -{ - ostringstream oss; - oss << "mpirun -np " << nbproc << " " << fileNameToExecute << endl; - return oss.str(); -} - -string MpiImpl_LAM::halt() -{ - ostringstream oss; - oss << "lamhalt" << endl; - return oss.str(); -} - -// mpich1 implementation -// Constructor -MpiImpl_MPICH1::MpiImpl_MPICH1() : MpiImpl() -{ -} - -// Destructor -MpiImpl_MPICH1::~MpiImpl_MPICH1() -{ - cerr << "MpiImpl_MPICH1 destructor" << endl; -} - -string MpiImpl_MPICH1::size() -{ - throw MpiImplException("mpich1 doesn't work with this batch system to submit salome session"); -} - -string MpiImpl_MPICH1::rank() -{ - throw MpiImplException("mpich1 doesn't work with this batch system to submit salome session"); -} - -string MpiImpl_MPICH1::boot(const string machinefile, const unsigned int nbnodes) -{ - return ""; -} - -string MpiImpl_MPICH1::run(const string machinefile, const unsigned int nbproc, const string fileNameToExecute) -{ - ostringstream oss; - oss << "mpirun -machinefile " << machinefile << " -np " << nbproc << " " << fileNameToExecute << endl; - return oss.str(); -} - -string MpiImpl_MPICH1::halt() -{ - return ""; -} - -// mpich2 implementation -// Constructor -MpiImpl_MPICH2::MpiImpl_MPICH2() : MpiImpl() -{ -} - -// Destructor -MpiImpl_MPICH2::~MpiImpl_MPICH2() -{ - cerr << "MpiImpl_MPICH2 destructor" << endl; -} - -string MpiImpl_MPICH2::size() -{ - return "${PMI_SIZE}"; -} - -string MpiImpl_MPICH2::rank() -{ - return "${PMI_RANK}"; -} - -string MpiImpl_MPICH2::boot(const string machinefile, const unsigned int nbnodes) -{ - ostringstream oss; - oss << "mpdboot -n " << nbnodes << " -f " << machinefile << endl; - return oss.str(); -} - -string MpiImpl_MPICH2::run(const string machinefile, const unsigned int nbproc, const string fileNameToExecute) -{ - ostringstream oss; - oss << "mpirun -np " << nbproc << " " << fileNameToExecute << endl; - return oss.str(); -} - -string MpiImpl_MPICH2::halt() -{ - ostringstream oss; - oss << "mpdallexit" << endl; - return oss.str(); -} - -// openmpi implementation -// Constructor -MpiImpl_OPENMPI::MpiImpl_OPENMPI() : MpiImpl() -{ -} - -// Destructor -MpiImpl_OPENMPI::~MpiImpl_OPENMPI() -{ - cerr << "MpiImpl_OPENMPI destructor" << endl; -} - -string MpiImpl_OPENMPI::size() -{ - return "${OMPI_MCA_ns_nds_num_procs}"; -} - -string MpiImpl_OPENMPI::rank() -{ - return "${OMPI_MCA_ns_nds_vpid}"; -} - -string MpiImpl_OPENMPI::boot(const string machinefile, const unsigned int nbnodes) -{ - return ""; -} - -string MpiImpl_OPENMPI::run(const string machinefile, const unsigned int nbproc, const string fileNameToExecute) -{ - ostringstream oss; - oss << "mpirun -hostfile " << machinefile << " -np " << nbproc << " " << fileNameToExecute << endl; - return oss.str(); -} - -string MpiImpl_OPENMPI::halt() -{ - return ""; -} - -// slurm implementation -// Constructor -MpiImpl_SLURM::MpiImpl_SLURM() : MpiImpl() -{ -} - -// Destructor -MpiImpl_SLURM::~MpiImpl_SLURM() -{ - cerr << "MpiImpl_SLURM destructor" << endl; -} - -string MpiImpl_SLURM::size() -{ - return "${SLURM_NPROCS}"; -} - -string MpiImpl_SLURM::rank() -{ - return "${SLURM_PROCID}"; -} - -string MpiImpl_SLURM::boot(const string machinefile, const unsigned int nbnodes) -{ - return ""; -} - -string MpiImpl_SLURM::run(const string machinefile, const unsigned int nbproc, const string fileNameToExecute) -{ - ostringstream oss; - oss << "srun " << fileNameToExecute << endl; - return oss.str(); -} - -string MpiImpl_SLURM::halt() -{ - return ""; -} - diff --git a/src/Launcher/MpiImpl.hxx b/src/Launcher/MpiImpl.hxx deleted file mode 100644 index 07f306bdf..000000000 --- a/src/Launcher/MpiImpl.hxx +++ /dev/null @@ -1,156 +0,0 @@ -// Copyright (C) 2005 OPEN CASCADE, EADS/CCR, LIP6, CEA/DEN, -// CEDRAT, EDF R&D, LEG, PRINCIPIA R&D, BUREAU VERITAS -// -// This library is free software; you can redistribute it and/or -// modify it under the terms of the GNU Lesser General Public -// License as published by the Free Software Foundation; either -// version 2.1 of the License. -// -// This library is distributed in the hope that it will be useful -// but WITHOUT ANY WARRANTY; without even the implied warranty of -// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU -// Lesser General Public License for more details. -// -// You should have received a copy of the GNU Lesser General Public -// License along with this library; if not, write to the Free Software -// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA -// -// See http://www.salome-platform.org/ or email : webmaster.salome@opencascade.com -// -/* - * BatchManager.hxx : - * - * Auteur : Bernard SECHER - CEA/DEN - * Date : Juillet 2007 - * Projet : SALOME - * - */ - -#ifndef _BL_MPIIMPL_H_ -#define _BL_MPIIMPL_H_ - -#include - -class MpiImplException -{ -public: - const std::string msg; - - MpiImplException(const std::string m) : msg(m) {} -}; - -class MpiImpl -{ -public: - // Constructeur et destructeur - MpiImpl(); // constrcuctor - virtual ~MpiImpl(); //Destructor - - virtual std::string size() = 0; // get number of process of current job - virtual std::string rank() = 0; // get process number of current job - virtual std::string boot(const std::string machinefile, const unsigned int nbnodes) = 0; // get boot command - virtual std::string run(const std::string machinefile, const unsigned int nbproc, const std::string fileNameToExecute) = 0; // get run command - virtual std::string halt() = 0; // get stop command - -protected: - -private: - -}; - -class MpiImpl_LAM : public MpiImpl -{ -public: - // Constructeur et destructeur - MpiImpl_LAM(); // constructor - virtual ~MpiImpl_LAM(); //Destructor - - std::string size(); // get number of process of current job - std::string rank(); // get process number of current job - std::string boot( const std::string machinefile, const unsigned int nbnodes); // get boot command - std::string run( const std::string machinefile, const unsigned int nbproc, const std::string fileNameToExecute); // get run command - std::string halt(); // get stop command - -protected: - -private: - -}; - -class MpiImpl_MPICH1 : public MpiImpl -{ -public: - // Constructeur et destructeur - MpiImpl_MPICH1(); // constructor - virtual ~MpiImpl_MPICH1(); //Destructor - - std::string size(); // get number of process of current job - std::string rank(); // get process number of current job - std::string boot( const std::string machinefile, const unsigned int nbnodes); // get boot command - std::string run( const std::string machinefile, const unsigned int nbproc, const std::string fileNameToExecute); // get run command - std::string halt(); // get stop command - -protected: - -private: - -}; - -class MpiImpl_MPICH2 : public MpiImpl -{ -public: - // Constructeur et destructeur - MpiImpl_MPICH2(); // constructor - virtual ~MpiImpl_MPICH2(); //Destructor - - std::string size(); // get number of process of current job - std::string rank(); // get process number of current job - std::string boot( const std::string machinefile, const unsigned int nbnodes); // get boot command - std::string run( const std::string machinefile, const unsigned int nbproc, const std::string fileNameToExecute); // get run command - std::string halt(); // get stop command - -protected: - -private: - -}; - -class MpiImpl_OPENMPI : public MpiImpl -{ -public: - // Constructeur et destructeur - MpiImpl_OPENMPI(); // constructor - virtual ~MpiImpl_OPENMPI(); //Destructor - - std::string size(); // get number of process of current job - std::string rank(); // get process number of current job - std::string boot( const std::string machinefile, const unsigned int nbnodes); // get boot command - std::string run( const std::string machinefile, const unsigned int nbproc, const std::string fileNameToExecute); // get run command - std::string halt(); // get stop command - -protected: - -private: - -}; - -class MpiImpl_SLURM : public MpiImpl -{ -public: - // Constructeur et destructeur - MpiImpl_SLURM(); // constructor - virtual ~MpiImpl_SLURM(); //Destructor - - std::string size(); // get number of process of current job - std::string rank(); // get process number of current job - std::string boot( const std::string machinefile, const unsigned int nbnodes); // get boot command - std::string run( const std::string machinefile, const unsigned int nbproc, const std::string fileNameToExecute); // get run command - std::string halt(); // get stop command - -protected: - -private: - -}; - -#endif