From 3ac70c37920ff7b12f9458bb0980a1d45ce63d7c Mon Sep 17 00:00:00 2001 From: secher Date: Fri, 27 Jun 2008 14:03:26 +0000 Subject: [PATCH] add Sun Grid Engine batch manager --- src/Batch/Batch_BatchManager_eClient.cxx | 2 + src/Batch/Batch_BatchManager_eClient.hxx | 2 +- src/Batch/Batch_BatchManager_eLSF.hxx | 2 +- src/Batch/Batch_BatchManager_ePBS.hxx | 2 +- src/Batch/Batch_BatchManager_eSGE.cxx | 329 ++++++++++++++++++ src/Batch/Batch_BatchManager_eSGE.hxx | 92 +++++ src/Batch/Batch_FactBatchManager_eSGE.cxx | 64 ++++ src/Batch/Batch_FactBatchManager_eSGE.hxx | 60 ++++ src/Batch/Batch_JobInfo_eSGE.cxx | 109 ++++++ src/Batch/Batch_JobInfo_eSGE.hxx | 69 ++++ src/Batch/Makefile.am | 6 + src/Batch/MpiImpl.cxx | 4 +- src/Launcher/BatchTest.cxx | 15 +- src/Launcher/Launcher.cxx | 32 +- src/Launcher/SALOME_Launcher.cxx | 2 +- .../SALOME_ResourcesCatalog_Handler.cxx | 10 +- .../SALOME_ResourcesCatalog_Parser.cxx | 2 +- .../SALOME_ResourcesCatalog_Parser.hxx | 4 +- .../SALOME_ResourcesManager.cxx | 6 +- 19 files changed, 790 insertions(+), 22 deletions(-) create mode 100644 src/Batch/Batch_BatchManager_eSGE.cxx create mode 100644 src/Batch/Batch_BatchManager_eSGE.hxx create mode 100644 src/Batch/Batch_FactBatchManager_eSGE.cxx create mode 100644 src/Batch/Batch_FactBatchManager_eSGE.hxx create mode 100644 src/Batch/Batch_JobInfo_eSGE.cxx create mode 100644 src/Batch/Batch_JobInfo_eSGE.hxx diff --git a/src/Batch/Batch_BatchManager_eClient.cxx b/src/Batch/Batch_BatchManager_eClient.cxx index d0b76fe9b..140d08de8 100644 --- a/src/Batch/Batch_BatchManager_eClient.cxx +++ b/src/Batch/Batch_BatchManager_eClient.cxx @@ -196,6 +196,8 @@ namespace Batch { return new MpiImpl_OPENMPI(); else if(mpiImpl == "slurm") return new MpiImpl_SLURM(); + else if(mpiImpl == "nompi") + throw EmulationException("you must specified an mpi implementation for batch manager"); else{ ostringstream oss; oss << mpiImpl << " : not yet implemented"; diff --git a/src/Batch/Batch_BatchManager_eClient.hxx b/src/Batch/Batch_BatchManager_eClient.hxx index 717eae6a3..5ae9cfcb0 100644 --- a/src/Batch/Batch_BatchManager_eClient.hxx +++ b/src/Batch/Batch_BatchManager_eClient.hxx @@ -50,7 +50,7 @@ namespace Batch { { public: // Constructeur et destructeur - BatchManager_eClient(const Batch::FactBatchManager * parent, const char* host="localhost", const char* protocol="ssh", const char* mpiImpl="indif"); + BatchManager_eClient(const Batch::FactBatchManager * parent, const char* host="localhost", const char* protocol="ssh", const char* mpiImpl="mpich1"); virtual ~BatchManager_eClient(); void importOutputFiles( const Job & job, const std::string directory ) throw(EmulationException); diff --git a/src/Batch/Batch_BatchManager_eLSF.hxx b/src/Batch/Batch_BatchManager_eLSF.hxx index 00d79a4fa..0b37d9072 100644 --- a/src/Batch/Batch_BatchManager_eLSF.hxx +++ b/src/Batch/Batch_BatchManager_eLSF.hxx @@ -52,7 +52,7 @@ namespace Batch { { public: // Constructeur et destructeur - BatchManager_eLSF(const FactBatchManager * parent, const char * host="localhost", const char * protocol="ssh", const char * mpiImpl="indif") throw(InvalidArgumentException,ConnexionFailureException); // connexion a la machine host + BatchManager_eLSF(const FactBatchManager * parent, const char * host="localhost", const char * protocol="ssh", const char * mpiImpl="nompi") throw(InvalidArgumentException,ConnexionFailureException); // connexion a la machine host virtual ~BatchManager_eLSF(); // Recupere le nom du serveur par defaut diff --git a/src/Batch/Batch_BatchManager_ePBS.hxx b/src/Batch/Batch_BatchManager_ePBS.hxx index 42f4b1b69..6f9783dfd 100644 --- a/src/Batch/Batch_BatchManager_ePBS.hxx +++ b/src/Batch/Batch_BatchManager_ePBS.hxx @@ -51,7 +51,7 @@ namespace Batch { { public: // Constructeur et destructeur - BatchManager_ePBS(const FactBatchManager * parent, const char * host="localhost", const char * protocol="ssh", const char * mpiImpl="indif") throw(InvalidArgumentException,ConnexionFailureException); // connexion a la machine host + BatchManager_ePBS(const FactBatchManager * parent, const char * host="localhost", const char * protocol="ssh", const char * mpiImpl="nompi") throw(InvalidArgumentException,ConnexionFailureException); // connexion a la machine host virtual ~BatchManager_ePBS(); // Recupere le nom du serveur par defaut diff --git a/src/Batch/Batch_BatchManager_eSGE.cxx b/src/Batch/Batch_BatchManager_eSGE.cxx new file mode 100644 index 000000000..e6fac7f57 --- /dev/null +++ b/src/Batch/Batch_BatchManager_eSGE.cxx @@ -0,0 +1,329 @@ +// Copyright (C) 2005 OPEN CASCADE, EADS/CCR, LIP6, CEA/DEN, +// CEDRAT, EDF R&D, LEG, PRINCIPIA R&D, BUREAU VERITAS +// +// This library is free software; you can redistribute it and/or +// modify it under the terms of the GNU Lesser General Public +// License as published by the Free Software Foundation; either +// version 2.1 of the License. +// +// This library is distributed in the hope that it will be useful +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +// Lesser General Public License for more details. +// +// You should have received a copy of the GNU Lesser General Public +// License along with this library; if not, write to the Free Software +// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +// +// See http://www.salome-platform.org/ or email : webmaster.salome@opencascade.com +// +/* + * BatchManager_eSGE.cxx : emulation of SGE client + * + * Auteur : Bernard SECHER - CEA DEN + * Mail : mailto:bernard.secher@cea.fr + * Date : Thu Apr 24 10:17:22 2008 + * Projet : PAL Salome + * + */ + +#include +#include +#include +#include +#include "Batch_BatchManager_eSGE.hxx" + +namespace Batch { + + BatchManager_eSGE::BatchManager_eSGE(const FactBatchManager * parent, const char * host, const char * protocol, const char * mpiImpl) throw(InvalidArgumentException,ConnexionFailureException) : BatchManager_eClient(parent,host,protocol,mpiImpl) + { + // Nothing to do + } + + // Destructeur + BatchManager_eSGE::~BatchManager_eSGE() + { + // Nothing to do + } + + // Methode pour le controle des jobs : soumet un job au gestionnaire + const JobId BatchManager_eSGE::submitJob(const Job & job) + { + int status; + Parametre params = job.getParametre(); + const std::string dirForTmpFiles = params[TMPDIR]; + const string fileToExecute = params[EXECUTABLE]; + string::size_type p1 = fileToExecute.find_last_of("/"); + string::size_type p2 = fileToExecute.find_last_of("."); + std::string fileNameToExecute = fileToExecute.substr(p1+1,p2-p1-1); + cerr << "Entree BatchManager_eSGE::submitJob" << endl; + + // export input files on cluster + exportInputFiles(job); + + // build batch script for job + buildBatchScript(job); + + // define name of log file + string logFile="/tmp/logs/"; + logFile += getenv("USER"); + logFile += "/batchSalome_"; + srand ( time(NULL) ); + int ir = rand(); + ostringstream oss; + oss << ir; + logFile += oss.str(); + logFile += ".log"; + + string command; + + // define command to submit batch + command = _protocol; + command += " "; + + if(_username != ""){ + command += _username; + command += "@"; + } + + command += _hostname; + command += " \"cd " ; + command += dirForTmpFiles ; + command += "; qsub " ; + command += fileNameToExecute ; + command += "_Batch.sh\" > "; + command += logFile; + cerr << command.c_str() << endl; + status = system(command.c_str()); + if(status) + throw EmulationException("Error of connection on remote host"); + + // read id of submitted job in log file + char line[128]; + FILE *fp = fopen(logFile.c_str(),"r"); + fgets( line, 128, fp); + fclose(fp); + + string strjob; + istringstream iss(line); + iss >> strjob >> strjob >> strjob; + + JobId id(this, strjob); + return id; + } + + // Methode pour le controle des jobs : retire un job du gestionnaire + void BatchManager_eSGE::deleteJob(const JobId & jobid) + { + int status; + int ref; + istringstream iss(jobid.getReference()); + iss >> ref; + + // define command to submit batch + string command; + command = _protocol; + command += " "; + + if (_username != ""){ + command += _username; + command += "@"; + } + + command += _hostname; + command += " \"qdel " ; + command += iss.str(); + command += "\""; + cerr << command.c_str() << endl; + status = system(command.c_str()); + if(status) + throw EmulationException("Error of connection on remote host"); + + cerr << "jobId = " << ref << "killed" << endl; + } + + // Methode pour le controle des jobs : suspend un job en file d'attente + void BatchManager_eSGE::holdJob(const JobId & jobid) + { + throw EmulationException("Not yet implemented"); + } + + // Methode pour le controle des jobs : relache un job suspendu + void BatchManager_eSGE::releaseJob(const JobId & jobid) + { + throw EmulationException("Not yet implemented"); + } + + + // Methode pour le controle des jobs : modifie un job en file d'attente + void BatchManager_eSGE::alterJob(const JobId & jobid, const Parametre & param, const Environnement & env) + { + throw EmulationException("Not yet implemented"); + } + + // Methode pour le controle des jobs : modifie un job en file d'attente + void BatchManager_eSGE::alterJob(const JobId & jobid, const Parametre & param) + { + alterJob(jobid, param, Environnement()); + } + + // Methode pour le controle des jobs : modifie un job en file d'attente + void BatchManager_eSGE::alterJob(const JobId & jobid, const Environnement & env) + { + alterJob(jobid, Parametre(), env); + } + + // Methode pour le controle des jobs : renvoie l'etat du job + JobInfo BatchManager_eSGE::queryJob(const JobId & jobid) + { + int id; + istringstream iss(jobid.getReference()); + iss >> id; + + // define name of log file + string logFile="/tmp/logs/"; + logFile += getenv("USER"); + logFile += "/batchSalome_"; + + ostringstream oss; + oss << this << "_" << id; + logFile += oss.str(); + logFile += ".log"; + + string command; + int status; + + // define command to submit batch + command = _protocol; + command += " "; + + if (_username != ""){ + command += _username; + command += "@"; + } + + command += _hostname; + command += " \"qstat | grep " ; + command += iss.str(); + command += "\" > "; + command += logFile; + cerr << command.c_str() << endl; + status = system(command.c_str()); + if(status && status != 256) + throw EmulationException("Error of connection on remote host"); + + JobInfo_eSGE ji = JobInfo_eSGE(id,logFile); + return ji; + } + + // Methode pour le controle des jobs : teste si un job est present en machine + bool BatchManager_eSGE::isRunning(const JobId & jobid) + { + throw EmulationException("Not yet implemented"); + } + + void BatchManager_eSGE::buildBatchScript(const Job & job) throw(EmulationException) + { + int status; + Parametre params = job.getParametre(); + Environnement env = job.getEnvironnement(); + const long nbproc = params[NBPROC]; + const long edt = params[MAXWALLTIME]; + const long mem = params[MAXRAMSIZE]; + const string workDir = params[WORKDIR]; + const std::string dirForTmpFiles = params[TMPDIR]; + const string fileToExecute = params[EXECUTABLE]; + const string home = params[HOMEDIR]; + std::string rootNameToExecute; + std::string fileNameToExecute; + std::string filelogtemp; + if( fileToExecute.size() > 0 ){ + string::size_type p1 = fileToExecute.find_last_of("/"); + string::size_type p2 = fileToExecute.find_last_of("."); + rootNameToExecute = fileToExecute.substr(p1+1,p2-p1-1); + fileNameToExecute = "~/" + dirForTmpFiles + "/" + string(basename(fileToExecute.c_str())); + + int idx = dirForTmpFiles.find("Batch/"); + filelogtemp = dirForTmpFiles.substr(idx+6, dirForTmpFiles.length()); + } + else{ + rootNameToExecute = "command"; + } + + std::string TmpFileName = BuildTemporaryFileName(); + ofstream tempOutputFile; + tempOutputFile.open(TmpFileName.c_str(), ofstream::out ); + + tempOutputFile << "#! /bin/sh -f" << endl; + tempOutputFile << "#$ -pe mpich " << nbproc << endl; + if( edt > 0 ) + tempOutputFile << "#$ -l h_rt=" << getWallTime(edt) << endl ; + if( mem > 0 ) + tempOutputFile << "#$ -l h_vmem=" << mem << "M" << endl ; + if( fileToExecute.size() > 0 ){ + tempOutputFile << "#$ -o " << home << "/" << dirForTmpFiles << "/output.log." << filelogtemp << endl ; + tempOutputFile << "#$ -e " << home << "/" << dirForTmpFiles << "/error.log." << filelogtemp << endl ; + } + else{ + tempOutputFile << "#$ -o " << dirForTmpFiles << "/" << env["LOGFILE"] << ".output.log" << endl ; + tempOutputFile << "#$ -e " << dirForTmpFiles << "/" << env["LOGFILE"] << ".error.log" << endl ; + } + if( workDir.size() > 0 ) + tempOutputFile << "cd " << workDir << endl ; + if( fileToExecute.size() > 0 ){ + tempOutputFile << _mpiImpl->boot("",nbproc); + tempOutputFile << _mpiImpl->run("${TMPDIR}/machines",nbproc,fileNameToExecute); + tempOutputFile << _mpiImpl->halt(); + } + else{ + tempOutputFile << "source " << env["SOURCEFILE"] << endl ; + tempOutputFile << env["COMMAND"]; + } + + tempOutputFile.flush(); + tempOutputFile.close(); + chmod(TmpFileName.c_str(), 0x1ED); + cerr << TmpFileName.c_str() << endl; + + string command; + if( _protocol == "rsh" ) + command = "rcp "; + else if( _protocol == "ssh" ) + command = "scp "; + else + throw EmulationException("Unknown protocol"); + command += TmpFileName; + command += " "; + if(_username != ""){ + command += _username; + command += "@"; + } + command += _hostname; + command += ":"; + command += dirForTmpFiles ; + command += "/" ; + command += rootNameToExecute ; + command += "_Batch.sh" ; + cerr << command.c_str() << endl; + status = system(command.c_str()); + if(status) + throw EmulationException("Error of connection on remote host"); + + RmTmpFile(TmpFileName); + + } + + std::string BatchManager_eSGE::getWallTime(const long edt) + { + long h, m; + h = edt / 60; + m = edt - h*60; + ostringstream oss; + if( m >= 10 ) + oss << h << ":" << m; + else + oss << h << ":0" << m; + return oss.str(); + } + +} diff --git a/src/Batch/Batch_BatchManager_eSGE.hxx b/src/Batch/Batch_BatchManager_eSGE.hxx new file mode 100644 index 000000000..253ab5d42 --- /dev/null +++ b/src/Batch/Batch_BatchManager_eSGE.hxx @@ -0,0 +1,92 @@ +// Copyright (C) 2005 OPEN CASCADE, EADS/CCR, LIP6, CEA/DEN, +// CEDRAT, EDF R&D, LEG, PRINCIPIA R&D, BUREAU VERITAS +// +// This library is free software; you can redistribute it and/or +// modify it under the terms of the GNU Lesser General Public +// License as published by the Free Software Foundation; either +// version 2.1 of the License. +// +// This library is distributed in the hope that it will be useful +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +// Lesser General Public License for more details. +// +// You should have received a copy of the GNU Lesser General Public +// License along with this library; if not, write to the Free Software +// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +// +// See http://www.salome-platform.org/ or email : webmaster.salome@opencascade.com +// +/* + * BatchManager_eSGE.hxx : emulation of SGE client + * + * Auteur : Bernard SECHER - CEA DEN + * Mail : mailto:bernard.secher@cea.fr + * Date : Thu Apr 24 10:17:22 2008 + * Projet : PAL Salome + * + */ + +#ifndef _BATCHMANAGER_eLSF_H_ +#define _BATCHMANAGER_eLSF_H_ + +#include "Batch_JobId.hxx" +#include "Batch_JobInfo.hxx" +#include "Batch_JobInfo_eSGE.hxx" +#include "Batch_InvalidArgumentException.hxx" +#include "Batch_ConnexionFailureException.hxx" +#include "Batch_APIInternalFailureException.hxx" +#include "Batch_NotYetImplementedException.hxx" +#include "Batch_BatchManager.hxx" +#include "Batch_BatchManager_eClient.hxx" + +namespace Batch { + + class Job; + class JobId; + class JobInfo; + class FactBatchManager; + + class BatchManager_eSGE : public BatchManager_eClient + { + public: + // Constructeur et destructeur + BatchManager_eSGE(const FactBatchManager * parent, const char * host="localhost", const char * protocol="ssh", const char * mpiImpl="nompi") throw(InvalidArgumentException,ConnexionFailureException); // connexion a la machine host + virtual ~BatchManager_eSGE(); + + // Recupere le nom du serveur par defaut + // static string BatchManager_LSF::getDefaultServer(); + + // Methodes pour le controle des jobs + virtual const JobId submitJob(const Job & job); // soumet un job au gestionnaire + virtual void deleteJob(const JobId & jobid); // retire un job du gestionnaire + virtual void holdJob(const JobId & jobid); // suspend un job en file d'attente + virtual void releaseJob(const JobId & jobid); // relache un job suspendu + virtual void alterJob(const JobId & jobid, const Parametre & param, const Environnement & env); // modifie un job en file d'attente + virtual void alterJob(const JobId & jobid, const Parametre & param); // modifie un job en file d'attente + virtual void alterJob(const JobId & jobid, const Environnement & env); // modifie un job en file d'attente + virtual JobInfo queryJob(const JobId & jobid); // renvoie l'etat du job + virtual bool isRunning(const JobId & jobid); // teste si un job est present en machine + + virtual void setParametre(const JobId & jobid, const Parametre & param) { return alterJob(jobid, param); } // modifie un job en file d'attente + virtual void setEnvironnement(const JobId & jobid, const Environnement & env) { return alterJob(jobid, env); } // modifie un job en file d'attente + + + protected: + void buildBatchScript(const Job & job) throw(EmulationException); + std::string getWallTime(const long edt); + + private: + +#ifdef SWIG + public: + // Recupere le l'identifiant d'un job deja soumis au BatchManager + //virtual const JobId getJobIdByReference(const string & ref) { return BatchManager::getJobIdByReference(ref); } + virtual const JobId getJobIdByReference(const char * ref) { return BatchManager::getJobIdByReference(ref); } +#endif + + }; + +} + +#endif diff --git a/src/Batch/Batch_FactBatchManager_eSGE.cxx b/src/Batch/Batch_FactBatchManager_eSGE.cxx new file mode 100644 index 000000000..9007e1270 --- /dev/null +++ b/src/Batch/Batch_FactBatchManager_eSGE.cxx @@ -0,0 +1,64 @@ +// Copyright (C) 2005 OPEN CASCADE, EADS/CCR, LIP6, CEA/DEN, +// CEDRAT, EDF R&D, LEG, PRINCIPIA R&D, BUREAU VERITAS +// +// This library is free software; you can redistribute it and/or +// modify it under the terms of the GNU Lesser General Public +// License as published by the Free Software Foundation; either +// version 2.1 of the License. +// +// This library is distributed in the hope that it will be useful +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +// Lesser General Public License for more details. +// +// You should have received a copy of the GNU Lesser General Public +// License along with this library; if not, write to the Free Software +// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +// +// See http://www.salome-platform.org/ or email : webmaster.salome@opencascade.com +// +/* + * FactBatchManager_eSGE.cxx : + * + * Auteur : Bernard SECHER - CEA DEN + * Date : Avril 2008 + * Projet : PAL Salome + * + */ + +#include +#include "Batch_BatchManager_eSGE.hxx" +#include "Batch_FactBatchManager_eSGE.hxx" +//#include "utilities.h" + +namespace Batch { + + static FactBatchManager_eSGE sFBM_eSGE; + + // Constructeur + FactBatchManager_eSGE::FactBatchManager_eSGE() : FactBatchManager_eClient("eSGE") + { + // Nothing to do + } + + // Destructeur + FactBatchManager_eSGE::~FactBatchManager_eSGE() + { + // Nothing to do + } + + // Functor + BatchManager * FactBatchManager_eSGE::operator() (const char * hostname) const + { + // MESSAGE("Building new BatchManager_SGE on host '" << hostname << "'"); + return new BatchManager_eSGE(this, hostname); + } + + BatchManager_eClient * FactBatchManager_eSGE::operator() (const char * hostname, const char * protocol, const char * mpiImpl) const + { + // MESSAGE("Building new BatchManager_SGE on host '" << hostname << "'"); + return new BatchManager_eSGE(this, hostname, protocol, mpiImpl); + } + + +} diff --git a/src/Batch/Batch_FactBatchManager_eSGE.hxx b/src/Batch/Batch_FactBatchManager_eSGE.hxx new file mode 100644 index 000000000..212511752 --- /dev/null +++ b/src/Batch/Batch_FactBatchManager_eSGE.hxx @@ -0,0 +1,60 @@ +// Copyright (C) 2005 OPEN CASCADE, EADS/CCR, LIP6, CEA/DEN, +// CEDRAT, EDF R&D, LEG, PRINCIPIA R&D, BUREAU VERITAS +// +// This library is free software; you can redistribute it and/or +// modify it under the terms of the GNU Lesser General Public +// License as published by the Free Software Foundation; either +// version 2.1 of the License. +// +// This library is distributed in the hope that it will be useful +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +// Lesser General Public License for more details. +// +// You should have received a copy of the GNU Lesser General Public +// License along with this library; if not, write to the Free Software +// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +// +// See http://www.salome-platform.org/ or email : webmaster.salome@opencascade.com +// +/* + * FactBatchManager_eSGE.hxx : + * + * Auteur : Bernard SECHER : CEA DEN + * Date : Avril 2008 + * Projet : PAL Salome + * + */ + +#ifndef _FACTBATCHMANAGER_eSGE_H_ +#define _FACTBATCHMANAGER_eSGE_H_ + +using namespace std; +#include +#include +#include "Batch_BatchManager_eClient.hxx" +#include "Batch_FactBatchManager_eClient.hxx" + +namespace Batch { + + class BatchManager_eSGE; + + class FactBatchManager_eSGE : public FactBatchManager_eClient + { + public: + // Constructeur et destructeur + FactBatchManager_eSGE(); + virtual ~FactBatchManager_eSGE(); + + virtual BatchManager * operator() (const char * hostname) const; + virtual BatchManager_eClient * operator() (const char * hostname, const char * protocol, const char * mpiImpl) const; + + protected: + + private: + + }; + +} + +#endif diff --git a/src/Batch/Batch_JobInfo_eSGE.cxx b/src/Batch/Batch_JobInfo_eSGE.cxx new file mode 100644 index 000000000..ee8210edb --- /dev/null +++ b/src/Batch/Batch_JobInfo_eSGE.cxx @@ -0,0 +1,109 @@ +// Copyright (C) 2005 OPEN CASCADE, EADS/CCR, LIP6, CEA/DEN, +// CEDRAT, EDF R&D, LEG, PRINCIPIA R&D, BUREAU VERITAS +// +// This library is free software; you can redistribute it and/or +// modify it under the terms of the GNU Lesser General Public +// License as published by the Free Software Foundation; either +// version 2.1 of the License. +// +// This library is distributed in the hope that it will be useful +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +// Lesser General Public License for more details. +// +// You should have received a copy of the GNU Lesser General Public +// License along with this library; if not, write to the Free Software +// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +// +// See http://www.salome-platform.org/ or email : webmaster.salome@opencascade.com +// +/* + * JobInfo_eSGE.cxx : emulation of SGE client + * + * Auteur : Bernard SECHER - CEA DEN + * Mail : mailto:bernard.secher@cea.fr + * Date : Thu Apr 24 10:17:22 2008 + * Projet : PAL Salome + * + */ + +#include +#include +#include +#include +#include "Batch_Parametre.hxx" +#include "Batch_Environnement.hxx" +#include "Batch_RunTimeException.hxx" +#include "Batch_APIInternalFailureException.hxx" +#include "Batch_JobInfo_eSGE.hxx" + +namespace Batch { + + + + // Constructeurs + JobInfo_eSGE::JobInfo_eSGE(int id, string logFile) : JobInfo() + { + // On remplit les membres _param et _env + ostringstream oss; + oss << id; + _param[ID] = oss.str(); + + // read of log file + char line[128]; + ifstream fp(logFile.c_str(),ios::in); + + string status; + string sline; + fp.getline(line,80,'\n'); + sline = string(line); + + if( sline.length() > 0 ){ + istringstream iss(sline); + iss >> status >> status >> status >> status >> status; + } + else + status = "e"; + + _param[STATE] = status; + + if( status.find("r") != string::npos) + _running = true; + + } + + // Teste si un job est present en machine + bool JobInfo_eSGE::isRunning() const + { + return _running; + } + + + // Destructeur + JobInfo_eSGE::~JobInfo_eSGE() + { + // Nothing to do + } + + // Convertit une date HH:MM:SS en secondes + long JobInfo_eSGE::HMStoLong(const string & s) + { + long hour, min, sec; + + sscanf( s.c_str(), "%ld:%ld:%ld", &hour, &min, &sec); + return ( ( ( hour * 60L ) + min ) * 60L ) + sec; + } + + // Methode pour l'interfacage avec Python (SWIG) : affichage en Python + string JobInfo_eSGE::__str__() const + { + ostringstream sst; + sst << " @@ -288,12 +289,19 @@ string Launcher_cpp::queryJob( long id, if(it == _batchmap.end()) throw LauncherException("no batchmanager for that cluster"); - ostringstream oss; - oss << id; - Batch::JobId jobId( _batchmap[clustername], oss.str() ); + Batch::Parametre par; + try{ + ostringstream oss; + oss << id; + Batch::JobId jobId( _batchmap[clustername], oss.str() ); + + Batch::JobInfo jinfo = jobId.queryJob(); + par = jinfo.getParametre(); + } + catch(const Batch::EmulationException &ex){ + throw LauncherException(ex.msg.c_str()); + } - Batch::JobInfo jinfo = jobId.queryJob(); - Batch::Parametre par = jinfo.getParametre(); return par[STATE]; } @@ -422,8 +430,11 @@ Batch::BatchManager_eClient *Launcher_cpp::FactoryBatchManager( const ParserReso case slurm: mpi = "slurm"; break; + case nompi: + throw LauncherException("you must specified an mpi implementation for batch manager"); + break; default: - mpi = "indif"; + throw LauncherException("unknown mpi implementation"); break; } cerr << "Instanciation of batch manager" << endl; @@ -436,6 +447,10 @@ Batch::BatchManager_eClient *Launcher_cpp::FactoryBatchManager( const ParserReso cerr << "Instantiation of LSF batch manager" << endl; fact = new Batch::FactBatchManager_eLSF; break; + case sge: + cerr << "Instantiation of SGE batch manager" << endl; + fact = new Batch::FactBatchManager_eSGE; + break; default: cerr << "BATCH = " << params.Batch << endl; throw LauncherException("no batchmanager for that cluster"); @@ -568,8 +583,9 @@ MpiImpl *Launcher_cpp::FactoryMpiImpl(MpiImplType mpi) throw(LauncherException) return new MpiImpl_OPENMPI(); case slurm: return new MpiImpl_SLURM(); - case indif: - throw LauncherException("you must specify a mpi implementation in CatalogResources.xml file"); + case nompi: + throw LauncherException("you must specified an mpi implementation for batch manager"); + break; default: ostringstream oss; oss << mpi << " : not yet implemented"; diff --git a/src/Launcher/SALOME_Launcher.cxx b/src/Launcher/SALOME_Launcher.cxx index 530a13cf8..44b453d50 100644 --- a/src/Launcher/SALOME_Launcher.cxx +++ b/src/Launcher/SALOME_Launcher.cxx @@ -230,7 +230,7 @@ char* SALOME_Launcher::queryJob( const CORBA::Long jobId, status = _l.queryJob(jobId,p); } catch(const LauncherException &ex){ - INFOS("Caught exception."); + INFOS(ex.msg.c_str()); THROW_SALOME_CORBA_EXCEPTION(ex.msg.c_str(),SALOME::BAD_PARAM); } return CORBA::string_dup(status.c_str()); diff --git a/src/ResourcesManager/SALOME_ResourcesCatalog_Handler.cxx b/src/ResourcesManager/SALOME_ResourcesCatalog_Handler.cxx index 6fad5fa92..a94d3ff34 100755 --- a/src/ResourcesManager/SALOME_ResourcesCatalog_Handler.cxx +++ b/src/ResourcesManager/SALOME_ResourcesCatalog_Handler.cxx @@ -189,6 +189,8 @@ void SALOME_ResourcesCatalog_Handler::ProcessXmlDocument(xmlDocPtr theDoc) _resource.Batch = pbs; else if (aBatch == "lsf") _resource.Batch = lsf; + else if (aBatch == "sge") + _resource.Batch = sge; else _resource.Batch = none; } @@ -209,7 +211,7 @@ void SALOME_ResourcesCatalog_Handler::ProcessXmlDocument(xmlDocPtr theDoc) else if (anMpi == "slurm") _resource.mpi = slurm; else - _resource.mpi = indif; + _resource.mpi = nompi; } if (xmlHasProp(aCurNode, (const xmlChar*)test_user_name)) @@ -384,6 +386,9 @@ void SALOME_ResourcesCatalog_Handler::PrepareDocToXmlFile(xmlDocPtr theDoc) case lsf: xmlNewProp(node, BAD_CAST test_batch, BAD_CAST "lsf"); break; + case sge: + xmlNewProp(node, BAD_CAST test_batch, BAD_CAST "sge"); + break; default: xmlNewProp(node, BAD_CAST test_batch, BAD_CAST ""); } @@ -467,6 +472,9 @@ void SALOME_ResourcesCatalog_Handler::PrepareDocToXmlFile(xmlDocPtr theDoc) case lsf: xmlNewProp(node, BAD_CAST test_batch, BAD_CAST "lsf"); break; + case sge: + xmlNewProp(node, BAD_CAST test_batch, BAD_CAST "sge"); + break; default: xmlNewProp(node, BAD_CAST test_batch, BAD_CAST ""); } diff --git a/src/ResourcesManager/SALOME_ResourcesCatalog_Parser.cxx b/src/ResourcesManager/SALOME_ResourcesCatalog_Parser.cxx index 1ebc6cb41..33b363ca1 100644 --- a/src/ResourcesManager/SALOME_ResourcesCatalog_Parser.cxx +++ b/src/ResourcesManager/SALOME_ResourcesCatalog_Parser.cxx @@ -150,7 +150,7 @@ void ParserResourcesType::Clear() Protocol = rsh; Mode = interactive; Batch = none; - mpi = indif; + mpi = nompi; UserName = ""; AppliPath = ""; ModulesList.clear(); diff --git a/src/ResourcesManager/SALOME_ResourcesCatalog_Parser.hxx b/src/ResourcesManager/SALOME_ResourcesCatalog_Parser.hxx index 901def13c..1d38cb854 100755 --- a/src/ResourcesManager/SALOME_ResourcesCatalog_Parser.hxx +++ b/src/ResourcesManager/SALOME_ResourcesCatalog_Parser.hxx @@ -38,9 +38,9 @@ enum AccessProtocolType {rsh, ssh}; enum AccessModeType {interactive, batch}; -enum BatchType {none, pbs, lsf}; +enum BatchType {none, pbs, lsf, sge}; -enum MpiImplType {indif, lam, mpich1, mpich2, openmpi, slurm}; +enum MpiImplType {nompi, lam, mpich1, mpich2, openmpi, slurm}; class ResourceDataToSort { diff --git a/src/ResourcesManager/SALOME_ResourcesManager.cxx b/src/ResourcesManager/SALOME_ResourcesManager.cxx index 90f273d69..554d2e65b 100644 --- a/src/ResourcesManager/SALOME_ResourcesManager.cxx +++ b/src/ResourcesManager/SALOME_ResourcesManager.cxx @@ -208,9 +208,7 @@ Engines::MachineParameters* SALOME_ResourcesManager::GetMachineParameters(const p_ptr->cpu_clock = resource.DataForSort._CPUFreqMHz; p_ptr->nb_proc_per_node = resource.DataForSort._nbOfProcPerNode; p_ptr->nb_node = resource.DataForSort._nbOfNodes; - if( resource.mpi == indif ) - p_ptr->mpiImpl = "indif"; - else if( resource.mpi == lam ) + if( resource.mpi == lam ) p_ptr->mpiImpl = "lam"; else if( resource.mpi == mpich1 ) p_ptr->mpiImpl = "mpich1"; @@ -224,6 +222,8 @@ Engines::MachineParameters* SALOME_ResourcesManager::GetMachineParameters(const p_ptr->batch = "pbs"; else if( resource.Batch == lsf ) p_ptr->batch = "lsf"; + else if( resource.Batch == sge ) + p_ptr->batch = "sge"; return p_ptr; } -- 2.39.2