From f3403f9d8213e58eff7a81d581e0de1e5a6c77d0 Mon Sep 17 00:00:00 2001 From: barate Date: Tue, 2 Apr 2013 16:20:21 +0000 Subject: [PATCH] Integrate developments from N. Toukourou at INRIA (OAR and CooRM support) --- src/CMakeLists.txt | 2 + src/COORM/BatchManager_COORM.cxx | 276 +++++++++++++++++++++++++++ src/COORM/BatchManager_COORM.hxx | 70 +++++++ src/COORM/CMakeLists.txt | 25 +++ src/COORM/FactBatchManager_COORM.cxx | 48 +++++ src/COORM/FactBatchManager_COORM.hxx | 56 ++++++ src/COORM/JobInfo_COORM.cxx | 129 +++++++++++++ src/COORM/JobInfo_COORM.hxx | 55 ++++++ src/Core/Constants.cxx | 5 + src/Core/Constants.hxx | 5 + src/Core/JobInfo.cxx | 18 ++ src/Core/JobInfo.hxx | 4 + src/Core/ParameterTypeMap.cxx | 5 + src/OAR/BatchManager_OAR.cxx | 251 ++++++++++++++++++++++++ src/OAR/BatchManager_OAR.hxx | 69 +++++++ src/OAR/CMakeLists.txt | 25 +++ src/OAR/FactBatchManager_OAR.cxx | 47 +++++ src/OAR/FactBatchManager_OAR.hxx | 56 ++++++ src/OAR/JobInfo_OAR.cxx | 129 +++++++++++++ src/OAR/JobInfo_OAR.hxx | 55 ++++++ 20 files changed, 1330 insertions(+) create mode 100644 src/COORM/BatchManager_COORM.cxx create mode 100644 src/COORM/BatchManager_COORM.hxx create mode 100644 src/COORM/CMakeLists.txt create mode 100644 src/COORM/FactBatchManager_COORM.cxx create mode 100644 src/COORM/FactBatchManager_COORM.hxx create mode 100644 src/COORM/JobInfo_COORM.cxx create mode 100644 src/COORM/JobInfo_COORM.hxx create mode 100644 src/OAR/BatchManager_OAR.cxx create mode 100644 src/OAR/BatchManager_OAR.hxx create mode 100644 src/OAR/CMakeLists.txt create mode 100644 src/OAR/FactBatchManager_OAR.cxx create mode 100644 src/OAR/FactBatchManager_OAR.hxx create mode 100644 src/OAR/JobInfo_OAR.cxx create mode 100644 src/OAR/JobInfo_OAR.hxx diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index 9e17986..fc4bfd7 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -49,6 +49,8 @@ ENDIF (BUILD_LOCAL_SUBMISSION) add_subdirectory (CCC) add_subdirectory (LSF) add_subdirectory (PBS) +add_subdirectory (OAR) +add_subdirectory (COORM) add_subdirectory (SGE) add_subdirectory (LoadLeveler) add_subdirectory (Slurm) diff --git a/src/COORM/BatchManager_COORM.cxx b/src/COORM/BatchManager_COORM.cxx new file mode 100644 index 0000000..9e50e5b --- /dev/null +++ b/src/COORM/BatchManager_COORM.cxx @@ -0,0 +1,276 @@ +// Copyright (C) 2012-2013 INRIA +// +// This library is free software; you can redistribute it and/or +// modify it under the terms of the GNU Lesser General Public +// License as published by the Free Software Foundation; either +// version 2.1 of the License. +// +// This library is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +// Lesser General Public License for more details. +// +// You should have received a copy of the GNU Lesser General Public +// License along with this library; if not, write to the Free Software +// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +// +// See http://www.salome-platform.org/ or email : webmaster.salome@opencascade.com +// + +#include +#include + +#include +#include +#include + +#include "BatchManager_COORM.hxx" +#include "JobInfo_COORM.hxx" + +using namespace std; + +namespace Batch +{ + BatchManager_COORM::BatchManager_COORM(const FactBatchManager * parent, const char * host, + const char * username, + CommunicationProtocolType protocolType, const char * mpiImpl) + : BatchManager(parent, host, username, protocolType, mpiImpl) + { + } + + BatchManager_COORM::~BatchManager_COORM() + { + } + + // Soumet un job au gestionnaire + const JobId BatchManager_COORM::submitJob(const Job & job) + { + Parametre params = job.getParametre(); + const string workDir = params[WORKDIR]; + const string fileToExecute = params[EXECUTABLE]; + + // For CooRM + const string launcherArgs = params[LAUNCHER_ARGS]; + const string launcherFile = params[LAUNCHER_FILE]; + + const string jobName = params[NAME]; + + string::size_type p1 = fileToExecute.find_last_of("/"); + string::size_type p2 = fileToExecute.find_last_of("."); + std::string fileNameToExecute = fileToExecute.substr(p1+1,p2-p1-1); + + // For CooRM + p1 = launcherFile.find_last_of("/"); + std::string fileNameToLaunch = launcherFile.substr(p1+1); + + + // export input files on cluster + exportInputFiles(job); + + // build batch script for job + string scriptFile = buildBatchScript(job); + + // Get REMOTE_COORM_PATH environment variable + const char * remote_coorm_path = getenv("REMOTE_COORM_PATH"); + if (remote_coorm_path == NULL) + { + throw RunTimeException("Unable to get REMOTE_COORM_PATH environment variable"); + } + + // We need omniORB to execute launcher.py + const string set_env_cmd = "source " + string(remote_coorm_path) + "/coorm_prerequis.sh;"; + + + // define command to submit batch + string subCommand = set_env_cmd + "python " + workDir + "/" + fileNameToLaunch + " --name="+ jobName + + " --workdir=" + workDir + " --outputs=" + workDir + "/logs/outputs.log" + + " --errors=" + workDir + "/logs/errors.log" + + " --executable=" + scriptFile + " " + launcherArgs; + string command = _protocol.getExecCommand(subCommand, _hostname, _username); + command += " 2>&1"; + LOG(command); + + // submit job + string output; + int status = Utils::getCommandOutput(command, output); + LOG(output); + if (status != 0) throw RunTimeException("Can't submit job, error was: " + output); + + // read id of submitted job in output + istringstream logfile(output); + string sline, idline, id; + + if (logfile) + { + while (getline(logfile, sline) && sline != "") + { + idline = sline; + } + + vector tokens; + JobInfo::Tokenize(idline, tokens, "="); + id = tokens[1] ; + } + else + { + throw RunTimeException("Error in the submission of the job on the remote host"); + } + + JobId jobid(this, (string) id); + return jobid; + } + + // retire un job du gestionnaire + void BatchManager_COORM::deleteJob(const JobId & jobid) + { + // Get REMOTE_COORM_PATH environment variable + const char * remote_coorm_path = getenv("REMOTE_COORM_PATH"); + if (remote_coorm_path == NULL) + { + throw RunTimeException("Unable to get REMOTE_COORM_PATH environment variable"); + } + + // We need omniORB to execute launcher.py + const string set_env_cmd = "source " + string(remote_coorm_path) + "/coorm_prerequis.sh;"; + + // define command to delete job + string subCommand = set_env_cmd + "python " + string(remote_coorm_path) + "/coormdel.py --jobID=" + jobid.getReference(); + string command = _protocol.getExecCommand(subCommand, _hostname, _username); + LOG(command); + + int status = system(command.c_str()); + if (status) + throw RunTimeException("Can't delete job " + jobid.getReference()); + + LOG("job " << jobid.getReference() << " killed"); + } + + // Renvoie l'etat du job + JobInfo BatchManager_COORM::queryJob(const JobId & jobid) + { + // Get REMOTE_COORM_PATH environment variable + const char * remote_coorm_path = getenv("REMOTE_COORM_PATH"); + if (remote_coorm_path == NULL) + { + throw RunTimeException("Unable to get REMOTE_COORM_PATH environment variable"); + } + + // We need omniORB to execute launcher.py + const string set_env_cmd = "source " + string(remote_coorm_path) + "/coorm_prerequis.sh;"; + + // define command to query batch + string subCommand = set_env_cmd + "python " + string(remote_coorm_path) + "/coormstat.py --jobID=" + jobid.getReference(); + string command = _protocol.getExecCommand(subCommand, _hostname, _username); + LOG(command); + string output; + int status = Utils::getCommandOutput(command, output); + if(status && status != 153 && status != 256*153) + throw RunTimeException("Error of connection on remote host"); + + JobInfo_COORM jobinfo = JobInfo_COORM(jobid.getReference(), output); + return jobinfo; + } + + string BatchManager_COORM::buildBatchScript(const Job & job) + { + Parametre params = job.getParametre(); + + // Job Parameters + string workDir = ""; + string fileToExecute = ""; + string tmpDir = ""; + int nbproc = 0; + int edt = 0; + int mem = 0; + string queue = ""; + + // Mandatory parameters + if (params.find(WORKDIR) != params.end()) + workDir = params[WORKDIR].str(); + else + throw RunTimeException("params[WORKDIR] is not defined. Please define it, cannot submit this job."); + if (params.find(EXECUTABLE) != params.end()) + fileToExecute = params[EXECUTABLE].str(); + else + throw RunTimeException("params[EXECUTABLE] is not defined. Please define it, cannot submit this job."); + + string::size_type p1 = fileToExecute.find_last_of("/"); + string::size_type p2 = fileToExecute.find_last_of("."); + string rootNameToExecute = fileToExecute.substr(p1+1,p2-p1-1); + string fileNameToExecute = fileToExecute.substr(p1+1); + + // Create batch submit file + ofstream tempOutputFile; + string tmpFileName = Utils::createAndOpenTemporaryFile("COORM-script", tempOutputFile); + + tempOutputFile << "#!/bin/sh -f" << endl; + tempOutputFile << "export LIBBATCH_NODEFILE=$COORM_NODEFILE" << endl; + // Launch the executable + tempOutputFile << "cd " << tmpDir << endl; + tempOutputFile << "./" + fileNameToExecute << endl; + tempOutputFile.flush(); + tempOutputFile.close(); + + Utils::chmod(tmpFileName.c_str(), 0x1ED); + LOG("Batch script file generated is: " << tmpFileName); + + string remoteFileName = rootNameToExecute + "_Batch.sh"; + int status = _protocol.copyFile(tmpFileName, "", "", + workDir + "/" + remoteFileName, + _hostname, _username); + if (status) + throw RunTimeException("Cannot copy batch submission file on host " + _hostname); + + return remoteFileName; + } + + const string BatchManager_COORM::convertSecTo_H_M_S(const long seconds) + { + int h(seconds / 3600); + int m((seconds % 3600) / 60); + int s((seconds % 3600) % 60); + + stringstream ss; + ss << h << ":" << m << ":" << s; + + return ss.str(); + } + + void BatchManager_COORM::exportInputFiles(const Job & job) + { + BatchManager::exportInputFiles(job); + + int status; + Parametre params = job.getParametre(); + + string launcherFile = params[LAUNCHER_FILE]; + + if (launcherFile.size() != 0) + { + // Copy launcherFile into batch working directory + status = _protocol.copyFile(launcherFile, "", "", + params[WORKDIR], _hostname, _username); + if (status) { + std::ostringstream oss; + oss << "Cannot copy file " << launcherFile << " on host " << _hostname; + oss << ". Return status is " << status; + throw RunTimeException(oss.str()); + } + + string remoteLauncher = launcherFile; + remoteLauncher = remoteLauncher.substr(remoteLauncher.rfind("/") + 1, remoteLauncher.length()); + remoteLauncher = string(params[WORKDIR]) + "/" + remoteLauncher; + + string subCommand = string("chmod u+x ") + remoteLauncher; + string command = _protocol.getExecCommand(subCommand, _hostname, _username); + LOG(command); + status = system(command.c_str()); + if (status) { + std::ostringstream oss; + oss << "Cannot change permissions of file " << remoteLauncher << " on host " << _hostname; + oss << ". Return status is " << status; + throw RunTimeException(oss.str()); + } + } + } +} diff --git a/src/COORM/BatchManager_COORM.hxx b/src/COORM/BatchManager_COORM.hxx new file mode 100644 index 0000000..33790b2 --- /dev/null +++ b/src/COORM/BatchManager_COORM.hxx @@ -0,0 +1,70 @@ +// Copyright (C) 2012-2013 INRIA +// +// This library is free software; you can redistribute it and/or +// modify it under the terms of the GNU Lesser General Public +// License as published by the Free Software Foundation; either +// version 2.1 of the License. +// +// This library is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +// Lesser General Public License for more details. +// +// You should have received a copy of the GNU Lesser General Public +// License along with this library; if not, write to the Free Software +// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +// +// See http://www.salome-platform.org/ or email : webmaster.salome@opencascade.com +// + +#ifndef _BATCHMANAGER_COORM_H_ +#define _BATCHMANAGER_COORM_H_ + +#include "Defines.hxx" +#include "JobId.hxx" +#include "JobInfo.hxx" +#include "FactBatchManager.hxx" +#include "BatchManager.hxx" + +namespace Batch +{ + class BATCH_EXPORT BatchManager_COORM : public BatchManager + { + public: + // Constructeur + BatchManager_COORM(const FactBatchManager * parent, const char * host="localhost", + const char * username="", + CommunicationProtocolType protocolType = SSH, const char * mpiImpl="nompi"); + + // Destructeur + virtual ~BatchManager_COORM(); + + // Soumet un job + virtual const JobId submitJob(const Job & job); + + // Supprime un job + virtual void deleteJob(const JobId & jobid); + + // Donne l'etat du job + virtual JobInfo queryJob(const JobId & jobid); + + // Modifie un job en file d'attente + virtual void setParametre(const JobId & jobid, const Parametre & param) { return alterJob(jobid, param); } + + // Modifie un job en file d'attente + virtual void setEnvironnement(const JobId & jobid, const Environnement & env) { return alterJob(jobid, env); } + + protected: + std::string buildBatchScript(const Job & job); + const std::string convertSecTo_H_M_S(const long seconds); + void exportInputFiles(const Job & job); + +#ifdef SWIG + public: + // Recupere le l'identifiant d'un job deja soumis au BatchManager + virtual const JobId getJobIdByReference(const char * ref) { return BatchManager::getJobIdByReference(ref); } +#endif + }; +} + +#endif diff --git a/src/COORM/CMakeLists.txt b/src/COORM/CMakeLists.txt new file mode 100644 index 0000000..66695ba --- /dev/null +++ b/src/COORM/CMakeLists.txt @@ -0,0 +1,25 @@ +# Copyright (C) 2012-2013 INRIA +# +# This library is free software; you can redistribute it and/or +# modify it under the terms of the GNU Lesser General Public +# License as published by the Free Software Foundation; either +# version 2.1 of the License. +# +# This library is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +# Lesser General Public License for more details. +# +# You should have received a copy of the GNU Lesser General Public +# License along with this library; if not, write to the Free Software +# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +# +# See http://www.salome-platform.org/ or email : webmaster.salome@opencascade.com +# + +SET(CLASS_LIST COORM/FactBatchManager_COORM + COORM/BatchManager_COORM + COORM/JobInfo_COORM + ) + +APPEND_CLASSES_TO_SRC_FILES(${CLASS_LIST}) diff --git a/src/COORM/FactBatchManager_COORM.cxx b/src/COORM/FactBatchManager_COORM.cxx new file mode 100644 index 0000000..8b71995 --- /dev/null +++ b/src/COORM/FactBatchManager_COORM.cxx @@ -0,0 +1,48 @@ +// Copyright (C) 2012-2013 INRIA +// +// This library is free software; you can redistribute it and/or +// modify it under the terms of the GNU Lesser General Public +// License as published by the Free Software Foundation; either +// version 2.1 of the License. +// +// This library is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +// Lesser General Public License for more details. +// +// You should have received a copy of the GNU Lesser General Public +// License along with this library; if not, write to the Free Software +// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +// +// See http://www.salome-platform.org/ or email : webmaster.salome@opencascade.com +// + +#include +#include "BatchManager_COORM.hxx" +#include "FactBatchManager_COORM.hxx" + +namespace Batch +{ + static FactBatchManager_COORM sFBM_eCOORM; + + // Constructeur + FactBatchManager_COORM::FactBatchManager_COORM() : FactBatchManager("COORM") + { + // Nothing to do + } + + // Destructeur + FactBatchManager_COORM::~FactBatchManager_COORM() + { + // Nothing to do + } + + // Functor + BatchManager * FactBatchManager_COORM::operator() (const char * hostname, + const char * username, + CommunicationProtocolType protocolType, + const char * mpiImpl) const + { + return new BatchManager_COORM(this, hostname, username, protocolType, mpiImpl); + } +} diff --git a/src/COORM/FactBatchManager_COORM.hxx b/src/COORM/FactBatchManager_COORM.hxx new file mode 100644 index 0000000..b00a8e2 --- /dev/null +++ b/src/COORM/FactBatchManager_COORM.hxx @@ -0,0 +1,56 @@ +// Copyright (C) 2012-2013 INRIA +// +// This library is free software; you can redistribute it and/or +// modify it under the terms of the GNU Lesser General Public +// License as published by the Free Software Foundation; either +// version 2.1 of the License. +// +// This library is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +// Lesser General Public License for more details. +// +// You should have received a copy of the GNU Lesser General Public +// License along with this library; if not, write to the Free Software +// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +// +// See http://www.salome-platform.org/ or email : webmaster.salome@opencascade.com +// + +#ifndef _FACTBATCHMANAGER_COORM_H_ +#define _FACTBATCHMANAGER_COORM_H_ + +#include +#include + +#include "Defines.hxx" +#include "BatchManager.hxx" +#include "FactBatchManager.hxx" + +namespace Batch +{ + class BatchManager_COORM; + + class BATCH_EXPORT FactBatchManager_COORM : public FactBatchManager + { + public: + // Constructeur + FactBatchManager_COORM(); + + // Destructeur + virtual ~FactBatchManager_COORM(); + + // Functor + virtual BatchManager * operator() (const char * hostname, + const char * username, + CommunicationProtocolType protocolType, + const char * mpiImpl) const; + + protected: + + private: + + }; +} + +#endif diff --git a/src/COORM/JobInfo_COORM.cxx b/src/COORM/JobInfo_COORM.cxx new file mode 100644 index 0000000..3022a3a --- /dev/null +++ b/src/COORM/JobInfo_COORM.cxx @@ -0,0 +1,129 @@ +// Copyright (C) 2012-2013 INRIA +// +// This library is free software; you can redistribute it and/or +// modify it under the terms of the GNU Lesser General Public +// License as published by the Free Software Foundation; either +// version 2.1 of the License. +// +// This library is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +// Lesser General Public License for more details. +// +// You should have received a copy of the GNU Lesser General Public +// License along with this library; if not, write to the Free Software +// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +// +// See http://www.salome-platform.org/ or email : webmaster.salome@opencascade.com +// + +#include +#include +#include +#include +#include + +#include "Constants.hxx" +#include "Parametre.hxx" +#include "Environnement.hxx" +#include "RunTimeException.hxx" +#include "APIInternalFailureException.hxx" +#include "JobInfo_COORM.hxx" + +using namespace std; + +namespace Batch { + + // Constructeurs + JobInfo_COORM::JobInfo_COORM(const std::string & id, const std::string & queryOutput) : JobInfo() + { + _param[ID] = id; + + // read query output + istringstream logfile(queryOutput); + + string sline, state, assigned_hostnames; + + if (logfile) + { + while (getline(logfile, sline) && sline != "") + { + vector tokens; + + JobInfo::Tokenize(sline, tokens, "= "); + + if (tokens[0] == "state") + { + state = tokens[1]; + } + + if (tokens[0] == "assigned_hostnames") + { + assigned_hostnames = tokens[1]; + } + } + + _param[ASSIGNEDHOSTNAMES] = assigned_hostnames; + + if (state == "FINISHED") + { + // Completed + _param[STATE] = FINISHED; + } + else if (state == "STARTED") + { + // Started + _param[STATE] = RUNNING; + } + else if (state == "WAITING") + { + // Waiting + _param[STATE] = QUEUED; + } + else if (state == "SUBMITTED") + { + // Submitted + _param[STATE] = CREATED; + } + else if (state == "KILLED") + { + // Killed + _param[STATE] = FAILED; + } + else + { + cerr << "Unknown job state code: " << state << endl; + } + } + else + { + throw RunTimeException("Error of connection on remote host"); + } + } + + // Destructeur + JobInfo_COORM::~JobInfo_COORM() + { + // Nothing to do + } + + // Convertit une date HH:MM:SS en secondes + long JobInfo_COORM::HMStoLong(const string & s) + { + long hour, min, sec; + + sscanf( s.c_str(), "%ld:%ld:%ld", &hour, &min, &sec); + return ( ( ( hour * 60L ) + min ) * 60L ) + sec; + } + + // Methode pour l'interfacage avec Python (SWIG) : affichage en Python + string JobInfo_COORM::__str__() const + { + ostringstream sst; + sst << "& tokens, const std::string& delimiters) + { + // Skip delimiters at beginning. + string::size_type lastPos = str.find_first_not_of(delimiters, 0); + // Find first "non-delimiter". + string::size_type pos = str.find_first_of(delimiters, lastPos); + + while (string::npos != pos || string::npos != lastPos) + { + // Found a token, add it to the vector. + tokens.push_back(str.substr(lastPos, pos - lastPos)); + // Skip delimiters. Note the "not_of" + lastPos = str.find_first_not_of(delimiters, pos); + // Find next "non-delimiter" + pos = str.find_first_of(delimiters, lastPos); + } + } } diff --git a/src/Core/JobInfo.hxx b/src/Core/JobInfo.hxx index 6b0c7c2..11b17aa 100644 --- a/src/Core/JobInfo.hxx +++ b/src/Core/JobInfo.hxx @@ -36,6 +36,7 @@ #include #include +#include #include "Parametre.hxx" #include "Environnement.hxx" @@ -60,6 +61,9 @@ namespace Batch { virtual Batch::Parametre getParametre() const; virtual Batch::Environnement getEnvironnement() const; + // To tokenize a string + static void Tokenize(const std::string& str, std::vector& tokens, const std::string& delimiters = " "); + // Methodes pour l'interfacage avec Python (SWIG) // TODO : supprimer ces methodes et transferer leur definitions dans SWIG std::string __str__() const; // SWIG : affichage en Python diff --git a/src/Core/ParameterTypeMap.cxx b/src/Core/ParameterTypeMap.cxx index b4c89e4..1375ada 100644 --- a/src/Core/ParameterTypeMap.cxx +++ b/src/Core/ParameterTypeMap.cxx @@ -46,6 +46,7 @@ namespace Batch { { // Don't use the string constants in this constructor because they might be uninitialized addParameter("ARGUMENTS", STRING, 0); + addParameter("ASSIGNEDHOSTNAMES", STRING, 1); addParameter("EXECUTABLE", STRING, 1); addParameter("ID", STRING, 1); addParameter("INFILE", COUPLE, 0); @@ -61,6 +62,10 @@ namespace Batch { addParameter("STATE", STRING, 1); addParameter("WORKDIR", STRING, 1); addParameter("EXCLUSIVE", BOOL, 1); + + // Parameters for COORM + addParameter("LAUNCHER_FILE", STRING, 1); + addParameter("LAUNCHER_ARGS", STRING, 1); } ParameterTypeMap::~ParameterTypeMap() diff --git a/src/OAR/BatchManager_OAR.cxx b/src/OAR/BatchManager_OAR.cxx new file mode 100644 index 0000000..5c050a9 --- /dev/null +++ b/src/OAR/BatchManager_OAR.cxx @@ -0,0 +1,251 @@ +// Copyright (C) 2012-2013 INRIA +// +// This library is free software; you can redistribute it and/or +// modify it under the terms of the GNU Lesser General Public +// License as published by the Free Software Foundation; either +// version 2.1 of the License. +// +// This library is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +// Lesser General Public License for more details. +// +// You should have received a copy of the GNU Lesser General Public +// License along with this library; if not, write to the Free Software +// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +// +// See http://www.salome-platform.org/ or email : webmaster.salome@opencascade.com +// + +#include +#include + +#include +#include +#include + +#include "BatchManager_OAR.hxx" +#include "JobInfo_OAR.hxx" + +using namespace std; + +namespace Batch +{ + BatchManager_OAR::BatchManager_OAR(const FactBatchManager * parent, const char * host, + const char * username, + CommunicationProtocolType protocolType, const char * mpiImpl) + : BatchManager(parent, host, username, protocolType, mpiImpl) + { + } + + BatchManager_OAR::~BatchManager_OAR() + { + } + + // Soumet un job au gestionnaire + const JobId BatchManager_OAR::submitJob(const Job & job) + { + Parametre params = job.getParametre(); + const string workDir = params[WORKDIR]; + const string fileToExecute = params[EXECUTABLE]; + string::size_type p1 = fileToExecute.find_last_of("/"); + string::size_type p2 = fileToExecute.find_last_of("."); + std::string fileNameToExecute = fileToExecute.substr(p1+1,p2-p1-1); + + // export input files on cluster + exportInputFiles(job); + + // build batch script for job + string scriptFile = buildBatchScript(job); + + // define command to submit batch + string subCommand = string("oarsub -t allow_classic_ssh -d ") + workDir + " -S " + workDir + "/" + scriptFile; + string command = _protocol.getExecCommand(subCommand, _hostname, _username); + command += " 2>&1"; + LOG(command); + + // submit job + string output; + int status = Utils::getCommandOutput(command, output); + LOG(output); + if (status != 0) throw RunTimeException("Can't submit job, error was: " + output); + + // read id of submitted job in output + istringstream logfile(output); + string sline, idline, id; + + if (logfile) + { + while (getline(logfile, sline) && sline != "") + { + idline = sline; + } + + vector tokens; + JobInfo::Tokenize(idline, tokens, "="); + id = tokens[1]; + } + else + { + throw RunTimeException("Error in the submission of the job on the remote host"); + } + + JobId jobid(this, id); + return jobid; + } + + // retire un job du gestionnaire + void BatchManager_OAR::deleteJob(const JobId & jobid) + { + // define command to delete job + string subCommand = "oardel " + jobid.getReference(); + string command = _protocol.getExecCommand(subCommand, _hostname, _username); + LOG(command); + + int status = system(command.c_str()); + if (status) + throw RunTimeException("Can't delete job " + jobid.getReference()); + + LOG("job " << jobid.getReference() << " killed"); + } + + // Renvoie l'etat du job + JobInfo BatchManager_OAR::queryJob(const JobId & jobid) + { + // define command to query batch + string subCommand = "oarstat -fj " + jobid.getReference(); + string command = _protocol.getExecCommand(subCommand, _hostname, _username); + LOG(command); + string output; + int status = Utils::getCommandOutput(command, output); + if(status && status != 153 && status != 256*153) + throw RunTimeException("Error of connection on remote host"); + + JobInfo_OAR jobinfo = JobInfo_OAR(jobid.getReference(), output); + return jobinfo; + } + + string BatchManager_OAR::buildBatchScript(const Job & job) + { + Parametre params = job.getParametre(); + + // Job Parameters + string workDir = ""; + string fileToExecute = ""; + string tmpDir = ""; + int nbproc = 0; + int edt = 0; + int mem = 0; + string queue = ""; + + // Mandatory parameters + if (params.find(WORKDIR) != params.end()) + workDir = params[WORKDIR].str(); + else + throw RunTimeException("params[WORKDIR] is not defined. Please define it, cannot submit this job."); + if (params.find(EXECUTABLE) != params.end()) + fileToExecute = params[EXECUTABLE].str(); + else + throw RunTimeException("params[EXECUTABLE] is not defined. Please define it, cannot submit this job."); + + // Optional parameters + if (params.find(NBPROC) != params.end()) + nbproc = params[NBPROC]; + int nbprocpernode = 1; + if (params.find(NBPROCPERNODE) != params.end()) + nbprocpernode = params[NBPROCPERNODE]; + if (params.find(MAXWALLTIME) != params.end()) + edt = params[MAXWALLTIME]; + if (params.find(MAXRAMSIZE) != params.end()) + mem = params[MAXRAMSIZE]; + if (params.find(QUEUE) != params.end()) + queue = params[QUEUE].str(); + + string::size_type p1 = fileToExecute.find_last_of("/"); + string::size_type p2 = fileToExecute.find_last_of("."); + string rootNameToExecute = fileToExecute.substr(p1+1,p2-p1-1); + string fileNameToExecute = fileToExecute.substr(p1+1); + + // Create batch submit file + ofstream tempOutputFile; + string tmpFileName = Utils::createAndOpenTemporaryFile("OAR-script", tempOutputFile); + + tempOutputFile << "#!/bin/sh -f" << endl; + + int nb_full_nodes(0); + int nb_proc_on_last_node(0); + + if (nbproc > 0) + { + nb_full_nodes = nbproc / nbprocpernode; + nb_proc_on_last_node = nbproc % nbprocpernode; + + // In exclusive mode, we reserve all procs on the nodes + if (params.find(EXCLUSIVE) != params.end() && params[EXCLUSIVE] && nb_proc_on_last_node > 0) + { + nb_full_nodes += 1; + nb_proc_on_last_node = 0; + } + } + + if (nb_full_nodes > 0) + { + tempOutputFile << "#OAR -l nodes=" << nb_full_nodes; + if (edt > 0) + { + tempOutputFile << ",walltime=" << convertSecTo_H_M_S(edt) << endl; + } + else + { + tempOutputFile << endl; + } + } + else + { + if (edt > 0) + { + tempOutputFile << "#OAR -l walltime=" << convertSecTo_H_M_S(edt) << endl; + } + } + + if (queue != "") + { + tempOutputFile << "#OAR -q " << queue << endl; + } + + tempOutputFile << "#OAR -O " << tmpDir << "/logs/output.log." << rootNameToExecute << endl; + tempOutputFile << "#OAR -E " << tmpDir << "/logs/error.log." << rootNameToExecute << endl; + + tempOutputFile << "export LIBBATCH_NODEFILE=$OAR_NODEFILE" << endl; + + // Launch the executable + tempOutputFile << "cd " << tmpDir << endl; + tempOutputFile << "./" + fileNameToExecute << endl; + tempOutputFile.flush(); + tempOutputFile.close(); + + Utils::chmod(tmpFileName.c_str(), 0x1ED); + LOG("Batch script file generated is: " << tmpFileName); + + string remoteFileName = rootNameToExecute + "_Batch.sh"; + int status = _protocol.copyFile(tmpFileName, "", "", + workDir + "/" + remoteFileName, + _hostname, _username); + if (status) + throw RunTimeException("Cannot copy batch submission file on host " + _hostname); + + return remoteFileName; + } + + const string BatchManager_OAR::convertSecTo_H_M_S(const long seconds) + { + int h(seconds / 3600); + int m((seconds % 3600) / 60); + int s((seconds % 3600) % 60); + + stringstream ss; + ss << h << ":" << m << ":" << s; + + return ss.str(); + } +} diff --git a/src/OAR/BatchManager_OAR.hxx b/src/OAR/BatchManager_OAR.hxx new file mode 100644 index 0000000..eef325b --- /dev/null +++ b/src/OAR/BatchManager_OAR.hxx @@ -0,0 +1,69 @@ +// Copyright (C) 2012-2013 INRIA +// +// This library is free software; you can redistribute it and/or +// modify it under the terms of the GNU Lesser General Public +// License as published by the Free Software Foundation; either +// version 2.1 of the License. +// +// This library is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +// Lesser General Public License for more details. +// +// You should have received a copy of the GNU Lesser General Public +// License along with this library; if not, write to the Free Software +// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +// +// See http://www.salome-platform.org/ or email : webmaster.salome@opencascade.com +// + +#ifndef _BATCHMANAGER_OAR_H_ +#define _BATCHMANAGER_OAR_H_ + +#include "Defines.hxx" +#include "JobId.hxx" +#include "JobInfo.hxx" +#include "FactBatchManager.hxx" +#include "BatchManager.hxx" + +namespace Batch +{ + class BATCH_EXPORT BatchManager_OAR : public BatchManager + { + public: + // Constructeur + BatchManager_OAR(const FactBatchManager * parent, const char * host="localhost", + const char * username="", + CommunicationProtocolType protocolType = SSH, const char * mpiImpl="nompi"); + + // Destructeur + virtual ~BatchManager_OAR(); + + // Soumet un job + virtual const JobId submitJob(const Job & job); + + // Supprime un job + virtual void deleteJob(const JobId & jobid); + + // Donne l'etat du job + virtual JobInfo queryJob(const JobId & jobid); + + // Modifie un job en file d'attente + virtual void setParametre(const JobId & jobid, const Parametre & param) { return alterJob(jobid, param); } + + // Modifie un job en file d'attente + virtual void setEnvironnement(const JobId & jobid, const Environnement & env) { return alterJob(jobid, env); } + + protected: + std::string buildBatchScript(const Job & job); + const std::string convertSecTo_H_M_S(const long seconds); + +#ifdef SWIG + public: + // Recupere le l'identifiant d'un job deja soumis au BatchManager + virtual const JobId getJobIdByReference(const char * ref) { return BatchManager::getJobIdByReference(ref); } +#endif + }; +} + +#endif diff --git a/src/OAR/CMakeLists.txt b/src/OAR/CMakeLists.txt new file mode 100644 index 0000000..ecdfec0 --- /dev/null +++ b/src/OAR/CMakeLists.txt @@ -0,0 +1,25 @@ +# Copyright (C) 2012-2013 INRIA +# +# This library is free software; you can redistribute it and/or +# modify it under the terms of the GNU Lesser General Public +# License as published by the Free Software Foundation; either +# version 2.1 of the License. +# +# This library is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +# Lesser General Public License for more details. +# +# You should have received a copy of the GNU Lesser General Public +# License along with this library; if not, write to the Free Software +# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +# +# See http://www.salome-platform.org/ or email : webmaster.salome@opencascade.com +# + +SET(CLASS_LIST OAR/FactBatchManager_OAR + OAR/BatchManager_OAR + OAR/JobInfo_OAR + ) + +APPEND_CLASSES_TO_SRC_FILES(${CLASS_LIST}) diff --git a/src/OAR/FactBatchManager_OAR.cxx b/src/OAR/FactBatchManager_OAR.cxx new file mode 100644 index 0000000..5f7a355 --- /dev/null +++ b/src/OAR/FactBatchManager_OAR.cxx @@ -0,0 +1,47 @@ +// Copyright (C) 2012-2013 INRIA +// +// This library is free software; you can redistribute it and/or +// modify it under the terms of the GNU Lesser General Public +// License as published by the Free Software Foundation; either +// version 2.1 of the License. +// +// This library is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +// Lesser General Public License for more details. +// +// You should have received a copy of the GNU Lesser General Public +// License along with this library; if not, write to the Free Software +// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +// +// See http://www.salome-platform.org/ or email : webmaster.salome@opencascade.com +// + +#include +#include "BatchManager_OAR.hxx" +#include "FactBatchManager_OAR.hxx" + +namespace Batch +{ + static FactBatchManager_OAR sFBM_eOAR; + + // Constructeur + FactBatchManager_OAR::FactBatchManager_OAR() : FactBatchManager("OAR") + { + // Nothing to do + } + + // Destructeur + FactBatchManager_OAR::~FactBatchManager_OAR() + { + // Nothing to do + } + + BatchManager * FactBatchManager_OAR::operator() (const char * hostname, + const char * username, + CommunicationProtocolType protocolType, + const char * mpiImpl) const + { + return new BatchManager_OAR(this, hostname, username, protocolType, mpiImpl); + } +} diff --git a/src/OAR/FactBatchManager_OAR.hxx b/src/OAR/FactBatchManager_OAR.hxx new file mode 100644 index 0000000..0cefdcd --- /dev/null +++ b/src/OAR/FactBatchManager_OAR.hxx @@ -0,0 +1,56 @@ +// Copyright (C) 2012-2013 INRIA +// +// This library is free software; you can redistribute it and/or +// modify it under the terms of the GNU Lesser General Public +// License as published by the Free Software Foundation; either +// version 2.1 of the License. +// +// This library is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +// Lesser General Public License for more details. +// +// You should have received a copy of the GNU Lesser General Public +// License along with this library; if not, write to the Free Software +// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +// +// See http://www.salome-platform.org/ or email : webmaster.salome@opencascade.com +// + +#ifndef _FACTBATCHMANAGER_OAR_H_ +#define _FACTBATCHMANAGER_OAR_H_ + +#include +#include + +#include "Defines.hxx" +#include "BatchManager.hxx" +#include "FactBatchManager.hxx" + +namespace Batch +{ + class BatchManager_OAR; + + class BATCH_EXPORT FactBatchManager_OAR : public FactBatchManager + { + public: + // Constructeur + FactBatchManager_OAR(); + + // Destructeur + virtual ~FactBatchManager_OAR(); + + // Functor + virtual BatchManager * operator() (const char * hostname, + const char * username, + CommunicationProtocolType protocolType, + const char * mpiImpl) const; + + protected: + + private: + + }; +} + +#endif diff --git a/src/OAR/JobInfo_OAR.cxx b/src/OAR/JobInfo_OAR.cxx new file mode 100644 index 0000000..2bebc4d --- /dev/null +++ b/src/OAR/JobInfo_OAR.cxx @@ -0,0 +1,129 @@ +// Copyright (C) 2012-2013 INRIA +// +// This library is free software; you can redistribute it and/or +// modify it under the terms of the GNU Lesser General Public +// License as published by the Free Software Foundation; either +// version 2.1 of the License. +// +// This library is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +// Lesser General Public License for more details. +// +// You should have received a copy of the GNU Lesser General Public +// License along with this library; if not, write to the Free Software +// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +// +// See http://www.salome-platform.org/ or email : webmaster.salome@opencascade.com +// + +#include +#include +#include +#include +#include + +#include "Constants.hxx" +#include "Parametre.hxx" +#include "Environnement.hxx" +#include "RunTimeException.hxx" +#include "APIInternalFailureException.hxx" +#include "JobInfo_OAR.hxx" + +using namespace std; + +namespace Batch { + + // Constructeurs + JobInfo_OAR::JobInfo_OAR(const std::string & id, const std::string & queryOutput) : JobInfo() + { + _param[ID] = id; + + // read query output + istringstream logfile(queryOutput); + + string sline, state, assigned_hostnames; + + if (logfile) + { + while (getline(logfile, sline) && sline != "") + { + vector tokens; + + JobInfo::Tokenize(sline, tokens, "= "); + + if (tokens[0] == "state") + { + state = tokens[1]; + } + + if (tokens[0] == "assigned_hostnames") + { + assigned_hostnames = tokens[1]; + } + } + + _param[ASSIGNEDHOSTNAMES] = assigned_hostnames; + + if (state == "Terminated") + { + // Completed + _param[STATE] = FINISHED; + } + else if (state == "Running") + { + // Running + _param[STATE] = RUNNING; + } + else if (state == "Waiting") + { + // Waiting + _param[STATE] = QUEUED; + } + else if (state == "Error" || state == "Finishing") + { + // Error + _param[STATE] = FAILED; + } + else if (state == "Launching" || state == "toLaunch") + { + // Launching + _param[STATE] = IN_PROCESS; + } + else + { + cerr << "Unknown job state code: " << state << endl; + } + } + else + { + throw RunTimeException("Error of connection on remote host"); + } + } + + // Destructeur + JobInfo_OAR::~JobInfo_OAR() + { + // Nothing to do + } + + // Convertit une date HH:MM:SS en secondes + long JobInfo_OAR::HMStoLong(const string & s) + { + long hour, min, sec; + + sscanf( s.c_str(), "%ld:%ld:%ld", &hour, &min, &sec); + return ( ( ( hour * 60L ) + min ) * 60L ) + sec; + } + + // Methode pour l'interfacage avec Python (SWIG) : affichage en Python + string JobInfo_OAR::__str__() const + { + ostringstream sst; + sst << "