From bdf7c563864fb20862f726602c447a4bcd2676a9 Mon Sep 17 00:00:00 2001 From: barate Date: Thu, 12 May 2011 14:36:54 +0000 Subject: [PATCH] Added Slurm batch manager --- src/CMakeLists.txt | 1 + src/Core/Test/batchtest.conf | 5 + src/Slurm/Batch_BatchManager_eSlurm.cxx | 258 ++++++++++++++++++++ src/Slurm/Batch_BatchManager_eSlurm.hxx | 75 ++++++ src/Slurm/Batch_FactBatchManager_eSlurm.cxx | 62 +++++ src/Slurm/Batch_FactBatchManager_eSlurm.hxx | 57 +++++ src/Slurm/Batch_JobInfo_eSlurm.cxx | 93 +++++++ src/Slurm/Batch_JobInfo_eSlurm.hxx | 49 ++++ src/Slurm/CMakeLists.txt | 32 +++ src/Slurm/Test/CMakeLists.txt | 44 ++++ src/Slurm/Test/Test_eSlurm.cxx | 162 ++++++++++++ src/Slurm/Test/seta.sh | 3 + src/Slurm/Test/setb.sh | 3 + src/Slurm/Test/test-script.sh | 9 + 14 files changed, 853 insertions(+) create mode 100644 src/Slurm/Batch_BatchManager_eSlurm.cxx create mode 100644 src/Slurm/Batch_BatchManager_eSlurm.hxx create mode 100644 src/Slurm/Batch_FactBatchManager_eSlurm.cxx create mode 100644 src/Slurm/Batch_FactBatchManager_eSlurm.hxx create mode 100644 src/Slurm/Batch_JobInfo_eSlurm.cxx create mode 100644 src/Slurm/Batch_JobInfo_eSlurm.hxx create mode 100644 src/Slurm/CMakeLists.txt create mode 100644 src/Slurm/Test/CMakeLists.txt create mode 100644 src/Slurm/Test/Test_eSlurm.cxx create mode 100644 src/Slurm/Test/seta.sh create mode 100644 src/Slurm/Test/setb.sh create mode 100755 src/Slurm/Test/test-script.sh diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index 065ab33..7ebd6f5 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -56,6 +56,7 @@ add_subdirectory (PBS) add_subdirectory (SGE) add_subdirectory (SSH) add_subdirectory (LoadLeveler) +add_subdirectory (Slurm) # Make a copy of the built value and clear the built value for the next run of cmake SET(SRC_FILES ${SRC_FILES_BUILD} CACHE INTERNAL "") diff --git a/src/Core/Test/batchtest.conf b/src/Core/Test/batchtest.conf index 2e1b8f3..570ec21 100644 --- a/src/Core/Test/batchtest.conf +++ b/src/Core/Test/batchtest.conf @@ -38,3 +38,8 @@ TEST_ELL_HOMEDIR = "/home/username" # Home directory on LoadLeveler se TEST_ELL_QUEUE = "classname" # Class for the test job test job on LoadLeveler server TEST_ELL_JOBTYPE = "serial" # Job type for LoadLeveler TEST_ELL_TIMEOUT = 120 # Execution timeout (in seconds) for LoadLeveler Batch test + +TEST_ESLURM_HOST = "localhost" # Slurm server host +TEST_ESLURM_USER = "username" # Login for the Slurm server +TEST_ESLURM_HOMEDIR = "/home/username" # Home directory on Slurm server +TEST_ESLURM_TIMEOUT = 120 # Execution timeout (in seconds) for Slurm Batch test diff --git a/src/Slurm/Batch_BatchManager_eSlurm.cxx b/src/Slurm/Batch_BatchManager_eSlurm.cxx new file mode 100644 index 0000000..6a4fda7 --- /dev/null +++ b/src/Slurm/Batch_BatchManager_eSlurm.cxx @@ -0,0 +1,258 @@ +// Copyright (C) 2007-2010 CEA/DEN, EDF R&D, OPEN CASCADE +// +// Copyright (C) 2003-2007 OPEN CASCADE, EADS/CCR, LIP6, CEA/DEN, +// CEDRAT, EDF R&D, LEG, PRINCIPIA R&D, BUREAU VERITAS +// +// This library is free software; you can redistribute it and/or +// modify it under the terms of the GNU Lesser General Public +// License as published by the Free Software Foundation; either +// version 2.1 of the License. +// +// This library is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +// Lesser General Public License for more details. +// +// You should have received a copy of the GNU Lesser General Public +// License along with this library; if not, write to the Free Software +// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +// +// See http://www.salome-platform.org/ or email : webmaster.salome@opencascade.com +// +/* + * Batch_BatchManager_eSlurm.cxx : + * + * Created on: 12 may 2011 + * Author : Renaud BARATE - EDF R&D + */ + +#include +#include +#include + +#include +#include + +#include "Batch_FactBatchManager_eSlurm.hxx" +#include "Batch_BatchManager_eSlurm.hxx" +#include "Batch_JobInfo_eSlurm.hxx" + +using namespace std; + +namespace Batch { + + BatchManager_eSlurm::BatchManager_eSlurm(const FactBatchManager * parent, + const char * host, + const char * username, + CommunicationProtocolType protocolType, + const char * mpiImpl, + int nb_proc_per_node) + : BatchManager(parent, host), + BatchManager_eClient(parent, host, username, protocolType, mpiImpl), + _nb_proc_per_node(nb_proc_per_node) + { + } + + BatchManager_eSlurm::~BatchManager_eSlurm() + { + } + + // Method to submit a job to the batch manager + const JobId BatchManager_eSlurm::submitJob(const Job & job) + { + int status; + Parametre params = job.getParametre(); + const string workDir = params[WORKDIR]; + + // export input files on cluster + exportInputFiles(job); + + // build command file to submit the job and copy it on the server + string cmdFile = buildCommandFile(job); + + // define name of log file (local) + string logFile = generateTemporaryFileName("slurm-submitlog"); + + // define command to submit batch + string subCommand = string("cd ") + workDir + "; sbatch " + cmdFile; + string command = _protocol.getExecCommand(subCommand, _hostname, _username); + command += " > "; + command += logFile; + cerr << command.c_str() << endl; + status = system(command.c_str()); + if (status) + { + ifstream error_message(logFile.c_str()); + string mess; + string temp; + while(getline(error_message, temp)) + mess += temp; + error_message.close(); + throw EmulationException("Error of connection on remote host, error was: " + mess); + } + + // read id of submitted job in log file + string jobref; + ifstream idfile(logFile.c_str()); + string line; + while (idfile && line.compare(0, 20, "Submitted batch job ") != 0) + getline(idfile, line); + idfile.close(); + if (line.compare(0, 20, "Submitted batch job ") == 0) + jobref = line.substr(20); + if (jobref.size() == 0) + throw EmulationException("Error in the submission of the job on the remote host"); + + JobId id(this, jobref); + return id; + } + + /** + * Create Slurm command file and copy it on the server. + * Return the name of the remote file. + */ + string BatchManager_eSlurm::buildCommandFile(const Job & job) + { + Parametre params = job.getParametre(); + + // Job Parameters + string workDir = ""; + string fileToExecute = ""; + string queue = ""; + + // Mandatory parameters + if (params.find(WORKDIR) != params.end()) + workDir = params[WORKDIR].str(); + else + throw EmulationException("params[WORKDIR] is not defined. Please define it, cannot submit this job."); + if (params.find(EXECUTABLE) != params.end()) + fileToExecute = params[EXECUTABLE].str(); + else + throw EmulationException("params[EXECUTABLE] is not defined. Please define it, cannot submit this job."); + + string::size_type p1 = fileToExecute.find_last_of("/"); + string::size_type p2 = fileToExecute.find_last_of("."); + string rootNameToExecute = fileToExecute.substr(p1+1,p2-p1-1); + string fileNameToExecute = fileToExecute.substr(p1+1); + + // Create batch submit file + ofstream tempOutputFile; + string tmpFileName = createAndOpenTemporaryFile("slurm-script", tempOutputFile); + + tempOutputFile << "#!/bin/bash" << endl; + tempOutputFile << "#SBATCH --output=" << workDir << "/logs/output.log." << rootNameToExecute << endl; + tempOutputFile << "#SBATCH --error=" << workDir << "/logs/error.log." << rootNameToExecute << endl; + + if (params.find(NAME) != params.end()) + tempOutputFile << "#SBATCH --job-name=" << params[NAME] << endl; + + // Optional parameters + int nbproc = 1; + if (params.find(NBPROC) != params.end()) + nbproc = params[NBPROC]; + + int nodes_requested = (nbproc + _nb_proc_per_node -1) / _nb_proc_per_node; + tempOutputFile << "#SBATCH --nodes=" << nodes_requested << endl; + tempOutputFile << "#SBATCH --ntasks-per-node=" << _nb_proc_per_node << endl; + + if (params.find(MAXWALLTIME) != params.end()) + tempOutputFile << "#SBATCH --time=" << params[MAXWALLTIME] << endl; + if (params.find(MAXRAMSIZE) != params.end()) + tempOutputFile << "#SBATCH --mem=" << params[MAXRAMSIZE] << endl; + if (params.find(QUEUE) != params.end()) + tempOutputFile << "#SBATCH --partition=" << params[QUEUE] << endl; + + // Define environment for the job + Environnement env = job.getEnvironnement(); + for (Environnement::const_iterator iter = env.begin() ; iter != env.end() ; ++iter) { + tempOutputFile << "export " << iter->first << "=" << iter->second << endl; + } + + // generate nodes file + tempOutputFile << "LIBBATCH_NODEFILE=`mktemp nodefile-XXXXXXXXXX`" << endl; + tempOutputFile << "srun hostname > $LIBBATCH_NODEFILE" << endl; + tempOutputFile << "export LIBBATCH_NODEFILE" << endl; + + // Launch the executable + tempOutputFile << "cd " << workDir << endl; + tempOutputFile << "./" + fileNameToExecute << endl; + + tempOutputFile.flush(); + tempOutputFile.close(); + + cerr << "Batch script file generated is: " << tmpFileName << endl; + + string remoteFileName = rootNameToExecute + "_slurm.cmd"; + int status = _protocol.copyFile(tmpFileName, "", "", + workDir + "/" + remoteFileName, + _hostname, _username); + if (status) + throw EmulationException("Cannot copy command file on host " + _hostname); + + return remoteFileName; + } + + void BatchManager_eSlurm::deleteJob(const JobId & jobid) + { + // define command to delete job + string subCommand = "scancel " + jobid.getReference(); + string command = _protocol.getExecCommand(subCommand, _hostname, _username); + cerr << command.c_str() << endl; + + int status = system(command.c_str()); + if (status) + throw EmulationException("Can't delete job " + jobid.getReference()); + + cerr << "job " << jobid.getReference() << " killed" << endl; + } + + void BatchManager_eSlurm::holdJob(const JobId & jobid) + { + throw NotYetImplementedException("BatchManager_eSlurm::holdJob"); + } + + void BatchManager_eSlurm::releaseJob(const JobId & jobid) + { + throw NotYetImplementedException("BatchManager_eSlurm::releaseJob"); + } + + void BatchManager_eSlurm::alterJob(const JobId & jobid, const Parametre & param, const Environnement & env) + { + throw NotYetImplementedException("BatchManager_eSlurm::alterJob"); + } + + void BatchManager_eSlurm::alterJob(const JobId & jobid, const Parametre & param) + { + throw NotYetImplementedException("BatchManager_eSlurm::alterJob"); + } + + void BatchManager_eSlurm::alterJob(const JobId & jobid, const Environnement & env) + { + throw NotYetImplementedException("BatchManager_eSlurm::alterJob"); + } + + JobInfo BatchManager_eSlurm::queryJob(const JobId & jobid) + { + // define name of log file (local) + string logFile = generateTemporaryFileName("slurm-querylog-" + jobid.getReference()); + + // define command to query batch + string subCommand = "squeue -o %t -j " + jobid.getReference(); + string command = _protocol.getExecCommand(subCommand, _hostname, _username); + command += " > "; + command += logFile; + cerr << command.c_str() << endl; + int status = system(command.c_str()); + if (status != 0) + throw EmulationException("Can't query job " + jobid.getReference()); + + JobInfo_eSlurm jobinfo = JobInfo_eSlurm(jobid.getReference(), logFile); + return jobinfo; + } + + const JobId BatchManager_eSlurm::addJob(const Job & job, const string reference) + { + return JobId(this, reference); + } + +} diff --git a/src/Slurm/Batch_BatchManager_eSlurm.hxx b/src/Slurm/Batch_BatchManager_eSlurm.hxx new file mode 100644 index 0000000..58e4ff0 --- /dev/null +++ b/src/Slurm/Batch_BatchManager_eSlurm.hxx @@ -0,0 +1,75 @@ +// Copyright (C) 2007-2010 CEA/DEN, EDF R&D, OPEN CASCADE +// +// Copyright (C) 2003-2007 OPEN CASCADE, EADS/CCR, LIP6, CEA/DEN, +// CEDRAT, EDF R&D, LEG, PRINCIPIA R&D, BUREAU VERITAS +// +// This library is free software; you can redistribute it and/or +// modify it under the terms of the GNU Lesser General Public +// License as published by the Free Software Foundation; either +// version 2.1 of the License. +// +// This library is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +// Lesser General Public License for more details. +// +// You should have received a copy of the GNU Lesser General Public +// License along with this library; if not, write to the Free Software +// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +// +// See http://www.salome-platform.org/ or email : webmaster.salome@opencascade.com +// +/* + * Batch_BatchManager_eSlurm.hxx : + * + * Created on: 12 may 2011 + * Author : Renaud BARATE - EDF R&D + */ + +#ifndef _BATCHMANAGER_ESLURM_H_ +#define _BATCHMANAGER_ESLURM_H_ + +#include + +#include +#include +#include +#include +#include + +namespace Batch { + + class BATCH_EXPORT BatchManager_eSlurm : public BatchManager_eClient + { + public: + + BatchManager_eSlurm(const FactBatchManager * parent, + const char * host = "localhost", + const char * username = "", + CommunicationProtocolType protocolType = SSH, + const char * mpiImpl = "nompi", + int nb_proc_per_node = 1); + virtual ~BatchManager_eSlurm(); + + // Methods to control jobs + virtual const JobId submitJob(const Job & job); + virtual void deleteJob(const JobId & jobid); + virtual void holdJob(const JobId & jobid); + virtual void releaseJob(const JobId & jobid); + virtual void alterJob(const JobId & jobid, const Parametre & param, const Environnement & env); + virtual void alterJob(const JobId & jobid, const Parametre & param); + virtual void alterJob(const JobId & jobid, const Environnement & env); + virtual JobInfo queryJob(const JobId & jobid); + virtual const JobId addJob(const Job & job, const std::string reference); + + protected: + + std::string buildCommandFile(const Job & job); + + int _nb_proc_per_node; + + }; + +} + +#endif diff --git a/src/Slurm/Batch_FactBatchManager_eSlurm.cxx b/src/Slurm/Batch_FactBatchManager_eSlurm.cxx new file mode 100644 index 0000000..2d804b4 --- /dev/null +++ b/src/Slurm/Batch_FactBatchManager_eSlurm.cxx @@ -0,0 +1,62 @@ +// Copyright (C) 2007-2010 CEA/DEN, EDF R&D, OPEN CASCADE +// +// Copyright (C) 2003-2007 OPEN CASCADE, EADS/CCR, LIP6, CEA/DEN, +// CEDRAT, EDF R&D, LEG, PRINCIPIA R&D, BUREAU VERITAS +// +// This library is free software; you can redistribute it and/or +// modify it under the terms of the GNU Lesser General Public +// License as published by the Free Software Foundation; either +// version 2.1 of the License. +// +// This library is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +// Lesser General Public License for more details. +// +// You should have received a copy of the GNU Lesser General Public +// License along with this library; if not, write to the Free Software +// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +// +// See http://www.salome-platform.org/ or email : webmaster.salome@opencascade.com +// +/* + * Batch_FactBatchManager_eSlurm.cxx : + * + * Created on: 12 may 2011 + * Author : Renaud BARATE - EDF R&D + */ + +#include + +#include "Batch_BatchManager_eSlurm.hxx" +#include "Batch_FactBatchManager_eSlurm.hxx" + +namespace Batch { + + static FactBatchManager_eSlurm sFBM_eSlurm; + + FactBatchManager_eSlurm::FactBatchManager_eSlurm() : FactBatchManager_eClient("eSlurm") + { + } + + FactBatchManager_eSlurm::~FactBatchManager_eSlurm() + { + } + + BatchManager * FactBatchManager_eSlurm::operator() (const char * hostname) const + { + // MESSAGE("Building new BatchManager_eSlurm on host '" << hostname << "'"); + return new BatchManager_eSlurm(this, hostname); + } + + BatchManager_eClient * FactBatchManager_eSlurm::operator() (const char * hostname, + const char * username, + CommunicationProtocolType protocolType, + const char * mpiImpl, + int nb_proc_per_node) const + { + // MESSAGE("Building new BatchManager_eSlurm on host '" << hostname << "'"); + return new BatchManager_eSlurm(this, hostname, username, protocolType, mpiImpl, nb_proc_per_node); + } + +} diff --git a/src/Slurm/Batch_FactBatchManager_eSlurm.hxx b/src/Slurm/Batch_FactBatchManager_eSlurm.hxx new file mode 100644 index 0000000..58c4559 --- /dev/null +++ b/src/Slurm/Batch_FactBatchManager_eSlurm.hxx @@ -0,0 +1,57 @@ +// Copyright (C) 2007-2010 CEA/DEN, EDF R&D, OPEN CASCADE +// +// Copyright (C) 2003-2007 OPEN CASCADE, EADS/CCR, LIP6, CEA/DEN, +// CEDRAT, EDF R&D, LEG, PRINCIPIA R&D, BUREAU VERITAS +// +// This library is free software; you can redistribute it and/or +// modify it under the terms of the GNU Lesser General Public +// License as published by the Free Software Foundation; either +// version 2.1 of the License. +// +// This library is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +// Lesser General Public License for more details. +// +// You should have received a copy of the GNU Lesser General Public +// License along with this library; if not, write to the Free Software +// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +// +// See http://www.salome-platform.org/ or email : webmaster.salome@opencascade.com +// +/* + * Batch_FactBatchManager_eSlurm.hxx : + * + * Created on: 12 may 2011 + * Author : Renaud BARATE - EDF R&D + */ + +#ifndef _FACTBATCHMANAGER_ESLURM_H_ +#define _FACTBATCHMANAGER_ESLURM_H_ + +#include +#include +#include +#include + +namespace Batch { + + class BATCH_EXPORT FactBatchManager_eSlurm : public FactBatchManager_eClient + { + public: + + FactBatchManager_eSlurm(); + virtual ~FactBatchManager_eSlurm(); + + virtual BatchManager * operator() (const char * hostname) const; + virtual BatchManager_eClient * operator() (const char * hostname, + const char * username, + CommunicationProtocolType protocolType, + const char * mpiImpl, + int nb_proc_per_node = 1) const; + + }; + +} + +#endif diff --git a/src/Slurm/Batch_JobInfo_eSlurm.cxx b/src/Slurm/Batch_JobInfo_eSlurm.cxx new file mode 100644 index 0000000..738d1f7 --- /dev/null +++ b/src/Slurm/Batch_JobInfo_eSlurm.cxx @@ -0,0 +1,93 @@ +// Copyright (C) 2007-2010 CEA/DEN, EDF R&D, OPEN CASCADE +// +// Copyright (C) 2003-2007 OPEN CASCADE, EADS/CCR, LIP6, CEA/DEN, +// CEDRAT, EDF R&D, LEG, PRINCIPIA R&D, BUREAU VERITAS +// +// This library is free software; you can redistribute it and/or +// modify it under the terms of the GNU Lesser General Public +// License as published by the Free Software Foundation; either +// version 2.1 of the License. +// +// This library is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +// Lesser General Public License for more details. +// +// You should have received a copy of the GNU Lesser General Public +// License along with this library; if not, write to the Free Software +// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +// +// See http://www.salome-platform.org/ or email : webmaster.salome@opencascade.com +// +/* + * Batch_JobInfo_eSlurm.cxx : + * + * Created on: 12 may 2011 + * Author : Renaud BARATE - EDF R&D + */ + +#include +#include +#include + +#include +#include + +#include "Batch_JobInfo_eSlurm.hxx" + +using namespace std; + +namespace Batch { + + JobInfo_eSlurm::JobInfo_eSlurm(const std::string & id, const std::string & logFile) + : JobInfo() + { + _param[ID] = id; + + // read log file + ifstream log(logFile.c_str()); + string line; + + // status should be on the second line + for (int i=0 ; i<2 ; i++) + getline(log, line); + log.close(); + string status; + istringstream iss(line); + iss >> status; + + if (status.size() == 0) { + // On some batch managers, the job is deleted as soon as it is finished, + // so we have to consider that an unknown job is a finished one, even if + // it is not always true. + _param[STATE] = FINISHED; + } else if (status == "CA") { // Canceled + _param[STATE] = FAILED; + } else if (status == "CD") { // Completed + _param[STATE] = FINISHED; + } else if (status == "CF") { // Configuring + _param[STATE] = QUEUED; + } else if (status == "CG") { // Completing + _param[STATE] = RUNNING; + } else if (status == "F") { // Failed + _param[STATE] = FAILED; + } else if (status == "NF") { // Node Fail + _param[STATE] = FAILED; + } else if (status == "PD") { // Pending + _param[STATE] = QUEUED; + } else if (status == "R") { // Running + _param[STATE] = RUNNING; + } else if (status == "S") { // Suspended + _param[STATE] = PAUSED; + } else if (status == "TO") { // Timeout + _param[STATE] = FAILED; + } else { + throw RunTimeException("Unknown job state code: \"" + status + "\""); + } + } + + JobInfo_eSlurm::~JobInfo_eSlurm() + { + } + +} diff --git a/src/Slurm/Batch_JobInfo_eSlurm.hxx b/src/Slurm/Batch_JobInfo_eSlurm.hxx new file mode 100644 index 0000000..3678c82 --- /dev/null +++ b/src/Slurm/Batch_JobInfo_eSlurm.hxx @@ -0,0 +1,49 @@ +// Copyright (C) 2007-2010 CEA/DEN, EDF R&D, OPEN CASCADE +// +// Copyright (C) 2003-2007 OPEN CASCADE, EADS/CCR, LIP6, CEA/DEN, +// CEDRAT, EDF R&D, LEG, PRINCIPIA R&D, BUREAU VERITAS +// +// This library is free software; you can redistribute it and/or +// modify it under the terms of the GNU Lesser General Public +// License as published by the Free Software Foundation; either +// version 2.1 of the License. +// +// This library is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +// Lesser General Public License for more details. +// +// You should have received a copy of the GNU Lesser General Public +// License along with this library; if not, write to the Free Software +// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +// +// See http://www.salome-platform.org/ or email : webmaster.salome@opencascade.com +// +/* + * Batch_JobInfo_eSlurm.hxx : + * + * Created on: 12 may 2011 + * Author : Renaud BARATE - EDF R&D + */ + +#ifndef _JOBINFO_ESLURM_H_ +#define _JOBINFO_ESLURM_H_ + +#include + +#include + +namespace Batch { + + class JobInfo_eSlurm : public JobInfo + { + public: + + JobInfo_eSlurm(const std::string & id, const std::string & logFile); + virtual ~JobInfo_eSlurm(); + + }; + +} + +#endif diff --git a/src/Slurm/CMakeLists.txt b/src/Slurm/CMakeLists.txt new file mode 100644 index 0000000..ea9ee7c --- /dev/null +++ b/src/Slurm/CMakeLists.txt @@ -0,0 +1,32 @@ +# Copyright (C) 2007-2010 CEA/DEN, EDF R&D, OPEN CASCADE +# +# Copyright (C) 2003-2007 OPEN CASCADE, EADS/CCR, LIP6, CEA/DEN, +# CEDRAT, EDF R&D, LEG, PRINCIPIA R&D, BUREAU VERITAS +# +# This library is free software; you can redistribute it and/or +# modify it under the terms of the GNU Lesser General Public +# License as published by the Free Software Foundation; either +# version 2.1 of the License. +# +# This library is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +# Lesser General Public License for more details. +# +# You should have received a copy of the GNU Lesser General Public +# License along with this library; if not, write to the Free Software +# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +# +# See http://www.salome-platform.org/ or email : webmaster.salome@opencascade.com +# + +SET(CLASS_LIST Slurm/Batch_BatchManager_eSlurm + Slurm/Batch_FactBatchManager_eSlurm + Slurm/Batch_JobInfo_eSlurm + ) + +APPEND_CLASSES_TO_SRC_FILES(${CLASS_LIST}) + +IF (TEST_ENABLED) + add_subdirectory(Test) +ENDIF (TEST_ENABLED) diff --git a/src/Slurm/Test/CMakeLists.txt b/src/Slurm/Test/CMakeLists.txt new file mode 100644 index 0000000..b918bd0 --- /dev/null +++ b/src/Slurm/Test/CMakeLists.txt @@ -0,0 +1,44 @@ +# Copyright (C) 2007-2010 CEA/DEN, EDF R&D, OPEN CASCADE +# +# Copyright (C) 2003-2007 OPEN CASCADE, EADS/CCR, LIP6, CEA/DEN, +# CEDRAT, EDF R&D, LEG, PRINCIPIA R&D, BUREAU VERITAS +# +# This library is free software; you can redistribute it and/or +# modify it under the terms of the GNU Lesser General Public +# License as published by the Free Software Foundation; either +# version 2.1 of the License. +# +# This library is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +# Lesser General Public License for more details. +# +# You should have received a copy of the GNU Lesser General Public +# License along with this library; if not, write to the Free Software +# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +# +# See http://www.salome-platform.org/ or email : webmaster.salome@opencascade.com +# + +# Just copy the test scripts to the binary dir +CONFIGURE_FILE(${CMAKE_CURRENT_SOURCE_DIR}/seta.sh ${CMAKE_CURRENT_BINARY_DIR}/seta.sh COPYONLY) +CONFIGURE_FILE(${CMAKE_CURRENT_SOURCE_DIR}/setb.sh ${CMAKE_CURRENT_BINARY_DIR}/setb.sh COPYONLY) +CONFIGURE_FILE(${CMAKE_CURRENT_SOURCE_DIR}/test-script.sh ${CMAKE_CURRENT_BINARY_DIR}/test-script.sh COPYONLY) + +# set the include directories +include_directories(${CMAKE_SOURCE_DIR}/src/Core) +include_directories(${CMAKE_SOURCE_DIR}/src/Core/Test) +include_directories(${CMAKE_CURRENT_SOURCE_DIR}/..) +include_directories(${CMAKE_CURRENT_BINARY_DIR}) + +# Build the test programs and add the tests +add_executable(Test_eSlurm Test_eSlurm.cxx) +target_link_libraries(Test_eSlurm Batch SimpleParser) + +IF (HAS_SSH) + ADD_TEST(eSlurm_SSH Test_eSlurm SSH) +ENDIF (HAS_SSH) + +#IF (HAS_RSH) +# ADD_TEST(eSlurm_RSH Test_eSlurm RSH) +#ENDIF (HAS_RSH) diff --git a/src/Slurm/Test/Test_eSlurm.cxx b/src/Slurm/Test/Test_eSlurm.cxx new file mode 100644 index 0000000..16142e2 --- /dev/null +++ b/src/Slurm/Test/Test_eSlurm.cxx @@ -0,0 +1,162 @@ +// Copyright (C) 2007-2010 CEA/DEN, EDF R&D, OPEN CASCADE +// +// Copyright (C) 2003-2007 OPEN CASCADE, EADS/CCR, LIP6, CEA/DEN, +// CEDRAT, EDF R&D, LEG, PRINCIPIA R&D, BUREAU VERITAS +// +// This library is free software; you can redistribute it and/or +// modify it under the terms of the GNU Lesser General Public +// License as published by the Free Software Foundation; either +// version 2.1 of the License. +// +// This library is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +// Lesser General Public License for more details. +// +// You should have received a copy of the GNU Lesser General Public +// License along with this library; if not, write to the Free Software +// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +// +// See http://www.salome-platform.org/ or email : webmaster.salome@opencascade.com +// +/* + * Test_eSlurm.cxx : + * + * Author : Renaud BARATE - EDF R&D + * Date : May 2011 + * + */ + +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include + +#include + +using namespace std; +using namespace Batch; + +void print_usage() +{ + cout << "usage: Test_eSlurm PROTOCOL" << endl; + cout << " PROTOCOL \"SSH\" or \"RSH\"" << endl; +} + +int main(int argc, char** argv) +{ + // Parse argument + if (argc != 2) { + print_usage(); + return 1; + } + CommunicationProtocolType protocol; + if (strcmp(argv[1], "SSH") == 0) + protocol = SSH; + else if (strcmp(argv[1], "RSH") == 0) + protocol = RSH; + else { + print_usage(); + return 1; + } + + cout << "*******************************************************************************************" << endl; + cout << "This program tests the batch submission based on Slurm emulation. Passwordless" << endl; + cout << "authentication must be used for this test to pass. For SSH, this can be configured with" << endl; + cout << "ssh-agent for instance. For RSH, this can be configured with the .rhosts file." << endl; + cout << "*******************************************************************************************" << endl; + + // eventually remove any previous result + remove("result.txt"); + + try { + // Parse the test configuration file + SimpleParser parser; + parser.parseTestConfigFile(); + const string & homedir = parser.getValue("TEST_ESLURM_HOMEDIR"); + const string & host = parser.getValue("TEST_ESLURM_HOST"); + const string & user = parser.getValue("TEST_ESLURM_USER"); + int timeout = parser.getValueAsInt("TEST_ESLURM_TIMEOUT"); + + // Define the job... + Job job; + // ... and its parameters ... + Parametre p; + p[EXECUTABLE] = "./test-script.sh"; + p[NAME] = string("Test_eSlurm_") + argv[1]; + p[WORKDIR] = homedir + "/tmp/Batch"; + p[INFILE] = Couple("seta.sh", "tmp/Batch/seta.sh"); + p[INFILE] += Couple("setb.sh", "tmp/Batch/setb.sh"); + p[OUTFILE] = Couple("result.txt", "tmp/Batch/result.txt"); + p[TMPDIR] = "tmp/Batch/"; + p[NBPROC] = 1; + p[MAXWALLTIME] = 1; + p[MAXRAMSIZE] = 50; + p[HOMEDIR] = homedir; + job.setParametre(p); + // ... and its environment + Environnement e; + e["MYENVVAR"] = "MYVALUE"; + job.setEnvironnement(e); + cout << job << endl; + + // Get the catalog + BatchManagerCatalog& c = BatchManagerCatalog::getInstance(); + + // Create a BatchManager of type ePBS on localhost + FactBatchManager_eClient * fbm = (FactBatchManager_eClient *)(c("eSlurm")); + BatchManager_eClient * bm = (*fbm)(host.c_str(), user.c_str(), protocol); + + // Submit the job to the BatchManager + JobId jobid = bm->submitJob(job); + cout << jobid.__repr__() << endl; + + // Wait for the end of the job + string state = bm->waitForJobEnd(jobid, timeout); + + if (state == FINISHED) { + cout << "Job " << jobid.__repr__() << " is done" << endl; + bm->importOutputFiles(job, "resultdir/seconddirname"); + } else if (state == FAILED) { + cerr << "Job " << jobid.__repr__() << " finished in error" << endl; + bm->importOutputFiles(job, "resultdir/seconddirname"); + return 1; + } else { + cerr << "Timeout while executing job" << endl; + return 1; + } + + } catch (GenericException e) { + cerr << "Error: " << e << endl; + return 1; + } catch (ParserException e) { + cerr << "Parser error: " << e.what() << endl; + return 1; + } + + // test the result file + try { + SimpleParser resultParser; + resultParser.parse("resultdir/seconddirname/result.txt"); + cout << "Result:" << endl << resultParser; + const string & envvar = resultParser.getValue("MYENVVAR"); + int result = resultParser.getValueAsInt("c"); + if (envvar == "MYVALUE" && result == 12) { + cout << "OK, Expected result found." << endl; + return 0; + } else { + cerr << "Error, result is not the expected one (MYENVVAR = MYVALUE, c = 12)." << endl; + return 1; + } + } catch (ParserException e) { + cerr << "Parser error on result file: " << e.what() << endl; + return 1; + } +} diff --git a/src/Slurm/Test/seta.sh b/src/Slurm/Test/seta.sh new file mode 100644 index 0000000..42d1e38 --- /dev/null +++ b/src/Slurm/Test/seta.sh @@ -0,0 +1,3 @@ +#!/bin/sh + +a=4 diff --git a/src/Slurm/Test/setb.sh b/src/Slurm/Test/setb.sh new file mode 100644 index 0000000..8969060 --- /dev/null +++ b/src/Slurm/Test/setb.sh @@ -0,0 +1,3 @@ +#!/bin/sh + +b=3 diff --git a/src/Slurm/Test/test-script.sh b/src/Slurm/Test/test-script.sh new file mode 100755 index 0000000..1d56247 --- /dev/null +++ b/src/Slurm/Test/test-script.sh @@ -0,0 +1,9 @@ +#!/bin/sh + +. ./seta.sh +. ./setb.sh + +c=`expr $a "*" $b` + +echo "MYENVVAR = $MYENVVAR" > result.txt +echo "c = $c" >> result.txt -- 2.30.2