From f6cf6adcb091e6d881ae2d564433d147daba36cf Mon Sep 17 00:00:00 2001 From: barate Date: Mon, 6 Feb 2012 10:07:29 +0000 Subject: [PATCH] Merge from trunk --- src/Core/Batch_Utils.cxx | 64 ++++++++++++++++++ src/Core/Batch_Utils.hxx | 54 +++++++++++++++ src/Core/CMakeLists.txt | 1 + src/LSF/Batch_BatchManager_eLSF.cxx | 88 +++++++------------------ src/LSF/Batch_BatchManager_eLSF.hxx | 2 +- src/LSF/Batch_JobInfo_eLSF.cxx | 18 ++--- src/LSF/Batch_JobInfo_eLSF.hxx | 3 +- src/LSF/Test/Test_eLSF.cxx | 4 +- src/PBS/Batch_BatchManager_ePBS.cxx | 87 ++++++++---------------- src/PBS/Batch_BatchManager_ePBS.hxx | 2 +- src/PBS/Batch_JobInfo_ePBS.cxx | 28 +++----- src/PBS/Batch_JobInfo_ePBS.hxx | 2 +- src/PBS/Test/Test_ePBS.cxx | 2 +- src/Slurm/Batch_BatchManager_eSlurm.cxx | 59 ++++++----------- src/Slurm/Batch_JobInfo_eSlurm.cxx | 18 ++--- src/Slurm/Batch_JobInfo_eSlurm.hxx | 2 +- 16 files changed, 220 insertions(+), 214 deletions(-) create mode 100644 src/Core/Batch_Utils.cxx create mode 100644 src/Core/Batch_Utils.hxx diff --git a/src/Core/Batch_Utils.cxx b/src/Core/Batch_Utils.cxx new file mode 100644 index 0000000..3b706d2 --- /dev/null +++ b/src/Core/Batch_Utils.cxx @@ -0,0 +1,64 @@ +// Copyright (C) 2007-2011 CEA/DEN, EDF R&D, OPEN CASCADE +// +// Copyright (C) 2003-2007 OPEN CASCADE, EADS/CCR, LIP6, CEA/DEN, +// CEDRAT, EDF R&D, LEG, PRINCIPIA R&D, BUREAU VERITAS +// +// This library is free software; you can redistribute it and/or +// modify it under the terms of the GNU Lesser General Public +// License as published by the Free Software Foundation; either +// version 2.1 of the License. +// +// This library is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +// Lesser General Public License for more details. +// +// You should have received a copy of the GNU Lesser General Public +// License along with this library; if not, write to the Free Software +// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +// +// See http://www.salome-platform.org/ or email : webmaster.salome@opencascade.com +// +/* + * Batch_Utils.cxx + * + * Created on: 30 jan. 2012 + * Author : Renaud BARATE - EDF R&D + */ + +#include + +#include +#include "Batch_Utils.hxx" + +#ifdef MSVC +#define popen _popen +#define pclose _pclose +#endif + +using namespace std; +namespace Batch { + +int Utils::getCommandOutput(const string & command, string & output) +{ + // Reinitialize output + output = ""; + + // Call command + FILE * fp = popen(command.c_str(), "r"); + if (fp == NULL) { + return -1; + } + + // Read the output and store it + char buf[1024]; + while (fgets(buf, sizeof(buf), fp) != NULL) { + output += buf; + } + + // close and get status + int status = pclose(fp); + return status; +} + +} diff --git a/src/Core/Batch_Utils.hxx b/src/Core/Batch_Utils.hxx new file mode 100644 index 0000000..24f17d1 --- /dev/null +++ b/src/Core/Batch_Utils.hxx @@ -0,0 +1,54 @@ +// Copyright (C) 2007-2011 CEA/DEN, EDF R&D, OPEN CASCADE +// +// Copyright (C) 2003-2007 OPEN CASCADE, EADS/CCR, LIP6, CEA/DEN, +// CEDRAT, EDF R&D, LEG, PRINCIPIA R&D, BUREAU VERITAS +// +// This library is free software; you can redistribute it and/or +// modify it under the terms of the GNU Lesser General Public +// License as published by the Free Software Foundation; either +// version 2.1 of the License. +// +// This library is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +// Lesser General Public License for more details. +// +// You should have received a copy of the GNU Lesser General Public +// License along with this library; if not, write to the Free Software +// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +// +// See http://www.salome-platform.org/ or email : webmaster.salome@opencascade.com +// +/* + * Batch_Utils.hxx + * + * Created on: 30 jan. 2012 + * Author : Renaud BARATE - EDF R&D + */ + +#ifndef BATCH_UTILS_HXX_ +#define BATCH_UTILS_HXX_ + +#include + +namespace Batch { + +class Utils { +public: + + /** + * Call a command with the system shell and stores its output in parameter "output". + * Returns the return code of the command. + */ + static int getCommandOutput(const std::string & command, std::string & output); + +private: + + // No instanciation possible as this class provides only static methods + Utils() { } + +}; + +} + +#endif /* BATCH_UTILS_HXX_ */ diff --git a/src/Core/CMakeLists.txt b/src/Core/CMakeLists.txt index 1e4f5c2..7d13826 100644 --- a/src/Core/CMakeLists.txt +++ b/src/Core/CMakeLists.txt @@ -54,6 +54,7 @@ SET(CLASS_LIST Core/Batch_APIInternalFailureException Core/Batch_StringType Core/Batch_TypeMismatchException Core/Batch_Versatile + Core/Batch_Utils ) APPEND_CLASSES_TO_SRC_FILES(${CLASS_LIST}) diff --git a/src/LSF/Batch_BatchManager_eLSF.cxx b/src/LSF/Batch_BatchManager_eLSF.cxx index 1da7a92..af1603a 100644 --- a/src/LSF/Batch_BatchManager_eLSF.cxx +++ b/src/LSF/Batch_BatchManager_eLSF.cxx @@ -29,25 +29,12 @@ * */ -#include -#include - -#include +#include #include #include -#include -#include - -#include -#include -#ifdef WIN32 -#include -#else -#include -#endif - -#include "Batch_Constants.hxx" +#include +#include #include "Batch_BatchManager_eLSF.hxx" #include "Batch_JobInfo_eLSF.hxx" @@ -74,13 +61,8 @@ namespace Batch { // Methode pour le controle des jobs : soumet un job au gestionnaire const JobId BatchManager_eLSF::submitJob(const Job & job) { - int status; Parametre params = job.getParametre(); const std::string workDir = params[WORKDIR]; - const string fileToExecute = params[EXECUTABLE]; - string::size_type p1 = fileToExecute.find_last_of("/"); - string::size_type p2 = fileToExecute.find_last_of("."); - std::string fileNameToExecute = fileToExecute.substr(p1+1,p2-p1-1); // export input files on cluster cerr << "Export des fichiers en entree" << endl; @@ -88,41 +70,24 @@ namespace Batch { // build batch script for job cerr << "Construction du script de batch" << endl; - buildBatchScript(job); + string scriptFile = buildSubmissionScript(job); cerr << "Script envoye" << endl; - // define name of log file (local) - string logFile = generateTemporaryFileName("LSF-submitlog"); - // define command to submit batch - string subCommand = string("cd ") + workDir + "; bsub < " + fileNameToExecute + "_Batch.sh"; + string subCommand = string("cd ") + workDir + "; bsub < " + scriptFile; string command = _protocol.getExecCommand(subCommand, _hostname, _username); - command += " > "; - command += logFile; command += " 2>&1"; cerr << command.c_str() << endl; - status = system(command.c_str()); - if(status) - { - ifstream error_message(logFile.c_str()); - std::string mess; - std::string temp; - while(std::getline(error_message, temp)) - mess += temp; - error_message.close(); - throw EmulationException("Error of connection on remote host, error was: " + mess); - } - // read id of submitted job in log file - char line[128]; - FILE *fp = fopen(logFile.c_str(),"r"); - fgets( line, 128, fp); - fclose(fp); + string output; + int status = Utils::getCommandOutput(command, output); + cout << output; + if (status != 0) throw EmulationException("Can't submit job, error was: " + output); - string sline(line); - int p10 = sline.find("<"); - int p20 = sline.find(">"); - string strjob = sline.substr(p10+1,p20-p10-1); + // read id of submitted job in output + int p10 = output.find("<"); + int p20 = output.find(">"); + string strjob = output.substr(p10+1,p20-p10-1); JobId id(this, strjob); return id; @@ -192,20 +157,16 @@ namespace Batch { istringstream iss(jobid.getReference()); iss >> id; - // define name of log file (local) - string logFile = generateTemporaryFileName(string("LSF-querylog-id") + jobid.getReference()); - // define command to query batch string subCommand = string("bjobs ") + iss.str(); string command = _protocol.getExecCommand(subCommand, _hostname, _username); - command += " > "; - command += logFile; cerr << command.c_str() << endl; - int status = system(command.c_str()); - if (status) - throw EmulationException("Error of connection on remote host"); - JobInfo_eLSF ji = JobInfo_eLSF(id,logFile); + string output; + int status = Utils::getCommandOutput(command, output); + if (status) throw EmulationException("Error of connection on remote host"); + + JobInfo_eLSF ji = JobInfo_eLSF(id, output); return ji; } @@ -217,9 +178,8 @@ namespace Batch { throw EmulationException("Not yet implemented"); } - void BatchManager_eLSF::buildBatchScript(const Job & job) + std::string BatchManager_eLSF::buildSubmissionScript(const Job & job) { -#ifndef WIN32 //TODO: need for porting on Windows Parametre params = job.getParametre(); // Job Parameters @@ -260,6 +220,8 @@ namespace Batch { std::string TmpFileName = createAndOpenTemporaryFile("LSF-script", tempOutputFile); tempOutputFile << "#! /bin/sh -f" << endl ; + if (params.find(NAME) != params.end()) + tempOutputFile << "#BSUB -J " << params[NAME] << endl; if (queue != "") tempOutputFile << "#BSUB -q " << queue << endl; if( edt > 0 ) @@ -319,17 +281,15 @@ namespace Batch { tempOutputFile.flush(); tempOutputFile.close(); - BATCH_CHMOD(TmpFileName.c_str(), 0x1ED); cerr << "Batch script file generated is: " << TmpFileName.c_str() << endl; + string remoteFileName = rootNameToExecute + "_Batch.sh"; int status = _protocol.copyFile(TmpFileName, "", "", - workDir + "/" + rootNameToExecute + "_Batch.sh", + workDir + "/" + remoteFileName, _hostname, _username); if (status) throw EmulationException("Error of connection on remote host"); - -#endif - + return remoteFileName; } std::string BatchManager_eLSF::getWallTime(const long edt) diff --git a/src/LSF/Batch_BatchManager_eLSF.hxx b/src/LSF/Batch_BatchManager_eLSF.hxx index 9b14762..b9e041a 100644 --- a/src/LSF/Batch_BatchManager_eLSF.hxx +++ b/src/LSF/Batch_BatchManager_eLSF.hxx @@ -69,7 +69,7 @@ namespace Batch { virtual const Batch::JobId addJob(const Batch::Job & job, const std::string reference); // ajoute un nouveau job sans le soumettre protected: - void buildBatchScript(const Job & job); + std::string buildSubmissionScript(const Job & job); std::string getWallTime(const long edt); private: diff --git a/src/LSF/Batch_JobInfo_eLSF.cxx b/src/LSF/Batch_JobInfo_eLSF.cxx index 349c575..4040837 100644 --- a/src/LSF/Batch_JobInfo_eLSF.cxx +++ b/src/LSF/Batch_JobInfo_eLSF.cxx @@ -30,34 +30,26 @@ */ #include -#include -#include #include -#include "Batch_Constants.hxx" -#include "Batch_Parametre.hxx" -#include "Batch_Environnement.hxx" -#include "Batch_RunTimeException.hxx" -#include "Batch_APIInternalFailureException.hxx" +#include #include "Batch_JobInfo_eLSF.hxx" using namespace std; namespace Batch { - - // Constructeurs - JobInfo_eLSF::JobInfo_eLSF(int id, string logFile) : JobInfo() + JobInfo_eLSF::JobInfo_eLSF(int id, const std::string & queryOutput) : JobInfo() { - // On remplit les membres _param et _env + // Fill ID parameter ostringstream oss; oss << id; _param[ID] = oss.str(); - // read status of job in log file + // read query output string line; - ifstream fp(logFile.c_str()); + istringstream fp(queryOutput); getline(fp, line); // On some batch managers, the job is deleted soon after it is finished, diff --git a/src/LSF/Batch_JobInfo_eLSF.hxx b/src/LSF/Batch_JobInfo_eLSF.hxx index 1e83631..75a59b2 100644 --- a/src/LSF/Batch_JobInfo_eLSF.hxx +++ b/src/LSF/Batch_JobInfo_eLSF.hxx @@ -32,7 +32,6 @@ #ifndef _JOBINFO_LSF_H_ #define _JOBINFO_LSF_H_ -#include "Batch_RunTimeException.hxx" #include "Batch_JobInfo.hxx" #include @@ -44,7 +43,7 @@ namespace Batch { public: // Constructeurs et destructeur JobInfo_eLSF() : _running(false) {}; - JobInfo_eLSF(int id,std::string logFile); + JobInfo_eLSF(int id, const std::string & queryOutput); virtual ~JobInfo_eLSF(); // Constructeur par recopie diff --git a/src/LSF/Test/Test_eLSF.cxx b/src/LSF/Test/Test_eLSF.cxx index b3962a9..67d1a2f 100644 --- a/src/LSF/Test/Test_eLSF.cxx +++ b/src/LSF/Test/Test_eLSF.cxx @@ -90,7 +90,7 @@ int main(int argc, char** argv) // ... and its parameters ... Parametre p; p[EXECUTABLE] = "./test-script.sh"; - p[NAME] = string("Test eLSF ") + argv[1]; + p[NAME] = string("Test_eLSF_") + argv[1]; p[WORKDIR] = homedir + "/tmp/Batch"; p[INFILE] = Couple("seta.sh", "tmp/Batch/seta.sh"); p[INFILE] += Couple("setb.sh", "tmp/Batch/setb.sh"); @@ -98,7 +98,7 @@ int main(int argc, char** argv) p[TMPDIR] = "tmp/Batch/"; p[NBPROC] = 1; p[MAXWALLTIME] = 1; - p[MAXRAMSIZE] = 50; + p[MAXRAMSIZE] = 128; p[HOMEDIR] = homedir; p[EXCLUSIVE] = true; job.setParametre(p); diff --git a/src/PBS/Batch_BatchManager_ePBS.cxx b/src/PBS/Batch_BatchManager_ePBS.cxx index c92a121..aee211b 100644 --- a/src/PBS/Batch_BatchManager_ePBS.cxx +++ b/src/PBS/Batch_BatchManager_ePBS.cxx @@ -29,25 +29,13 @@ * */ -#include -#include - -#include +#include #include #include -#include - -#include -#include -#include -#ifdef MSVC -#include -#else -#include -#endif +#include +#include -#include "Batch_Constants.hxx" #include "Batch_BatchManager_ePBS.hxx" #include "Batch_JobInfo_ePBS.hxx" @@ -60,10 +48,10 @@ namespace Batch { CommunicationProtocolType protocolType, const char * mpiImpl, int nb_proc_per_node) : BatchManager(parent, host), - BatchManager_eClient(parent, host, username, protocolType, mpiImpl) + BatchManager_eClient(parent, host, username, protocolType, mpiImpl), + _nb_proc_per_node(nb_proc_per_node) { // Nothing to do - _nb_proc_per_node = nb_proc_per_node; } // Destructeur @@ -75,51 +63,31 @@ namespace Batch { // Methode pour le controle des jobs : soumet un job au gestionnaire const JobId BatchManager_ePBS::submitJob(const Job & job) { - int status; Parametre params = job.getParametre(); const std::string workDir = params[WORKDIR]; - const string fileToExecute = params[EXECUTABLE]; - string::size_type p1 = fileToExecute.find_last_of("/"); - string::size_type p2 = fileToExecute.find_last_of("."); - std::string fileNameToExecute = fileToExecute.substr(p1+1,p2-p1-1); // export input files on cluster exportInputFiles(job); // build batch script for job - buildBatchScript(job); - - // define name of log file (local) - string logFile = generateTemporaryFileName("PBS-submitlog"); + string scriptFile = buildSubmissionScript(job); // define command to submit batch - string subCommand = string("cd ") + workDir + "; qsub " + fileNameToExecute + "_Batch.sh"; + string subCommand = string("cd ") + workDir + "; qsub " + scriptFile; string command = _protocol.getExecCommand(subCommand, _hostname, _username); - command += " > "; - command += logFile; command += " 2>&1"; cerr << command.c_str() << endl; - status = system(command.c_str()); - if(status) - { - ifstream error_message(logFile.c_str()); - std::string mess; - std::string temp; - while(std::getline(error_message, temp)) - mess += temp; - error_message.close(); - throw EmulationException("Error of connection on remote host, error was: " + mess); - } - // read id of submitted job in log file - ifstream idfile(logFile.c_str()); - string sline; - idfile >> sline; - idfile.close(); - if (sline.size() == 0) - throw EmulationException("Error in the submission of the job on the remote host"); + // submit job + string output; + int status = Utils::getCommandOutput(command, output); + cout << output; + if (status != 0) throw EmulationException("Can't submit job, error was: " + output); + + // normally output contains only id of submitted job, we just need to remove the final \n + string jobref = output.substr(0, output.size() - 1); + JobId id(this, jobref); - JobId id(this, sline); return id; } @@ -187,20 +155,17 @@ namespace Batch { istringstream iss(jobid.getReference()); iss >> id; - // define name of log file (local) - string logFile = generateTemporaryFileName(string("PBS-querylog-id") + jobid.getReference()); - // define command to query batch string subCommand = string("qstat -f ") + iss.str(); string command = _protocol.getExecCommand(subCommand, _hostname, _username); - command += " > "; - command += logFile; cerr << command.c_str() << endl; - int status = system(command.c_str()); + + string output; + int status = Utils::getCommandOutput(command, output); if(status && status != 153 && status != 256*153) throw EmulationException("Error of connection on remote host"); - JobInfo_ePBS ji = JobInfo_ePBS(id,logFile); + JobInfo_ePBS ji = JobInfo_ePBS(id, output); return ji; } @@ -210,9 +175,8 @@ namespace Batch { throw EmulationException("Not yet implemented"); } - void BatchManager_ePBS::buildBatchScript(const Job & job) + std::string BatchManager_ePBS::buildSubmissionScript(const Job & job) { - std::cerr << "BuildBatchScript" << std::endl; Parametre params = job.getParametre(); Environnement env = job.getEnvironnement(); @@ -254,6 +218,10 @@ namespace Batch { std::string TmpFileName = createAndOpenTemporaryFile("PBS-script", tempOutputFile); tempOutputFile << "#! /bin/sh -f" << endl; + if (params.find(NAME) != params.end()) { + tempOutputFile << "#PBS -N " << params[NAME] << endl; + } + if (nbproc > 0) { int nb_full_nodes = nbproc / _nb_proc_per_node; @@ -310,13 +278,14 @@ namespace Batch { tempOutputFile.flush(); tempOutputFile.close(); - BATCH_CHMOD(TmpFileName.c_str(), 0x1ED); cerr << "Batch script file generated is: " << TmpFileName.c_str() << endl; + string remoteFileName = rootNameToExecute + "_Batch.sh"; int status = _protocol.copyFile(TmpFileName, "", "", - workDir + "/" + rootNameToExecute + "_Batch.sh", + workDir + "/" + remoteFileName, _hostname, _username); if (status) throw EmulationException("Error of connection on remote host, cannot copy batch submission file"); + return remoteFileName; } } diff --git a/src/PBS/Batch_BatchManager_ePBS.hxx b/src/PBS/Batch_BatchManager_ePBS.hxx index e525d19..7511ae1 100644 --- a/src/PBS/Batch_BatchManager_ePBS.hxx +++ b/src/PBS/Batch_BatchManager_ePBS.hxx @@ -70,7 +70,7 @@ namespace Batch { virtual const Batch::JobId addJob(const Batch::Job & job, const std::string reference); // ajoute un nouveau job sans le soumettre protected: - void buildBatchScript(const Job & job); + std::string buildSubmissionScript(const Job & job); private: int _nb_proc_per_node; diff --git a/src/PBS/Batch_JobInfo_ePBS.cxx b/src/PBS/Batch_JobInfo_ePBS.cxx index 55fd6fc..c69c0a4 100644 --- a/src/PBS/Batch_JobInfo_ePBS.cxx +++ b/src/PBS/Batch_JobInfo_ePBS.cxx @@ -31,14 +31,9 @@ #include #include -#include #include -#include "Batch_Constants.hxx" -#include "Batch_Parametre.hxx" -#include "Batch_Environnement.hxx" -#include "Batch_RunTimeException.hxx" -#include "Batch_APIInternalFailureException.hxx" +#include #include "Batch_JobInfo_ePBS.hxx" using namespace std; @@ -46,27 +41,24 @@ using namespace std; namespace Batch { // Constructeurs - JobInfo_ePBS::JobInfo_ePBS(int id, string logFile) : JobInfo() + JobInfo_ePBS::JobInfo_ePBS(int id, string queryOutput) : JobInfo() { - // On remplit les membres _param et _env + // Fill ID parameter ostringstream oss; oss << id; _param[ID] = oss.str(); - // read of log file - char line[128]; - ifstream fp(logFile.c_str(),ios::in); - - string sline; + // read query output + istringstream queryIss(queryOutput); + string line; size_t pos = string::npos; - while( (pos == string::npos) && fp.getline(line,80,'\n') ){ - sline = string(line); - pos = sline.find("job_state"); - }; + while( (pos == string::npos) && getline(queryIss, line) ) { + pos = line.find("job_state"); + } if(pos!=string::npos){ string status; - istringstream iss(sline); + istringstream iss(line); iss >> status; iss >> status; iss >> status; diff --git a/src/PBS/Batch_JobInfo_ePBS.hxx b/src/PBS/Batch_JobInfo_ePBS.hxx index 390e6fa..3082150 100644 --- a/src/PBS/Batch_JobInfo_ePBS.hxx +++ b/src/PBS/Batch_JobInfo_ePBS.hxx @@ -43,7 +43,7 @@ namespace Batch { public: // Constructeurs et destructeur JobInfo_ePBS() {}; - JobInfo_ePBS(int id,std::string logFile); + JobInfo_ePBS(int id, std::string queryOutput); virtual ~JobInfo_ePBS(); // Constructeur par recopie diff --git a/src/PBS/Test/Test_ePBS.cxx b/src/PBS/Test/Test_ePBS.cxx index 1040faa..3e6bdc0 100644 --- a/src/PBS/Test/Test_ePBS.cxx +++ b/src/PBS/Test/Test_ePBS.cxx @@ -99,7 +99,7 @@ int main(int argc, char** argv) p[TMPDIR] = "tmp/Batch/"; p[NBPROC] = 1; p[MAXWALLTIME] = 1; - p[MAXRAMSIZE] = 1; + p[MAXRAMSIZE] = 128; p[HOMEDIR] = homedir; p[QUEUE] = queue; job.setParametre(p); diff --git a/src/Slurm/Batch_BatchManager_eSlurm.cxx b/src/Slurm/Batch_BatchManager_eSlurm.cxx index e677509..4cbe8c9 100644 --- a/src/Slurm/Batch_BatchManager_eSlurm.cxx +++ b/src/Slurm/Batch_BatchManager_eSlurm.cxx @@ -27,13 +27,12 @@ */ #include -#include #include #include #include +#include -#include "Batch_FactBatchManager_eSlurm.hxx" #include "Batch_BatchManager_eSlurm.hxx" #include "Batch_JobInfo_eSlurm.hxx" @@ -60,7 +59,6 @@ namespace Batch { // Method to submit a job to the batch manager const JobId BatchManager_eSlurm::submitJob(const Job & job) { - int status; Parametre params = job.getParametre(); const string workDir = params[WORKDIR]; @@ -70,38 +68,27 @@ namespace Batch { // build command file to submit the job and copy it on the server string cmdFile = buildCommandFile(job); - // define name of log file (local) - string logFile = generateTemporaryFileName("slurm-submitlog"); - // define command to submit batch string subCommand = string("cd ") + workDir + "; sbatch " + cmdFile; string command = _protocol.getExecCommand(subCommand, _hostname, _username); - command += " > "; - command += logFile; - cerr << command.c_str() << endl; - status = system(command.c_str()); - if (status) - { - ifstream error_message(logFile.c_str()); - string mess; - string temp; - while(getline(error_message, temp)) - mess += temp; - error_message.close(); - throw EmulationException("Error of connection on remote host, error was: " + mess); - } - - // read id of submitted job in log file - string jobref; - ifstream idfile(logFile.c_str()); - string line; - while (idfile && line.compare(0, 20, "Submitted batch job ") != 0) - getline(idfile, line); - idfile.close(); - if (line.compare(0, 20, "Submitted batch job ") == 0) - jobref = line.substr(20); - if (jobref.size() == 0) + command += " 2>&1"; + cout << command.c_str() << endl; + + // submit job + string output; + int status = Utils::getCommandOutput(command, output); + cout << output; + if (status != 0) throw EmulationException("Can't submit job, error was: " + output); + + // find id of submitted job in output + string search = "Submitted batch job "; + string::size_type pos = output.find(search); + if (pos == string::npos) throw EmulationException("Error in the submission of the job on the remote host"); + pos += search.size(); + string::size_type endl_pos = output.find('\n', pos); + string::size_type count = (endl_pos == string::npos)? string::npos : endl_pos - pos; + string jobref = output.substr(pos, count); JobId id(this, jobref); return id; @@ -240,21 +227,17 @@ namespace Batch { JobInfo BatchManager_eSlurm::queryJob(const JobId & jobid) { - // define name of log file (local) - string logFile = generateTemporaryFileName("slurm-querylog-" + jobid.getReference()); - // define command to query batch string subCommand = "squeue -o %t -j " + jobid.getReference(); string command = _protocol.getExecCommand(subCommand, _hostname, _username); - command += " > "; - command += logFile; cerr << command.c_str() << endl; - system(command.c_str()); + string output; + Utils::getCommandOutput(command, output); // We don't test the return code here because with jobs finished since a long time Slurm // returns an error and a message like "slurm_load_jobs error: Invalid job id specified". // So we consider that the job is finished when we get an error. - JobInfo_eSlurm jobinfo = JobInfo_eSlurm(jobid.getReference(), logFile); + JobInfo_eSlurm jobinfo = JobInfo_eSlurm(jobid.getReference(), output); return jobinfo; } diff --git a/src/Slurm/Batch_JobInfo_eSlurm.cxx b/src/Slurm/Batch_JobInfo_eSlurm.cxx index 815279e..ac53a4d 100644 --- a/src/Slurm/Batch_JobInfo_eSlurm.cxx +++ b/src/Slurm/Batch_JobInfo_eSlurm.cxx @@ -26,8 +26,6 @@ * Author : Renaud BARATE - EDF R&D */ -#include -#include #include #include @@ -39,22 +37,16 @@ using namespace std; namespace Batch { - JobInfo_eSlurm::JobInfo_eSlurm(const std::string & id, const std::string & logFile) + JobInfo_eSlurm::JobInfo_eSlurm(const std::string & id, const std::string & queryOutput) : JobInfo() { _param[ID] = id; - // read log file - ifstream log(logFile.c_str()); - string line; - - // status should be on the second line - for (int i=0 ; i<2 ; i++) - getline(log, line); - log.close(); + // read query output, status should be on the second line + istringstream iss(queryOutput); string status; - istringstream iss(line); - iss >> status; + for (int i=0 ; i<2 ; i++) + getline(iss, status); if (status.size() == 0) { // On some batch managers, the job is deleted as soon as it is finished, diff --git a/src/Slurm/Batch_JobInfo_eSlurm.hxx b/src/Slurm/Batch_JobInfo_eSlurm.hxx index c97ea97..6c1a732 100644 --- a/src/Slurm/Batch_JobInfo_eSlurm.hxx +++ b/src/Slurm/Batch_JobInfo_eSlurm.hxx @@ -39,7 +39,7 @@ namespace Batch { { public: - JobInfo_eSlurm(const std::string & id, const std::string & logFile); + JobInfo_eSlurm(const std::string & id, const std::string & queryOutput); virtual ~JobInfo_eSlurm(); }; -- 2.39.2