From: barate Date: Wed, 1 Feb 2012 12:32:51 +0000 (+0000) Subject: Some refactoring in batch manager eLSF to use method getCommandOutput instead of... X-Git-Tag: V1_5_0b1~13 X-Git-Url: http://git.salome-platform.org/gitweb/?a=commitdiff_plain;h=426178a089387f35e0be99984e2408f3f22ee572;p=tools%2Flibbatch.git Some refactoring in batch manager eLSF to use method getCommandOutput instead of temporary files --- diff --git a/src/LSF/Batch_BatchManager_eLSF.cxx b/src/LSF/Batch_BatchManager_eLSF.cxx index 1da7a92..af1603a 100644 --- a/src/LSF/Batch_BatchManager_eLSF.cxx +++ b/src/LSF/Batch_BatchManager_eLSF.cxx @@ -29,25 +29,12 @@ * */ -#include -#include - -#include +#include #include #include -#include -#include - -#include -#include -#ifdef WIN32 -#include -#else -#include -#endif - -#include "Batch_Constants.hxx" +#include +#include #include "Batch_BatchManager_eLSF.hxx" #include "Batch_JobInfo_eLSF.hxx" @@ -74,13 +61,8 @@ namespace Batch { // Methode pour le controle des jobs : soumet un job au gestionnaire const JobId BatchManager_eLSF::submitJob(const Job & job) { - int status; Parametre params = job.getParametre(); const std::string workDir = params[WORKDIR]; - const string fileToExecute = params[EXECUTABLE]; - string::size_type p1 = fileToExecute.find_last_of("/"); - string::size_type p2 = fileToExecute.find_last_of("."); - std::string fileNameToExecute = fileToExecute.substr(p1+1,p2-p1-1); // export input files on cluster cerr << "Export des fichiers en entree" << endl; @@ -88,41 +70,24 @@ namespace Batch { // build batch script for job cerr << "Construction du script de batch" << endl; - buildBatchScript(job); + string scriptFile = buildSubmissionScript(job); cerr << "Script envoye" << endl; - // define name of log file (local) - string logFile = generateTemporaryFileName("LSF-submitlog"); - // define command to submit batch - string subCommand = string("cd ") + workDir + "; bsub < " + fileNameToExecute + "_Batch.sh"; + string subCommand = string("cd ") + workDir + "; bsub < " + scriptFile; string command = _protocol.getExecCommand(subCommand, _hostname, _username); - command += " > "; - command += logFile; command += " 2>&1"; cerr << command.c_str() << endl; - status = system(command.c_str()); - if(status) - { - ifstream error_message(logFile.c_str()); - std::string mess; - std::string temp; - while(std::getline(error_message, temp)) - mess += temp; - error_message.close(); - throw EmulationException("Error of connection on remote host, error was: " + mess); - } - // read id of submitted job in log file - char line[128]; - FILE *fp = fopen(logFile.c_str(),"r"); - fgets( line, 128, fp); - fclose(fp); + string output; + int status = Utils::getCommandOutput(command, output); + cout << output; + if (status != 0) throw EmulationException("Can't submit job, error was: " + output); - string sline(line); - int p10 = sline.find("<"); - int p20 = sline.find(">"); - string strjob = sline.substr(p10+1,p20-p10-1); + // read id of submitted job in output + int p10 = output.find("<"); + int p20 = output.find(">"); + string strjob = output.substr(p10+1,p20-p10-1); JobId id(this, strjob); return id; @@ -192,20 +157,16 @@ namespace Batch { istringstream iss(jobid.getReference()); iss >> id; - // define name of log file (local) - string logFile = generateTemporaryFileName(string("LSF-querylog-id") + jobid.getReference()); - // define command to query batch string subCommand = string("bjobs ") + iss.str(); string command = _protocol.getExecCommand(subCommand, _hostname, _username); - command += " > "; - command += logFile; cerr << command.c_str() << endl; - int status = system(command.c_str()); - if (status) - throw EmulationException("Error of connection on remote host"); - JobInfo_eLSF ji = JobInfo_eLSF(id,logFile); + string output; + int status = Utils::getCommandOutput(command, output); + if (status) throw EmulationException("Error of connection on remote host"); + + JobInfo_eLSF ji = JobInfo_eLSF(id, output); return ji; } @@ -217,9 +178,8 @@ namespace Batch { throw EmulationException("Not yet implemented"); } - void BatchManager_eLSF::buildBatchScript(const Job & job) + std::string BatchManager_eLSF::buildSubmissionScript(const Job & job) { -#ifndef WIN32 //TODO: need for porting on Windows Parametre params = job.getParametre(); // Job Parameters @@ -260,6 +220,8 @@ namespace Batch { std::string TmpFileName = createAndOpenTemporaryFile("LSF-script", tempOutputFile); tempOutputFile << "#! /bin/sh -f" << endl ; + if (params.find(NAME) != params.end()) + tempOutputFile << "#BSUB -J " << params[NAME] << endl; if (queue != "") tempOutputFile << "#BSUB -q " << queue << endl; if( edt > 0 ) @@ -319,17 +281,15 @@ namespace Batch { tempOutputFile.flush(); tempOutputFile.close(); - BATCH_CHMOD(TmpFileName.c_str(), 0x1ED); cerr << "Batch script file generated is: " << TmpFileName.c_str() << endl; + string remoteFileName = rootNameToExecute + "_Batch.sh"; int status = _protocol.copyFile(TmpFileName, "", "", - workDir + "/" + rootNameToExecute + "_Batch.sh", + workDir + "/" + remoteFileName, _hostname, _username); if (status) throw EmulationException("Error of connection on remote host"); - -#endif - + return remoteFileName; } std::string BatchManager_eLSF::getWallTime(const long edt) diff --git a/src/LSF/Batch_BatchManager_eLSF.hxx b/src/LSF/Batch_BatchManager_eLSF.hxx index 9b14762..b9e041a 100644 --- a/src/LSF/Batch_BatchManager_eLSF.hxx +++ b/src/LSF/Batch_BatchManager_eLSF.hxx @@ -69,7 +69,7 @@ namespace Batch { virtual const Batch::JobId addJob(const Batch::Job & job, const std::string reference); // ajoute un nouveau job sans le soumettre protected: - void buildBatchScript(const Job & job); + std::string buildSubmissionScript(const Job & job); std::string getWallTime(const long edt); private: diff --git a/src/LSF/Batch_JobInfo_eLSF.cxx b/src/LSF/Batch_JobInfo_eLSF.cxx index 349c575..4040837 100644 --- a/src/LSF/Batch_JobInfo_eLSF.cxx +++ b/src/LSF/Batch_JobInfo_eLSF.cxx @@ -30,34 +30,26 @@ */ #include -#include -#include #include -#include "Batch_Constants.hxx" -#include "Batch_Parametre.hxx" -#include "Batch_Environnement.hxx" -#include "Batch_RunTimeException.hxx" -#include "Batch_APIInternalFailureException.hxx" +#include #include "Batch_JobInfo_eLSF.hxx" using namespace std; namespace Batch { - - // Constructeurs - JobInfo_eLSF::JobInfo_eLSF(int id, string logFile) : JobInfo() + JobInfo_eLSF::JobInfo_eLSF(int id, const std::string & queryOutput) : JobInfo() { - // On remplit les membres _param et _env + // Fill ID parameter ostringstream oss; oss << id; _param[ID] = oss.str(); - // read status of job in log file + // read query output string line; - ifstream fp(logFile.c_str()); + istringstream fp(queryOutput); getline(fp, line); // On some batch managers, the job is deleted soon after it is finished, diff --git a/src/LSF/Batch_JobInfo_eLSF.hxx b/src/LSF/Batch_JobInfo_eLSF.hxx index 1e83631..75a59b2 100644 --- a/src/LSF/Batch_JobInfo_eLSF.hxx +++ b/src/LSF/Batch_JobInfo_eLSF.hxx @@ -32,7 +32,6 @@ #ifndef _JOBINFO_LSF_H_ #define _JOBINFO_LSF_H_ -#include "Batch_RunTimeException.hxx" #include "Batch_JobInfo.hxx" #include @@ -44,7 +43,7 @@ namespace Batch { public: // Constructeurs et destructeur JobInfo_eLSF() : _running(false) {}; - JobInfo_eLSF(int id,std::string logFile); + JobInfo_eLSF(int id, const std::string & queryOutput); virtual ~JobInfo_eLSF(); // Constructeur par recopie diff --git a/src/LSF/Test/Test_eLSF.cxx b/src/LSF/Test/Test_eLSF.cxx index b3962a9..67d1a2f 100644 --- a/src/LSF/Test/Test_eLSF.cxx +++ b/src/LSF/Test/Test_eLSF.cxx @@ -90,7 +90,7 @@ int main(int argc, char** argv) // ... and its parameters ... Parametre p; p[EXECUTABLE] = "./test-script.sh"; - p[NAME] = string("Test eLSF ") + argv[1]; + p[NAME] = string("Test_eLSF_") + argv[1]; p[WORKDIR] = homedir + "/tmp/Batch"; p[INFILE] = Couple("seta.sh", "tmp/Batch/seta.sh"); p[INFILE] += Couple("setb.sh", "tmp/Batch/setb.sh"); @@ -98,7 +98,7 @@ int main(int argc, char** argv) p[TMPDIR] = "tmp/Batch/"; p[NBPROC] = 1; p[MAXWALLTIME] = 1; - p[MAXRAMSIZE] = 50; + p[MAXRAMSIZE] = 128; p[HOMEDIR] = homedir; p[EXCLUSIVE] = true; job.setParametre(p);