-// Copyright (C) 2007-2010 CEA/DEN, EDF R&D, OPEN CASCADE
+// Copyright (C) 2007-2012 CEA/DEN, EDF R&D, OPEN CASCADE
//
// Copyright (C) 2003-2007 OPEN CASCADE, EADS/CCR, LIP6, CEA/DEN,
// CEDRAT, EDF R&D, LEG, PRINCIPIA R&D, BUREAU VERITAS
*
*/
-#include <stdlib.h>
-#include <string.h>
-
-#include <iostream>
+#include <cstdlib>
#include <fstream>
#include <sstream>
-#include <string>
-#include <sys/stat.h>
-
-#include <stdlib.h>
-#include <string.h>
-
-#ifdef WIN32
-#include <io.h>
-#else
-#include <libgen.h>
-#endif
-#include "Batch_Constants.hxx"
+#include <Batch_Constants.hxx>
+#include <Batch_Utils.hxx>
+#include <Batch_NotYetImplementedException.hxx>
#include "Batch_BatchManager_eLSF.hxx"
#include "Batch_JobInfo_eLSF.hxx"
BatchManager_eLSF::BatchManager_eLSF(const FactBatchManager * parent, const char * host,
const char * username,
CommunicationProtocolType protocolType, const char * mpiImpl)
- : BatchManager(parent, host),
- BatchManager_eClient(parent, host, username, protocolType, mpiImpl)
-
+ : BatchManager(parent, host, username, protocolType, mpiImpl)
{
// Nothing to do
}
// Methode pour le controle des jobs : soumet un job au gestionnaire
const JobId BatchManager_eLSF::submitJob(const Job & job)
{
- int status;
Parametre params = job.getParametre();
const std::string workDir = params[WORKDIR];
- const string fileToExecute = params[EXECUTABLE];
- string::size_type p1 = fileToExecute.find_last_of("/");
- string::size_type p2 = fileToExecute.find_last_of(".");
- std::string fileNameToExecute = fileToExecute.substr(p1+1,p2-p1-1);
// export input files on cluster
cerr << "Export des fichiers en entree" << endl;
// build batch script for job
cerr << "Construction du script de batch" << endl;
- buildBatchScript(job);
+ string scriptFile = buildSubmissionScript(job);
cerr << "Script envoye" << endl;
- // define name of log file (local)
- string logFile = generateTemporaryFileName("LSF-submitlog");
-
// define command to submit batch
- string subCommand = string("cd ") + workDir + "; bsub < " + fileNameToExecute + "_Batch.sh";
+ string subCommand = string("cd ") + workDir + "; bsub < " + scriptFile;
string command = _protocol.getExecCommand(subCommand, _hostname, _username);
- command += " > ";
- command += logFile;
command += " 2>&1";
cerr << command.c_str() << endl;
- status = system(command.c_str());
- if(status)
- {
- ifstream error_message(logFile.c_str());
- std::string mess;
- std::string temp;
- while(std::getline(error_message, temp))
- mess += temp;
- error_message.close();
- throw EmulationException("Error of connection on remote host, error was: " + mess);
- }
- // read id of submitted job in log file
- char line[128];
- FILE *fp = fopen(logFile.c_str(),"r");
- fgets( line, 128, fp);
- fclose(fp);
+ string output;
+ int status = Utils::getCommandOutput(command, output);
+ cout << output;
+ if (status != 0) throw RunTimeException("Can't submit job, error was: " + output);
- string sline(line);
- int p10 = sline.find("<");
- int p20 = sline.find(">");
- string strjob = sline.substr(p10+1,p20-p10-1);
+ // read id of submitted job in output
+ int p10 = output.find("<");
+ int p20 = output.find(">");
+ string strjob = output.substr(p10+1,p20-p10-1);
JobId id(this, strjob);
return id;
cerr << command.c_str() << endl;
status = system(command.c_str());
if (status)
- throw EmulationException("Error of connection on remote host");
+ throw RunTimeException("Error of connection on remote host");
cerr << "jobId = " << ref << "killed" << endl;
}
// Methode pour le controle des jobs : suspend un job en file d'attente
void BatchManager_eLSF::holdJob(const JobId & jobid)
{
- throw EmulationException("Not yet implemented");
+ throw NotYetImplementedException("BatchManager_eLSF::holdJob");
}
// Methode pour le controle des jobs : relache un job suspendu
void BatchManager_eLSF::releaseJob(const JobId & jobid)
{
- throw EmulationException("Not yet implemented");
+ throw NotYetImplementedException("BatchManager_eLSF::releaseJob");
}
// Methode pour le controle des jobs : modifie un job en file d'attente
void BatchManager_eLSF::alterJob(const JobId & jobid, const Parametre & param, const Environnement & env)
{
- throw EmulationException("Not yet implemented");
+ throw NotYetImplementedException("BatchManager_eLSF::alterJob");
}
// Methode pour le controle des jobs : modifie un job en file d'attente
istringstream iss(jobid.getReference());
iss >> id;
- // define name of log file (local)
- string logFile = generateTemporaryFileName(string("LSF-querylog-id") + jobid.getReference());
-
// define command to query batch
string subCommand = string("bjobs ") + iss.str();
string command = _protocol.getExecCommand(subCommand, _hostname, _username);
- command += " > ";
- command += logFile;
cerr << command.c_str() << endl;
- int status = system(command.c_str());
- if (status)
- throw EmulationException("Error of connection on remote host");
- JobInfo_eLSF ji = JobInfo_eLSF(id,logFile);
+ string output;
+ int status = Utils::getCommandOutput(command, output);
+ if (status) throw RunTimeException("Error of connection on remote host");
+
+ JobInfo_eLSF ji = JobInfo_eLSF(id, output);
return ji;
}
// Methode pour le controle des jobs : teste si un job est present en machine
bool BatchManager_eLSF::isRunning(const JobId & jobid)
{
- throw EmulationException("Not yet implemented");
+ throw NotYetImplementedException("BatchManager_eLSF::isRunning");
}
- void BatchManager_eLSF::buildBatchScript(const Job & job)
+ std::string BatchManager_eLSF::buildSubmissionScript(const Job & job)
{
-#ifndef WIN32 //TODO: need for porting on Windows
Parametre params = job.getParametre();
// Job Parameters
if (params.find(WORKDIR) != params.end())
workDir = params[WORKDIR].str();
else
- throw EmulationException("params[WORKDIR] is not defined ! Please defined it, cannot submit this job");
+ throw RunTimeException("params[WORKDIR] is not defined ! Please defined it, cannot submit this job");
if (params.find(EXECUTABLE) != params.end())
fileToExecute = params[EXECUTABLE].str();
else
- throw EmulationException("params[EXECUTABLE] is not defined ! Please defined it, cannot submit this job");
+ throw RunTimeException("params[EXECUTABLE] is not defined ! Please defined it, cannot submit this job");
// Optional parameters
if (params.find(NBPROC) != params.end())
// Create batch submit file
ofstream tempOutputFile;
- std::string TmpFileName = createAndOpenTemporaryFile("LSF-script", tempOutputFile);
+ std::string TmpFileName = Utils::createAndOpenTemporaryFile("LSF-script", tempOutputFile);
tempOutputFile << "#! /bin/sh -f" << endl ;
+ if (params.find(NAME) != params.end())
+ tempOutputFile << "#BSUB -J " << params[NAME] << endl;
if (queue != "")
tempOutputFile << "#BSUB -q " << queue << endl;
if( edt > 0 )
if( mem > 0 )
tempOutputFile << "#BSUB -M " << mem*1024 << endl ;
tempOutputFile << "#BSUB -n " << nbproc << endl ;
+
+ if (params.find(EXCLUSIVE) != params.end() && params[EXCLUSIVE]) {
+ tempOutputFile << "#BSUB -x" << endl ;
+ }
+
size_t pos = workDir.find("$HOME");
string baseDir;
if( pos != string::npos )
tempOutputFile << "#BSUB -o " << baseDir << "/logs/output.log." << rootNameToExecute << endl ;
tempOutputFile << "#BSUB -e " << baseDir << "/logs/error.log." << rootNameToExecute << endl ;
+ // Define environment for the job
+ Environnement env = job.getEnvironnement();
+ for (Environnement::const_iterator iter = env.begin() ; iter != env.end() ; ++iter) {
+ tempOutputFile << "export " << iter->first << "=" << iter->second << endl;
+ }
+
tempOutputFile << "cd " << workDir << endl ;
// generate nodes file
- tempOutputFile << "NODEFILE=`mktemp nodefile-XXXXXXXXXX` || exit 1" << endl;
+ tempOutputFile << "LIBBATCH_NODEFILE=`mktemp nodefile-XXXXXXXXXX` || exit 1" << endl;
tempOutputFile << "bool=0" << endl;
tempOutputFile << "for i in $LSB_MCPU_HOSTS; do" << endl;
tempOutputFile << " if test $bool = 0; then" << endl;
tempOutputFile << " bool=1" << endl;
tempOutputFile << " else" << endl;
tempOutputFile << " for ((j=0;j<$i;j++)); do" << endl;
- tempOutputFile << " echo $n >> $NODEFILE" << endl;
+ tempOutputFile << " echo $n >> $LIBBATCH_NODEFILE" << endl;
tempOutputFile << " done" << endl;
tempOutputFile << " bool=0" << endl;
tempOutputFile << " fi" << endl;
tempOutputFile << "done" << endl;
-
- // Abstraction of PBS_NODEFILE - TODO
- tempOutputFile << "export LIBBATCH_NODEFILE=$NODEFILE" << endl;
+ tempOutputFile << "export LIBBATCH_NODEFILE" << endl;
// Launch the executable
tempOutputFile << "./" + fileNameToExecute << endl;
// Remove the node file
- tempOutputFile << "rm $NODEFILE" << endl;
+ tempOutputFile << "rm $LIBBATCH_NODEFILE" << endl;
tempOutputFile.flush();
tempOutputFile.close();
- BATCH_CHMOD(TmpFileName.c_str(), 0x1ED);
cerr << "Batch script file generated is: " << TmpFileName.c_str() << endl;
+ string remoteFileName = rootNameToExecute + "_Batch.sh";
int status = _protocol.copyFile(TmpFileName, "", "",
- workDir + "/" + rootNameToExecute + "_Batch.sh",
+ workDir + "/" + remoteFileName,
_hostname, _username);
if (status)
- throw EmulationException("Error of connection on remote host");
-
-#endif
-
+ throw RunTimeException("Error of connection on remote host");
+ return remoteFileName;
}
std::string BatchManager_eLSF::getWallTime(const long edt)
cerr << command.c_str() << endl;
int status = system(command.c_str());
if (status)
- throw EmulationException("Error of launching home command on remote host");
+ throw RunTimeException("Error of launching home command on remote host");
std::ifstream file_home(filelogtemp.c_str());
std::getline(file_home, home);