X-Git-Url: http://git.salome-platform.org/gitweb/?a=blobdiff_plain;f=src%2FLSF%2FBatch_BatchManager_eLSF.cxx;h=9650a92f3981a24f8d5b33c7bd969a63630918e8;hb=22e2a51eb83e8e1ff962a027f67f9e433d30389f;hp=05eb0345f81a9400307801e57030ae1c5106158e;hpb=983b9637d51178045574789ef0429bb5dd3b40d4;p=tools%2Flibbatch.git diff --git a/src/LSF/Batch_BatchManager_eLSF.cxx b/src/LSF/Batch_BatchManager_eLSF.cxx index 05eb034..9650a92 100644 --- a/src/LSF/Batch_BatchManager_eLSF.cxx +++ b/src/LSF/Batch_BatchManager_eLSF.cxx @@ -1,4 +1,4 @@ -// Copyright (C) 2007-2010 CEA/DEN, EDF R&D, OPEN CASCADE +// Copyright (C) 2007-2012 CEA/DEN, EDF R&D, OPEN CASCADE // // Copyright (C) 2003-2007 OPEN CASCADE, EADS/CCR, LIP6, CEA/DEN, // CEDRAT, EDF R&D, LEG, PRINCIPIA R&D, BUREAU VERITAS @@ -29,24 +29,13 @@ * */ -#include -#include - -#include +#include #include #include -#include -#include - -#include -#include - -#ifdef WIN32 -#include -#else -#include -#endif +#include +#include +#include #include "Batch_BatchManager_eLSF.hxx" #include "Batch_JobInfo_eLSF.hxx" @@ -55,9 +44,9 @@ using namespace std; namespace Batch { BatchManager_eLSF::BatchManager_eLSF(const FactBatchManager * parent, const char * host, + const char * username, CommunicationProtocolType protocolType, const char * mpiImpl) - : BatchManager_eClient(parent, host, protocolType, mpiImpl), - BatchManager(parent, host) + : BatchManager(parent, host, username, protocolType, mpiImpl) { // Nothing to do } @@ -71,13 +60,8 @@ namespace Batch { // Methode pour le controle des jobs : soumet un job au gestionnaire const JobId BatchManager_eLSF::submitJob(const Job & job) { - int status; Parametre params = job.getParametre(); const std::string workDir = params[WORKDIR]; - const string fileToExecute = params[EXECUTABLE]; - string::size_type p1 = fileToExecute.find_last_of("/"); - string::size_type p2 = fileToExecute.find_last_of("."); - std::string fileNameToExecute = fileToExecute.substr(p1+1,p2-p1-1); // export input files on cluster cerr << "Export des fichiers en entree" << endl; @@ -85,41 +69,24 @@ namespace Batch { // build batch script for job cerr << "Construction du script de batch" << endl; - buildBatchScript(job); + string scriptFile = buildSubmissionScript(job); cerr << "Script envoye" << endl; - // define name of log file (local) - string logFile = generateTemporaryFileName("LSF-submitlog"); - // define command to submit batch - string subCommand = string("cd ") + workDir + "; bsub < " + fileNameToExecute + "_Batch.sh"; + string subCommand = string("cd ") + workDir + "; bsub < " + scriptFile; string command = _protocol.getExecCommand(subCommand, _hostname, _username); - command += " > "; - command += logFile; command += " 2>&1"; cerr << command.c_str() << endl; - status = system(command.c_str()); - if(status) - { - ifstream error_message(logFile.c_str()); - std::string mess; - std::string temp; - while(std::getline(error_message, temp)) - mess += temp; - error_message.close(); - throw EmulationException("Error of connection on remote host, error was: " + mess); - } - // read id of submitted job in log file - char line[128]; - FILE *fp = fopen(logFile.c_str(),"r"); - fgets( line, 128, fp); - fclose(fp); + string output; + int status = Utils::getCommandOutput(command, output); + cout << output; + if (status != 0) throw RunTimeException("Can't submit job, error was: " + output); - string sline(line); - int p10 = sline.find("<"); - int p20 = sline.find(">"); - string strjob = sline.substr(p10+1,p20-p10-1); + // read id of submitted job in output + int p10 = output.find("<"); + int p20 = output.find(">"); + string strjob = output.substr(p10+1,p20-p10-1); JobId id(this, strjob); return id; @@ -146,7 +113,7 @@ namespace Batch { cerr << command.c_str() << endl; status = system(command.c_str()); if (status) - throw EmulationException("Error of connection on remote host"); + throw RunTimeException("Error of connection on remote host"); cerr << "jobId = " << ref << "killed" << endl; } @@ -154,20 +121,20 @@ namespace Batch { // Methode pour le controle des jobs : suspend un job en file d'attente void BatchManager_eLSF::holdJob(const JobId & jobid) { - throw EmulationException("Not yet implemented"); + throw NotYetImplementedException("BatchManager_eLSF::holdJob"); } // Methode pour le controle des jobs : relache un job suspendu void BatchManager_eLSF::releaseJob(const JobId & jobid) { - throw EmulationException("Not yet implemented"); + throw NotYetImplementedException("BatchManager_eLSF::releaseJob"); } // Methode pour le controle des jobs : modifie un job en file d'attente void BatchManager_eLSF::alterJob(const JobId & jobid, const Parametre & param, const Environnement & env) { - throw EmulationException("Not yet implemented"); + throw NotYetImplementedException("BatchManager_eLSF::alterJob"); } // Methode pour le controle des jobs : modifie un job en file d'attente @@ -189,20 +156,16 @@ namespace Batch { istringstream iss(jobid.getReference()); iss >> id; - // define name of log file (local) - string logFile = generateTemporaryFileName(string("LSF-querylog-id") + jobid.getReference()); - // define command to query batch string subCommand = string("bjobs ") + iss.str(); string command = _protocol.getExecCommand(subCommand, _hostname, _username); - command += " > "; - command += logFile; cerr << command.c_str() << endl; - int status = system(command.c_str()); - if (status) - throw EmulationException("Error of connection on remote host"); - JobInfo_eLSF ji = JobInfo_eLSF(id,logFile); + string output; + int status = Utils::getCommandOutput(command, output); + if (status) throw RunTimeException("Error of connection on remote host"); + + JobInfo_eLSF ji = JobInfo_eLSF(id, output); return ji; } @@ -211,12 +174,11 @@ namespace Batch { // Methode pour le controle des jobs : teste si un job est present en machine bool BatchManager_eLSF::isRunning(const JobId & jobid) { - throw EmulationException("Not yet implemented"); + throw NotYetImplementedException("BatchManager_eLSF::isRunning"); } - void BatchManager_eLSF::buildBatchScript(const Job & job) + std::string BatchManager_eLSF::buildSubmissionScript(const Job & job) { -#ifndef WIN32 //TODO: need for porting on Windows Parametre params = job.getParametre(); // Job Parameters @@ -231,11 +193,11 @@ namespace Batch { if (params.find(WORKDIR) != params.end()) workDir = params[WORKDIR].str(); else - throw EmulationException("params[WORKDIR] is not defined ! Please defined it, cannot submit this job"); + throw RunTimeException("params[WORKDIR] is not defined ! Please defined it, cannot submit this job"); if (params.find(EXECUTABLE) != params.end()) fileToExecute = params[EXECUTABLE].str(); else - throw EmulationException("params[EXECUTABLE] is not defined ! Please defined it, cannot submit this job"); + throw RunTimeException("params[EXECUTABLE] is not defined ! Please defined it, cannot submit this job"); // Optional parameters if (params.find(NBPROC) != params.end()) @@ -254,9 +216,11 @@ namespace Batch { // Create batch submit file ofstream tempOutputFile; - std::string TmpFileName = createAndOpenTemporaryFile("LSF-script", tempOutputFile); + std::string TmpFileName = Utils::createAndOpenTemporaryFile("LSF-script", tempOutputFile); tempOutputFile << "#! /bin/sh -f" << endl ; + if (params.find(NAME) != params.end()) + tempOutputFile << "#BSUB -J " << params[NAME] << endl; if (queue != "") tempOutputFile << "#BSUB -q " << queue << endl; if( edt > 0 ) @@ -264,6 +228,11 @@ namespace Batch { if( mem > 0 ) tempOutputFile << "#BSUB -M " << mem*1024 << endl ; tempOutputFile << "#BSUB -n " << nbproc << endl ; + + if (params.find(EXCLUSIVE) != params.end() && params[EXCLUSIVE]) { + tempOutputFile << "#BSUB -x" << endl ; + } + size_t pos = workDir.find("$HOME"); string baseDir; if( pos != string::npos ) @@ -278,10 +247,16 @@ namespace Batch { tempOutputFile << "#BSUB -o " << baseDir << "/logs/output.log." << rootNameToExecute << endl ; tempOutputFile << "#BSUB -e " << baseDir << "/logs/error.log." << rootNameToExecute << endl ; + // Define environment for the job + Environnement env = job.getEnvironnement(); + for (Environnement::const_iterator iter = env.begin() ; iter != env.end() ; ++iter) { + tempOutputFile << "export " << iter->first << "=" << iter->second << endl; + } + tempOutputFile << "cd " << workDir << endl ; // generate nodes file - tempOutputFile << "NODEFILE=`mktemp nodefile-XXXXXXXXXX` || exit 1" << endl; + tempOutputFile << "LIBBATCH_NODEFILE=`mktemp nodefile-XXXXXXXXXX` || exit 1" << endl; tempOutputFile << "bool=0" << endl; tempOutputFile << "for i in $LSB_MCPU_HOSTS; do" << endl; tempOutputFile << " if test $bool = 0; then" << endl; @@ -289,35 +264,31 @@ namespace Batch { tempOutputFile << " bool=1" << endl; tempOutputFile << " else" << endl; tempOutputFile << " for ((j=0;j<$i;j++)); do" << endl; - tempOutputFile << " echo $n >> $NODEFILE" << endl; + tempOutputFile << " echo $n >> $LIBBATCH_NODEFILE" << endl; tempOutputFile << " done" << endl; tempOutputFile << " bool=0" << endl; tempOutputFile << " fi" << endl; tempOutputFile << "done" << endl; - - // Abstraction of PBS_NODEFILE - TODO - tempOutputFile << "export LIBBATCH_NODEFILE=$NODEFILE" << endl; + tempOutputFile << "export LIBBATCH_NODEFILE" << endl; // Launch the executable tempOutputFile << "./" + fileNameToExecute << endl; // Remove the node file - tempOutputFile << "rm $NODEFILE" << endl; + tempOutputFile << "rm $LIBBATCH_NODEFILE" << endl; tempOutputFile.flush(); tempOutputFile.close(); - BATCH_CHMOD(TmpFileName.c_str(), 0x1ED); cerr << "Batch script file generated is: " << TmpFileName.c_str() << endl; + string remoteFileName = rootNameToExecute + "_Batch.sh"; int status = _protocol.copyFile(TmpFileName, "", "", - workDir + "/" + rootNameToExecute + "_Batch.sh", + workDir + "/" + remoteFileName, _hostname, _username); if (status) - throw EmulationException("Error of connection on remote host"); - -#endif - + throw RunTimeException("Error of connection on remote host"); + return remoteFileName; } std::string BatchManager_eLSF::getWallTime(const long edt) @@ -345,7 +316,7 @@ namespace Batch { cerr << command.c_str() << endl; int status = system(command.c_str()); if (status) - throw EmulationException("Error of launching home command on remote host"); + throw RunTimeException("Error of launching home command on remote host"); std::ifstream file_home(filelogtemp.c_str()); std::getline(file_home, home);