From: ribes Date: Fri, 13 Nov 2009 15:06:39 +0000 (+0000) Subject: New version of eClient - Adding default logs directory X-Git-Tag: new_launcher_alpha_091119~8 X-Git-Url: http://git.salome-platform.org/gitweb/?a=commitdiff_plain;h=db6ae3496efb5335aacdf0978aa5199b63c8a10f;p=tools%2Flibbatch.git New version of eClient - Adding default logs directory New ePBS Client First to handle cpu and nodes in submission --- diff --git a/src/Core/Batch_BatchManager_eClient.cxx b/src/Core/Batch_BatchManager_eClient.cxx index 6380167..81f03a3 100644 --- a/src/Core/Batch_BatchManager_eClient.cxx +++ b/src/Core/Batch_BatchManager_eClient.cxx @@ -83,7 +83,7 @@ namespace Batch { Versatile::iterator Vit; _username = string(params[USER]); - string subCommand = string("mkdir -p ") + string(params[TMPDIR]); + string subCommand = string("mkdir -p ") + string(params[TMPDIR]) + string("/logs"); string command = _protocol.getExecCommand(subCommand, _hostname, _username); cerr << command.c_str() << endl; status = system(command.c_str()); @@ -168,6 +168,17 @@ namespace Batch { } } + // Copy logs + int status = _protocol.copyFile(string(params[TMPDIR]) + string("/logs"), _hostname, _username, + directory, "", ""); + if (status) { + std::string mess("Copy logs directory failed ! status is :"); + ostringstream status_str; + status_str << status; + mess += status_str.str(); + cerr << mess << endl; + } + } MpiImpl *BatchManager_eClient::FactoryMpiImpl(string mpiImpl) throw(EmulationException) diff --git a/src/Core/Batch_Defines.hxx b/src/Core/Batch_Defines.hxx index ab2a237..587e7d7 100644 --- a/src/Core/Batch_Defines.hxx +++ b/src/Core/Batch_Defines.hxx @@ -36,4 +36,10 @@ # define BATCH_EXPORT #endif +#ifdef WIN32 +#define BATCH_CHMOD(name, mode) _chmod(name, mode) +#else +#define BATCH_CHMOD(name, mode) chmod(name, mode) +#endif + #endif diff --git a/src/Core/Batch_FactBatchManager_eClient.hxx b/src/Core/Batch_FactBatchManager_eClient.hxx index d047f55..0e4274f 100644 --- a/src/Core/Batch_FactBatchManager_eClient.hxx +++ b/src/Core/Batch_FactBatchManager_eClient.hxx @@ -49,7 +49,8 @@ namespace Batch { virtual Batch::BatchManager_eClient * operator() (const char * hostname, CommunicationProtocolType protocolType, - const char * mpi) const = 0; + const char * mpi, + int nb_proc_per_node = 1) const = 0; protected: diff --git a/src/LSF/Batch_FactBatchManager_eLSF.cxx b/src/LSF/Batch_FactBatchManager_eLSF.cxx index f799922..afece45 100644 --- a/src/LSF/Batch_FactBatchManager_eLSF.cxx +++ b/src/LSF/Batch_FactBatchManager_eLSF.cxx @@ -54,7 +54,8 @@ namespace Batch { BatchManager_eClient * FactBatchManager_eLSF::operator() (const char * hostname, CommunicationProtocolType protocolType, - const char * mpiImpl) const + const char * mpiImpl, + int nb_proc_per_node) const { // MESSAGE("Building new BatchManager_LSF on host '" << hostname << "'"); return new BatchManager_eLSF(this, hostname, protocolType, mpiImpl); diff --git a/src/LSF/Batch_FactBatchManager_eLSF.hxx b/src/LSF/Batch_FactBatchManager_eLSF.hxx index 9144f64..3f3c604 100644 --- a/src/LSF/Batch_FactBatchManager_eLSF.hxx +++ b/src/LSF/Batch_FactBatchManager_eLSF.hxx @@ -47,7 +47,8 @@ namespace Batch { virtual BatchManager * operator() (const char * hostname) const; virtual BatchManager_eClient * operator() (const char * hostname, CommunicationProtocolType protocolType, - const char * mpiImpl) const; + const char * mpiImpl, + int nb_proc_per_node = 1) const; protected: diff --git a/src/PBS/Batch_BatchManager_ePBS.cxx b/src/PBS/Batch_BatchManager_ePBS.cxx index e676860..8e4d1be 100644 --- a/src/PBS/Batch_BatchManager_ePBS.cxx +++ b/src/PBS/Batch_BatchManager_ePBS.cxx @@ -55,11 +55,13 @@ using namespace std; namespace Batch { BatchManager_ePBS::BatchManager_ePBS(const FactBatchManager * parent, const char * host, - CommunicationProtocolType protocolType, const char * mpiImpl) + CommunicationProtocolType protocolType, const char * mpiImpl, + int nb_proc_per_node) : BatchManager_eClient(parent, host, protocolType, mpiImpl), BatchManager(parent, host) { // Nothing to do + _nb_proc_per_node = nb_proc_per_node; } // Destructeur @@ -73,7 +75,7 @@ namespace Batch { { int status; Parametre params = job.getParametre(); - const std::string dirForTmpFiles = params[TMPDIR]; + const std::string workDir = params[WORKDIR]; const string fileToExecute = params[EXECUTABLE]; string::size_type p1 = fileToExecute.find_last_of("/"); string::size_type p2 = fileToExecute.find_last_of("."); @@ -89,8 +91,7 @@ namespace Batch { string logFile = generateTemporaryFileName("PBS-submitlog"); // define command to submit batch - string subCommand = string("cd ") + dirForTmpFiles + "; qsub " + - fileNameToExecute + "_Batch.sh"; + string subCommand = string("cd ") + workDir + "; qsub " + fileNameToExecute + "_Batch.sh"; string command = _protocol.getExecCommand(subCommand, _hostname, _username); command += " > "; command += logFile; @@ -199,6 +200,76 @@ namespace Batch { } void BatchManager_ePBS::buildBatchScript(const Job & job) + { + std::cerr << "BuildBatchScript" << std::endl; + Parametre params = job.getParametre(); + + // Job Parameters + string workDir = ""; + string fileToExecute = ""; + int nbproc = 0; + int edt = 0; + int mem = 0; + string queue = ""; + + // Mandatory parameters + if (params.find(WORKDIR) != params.end()) + workDir = params[WORKDIR].str(); + else + throw EmulationException("params[WORKDIR] is not defined ! Please defined it, cannot submit this job"); + if (params.find(EXECUTABLE) != params.end()) + fileToExecute = params[EXECUTABLE].str(); + else + throw EmulationException("params[EXECUTABLE] is not defined ! Please defined it, cannot submit this job"); + + // Optional parameters + if (params.find(NBPROC) != params.end()) + nbproc = params[NBPROC]; + if (params.find(MAXWALLTIME) != params.end()) + edt = params[MAXWALLTIME]; + if (params.find(MAXRAMSIZE) != params.end()) + mem = params[MAXRAMSIZE]; + + string::size_type p1 = fileToExecute.find_last_of("/"); + string::size_type p2 = fileToExecute.find_last_of("."); + string rootNameToExecute = fileToExecute.substr(p1+1,p2-p1-1); + string fileNameToExecute = fileToExecute.substr(p1+1); + + // Create batch submit file + ofstream tempOutputFile; + std::string TmpFileName = createAndOpenTemporaryFile("PBS-script", tempOutputFile); + + tempOutputFile << "#! /bin/sh -f" << endl; + if (nbproc > 0) + { + // Division - arrondi supérieur + int nodes_requested = (nbproc + _nb_proc_per_node -1) / _nb_proc_per_node; + tempOutputFile << "#PBS -l nodes=" << nodes_requested << endl; + } + if (queue != "") + tempOutputFile << "#BSUB -q " << queue << endl; + if( edt > 0 ) + tempOutputFile << "#PBS -l walltime=" << edt*60 << endl; + if( mem > 0 ) + tempOutputFile << "#PBS -l mem=" << mem << "mb" << endl; + tempOutputFile << "#PBS -o " << workDir << "/logs/output.log." << rootNameToExecute << endl ; + tempOutputFile << "#PBS -e " << workDir << "/logs/error.log." << rootNameToExecute << endl ; + tempOutputFile << "cd " << workDir << endl ; + tempOutputFile << "./" + fileNameToExecute << endl;; + tempOutputFile.flush(); + tempOutputFile.close(); + + BATCH_CHMOD(TmpFileName.c_str(), 0x1ED); + cerr << "Batch script file generated is: " << TmpFileName.c_str() << endl; + + int status = _protocol.copyFile(TmpFileName, "", "", + workDir + "/" + rootNameToExecute + "_Batch.sh", + _hostname, _username); + if (status) + throw EmulationException("Error of connection on remote host, cannot copy batch submission file"); + } + + void BatchManager_ePBS::oldbuildBatchScript(const Job & job) { Parametre params = job.getParametre(); Environnement env = job.getEnvironnement(); diff --git a/src/PBS/Batch_BatchManager_ePBS.hxx b/src/PBS/Batch_BatchManager_ePBS.hxx index a2f9457..73b4947 100644 --- a/src/PBS/Batch_BatchManager_ePBS.hxx +++ b/src/PBS/Batch_BatchManager_ePBS.hxx @@ -45,7 +45,8 @@ namespace Batch { public: // Constructeur et destructeur BatchManager_ePBS(const FactBatchManager * parent, const char * host="localhost", - CommunicationProtocolType protocolType = SSH, const char * mpiImpl="nompi"); // connexion a la machine host + CommunicationProtocolType protocolType = SSH, const char * mpiImpl="nompi", + int nb_proc_per_node=1); // connexion a la machine host virtual ~BatchManager_ePBS(); // Recupere le nom du serveur par defaut @@ -68,8 +69,10 @@ namespace Batch { protected: void buildBatchScript(const Job & job); + void oldbuildBatchScript(const Job & job); private: + int _nb_proc_per_node; #ifdef SWIG public: diff --git a/src/PBS/Batch_FactBatchManager_ePBS.cxx b/src/PBS/Batch_FactBatchManager_ePBS.cxx index 2984a3c..e017f2f 100644 --- a/src/PBS/Batch_FactBatchManager_ePBS.cxx +++ b/src/PBS/Batch_FactBatchManager_ePBS.cxx @@ -58,10 +58,11 @@ namespace Batch { BatchManager_eClient * FactBatchManager_ePBS::operator() (const char * hostname, CommunicationProtocolType protocolType, - const char * mpiImpl) const + const char * mpiImpl, + int nb_proc_per_node) const { // MESSAGE("Building new BatchManager_PBS on host '" << hostname << "'"); - return new BatchManager_ePBS(this, hostname, protocolType, mpiImpl); + return new BatchManager_ePBS(this, hostname, protocolType, mpiImpl, nb_proc_per_node); } diff --git a/src/PBS/Batch_FactBatchManager_ePBS.hxx b/src/PBS/Batch_FactBatchManager_ePBS.hxx index 930552d..3cf5d3a 100644 --- a/src/PBS/Batch_FactBatchManager_ePBS.hxx +++ b/src/PBS/Batch_FactBatchManager_ePBS.hxx @@ -52,7 +52,8 @@ namespace Batch { virtual BatchManager * operator() (const char * hostname) const; virtual BatchManager_eClient * operator() (const char * hostname, CommunicationProtocolType protocolType, - const char * mpiImpl) const; + const char * mpiImpl, + int nb_proc_per_node = 1) const; protected: diff --git a/src/SGE/Batch_FactBatchManager_eSGE.cxx b/src/SGE/Batch_FactBatchManager_eSGE.cxx index 7bc44c0..a1bdcc2 100644 --- a/src/SGE/Batch_FactBatchManager_eSGE.cxx +++ b/src/SGE/Batch_FactBatchManager_eSGE.cxx @@ -56,7 +56,8 @@ namespace Batch { BatchManager_eClient * FactBatchManager_eSGE::operator() (const char * hostname, CommunicationProtocolType protocolType, - const char * mpiImpl) const + const char * mpiImpl, + int nb_proc_per_node) const { // MESSAGE("Building new BatchManager_SGE on host '" << hostname << "'"); return new BatchManager_eSGE(this, hostname, protocolType, mpiImpl); diff --git a/src/SGE/Batch_FactBatchManager_eSGE.hxx b/src/SGE/Batch_FactBatchManager_eSGE.hxx index 6dae360..509220a 100644 --- a/src/SGE/Batch_FactBatchManager_eSGE.hxx +++ b/src/SGE/Batch_FactBatchManager_eSGE.hxx @@ -48,7 +48,8 @@ namespace Batch { virtual BatchManager * operator() (const char * hostname) const; virtual BatchManager_eClient * operator() (const char * hostname, CommunicationProtocolType protocolType, - const char * mpiImpl) const; + const char * mpiImpl, + int nb_proc_per_node = 1) const; protected: diff --git a/src/SSH/Batch_FactBatchManager_eSSH.cxx b/src/SSH/Batch_FactBatchManager_eSSH.cxx index 28b7082..0c8d153 100644 --- a/src/SSH/Batch_FactBatchManager_eSSH.cxx +++ b/src/SSH/Batch_FactBatchManager_eSSH.cxx @@ -43,7 +43,8 @@ Batch::FactBatchManager_eSSH::operator() (const char * hostname) const Batch::BatchManager_eClient * Batch::FactBatchManager_eSSH::operator() (const char * hostname, CommunicationProtocolType protocolType, - const char * mpiImpl) const + const char * mpiImpl, + int nb_proc_per_node) const { //protocolType and mpiImpl are ignored. std::cerr << "[Batch::FactBatchManager_eSSH] creating new Batch::BatchManager_eSSH with hostname = " << hostname << std::endl; diff --git a/src/SSH/Batch_FactBatchManager_eSSH.hxx b/src/SSH/Batch_FactBatchManager_eSSH.hxx index 5014cd4..f3210e6 100644 --- a/src/SSH/Batch_FactBatchManager_eSSH.hxx +++ b/src/SSH/Batch_FactBatchManager_eSSH.hxx @@ -49,7 +49,8 @@ namespace Batch { virtual BatchManager * operator() (const char * hostname) const; // From FactBacthManager virtual BatchManager_eClient * operator() (const char * hostname, CommunicationProtocolType protocolType, - const char * mpiImpl) const; // From FactBatchManager_eClient + const char * mpiImpl, + int nb_proc_per_node = 1) const; // From FactBatchManager_eClient }; }