From: barate Date: Mon, 14 Jan 2013 17:10:45 +0000 (+0000) Subject: Merge class BatchManager_eClient into class BatchManager X-Git-Url: http://git.salome-platform.org/gitweb/?a=commitdiff_plain;h=22e2a51eb83e8e1ff962a027f67f9e433d30389f;p=tools%2Flibbatch.git Merge class BatchManager_eClient into class BatchManager --- diff --git a/doc/Batch.texi b/doc/Batch.texi index d8bc55c..85774d8 100644 --- a/doc/Batch.texi +++ b/doc/Batch.texi @@ -2136,12 +2136,12 @@ class Parametre : public map< string, Versatile > static const string STARTDATE; static const string STATE; static const string TEXT; - static const string TMPDIR; static const string USEDCPUTIME; static const string USEDDISKSIZE; static const string USEDRAMSIZE; static const string USEDWALLTIME; static const string USER; + static const string WORKDIR; protected: // map interne servant a controler le type @@ -2381,12 +2381,6 @@ traduire l'etat reel du job. Par exemple, ce message peut informer l'utilisateur de la raison qui maintient un job dans un etat suspendu ou qui l'empeche de s'executer. -@item TMPDIR : type STRING - -Un chemin d'acces absolu a un repertoire qui sera cree au demarrage du -job et qui isolera le job des autres travaux en cours d'execution sur la -meme machine. - @item USEDCPUTIME : type LONG Le temps de calcul (@i{CPU time}) en secondes reellement consomme par le job. @@ -2409,6 +2403,12 @@ Le nom de l'utilisateur (@i{username}) sous lequel le job devra tourner sur la machine d'execution. Ce parametre est utile lorsque l'utilisateur possede des comptes differents sur les machines sur lequel il soumet et calcule. +@item WORKDIR : type STRING + +Un chemin d'acces a un repertoire sur la machine d'execution qui sera cree au +demarrage du job et qui isolera le job des autres travaux en cours d'execution +sur la meme machine. + @end itemize diff --git a/src/CCC/Batch_BatchManager_eCCC.cxx b/src/CCC/Batch_BatchManager_eCCC.cxx index 83443ee..0d8f509 100644 --- a/src/CCC/Batch_BatchManager_eCCC.cxx +++ b/src/CCC/Batch_BatchManager_eCCC.cxx @@ -47,7 +47,10 @@ #include #endif -#include "Batch_Constants.hxx" +#include +#include +#include + #include "Batch_BatchManager_eCCC.hxx" #include "Batch_JobInfo_eCCC.hxx" @@ -58,8 +61,7 @@ namespace Batch { BatchManager_eCCC::BatchManager_eCCC(const FactBatchManager * parent, const char * host, const char * username, CommunicationProtocolType protocolType, const char * mpiImpl) - : BatchManager(parent, host), - BatchManager_eClient(parent, host, username, protocolType, mpiImpl) + : BatchManager(parent, host, username, protocolType, mpiImpl) { // Nothing to do } @@ -73,7 +75,6 @@ namespace Batch { // Methode pour le controle des jobs : soumet un job au gestionnaire const JobId BatchManager_eCCC::submitJob(const Job & job) { - int status; Parametre params = job.getParametre(); const std::string workDir = params[WORKDIR]; const string fileToExecute = params[EXECUTABLE]; @@ -90,38 +91,27 @@ namespace Batch { buildBatchScript(job); cerr << "Script envoye" << endl; - // define name of log file (local) - string logFile = generateTemporaryFileName("CCC-submitlog"); - // define command to submit batch string subCommand = string("bash -l -c \\\"cd ") + workDir + "; ccc_msub " + fileNameToExecute + "_Batch.sh\\\""; string command = _protocol.getExecCommand(subCommand, _hostname, _username); - command += " > "; - command += logFile; command += " 2>&1"; cerr << command.c_str() << endl; - status = system(command.c_str()); - if(status) - { - ifstream error_message(logFile.c_str()); - std::string mess; - std::string temp; - while(std::getline(error_message, temp)) - mess += temp; - error_message.close(); - throw EmulationException("Error of connection on remote host, error was: " + mess); - } - // read id of submitted job in log file - ifstream idfile(logFile.c_str()); + // submit job + string output; + int status = Utils::getCommandOutput(command, output); + cout << output; + if (status != 0) throw RunTimeException("Can't submit job, error was: " + output); + + // find id of submitted job in output + istringstream idfile(output); string sidj; idfile >> sidj; idfile >> sidj; idfile >> sidj; idfile >> sidj; - idfile.close(); if (sidj.size() == 0) - throw EmulationException("Error in the submission of the job on the remote host"); + throw RunTimeException("Error in the submission of the job on the remote host"); JobId id(this, sidj); return id; @@ -148,7 +138,7 @@ namespace Batch { cerr << command.c_str() << endl; status = system(command.c_str()); if (status) - throw EmulationException("Error of connection on remote host"); + throw RunTimeException("Error of connection on remote host"); cerr << "jobId = " << ref << "killed" << endl; } @@ -156,20 +146,20 @@ namespace Batch { // Methode pour le controle des jobs : suspend un job en file d'attente void BatchManager_eCCC::holdJob(const JobId & jobid) { - throw EmulationException("Not yet implemented"); + throw NotYetImplementedException("BatchManager_eCCC::holdJob"); } // Methode pour le controle des jobs : relache un job suspendu void BatchManager_eCCC::releaseJob(const JobId & jobid) { - throw EmulationException("Not yet implemented"); + throw NotYetImplementedException("BatchManager_eCCC::releaseJob"); } // Methode pour le controle des jobs : modifie un job en file d'attente void BatchManager_eCCC::alterJob(const JobId & jobid, const Parametre & param, const Environnement & env) { - throw EmulationException("Not yet implemented"); + throw NotYetImplementedException("BatchManager_eCCC::alterJob"); } // Methode pour le controle des jobs : modifie un job en file d'attente @@ -191,20 +181,17 @@ namespace Batch { istringstream iss(jobid.getReference()); iss >> id; - // define name of log file (local) - string logFile = generateTemporaryFileName(string("CCC-querylog-id") + jobid.getReference()); - // define command to query batch string subCommand = string("bash -l -c \\\"bjobs ") + iss.str() + string("\\\""); string command = _protocol.getExecCommand(subCommand, _hostname, _username); - command += " > "; - command += logFile; cerr << command.c_str() << endl; - int status = system(command.c_str()); + + string output; + int status = Utils::getCommandOutput(command, output); if (status) - throw EmulationException("Error of connection on remote host"); + throw RunTimeException("Error of connection on remote host"); - JobInfo_eCCC ji = JobInfo_eCCC(id,logFile); + JobInfo_eCCC ji = JobInfo_eCCC(id, output); return ji; } @@ -213,7 +200,7 @@ namespace Batch { // Methode pour le controle des jobs : teste si un job est present en machine bool BatchManager_eCCC::isRunning(const JobId & jobid) { - throw EmulationException("Not yet implemented"); + throw NotYetImplementedException("BatchManager_eCCC::isRunning"); } void BatchManager_eCCC::buildBatchScript(const Job & job) @@ -233,11 +220,11 @@ namespace Batch { if (params.find(WORKDIR) != params.end()) workDir = params[WORKDIR].str(); else - throw EmulationException("params[WORKDIR] is not defined ! Please defined it, cannot submit this job"); + throw RunTimeException("params[WORKDIR] is not defined ! Please defined it, cannot submit this job"); if (params.find(EXECUTABLE) != params.end()) fileToExecute = params[EXECUTABLE].str(); else - throw EmulationException("params[EXECUTABLE] is not defined ! Please defined it, cannot submit this job"); + throw RunTimeException("params[EXECUTABLE] is not defined ! Please defined it, cannot submit this job"); // Optional parameters if (params.find(NBPROC) != params.end()) @@ -256,7 +243,7 @@ namespace Batch { // Create batch submit file ofstream tempOutputFile; - std::string TmpFileName = createAndOpenTemporaryFile("LSF-script", tempOutputFile); + std::string TmpFileName = Utils::createAndOpenTemporaryFile("LSF-script", tempOutputFile); tempOutputFile << "#!/bin/bash" << endl ; if (queue != "") @@ -314,7 +301,7 @@ namespace Batch { workDir + "/" + rootNameToExecute + "_Batch.sh", _hostname, _username); if (status) - throw EmulationException("Error of connection on remote host"); + throw RunTimeException("Error of connection on remote host"); #endif @@ -323,19 +310,20 @@ namespace Batch { std::string BatchManager_eCCC::getHomeDir(std::string tmpdir) { std::string home; - std::string filelogtemp = generateTemporaryFileName("gethomedir"); string subCommand = string("echo "); subCommand += tmpdir; - string command = _protocol.getExecCommand(subCommand, _hostname, _username) + " > " + filelogtemp; + string command = _protocol.getExecCommand(subCommand, _hostname, _username); cerr << command.c_str() << endl; - int status = system(command.c_str()); + + string output; + int status = Utils::getCommandOutput(command, output); + if (status) - throw EmulationException("Error of launching home command on remote host"); + throw RunTimeException("Error of launching home command on remote host"); - std::ifstream file_home(filelogtemp.c_str()); + std::istringstream file_home(output); std::getline(file_home, home); - file_home.close(); return home; } diff --git a/src/CCC/Batch_BatchManager_eCCC.hxx b/src/CCC/Batch_BatchManager_eCCC.hxx index de93827..ac8f734 100644 --- a/src/CCC/Batch_BatchManager_eCCC.hxx +++ b/src/CCC/Batch_BatchManager_eCCC.hxx @@ -36,11 +36,11 @@ #include "Batch_JobId.hxx" #include "Batch_JobInfo.hxx" #include "Batch_FactBatchManager.hxx" -#include "Batch_BatchManager_eClient.hxx" +#include "Batch_BatchManager.hxx" namespace Batch { - class BATCH_EXPORT BatchManager_eCCC : public BatchManager_eClient + class BATCH_EXPORT BatchManager_eCCC : public BatchManager { public: // Constructeur et destructeur diff --git a/src/CCC/Batch_FactBatchManager_eCCC.cxx b/src/CCC/Batch_FactBatchManager_eCCC.cxx index 080d89f..878d980 100644 --- a/src/CCC/Batch_FactBatchManager_eCCC.cxx +++ b/src/CCC/Batch_FactBatchManager_eCCC.cxx @@ -36,7 +36,7 @@ namespace Batch { static FactBatchManager_eCCC sFBM_eCCC; // Constructeur - FactBatchManager_eCCC::FactBatchManager_eCCC() : FactBatchManager_eClient("eCCC") + FactBatchManager_eCCC::FactBatchManager_eCCC() : FactBatchManager("CCC") { // Nothing to do } @@ -47,18 +47,11 @@ namespace Batch { // Nothing to do } - // Functor - BatchManager * FactBatchManager_eCCC::operator() (const char * hostname) const - { - // MESSAGE("Building new BatchManager_CCC on host '" << hostname << "'"); - return new BatchManager_eCCC(this, hostname); - } - - BatchManager_eClient * FactBatchManager_eCCC::operator() (const char * hostname, - const char * username, - CommunicationProtocolType protocolType, - const char * mpiImpl, - int nb_proc_per_node) const + BatchManager * FactBatchManager_eCCC::operator() (const char * hostname, + const char * username, + CommunicationProtocolType protocolType, + const char * mpiImpl, + int nb_proc_per_node) const { // MESSAGE("Building new BatchManager_CCC on host '" << hostname << "'"); return new BatchManager_eCCC(this, hostname, username, protocolType, mpiImpl); diff --git a/src/CCC/Batch_FactBatchManager_eCCC.hxx b/src/CCC/Batch_FactBatchManager_eCCC.hxx index 9070abe..c558801 100644 --- a/src/CCC/Batch_FactBatchManager_eCCC.hxx +++ b/src/CCC/Batch_FactBatchManager_eCCC.hxx @@ -32,29 +32,23 @@ #define _FACTBATCHMANAGER_eCCC_H_ #include "Batch_Defines.hxx" -#include "Batch_BatchManager_eClient.hxx" -#include "Batch_FactBatchManager_eClient.hxx" +#include "Batch_BatchManager.hxx" +#include "Batch_FactBatchManager.hxx" namespace Batch { - class BATCH_EXPORT FactBatchManager_eCCC : public FactBatchManager_eClient + class BATCH_EXPORT FactBatchManager_eCCC : public FactBatchManager { public: // Constructeur et destructeur FactBatchManager_eCCC(); virtual ~FactBatchManager_eCCC(); - virtual BatchManager * operator() (const char * hostname) const; - virtual BatchManager_eClient * operator() (const char * hostname, - const char * username, - CommunicationProtocolType protocolType, - const char * mpiImpl, - int nb_proc_per_node = 1) const; - - protected: - - private: - + virtual BatchManager * operator() (const char * hostname, + const char * username, + CommunicationProtocolType protocolType, + const char * mpiImpl, + int nb_proc_per_node = 1) const; }; } diff --git a/src/CCC/Batch_JobInfo_eCCC.cxx b/src/CCC/Batch_JobInfo_eCCC.cxx index ff56a35..edc646b 100644 --- a/src/CCC/Batch_JobInfo_eCCC.cxx +++ b/src/CCC/Batch_JobInfo_eCCC.cxx @@ -31,14 +31,11 @@ #include #include -#include #include #include "Batch_Constants.hxx" #include "Batch_Parametre.hxx" #include "Batch_Environnement.hxx" -#include "Batch_RunTimeException.hxx" -#include "Batch_APIInternalFailureException.hxx" #include "Batch_JobInfo_eCCC.hxx" using namespace std; @@ -48,7 +45,7 @@ namespace Batch { // Constructeurs - JobInfo_eCCC::JobInfo_eCCC(int id, string logFile) : JobInfo() + JobInfo_eCCC::JobInfo_eCCC(int id, string output) : JobInfo() { // On remplit les membres _param et _env ostringstream oss; @@ -57,7 +54,7 @@ namespace Batch { // read status of job in log file char line[128]; - ifstream fp(logFile.c_str(),ios::in); + istringstream fp(output); fp.getline(line,80,'\n'); string sjobid, username, status; diff --git a/src/CCC/Batch_JobInfo_eCCC.hxx b/src/CCC/Batch_JobInfo_eCCC.hxx index e1d6710..a728cb6 100644 --- a/src/CCC/Batch_JobInfo_eCCC.hxx +++ b/src/CCC/Batch_JobInfo_eCCC.hxx @@ -44,7 +44,7 @@ namespace Batch { public: // Constructeurs et destructeur JobInfo_eCCC() : _running(false) {}; - JobInfo_eCCC(int id,std::string logFile); + JobInfo_eCCC(int id,std::string output); virtual ~JobInfo_eCCC(); // Constructeur par recopie diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index e88e974..e3ec577 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -54,7 +54,7 @@ add_subdirectory (CCC) add_subdirectory (LSF) add_subdirectory (PBS) add_subdirectory (SGE) -add_subdirectory (SSH) +#add_subdirectory (SSH) add_subdirectory (LoadLeveler) add_subdirectory (Slurm) diff --git a/src/Core/Batch_BatchManager.cxx b/src/Core/Batch_BatchManager.cxx index 3d0b8d9..2585f7f 100644 --- a/src/Core/Batch_BatchManager.cxx +++ b/src/Core/Batch_BatchManager.cxx @@ -45,6 +45,7 @@ #include "Batch_InvalidArgumentException.hxx" #include "Batch_FactBatchManager.hxx" #include "Batch_BatchManager.hxx" +#include "Batch_Utils.hxx" #ifdef WIN32 #define sleep(seconds) Sleep((seconds)*1000) @@ -54,45 +55,20 @@ using namespace std; namespace Batch { - // Constructeur -// BatchManager::BatchManager(string host) throw(InvalidArgumentException) : _hostname(host), jobid_map() -// { -// // On verifie que le hostname est correct -// if (!gethostbyname(_hostname.c_str())) { // hostname unknown from network -// string msg = "hostname \""; -// msg += _hostname; -// msg += "\" unknown from the network"; -// throw InvalidArgumentException(msg.c_str()); -// } -// } - BatchManager::BatchManager(const FactBatchManager * parent, const char * host) throw(InvalidArgumentException) : _hostname(host), jobid_map(), _parent(parent) + BatchManager::BatchManager(const Batch::FactBatchManager * parent, const char* host, + const char * username, + CommunicationProtocolType protocolType, const char* mpiImpl) + : _hostname(host), jobid_map(), _parent(parent), + _protocol(CommunicationProtocol::getInstance(protocolType)), + _username(username), _mpiImpl(FactoryMpiImpl(mpiImpl)) { - /* -#ifdef WIN32 - WSADATA wsaData; - WSAStartup(MAKEWORD(2, 2), &wsaData); // Initialize Winsock -#endif - - // On verifie que le hostname est correct - struct hostent* res = gethostbyname(_hostname.c_str()); - -#ifdef WIN32 - WSACleanup(); // Finalize Winsock -#endif - - if (!res) { // hostname unknown from network - string msg = "hostname \""; - msg += _hostname; - msg += "\" unknown from the network"; - throw InvalidArgumentException(msg.c_str()); - } - */ } + // Destructeur BatchManager::~BatchManager() { - // Nothing to do + delete _mpiImpl; } string BatchManager::__repr__() const @@ -205,4 +181,180 @@ namespace Batch { return state; } + + void BatchManager::exportInputFiles(const Job& job) + { + int status; + Parametre params = job.getParametre(); + const Versatile & V = params[INFILE]; + Versatile::const_iterator Vit; + + // Create remote directories + string logdir = string(params[WORKDIR]) + "/logs"; + status = _protocol.makeDirectory(logdir, _hostname, _username); + if (status) { + std::ostringstream oss; + oss << "Cannot create directory " << logdir << " on host " << _hostname; + oss << ". Return status is " << status; + throw RunTimeException(oss.str()); + } + + // Copy the file to execute into the remote working directory + string executeFile = params[EXECUTABLE]; + if (executeFile.size() != 0) { + status = _protocol.copyFile(executeFile, "", "", + params[WORKDIR], _hostname, _username); + if (status) { + std::ostringstream oss; + oss << "Cannot copy file " << executeFile << " on host " << _hostname; + oss << ". Return status is " << status; + throw RunTimeException(oss.str()); + } + +#ifdef WIN32 + // On Windows, we make the remote file executable afterward because + // pscp does not preserve access permissions on files + + string remoteExec = string(params[EXECUTABLE]); + remoteExec = remoteExec.substr(remoteExec.rfind("\\") + 1, remoteExec.length()); + remoteExec = string(params[WORKDIR]) + "/" + executable; + + string subCommand = string("chmod u+x ") + remoteExec; + string command = _protocol.getExecCommand(subCommand, _hostname, _username); + cerr << command.c_str() << endl; + status = system(command.c_str()); + if (status) { + std::ostringstream oss; + oss << "Cannot change permissions of file " << remoteExec << " on host " << _hostname; + oss << ". Return status is " << status; + throw RunTimeException(oss.str()); + } +#endif + } + + // Copy input files into the remote working directory + for (Vit=V.begin() ; Vit!=V.end() ; Vit++) { + CoupleType cpt = *static_cast< CoupleType * >(*Vit); + Couple inputFile = cpt; + status = _protocol.copyFile(inputFile.getLocal(), "", "", + inputFile.getRemote(), _hostname, _username); + if (status) { + std::ostringstream oss; + oss << "Cannot copy file " << inputFile.getLocal() << " on host " << _hostname; + oss << ". Return status is " << status; + throw RunTimeException(oss.str()); + } + } + + } + + void BatchManager::importOutputFiles( const Job & job, const string directory ) + { + Parametre params = job.getParametre(); + const Versatile & V = params[OUTFILE]; + Versatile::const_iterator Vit; + + // Create local result directory + int status = CommunicationProtocol::getInstance(SH).makeDirectory(directory, "", ""); + if (status) { + string mess("Directory creation failed. Status is :"); + ostringstream status_str; + status_str << status; + mess += status_str.str(); + cerr << mess << endl; + } + + for(Vit=V.begin(); Vit!=V.end(); Vit++) { + CoupleType cpt = *static_cast< CoupleType * >(*Vit); + Couple outputFile = cpt; + string localPath = outputFile.getLocal(); + if (!Utils::isAbsolutePath(localPath)) { + localPath = directory + "/" + localPath; + } + status = _protocol.copyFile(outputFile.getRemote(), _hostname, _username, + localPath, "", ""); + if (status) { + // Try to get what we can (logs files) + // throw BatchException("Error of connection on remote host"); + std::string mess("Copy command failed ! status is :"); + ostringstream status_str; + status_str << status; + mess += status_str.str(); + cerr << mess << endl; + } + } + + // Copy logs + status = _protocol.copyFile(string(params[WORKDIR]) + string("/logs"), _hostname, _username, + directory, "", ""); + if (status) { + std::string mess("Copy logs directory failed ! status is :"); + ostringstream status_str; + status_str << status; + mess += status_str.str(); + cerr << mess << endl; + } + + } + + bool BatchManager::importDumpStateFile( const Job & job, const string directory ) + { + Parametre params = job.getParametre(); + + // Create local result directory + int status = CommunicationProtocol::getInstance(SH).makeDirectory(directory, "", ""); + if (status) { + string mess("Directory creation failed. Status is :"); + ostringstream status_str; + status_str << status; + mess += status_str.str(); + cerr << mess << endl; + } + + bool ret = true; + status = _protocol.copyFile(string(params[WORKDIR]) + string("/dumpState*.xml"), _hostname, _username, + directory, "", ""); + if (status) { + // Try to get what we can (logs files) + // throw BatchException("Error of connection on remote host"); + std::string mess("Copy command failed ! status is :"); + ostringstream status_str; + status_str << status; + mess += status_str.str(); + cerr << mess << endl; + ret = false; + } + return ret; + } + + MpiImpl *BatchManager::FactoryMpiImpl(string mpiImpl) + { + if(mpiImpl == "lam") + return new MpiImpl_LAM(); + else if(mpiImpl == "mpich1") + return new MpiImpl_MPICH1(); + else if(mpiImpl == "mpich2") + return new MpiImpl_MPICH2(); + else if(mpiImpl == "openmpi") + return new MpiImpl_OPENMPI(); + else if(mpiImpl == "ompi") + return new MpiImpl_OMPI(); + else if(mpiImpl == "slurm") + return new MpiImpl_SLURM(); + else if(mpiImpl == "prun") + return new MpiImpl_PRUN(); + else if(mpiImpl == "nompi") + return NULL; + else{ + ostringstream oss; + oss << mpiImpl << " : not yet implemented"; + throw RunTimeException(oss.str().c_str()); + } + } + + const CommunicationProtocol & BatchManager::getProtocol() const + { + return _protocol; + } + } diff --git a/src/Core/Batch_BatchManager.hxx b/src/Core/Batch_BatchManager.hxx index 1deb9ff..12086f7 100644 --- a/src/Core/Batch_BatchManager.hxx +++ b/src/Core/Batch_BatchManager.hxx @@ -39,6 +39,8 @@ #include "Batch_JobId.hxx" #include "Batch_JobInfo.hxx" #include "Batch_InvalidArgumentException.hxx" +#include "Batch_CommunicationProtocol.hxx" +#include "Batch_MpiImpl.hxx" namespace Batch { @@ -51,8 +53,9 @@ namespace Batch { { public: // Constructeur et destructeur - //BatchManager(std::string host="localhost") throw(InvalidArgumentException); // connexion a la machine host - BatchManager(const Batch::FactBatchManager * parent, const char * host="localhost") throw(InvalidArgumentException); // connexion a la machine host + BatchManager(const Batch::FactBatchManager * parent, const char * host = "localhost", + const char * username = "", + CommunicationProtocolType protocolType = SSH, const char * mpiImpl = "nompi"); virtual ~BatchManager(); virtual std::string __repr__() const; @@ -72,12 +75,23 @@ namespace Batch { virtual const Batch::JobId addJob(const Batch::Job & job, const std::string reference) = 0; // ajoute un nouveau job sans le soumettre virtual std::string waitForJobEnd(const Batch::JobId & jobid, long timeout = -1, long initSleepTime = 1, long maxSleepTime = 600); + virtual void importOutputFiles( const Job & job, const std::string directory ); + bool importDumpStateFile( const Job & job, const std::string directory ); + + // Get the underlying communication protocol + const CommunicationProtocol & getProtocol() const; protected: std::string _hostname; // serveur ou tourne le BatchManager // std::map< const std::string, const Batch::JobId * > jobid_map; // table des jobs deja soumis std::map< std::string, const Batch::JobId * > jobid_map; // table des jobs deja soumis const Batch::FactBatchManager * _parent; + const CommunicationProtocol & _protocol; // protocol to access _hostname + const std::string _username; // username to access _hostname + MpiImpl *_mpiImpl; // Mpi implementation to launch executable in batch script + + MpiImpl* FactoryMpiImpl(std::string mpiImpl); + void exportInputFiles(const Job & job); private: diff --git a/src/Core/Batch_BatchManager_eClient.cxx b/src/Core/Batch_BatchManager_eClient.cxx deleted file mode 100644 index 92162b2..0000000 --- a/src/Core/Batch_BatchManager_eClient.cxx +++ /dev/null @@ -1,380 +0,0 @@ -// Copyright (C) 2007-2012 CEA/DEN, EDF R&D, OPEN CASCADE -// -// Copyright (C) 2003-2007 OPEN CASCADE, EADS/CCR, LIP6, CEA/DEN, -// CEDRAT, EDF R&D, LEG, PRINCIPIA R&D, BUREAU VERITAS -// -// This library is free software; you can redistribute it and/or -// modify it under the terms of the GNU Lesser General Public -// License as published by the Free Software Foundation; either -// version 2.1 of the License. -// -// This library is distributed in the hope that it will be useful, -// but WITHOUT ANY WARRANTY; without even the implied warranty of -// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU -// Lesser General Public License for more details. -// -// You should have received a copy of the GNU Lesser General Public -// License along with this library; if not, write to the Free Software -// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA -// -// See http://www.salome-platform.org/ or email : webmaster.salome@opencascade.com -// -/* -* BatchManager_eLSF.cxx : emulation of LSF client -* -* Auteur : Bernard SECHER - CEA DEN -* Mail : mailto:bernard.secher@cea.fr -* Date : Thu Apr 24 10:17:22 2008 -* Projet : PAL Salome -* -*/ - -#include -#include - -#include -#include -#include -#include - -#ifdef WIN32 -#include -#include -#else -#include -#endif - -#include - -#include "Batch_Constants.hxx" -#include "Batch_BatchManager_eClient.hxx" -#include "Batch_RunTimeException.hxx" -#include "Batch_Utils.hxx" - -#ifdef MSVC -#define EXISTS(path) _access_s(path, 0) == 0 -#else -#define EXISTS(path) access(path, F_OK) == 0 -#endif - -using namespace std; - - -namespace Batch { - - BatchManager_eClient::BatchManager_eClient(const Batch::FactBatchManager * parent, const char* host, - const char * username, - CommunicationProtocolType protocolType, const char* mpiImpl) - : BatchManager(parent, host), _protocol(CommunicationProtocol::getInstance(protocolType)), - _username(username) - { - // instanciation of mpi implementation needed to launch executable in batch script - _mpiImpl = FactoryMpiImpl(mpiImpl); - } - - // Destructeur - BatchManager_eClient::~BatchManager_eClient() - { - if (_mpiImpl) - delete _mpiImpl; - } - - void BatchManager_eClient::exportInputFiles(const Job& job) - { - int status; - Parametre params = job.getParametre(); - const Versatile & V = params[INFILE]; - Versatile::const_iterator Vit; - - status = _protocol.makeDirectory(string(params[TMPDIR]) + "/logs", _hostname, _username); - if(status) { - std::ostringstream oss; - oss << status; - std::string ex_mess("Error of connection on remote host ! status = "); - ex_mess += oss.str(); - throw EmulationException(ex_mess.c_str()); - } - - // Second step : copy fileToExecute into - // batch tmp files directory - string executeFile = params[EXECUTABLE]; - if (executeFile.size() != 0) { - status = _protocol.copyFile(executeFile, "", "", - params[TMPDIR], _hostname, _username); - if(status) { - std::ostringstream oss; - oss << status; - std::string ex_mess("Error of connection on remote host ! status = "); - ex_mess += oss.str(); - throw EmulationException(ex_mess.c_str()); - } - -#ifdef WIN32 - // On Windows, we make the remote file executable afterward because - // pscp does not preserve access permissions on files - - string executable = string(params[EXECUTABLE]); - executable = executable.substr(executable.rfind("\\") + 1,executable.length()); - - string subCommand = string("chmod u+x ") + string(params[TMPDIR]) + "/" + executable; - string command = _protocol.getExecCommand(subCommand, _hostname, _username); - cerr << command.c_str() << endl; - status = system(command.c_str()); - if(status) { - std::ostringstream oss; - oss << status; - std::string ex_mess("Error of connection on remote host ! status = "); - ex_mess += oss.str(); - throw EmulationException(ex_mess.c_str()); - } -#endif - } - - // Third step : copy filesToExportList into - // batch tmp files directory - for(Vit=V.begin(); Vit!=V.end(); Vit++) { - CoupleType cpt = *static_cast< CoupleType * >(*Vit); - Couple inputFile = cpt; - status = _protocol.copyFile(inputFile.getLocal(), "", "", - inputFile.getRemote(), _hostname, _username); - if(status) { - std::ostringstream oss; - oss << status; - std::string ex_mess("Error of connection on remote host ! status = "); - ex_mess += oss.str(); - throw EmulationException(ex_mess.c_str()); - } - } - - } - - void BatchManager_eClient::importOutputFiles( const Job & job, const string directory ) - { - Parametre params = job.getParametre(); - const Versatile & V = params[OUTFILE]; - Versatile::const_iterator Vit; - - // Create local result directory - int status = CommunicationProtocol::getInstance(SH).makeDirectory(directory, "", ""); - if (status) { - string mess("Directory creation failed. Status is :"); - ostringstream status_str; - status_str << status; - mess += status_str.str(); - cerr << mess << endl; - } - - for(Vit=V.begin(); Vit!=V.end(); Vit++) { - CoupleType cpt = *static_cast< CoupleType * >(*Vit); - Couple outputFile = cpt; - string localPath = outputFile.getLocal(); - if (!Utils::isAbsolutePath(localPath)) { - localPath = directory + "/" + localPath; - } - status = _protocol.copyFile(outputFile.getRemote(), _hostname, _username, - localPath, "", ""); - if (status) { - // Try to get what we can (logs files) - // throw BatchException("Error of connection on remote host"); - std::string mess("Copy command failed ! status is :"); - ostringstream status_str; - status_str << status; - mess += status_str.str(); - cerr << mess << endl; - } - } - - // Copy logs - status = _protocol.copyFile(string(params[TMPDIR]) + string("/logs"), _hostname, _username, - directory, "", ""); - if (status) { - std::string mess("Copy logs directory failed ! status is :"); - ostringstream status_str; - status_str << status; - mess += status_str.str(); - cerr << mess << endl; - } - - } - - bool BatchManager_eClient::importDumpStateFile( const Job & job, const string directory ) - { - Parametre params = job.getParametre(); - - // Create local result directory - int status = CommunicationProtocol::getInstance(SH).makeDirectory(directory, "", ""); - if (status) { - string mess("Directory creation failed. Status is :"); - ostringstream status_str; - status_str << status; - mess += status_str.str(); - cerr << mess << endl; - } - - bool ret = true; - status = _protocol.copyFile(string(params[TMPDIR]) + string("/dumpState*.xml"), _hostname, _username, - directory, "", ""); - if (status) { - // Try to get what we can (logs files) - // throw BatchException("Error of connection on remote host"); - std::string mess("Copy command failed ! status is :"); - ostringstream status_str; - status_str << status; - mess += status_str.str(); - cerr << mess << endl; - ret = false; - } - return ret; - } - - MpiImpl *BatchManager_eClient::FactoryMpiImpl(string mpiImpl) - { - if(mpiImpl == "lam") - return new MpiImpl_LAM(); - else if(mpiImpl == "mpich1") - return new MpiImpl_MPICH1(); - else if(mpiImpl == "mpich2") - return new MpiImpl_MPICH2(); - else if(mpiImpl == "openmpi") - return new MpiImpl_OPENMPI(); - else if(mpiImpl == "ompi") - return new MpiImpl_OMPI(); - else if(mpiImpl == "slurm") - return new MpiImpl_SLURM(); - else if(mpiImpl == "prun") - return new MpiImpl_PRUN(); - else if(mpiImpl == "nompi") - return NULL; - else{ - ostringstream oss; - oss << mpiImpl << " : not yet implemented"; - throw EmulationException(oss.str().c_str()); - } - } - - /** - * This method generates a temporary file name with the pattern "/-XXXXXX" where - * is the directory for temporary files (see BatchManager_eClient::getTmpDir()) and the - * X's are replaced by random characters. Note that this method is less secure than - * BatchManager_eClient::createAndOpenTemporaryFile, so use the latter whenever possible. - * \param prefix the prefix to use for the temporary file. - * \return a name usable for a temporary file. - */ - string BatchManager_eClient::generateTemporaryFileName(const string & prefix) - { - string fileName = getTmpDir() + "/" + prefix + "-XXXXXX"; - char randstr[7]; - - do { - sprintf(randstr, "%06d", rand() % 1000000); - fileName.replace(fileName.size()-6, 6, randstr); - } while (EXISTS(fileName.c_str())); - - return fileName; - } - - /** - * This method creates a temporary file and opens an output stream to write into this file. - * The file is created with the pattern "/-XXXXXX" where is the directory - * for temporary files (see BatchManager_eClient::getTmpDir()) and the X's are replaced by random - * characters. The caller is responsible for closing and deleting the file when it is no more used. - * \param prefix the prefix to use for the temporary file. - * \param outputStream an output stream that will be opened for writing in the temporary file. If - * the stream is already open, it will be closed first. - * \return the name of the created file. - */ - string BatchManager_eClient::createAndOpenTemporaryFile(const string & prefix, ofstream & outputStream) - { - if (outputStream.is_open()) - outputStream.close(); - -#ifdef WIN32 - - string fileName = generateTemporaryFileName(prefix); - // Open the file as binary to avoid problems with Windows newlines - outputStream.open(fileName.c_str(), ios_base::binary | ios_base::out); - -#else - - string fileName = getTmpDir() + "/" + prefix + "-XXXXXX"; - char * buf = new char[fileName.size()+1]; - fileName.copy(buf, fileName.size()); - buf[fileName.size()] = '\0'; - - int fd = mkstemp(buf); - if (fd == -1) { - delete[] buf; - throw RunTimeException(string("Can't create temporary file ") + fileName); - } - fileName = buf; - delete[] buf; - - outputStream.open(fileName.c_str()); - close(fd); // Close the file descriptor so that the file is not opened twice - -#endif - - if (outputStream.fail()) - throw RunTimeException(string("Can't open temporary file ") + fileName); - - return fileName; - } - - /** - * This method finds the name of the directory to use for temporary files in libBatch. This name - * is /libBatch--XXXXXX. is found by looking for environment - * variables TEMP, TMP, TEMPDIR, TMPDIR, and defaults to "/tmp" if none of them is defined. - * is found by looking for environment variables USER and USERNAME, and defaults to - * "unknown". XXXXXX represents random characters. The directory name is generated only once for - * each BatchManager_eClient instance, and the directory is created at this moment. Subsequent - * calls will always return the same path and the existence of the directory will not be - * rechecked. - * \return the name of the directory to use for temporary files. - */ - const std::string & BatchManager_eClient::getTmpDir() - { - if (tmpDirName.empty()) { - const char * baseDir = getenv("TEMP"); - if (baseDir == NULL) baseDir = getenv("TMP"); - if (baseDir == NULL) baseDir = getenv("TEMPDIR"); - if (baseDir == NULL) baseDir = getenv("TMPDIR"); - if (baseDir == NULL) baseDir = "/tmp"; - - const char * userName = getenv("USER"); - if (userName == NULL) userName = getenv("USERNAME"); - if (userName == NULL) userName = "unknown"; - - string baseName = string(baseDir) + "/libBatch-" + userName + "-XXXXXX"; - srand(time(NULL)); - -#ifdef WIN32 - - char randstr[7]; - do { - sprintf(randstr, "%06d", rand() % 1000000); - baseName.replace(baseName.size()-6, 6, randstr); - } while (EXISTS(baseName.c_str())); - if (_mkdir(baseName.c_str()) != 0) - throw RunTimeException(string("Can't create temporary directory ") + baseName); - tmpDirName = baseName; - -#else - - char * buf = new char[baseName.size() + 1]; - baseName.copy(buf, baseName.size()); - buf[baseName.size()] = '\0'; - if (mkdtemp(buf) == NULL) { - delete[] buf; - throw RunTimeException(string("Can't create temporary directory ") + baseName); - } - tmpDirName = buf; - delete[] buf; - -#endif - - } - - return tmpDirName; - } - -} diff --git a/src/Core/Batch_BatchManager_eClient.hxx b/src/Core/Batch_BatchManager_eClient.hxx deleted file mode 100644 index bc4396b..0000000 --- a/src/Core/Batch_BatchManager_eClient.hxx +++ /dev/null @@ -1,76 +0,0 @@ -// Copyright (C) 2007-2012 CEA/DEN, EDF R&D, OPEN CASCADE -// -// Copyright (C) 2003-2007 OPEN CASCADE, EADS/CCR, LIP6, CEA/DEN, -// CEDRAT, EDF R&D, LEG, PRINCIPIA R&D, BUREAU VERITAS -// -// This library is free software; you can redistribute it and/or -// modify it under the terms of the GNU Lesser General Public -// License as published by the Free Software Foundation; either -// version 2.1 of the License. -// -// This library is distributed in the hope that it will be useful, -// but WITHOUT ANY WARRANTY; without even the implied warranty of -// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU -// Lesser General Public License for more details. -// -// You should have received a copy of the GNU Lesser General Public -// License along with this library; if not, write to the Free Software -// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA -// -// See http://www.salome-platform.org/ or email : webmaster.salome@opencascade.com -// -/* - * BatchManager_eLSF.hxx : emulation of client - * - * Auteur : Bernard SECHER - CEA DEN - * Mail : mailto:bernard.secher@cea.fr - * Date : Thu Apr 24 10:17:22 2008 - * Projet : PAL Salome - * - */ - -#ifndef _BATCHMANAGER_eClient_H_ -#define _BATCHMANAGER_eClient_H_ - -#include "Batch_Defines.hxx" -#include "Batch_MpiImpl.hxx" -#include "Batch_BatchManager.hxx" -#include "Batch_EmulationException.hxx" -#include "Batch_CommunicationProtocol.hxx" - -#include - -namespace Batch { - - class Job; - - class BATCH_EXPORT BatchManager_eClient : virtual public BatchManager - { - public: - // Constructeur et destructeur - BatchManager_eClient(const Batch::FactBatchManager * parent, const char* host="localhost", - const char * username="", - CommunicationProtocolType protocolType = SSH, const char* mpiImpl="mpich1"); - virtual ~BatchManager_eClient(); - virtual void importOutputFiles( const Job & job, const std::string directory ); - bool importDumpStateFile( const Job & job, const std::string directory ); - - protected: - const CommunicationProtocol & _protocol; // protocol to access _hostname - const std::string _username; // username to access _hostname - MpiImpl *_mpiImpl; // Mpi implementation to launch executable in batch script - - std::string generateTemporaryFileName(const std::string & prefix); - std::string createAndOpenTemporaryFile(const std::string & prefix, std::ofstream & outputStream); - MpiImpl* FactoryMpiImpl(std::string mpiImpl); - void exportInputFiles(const Job & job); - const std::string & getTmpDir(); - - private: - std::string tmpDirName; // Path to the directory for temporary files - - }; - -} - -#endif diff --git a/src/Core/Batch_Constants.cxx b/src/Core/Batch_Constants.cxx index 778a3c7..fc04add 100644 --- a/src/Core/Batch_Constants.cxx +++ b/src/Core/Batch_Constants.cxx @@ -42,7 +42,6 @@ namespace Batch { def_Constant(ENDTIME); def_Constant(EUSER); def_Constant(EXECUTABLE); - def_Constant(EXECUTIONHOST); def_Constant(EXITCODE); def_Constant(HOLD); def_Constant(ID); @@ -63,7 +62,6 @@ namespace Batch { def_Constant(STARTTIME); def_Constant(STATE); def_Constant(TEXT); - def_Constant(TMPDIR); def_Constant(USEDCPUTIME); def_Constant(USEDDISKSIZE); def_Constant(USEDRAMSIZE); diff --git a/src/Core/Batch_Constants.hxx b/src/Core/Batch_Constants.hxx index 332a163..b8c1b96 100644 --- a/src/Core/Batch_Constants.hxx +++ b/src/Core/Batch_Constants.hxx @@ -53,7 +53,6 @@ namespace Batch { decl_extern_Constant(ENDTIME); decl_extern_Constant(EUSER); decl_extern_Constant(EXECUTABLE); - decl_extern_Constant(EXECUTIONHOST); decl_extern_Constant(EXITCODE); decl_extern_Constant(HOLD); decl_extern_Constant(ID); @@ -74,7 +73,6 @@ namespace Batch { decl_extern_Constant(STARTTIME); decl_extern_Constant(STATE); decl_extern_Constant(TEXT); - decl_extern_Constant(TMPDIR); decl_extern_Constant(USEDCPUTIME); decl_extern_Constant(USEDDISKSIZE); decl_extern_Constant(USEDRAMSIZE); diff --git a/src/Core/Batch_EmulationException.cxx b/src/Core/Batch_EmulationException.cxx deleted file mode 100644 index 627655c..0000000 --- a/src/Core/Batch_EmulationException.cxx +++ /dev/null @@ -1,35 +0,0 @@ -// Copyright (C) 2007-2012 CEA/DEN, EDF R&D, OPEN CASCADE -// -// Copyright (C) 2003-2007 OPEN CASCADE, EADS/CCR, LIP6, CEA/DEN, -// CEDRAT, EDF R&D, LEG, PRINCIPIA R&D, BUREAU VERITAS -// -// This library is free software; you can redistribute it and/or -// modify it under the terms of the GNU Lesser General Public -// License as published by the Free Software Foundation; either -// version 2.1 of the License. -// -// This library is distributed in the hope that it will be useful, -// but WITHOUT ANY WARRANTY; without even the implied warranty of -// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU -// Lesser General Public License for more details. -// -// You should have received a copy of the GNU Lesser General Public -// License along with this library; if not, write to the Free Software -// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA -// -// See http://www.salome-platform.org/ or email : webmaster.salome@opencascade.com -// -/* - * EmulationException.cxx : - * - * Author : Renaud BARATE - EDF R&D - * Date : April 2009 - * - */ - -#include "Batch_EmulationException.hxx" -using namespace std; - -namespace Batch { - -} diff --git a/src/Core/Batch_EmulationException.hxx b/src/Core/Batch_EmulationException.hxx deleted file mode 100644 index a34e82b..0000000 --- a/src/Core/Batch_EmulationException.hxx +++ /dev/null @@ -1,48 +0,0 @@ -// Copyright (C) 2007-2012 CEA/DEN, EDF R&D, OPEN CASCADE -// -// Copyright (C) 2003-2007 OPEN CASCADE, EADS/CCR, LIP6, CEA/DEN, -// CEDRAT, EDF R&D, LEG, PRINCIPIA R&D, BUREAU VERITAS -// -// This library is free software; you can redistribute it and/or -// modify it under the terms of the GNU Lesser General Public -// License as published by the Free Software Foundation; either -// version 2.1 of the License. -// -// This library is distributed in the hope that it will be useful, -// but WITHOUT ANY WARRANTY; without even the implied warranty of -// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU -// Lesser General Public License for more details. -// -// You should have received a copy of the GNU Lesser General Public -// License along with this library; if not, write to the Free Software -// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA -// -// See http://www.salome-platform.org/ or email : webmaster.salome@opencascade.com -// -/* - * EmulationException.hxx : - * - * Author : Renaud BARATE - EDF R&D - * Date : April 2009 - * - */ - -#ifndef _EMULATIONEXCEPTION_H_ -#define _EMULATIONEXCEPTION_H_ - -#include "Batch_Defines.hxx" -#include "Batch_GenericException.hxx" - -namespace Batch { - - class BATCH_EXPORT EmulationException : public GenericException - { - public: - // Constructor - EmulationException(const std::string & ch = "undefined") - : GenericException("EmulationException", ch) {} - }; - -} - -#endif diff --git a/src/Core/Batch_FactBatchManager.hxx b/src/Core/Batch_FactBatchManager.hxx index a51a596..01b9df4 100644 --- a/src/Core/Batch_FactBatchManager.hxx +++ b/src/Core/Batch_FactBatchManager.hxx @@ -31,10 +31,10 @@ #ifndef _FACTBATCHMANAGER_H_ #define _FACTBATCHMANAGER_H_ -#include "Batch_Defines.hxx" - #include -#include + +#include "Batch_Defines.hxx" +#include "Batch_CommunicationProtocol.hxx" namespace Batch { @@ -47,15 +47,17 @@ namespace Batch { FactBatchManager(const std::string & type); virtual ~FactBatchManager(); - virtual Batch::BatchManager * operator() (const char * hostname) const = 0; + virtual Batch::BatchManager * operator() (const char * hostname, + const char * username = "", + CommunicationProtocolType protocolType = SSH, + const char * mpi = "nompi", + int nb_proc_per_node = 1) const = 0; std::string getType() const; std::string __repr__() const; protected: std::string type; - private: - }; } diff --git a/src/Core/Batch_FactBatchManager_eClient.cxx b/src/Core/Batch_FactBatchManager_eClient.cxx deleted file mode 100644 index 69a571c..0000000 --- a/src/Core/Batch_FactBatchManager_eClient.cxx +++ /dev/null @@ -1,50 +0,0 @@ -// Copyright (C) 2007-2012 CEA/DEN, EDF R&D, OPEN CASCADE -// -// Copyright (C) 2003-2007 OPEN CASCADE, EADS/CCR, LIP6, CEA/DEN, -// CEDRAT, EDF R&D, LEG, PRINCIPIA R&D, BUREAU VERITAS -// -// This library is free software; you can redistribute it and/or -// modify it under the terms of the GNU Lesser General Public -// License as published by the Free Software Foundation; either -// version 2.1 of the License. -// -// This library is distributed in the hope that it will be useful, -// but WITHOUT ANY WARRANTY; without even the implied warranty of -// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU -// Lesser General Public License for more details. -// -// You should have received a copy of the GNU Lesser General Public -// License along with this library; if not, write to the Free Software -// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA -// -// See http://www.salome-platform.org/ or email : webmaster.salome@opencascade.com -// -/* - * FactBatchManager_eClient.cxx : emulation of client - * - * Auteur : Bernard SECHER - CEA DEN - * Mail : mailto:bernard.secher@cea.fr - * Date : Thu Apr 24 10:17:22 2008 - * Projet : PAL Salome - * - */ - -#include -#include -#include "Batch_FactBatchManager_eClient.hxx" -using namespace std; - -namespace Batch { - - // Constructeur - FactBatchManager_eClient::FactBatchManager_eClient(const string & _t) : FactBatchManager(_t) - { - } - - // Destructeur - FactBatchManager_eClient::~FactBatchManager_eClient() - { - // Nothing to do - } - -} diff --git a/src/Core/Batch_FactBatchManager_eClient.hxx b/src/Core/Batch_FactBatchManager_eClient.hxx deleted file mode 100644 index e475929..0000000 --- a/src/Core/Batch_FactBatchManager_eClient.hxx +++ /dev/null @@ -1,64 +0,0 @@ -// Copyright (C) 2007-2012 CEA/DEN, EDF R&D, OPEN CASCADE -// -// Copyright (C) 2003-2007 OPEN CASCADE, EADS/CCR, LIP6, CEA/DEN, -// CEDRAT, EDF R&D, LEG, PRINCIPIA R&D, BUREAU VERITAS -// -// This library is free software; you can redistribute it and/or -// modify it under the terms of the GNU Lesser General Public -// License as published by the Free Software Foundation; either -// version 2.1 of the License. -// -// This library is distributed in the hope that it will be useful, -// but WITHOUT ANY WARRANTY; without even the implied warranty of -// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU -// Lesser General Public License for more details. -// -// You should have received a copy of the GNU Lesser General Public -// License along with this library; if not, write to the Free Software -// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA -// -// See http://www.salome-platform.org/ or email : webmaster.salome@opencascade.com -// -/* - * FactBatchManager_eClient.hxx : emulation of client - * - * Auteur : Bernard SECHER - CEA DEN - * Mail : mailto:bernard.secher@cea.fr - * Date : Thu Apr 24 10:17:22 2008 - * Projet : PAL Salome - * - */ - -#ifndef _FACTBATCHMANAGER_ECLIENT_H_ -#define _FACTBATCHMANAGER_ECLIENT_H_ - -#include - -#include "Batch_FactBatchManager.hxx" -#include "Batch_BatchManager_eClient.hxx" -#include "Batch_CommunicationProtocol.hxx" - -namespace Batch { - - class BATCH_EXPORT FactBatchManager_eClient : public FactBatchManager - { - public: - // Constructeur et destructeur - FactBatchManager_eClient(const std::string & type); - virtual ~FactBatchManager_eClient(); - - virtual Batch::BatchManager_eClient * operator() (const char * hostname, - const char * username, - CommunicationProtocolType protocolType, - const char * mpi = "nompi", - int nb_proc_per_node = 1) const = 0; - - protected: - - private: - - }; - -} - -#endif diff --git a/src/Core/Batch_ParameterTypeMap.cxx b/src/Core/Batch_ParameterTypeMap.cxx index dcda3bb..7a14261 100644 --- a/src/Core/Batch_ParameterTypeMap.cxx +++ b/src/Core/Batch_ParameterTypeMap.cxx @@ -55,7 +55,6 @@ namespace Batch { addParameter("ENDTIME", LONG, 1); addParameter("EUSER", STRING, 1); addParameter("EXECUTABLE", STRING, 1); - addParameter("EXECUTIONHOST", STRING, 0); addParameter("EXITCODE", LONG, 1); addParameter("HOLD", LONG, 1); addParameter("ID", STRING, 1); @@ -76,7 +75,6 @@ namespace Batch { addParameter("STARTTIME", LONG, 1); addParameter("STATE", STRING, 1); addParameter("TEXT", STRING, 1); - addParameter("TMPDIR", STRING, 1); addParameter("USEDCPUTIME", LONG, 1); addParameter("USEDDISKSIZE", LONG, 1); addParameter("USEDRAMSIZE", LONG, 1); diff --git a/src/Core/Batch_Utils.cxx b/src/Core/Batch_Utils.cxx index e3c864a..5d173bc 100644 --- a/src/Core/Batch_Utils.cxx +++ b/src/Core/Batch_Utils.cxx @@ -26,10 +26,15 @@ * Author : Renaud BARATE - EDF R&D */ +#include #include +#include +#include +#include #include #include "Batch_Utils.hxx" +#include "Batch_RunTimeException.hxx" #ifdef MSVC #define popen _popen @@ -68,4 +73,56 @@ bool Utils::isAbsolutePath(const string & path) return path[0] == '/'; } +string Utils::createAndOpenTemporaryFile(const string & prefix, ofstream & outputStream) +{ + if (outputStream.is_open()) + outputStream.close(); + + // Find directory for temporary files + const char * tmpDirName = getenv("TEMP"); + if (tmpDirName == NULL) tmpDirName = getenv("TMP"); + if (tmpDirName == NULL) tmpDirName = getenv("TEMPDIR"); + if (tmpDirName == NULL) tmpDirName = getenv("TMPDIR"); + if (tmpDirName == NULL) tmpDirName = "/tmp"; + + string fileName = (string)tmpDirName + "/libbatch-" + prefix + "-XXXXXX"; + +#ifdef WIN32 + + char randstr[7]; + srand(time(NULL)); + + do { + sprintf(randstr, "%06d", rand() % 1000000); + fileName.replace(fileName.size()-6, 6, randstr); + } while (EXISTS(fileName.c_str())); + + // Open the file as binary to avoid problems with Windows newlines + outputStream.open(fileName.c_str(), ios_base::binary | ios_base::out); + +#else + + char * buf = new char[fileName.size()+1]; + fileName.copy(buf, fileName.size()); + buf[fileName.size()] = '\0'; + + int fd = mkstemp(buf); + if (fd == -1) { + delete[] buf; + throw RunTimeException(string("Can't create temporary file ") + fileName); + } + fileName = buf; + delete[] buf; + + outputStream.open(fileName.c_str()); + close(fd); // Close the file descriptor so that the file is not opened twice + +#endif + + if (outputStream.fail()) + throw RunTimeException(string("Can't open temporary file ") + fileName); + + return fileName; +} + } diff --git a/src/Core/Batch_Utils.hxx b/src/Core/Batch_Utils.hxx index c14273b..e0cdb23 100644 --- a/src/Core/Batch_Utils.hxx +++ b/src/Core/Batch_Utils.hxx @@ -48,6 +48,21 @@ public: */ static bool isAbsolutePath(const std::string & path); + /** + * Create a temporary file and open an output stream to write into this file. + * The file is created with the pattern "/libbatch--XXXXXX" where is the + * directory for temporary files and the X's are replaced by random characters. The caller is + * responsible for closing and deleting the file when it is no more used. + * is found by looking for environment variables TEMP, TMP, TEMPDIR, TMPDIR, and defaults + * to "/tmp" if none of them is defined. + * \param prefix the prefix to use for the temporary file. + * \param outputStream an output stream that will be opened for writing in the temporary file. If + * the stream is already open, it will be closed first. + * \return the name of the created file. + */ + static std::string createAndOpenTemporaryFile(const std::string & prefix, + std::ofstream & outputStream); + private: // No instanciation possible as this class provides only static methods diff --git a/src/Core/CMakeLists.txt b/src/Core/CMakeLists.txt index 8b0136a..aa7bf74 100644 --- a/src/Core/CMakeLists.txt +++ b/src/Core/CMakeLists.txt @@ -21,7 +21,6 @@ # SET(CLASS_LIST Core/Batch_APIInternalFailureException - Core/Batch_BatchManager_eClient Core/Batch_BatchManager Core/Batch_BatchManagerCatalog Core/Batch_BoolType @@ -33,9 +32,7 @@ SET(CLASS_LIST Core/Batch_APIInternalFailureException Core/Batch_CoupleType Core/Batch_Date Core/Batch_DateType - Core/Batch_EmulationException Core/Batch_Environnement - Core/Batch_FactBatchManager_eClient Core/Batch_FactBatchManager Core/Batch_GenericException Core/Batch_GenericType diff --git a/src/LSF/Batch_BatchManager_eLSF.cxx b/src/LSF/Batch_BatchManager_eLSF.cxx index 560b994..9650a92 100644 --- a/src/LSF/Batch_BatchManager_eLSF.cxx +++ b/src/LSF/Batch_BatchManager_eLSF.cxx @@ -35,6 +35,7 @@ #include #include +#include #include "Batch_BatchManager_eLSF.hxx" #include "Batch_JobInfo_eLSF.hxx" @@ -45,9 +46,7 @@ namespace Batch { BatchManager_eLSF::BatchManager_eLSF(const FactBatchManager * parent, const char * host, const char * username, CommunicationProtocolType protocolType, const char * mpiImpl) - : BatchManager(parent, host), - BatchManager_eClient(parent, host, username, protocolType, mpiImpl) - + : BatchManager(parent, host, username, protocolType, mpiImpl) { // Nothing to do } @@ -82,7 +81,7 @@ namespace Batch { string output; int status = Utils::getCommandOutput(command, output); cout << output; - if (status != 0) throw EmulationException("Can't submit job, error was: " + output); + if (status != 0) throw RunTimeException("Can't submit job, error was: " + output); // read id of submitted job in output int p10 = output.find("<"); @@ -114,7 +113,7 @@ namespace Batch { cerr << command.c_str() << endl; status = system(command.c_str()); if (status) - throw EmulationException("Error of connection on remote host"); + throw RunTimeException("Error of connection on remote host"); cerr << "jobId = " << ref << "killed" << endl; } @@ -122,20 +121,20 @@ namespace Batch { // Methode pour le controle des jobs : suspend un job en file d'attente void BatchManager_eLSF::holdJob(const JobId & jobid) { - throw EmulationException("Not yet implemented"); + throw NotYetImplementedException("BatchManager_eLSF::holdJob"); } // Methode pour le controle des jobs : relache un job suspendu void BatchManager_eLSF::releaseJob(const JobId & jobid) { - throw EmulationException("Not yet implemented"); + throw NotYetImplementedException("BatchManager_eLSF::releaseJob"); } // Methode pour le controle des jobs : modifie un job en file d'attente void BatchManager_eLSF::alterJob(const JobId & jobid, const Parametre & param, const Environnement & env) { - throw EmulationException("Not yet implemented"); + throw NotYetImplementedException("BatchManager_eLSF::alterJob"); } // Methode pour le controle des jobs : modifie un job en file d'attente @@ -164,7 +163,7 @@ namespace Batch { string output; int status = Utils::getCommandOutput(command, output); - if (status) throw EmulationException("Error of connection on remote host"); + if (status) throw RunTimeException("Error of connection on remote host"); JobInfo_eLSF ji = JobInfo_eLSF(id, output); return ji; @@ -175,7 +174,7 @@ namespace Batch { // Methode pour le controle des jobs : teste si un job est present en machine bool BatchManager_eLSF::isRunning(const JobId & jobid) { - throw EmulationException("Not yet implemented"); + throw NotYetImplementedException("BatchManager_eLSF::isRunning"); } std::string BatchManager_eLSF::buildSubmissionScript(const Job & job) @@ -194,11 +193,11 @@ namespace Batch { if (params.find(WORKDIR) != params.end()) workDir = params[WORKDIR].str(); else - throw EmulationException("params[WORKDIR] is not defined ! Please defined it, cannot submit this job"); + throw RunTimeException("params[WORKDIR] is not defined ! Please defined it, cannot submit this job"); if (params.find(EXECUTABLE) != params.end()) fileToExecute = params[EXECUTABLE].str(); else - throw EmulationException("params[EXECUTABLE] is not defined ! Please defined it, cannot submit this job"); + throw RunTimeException("params[EXECUTABLE] is not defined ! Please defined it, cannot submit this job"); // Optional parameters if (params.find(NBPROC) != params.end()) @@ -217,7 +216,7 @@ namespace Batch { // Create batch submit file ofstream tempOutputFile; - std::string TmpFileName = createAndOpenTemporaryFile("LSF-script", tempOutputFile); + std::string TmpFileName = Utils::createAndOpenTemporaryFile("LSF-script", tempOutputFile); tempOutputFile << "#! /bin/sh -f" << endl ; if (params.find(NAME) != params.end()) @@ -288,7 +287,7 @@ namespace Batch { workDir + "/" + remoteFileName, _hostname, _username); if (status) - throw EmulationException("Error of connection on remote host"); + throw RunTimeException("Error of connection on remote host"); return remoteFileName; } @@ -317,7 +316,7 @@ namespace Batch { cerr << command.c_str() << endl; int status = system(command.c_str()); if (status) - throw EmulationException("Error of launching home command on remote host"); + throw RunTimeException("Error of launching home command on remote host"); std::ifstream file_home(filelogtemp.c_str()); std::getline(file_home, home); diff --git a/src/LSF/Batch_BatchManager_eLSF.hxx b/src/LSF/Batch_BatchManager_eLSF.hxx index 57fa621..cc4d48f 100644 --- a/src/LSF/Batch_BatchManager_eLSF.hxx +++ b/src/LSF/Batch_BatchManager_eLSF.hxx @@ -36,11 +36,11 @@ #include "Batch_JobId.hxx" #include "Batch_JobInfo.hxx" #include "Batch_FactBatchManager.hxx" -#include "Batch_BatchManager_eClient.hxx" +#include "Batch_BatchManager.hxx" namespace Batch { - class BATCH_EXPORT BatchManager_eLSF : public BatchManager_eClient + class BATCH_EXPORT BatchManager_eLSF : public BatchManager { public: // Constructeur et destructeur diff --git a/src/LSF/Batch_FactBatchManager_eLSF.cxx b/src/LSF/Batch_FactBatchManager_eLSF.cxx index e5387bf..ee2585e 100644 --- a/src/LSF/Batch_FactBatchManager_eLSF.cxx +++ b/src/LSF/Batch_FactBatchManager_eLSF.cxx @@ -36,7 +36,7 @@ namespace Batch { static FactBatchManager_eLSF sFBM_eLSF; // Constructeur - FactBatchManager_eLSF::FactBatchManager_eLSF() : FactBatchManager_eClient("eLSF") + FactBatchManager_eLSF::FactBatchManager_eLSF() : FactBatchManager("LSF") { // Nothing to do } @@ -47,18 +47,11 @@ namespace Batch { // Nothing to do } - // Functor - BatchManager * FactBatchManager_eLSF::operator() (const char * hostname) const - { - // MESSAGE("Building new BatchManager_LSF on host '" << hostname << "'"); - return new BatchManager_eLSF(this, hostname); - } - - BatchManager_eClient * FactBatchManager_eLSF::operator() (const char * hostname, - const char * username, - CommunicationProtocolType protocolType, - const char * mpiImpl, - int nb_proc_per_node) const + BatchManager * FactBatchManager_eLSF::operator() (const char * hostname, + const char * username, + CommunicationProtocolType protocolType, + const char * mpiImpl, + int nb_proc_per_node) const { // MESSAGE("Building new BatchManager_LSF on host '" << hostname << "'"); return new BatchManager_eLSF(this, hostname, username, protocolType, mpiImpl); diff --git a/src/LSF/Batch_FactBatchManager_eLSF.hxx b/src/LSF/Batch_FactBatchManager_eLSF.hxx index 6668068..9997521 100644 --- a/src/LSF/Batch_FactBatchManager_eLSF.hxx +++ b/src/LSF/Batch_FactBatchManager_eLSF.hxx @@ -32,28 +32,23 @@ #define _FACTBATCHMANAGER_eLSF_H_ #include "Batch_Defines.hxx" -#include "Batch_BatchManager_eClient.hxx" -#include "Batch_FactBatchManager_eClient.hxx" +#include "Batch_BatchManager.hxx" +#include "Batch_FactBatchManager.hxx" namespace Batch { - class BATCH_EXPORT FactBatchManager_eLSF : public FactBatchManager_eClient + class BATCH_EXPORT FactBatchManager_eLSF : public FactBatchManager { public: - // Constructeur et destructeur + FactBatchManager_eLSF(); virtual ~FactBatchManager_eLSF(); - virtual BatchManager * operator() (const char * hostname) const; - virtual BatchManager_eClient * operator() (const char * hostname, - const char * username, - CommunicationProtocolType protocolType, - const char * mpiImpl, - int nb_proc_per_node = 1) const; - - protected: - - private: + virtual BatchManager * operator() (const char * hostname, + const char * username, + CommunicationProtocolType protocolType, + const char * mpiImpl, + int nb_proc_per_node = 1) const; }; diff --git a/src/LSF/CMakeLists.txt b/src/LSF/CMakeLists.txt index 53602e9..7675329 100644 --- a/src/LSF/CMakeLists.txt +++ b/src/LSF/CMakeLists.txt @@ -25,14 +25,14 @@ SET(CLASS_LIST LSF/Batch_BatchManager_eLSF LSF/Batch_JobInfo_eLSF ) -IF (BUILD_LSF_INTERFACE AND LSF_FOUND) - SET(CLASS_LIST ${CLASS_LIST} - LSF/Batch_BatchManager_LSF - LSF/Batch_FactBatchManager_LSF - LSF/Batch_Job_LSF - LSF/Batch_JobInfo_LSF - ) -ENDIF (BUILD_LSF_INTERFACE AND LSF_FOUND) +#IF (BUILD_LSF_INTERFACE AND LSF_FOUND) +# SET(CLASS_LIST ${CLASS_LIST} +# LSF/Batch_BatchManager_LSF +# LSF/Batch_FactBatchManager_LSF +# LSF/Batch_Job_LSF +# LSF/Batch_JobInfo_LSF +# ) +#ENDIF (BUILD_LSF_INTERFACE AND LSF_FOUND) APPEND_CLASSES_TO_SRC_FILES(${CLASS_LIST}) diff --git a/src/LSF/Test/Test_eLSF.cxx b/src/LSF/Test/Test_eLSF.cxx index 420e1ad..98aa060 100644 --- a/src/LSF/Test/Test_eLSF.cxx +++ b/src/LSF/Test/Test_eLSF.cxx @@ -35,9 +35,7 @@ #include #include #include -#include #include -#include #include @@ -95,7 +93,6 @@ int main(int argc, char** argv) p[INFILE] = Couple("seta.sh", "tmp/Batch/seta.sh"); p[INFILE] += Couple("setb.sh", "tmp/Batch/setb.sh"); p[OUTFILE] = Couple("result.txt", "tmp/Batch/result.txt"); - p[TMPDIR] = "tmp/Batch/"; p[NBPROC] = 1; p[MAXWALLTIME] = 1; p[MAXRAMSIZE] = 128; @@ -112,8 +109,8 @@ int main(int argc, char** argv) BatchManagerCatalog& c = BatchManagerCatalog::getInstance(); // Create a BatchManager of type ePBS on localhost - FactBatchManager_eClient * fbm = (FactBatchManager_eClient *)(c("eLSF")); - BatchManager_eClient * bm = (*fbm)(host.c_str(), user.c_str(), protocol); + FactBatchManager * fbm = c("LSF"); + BatchManager * bm = (*fbm)(host.c_str(), user.c_str(), protocol); // Submit the job to the BatchManager JobId jobid = bm->submitJob(job); diff --git a/src/LoadLeveler/Batch_BatchManager_eLL.cxx b/src/LoadLeveler/Batch_BatchManager_eLL.cxx index 91ba2ca..d377fab 100644 --- a/src/LoadLeveler/Batch_BatchManager_eLL.cxx +++ b/src/LoadLeveler/Batch_BatchManager_eLL.cxx @@ -29,9 +29,11 @@ #include #include #include +#include #include #include +#include #include "Batch_FactBatchManager_eLL.hxx" #include "Batch_BatchManager_eLL.hxx" @@ -45,8 +47,7 @@ namespace Batch { const char * username, CommunicationProtocolType protocolType, const char * mpiImpl, int nb_proc_per_node) - : BatchManager(parent, host), - BatchManager_eClient(parent, host, username, protocolType, mpiImpl), + : BatchManager(parent, host, username, protocolType, mpiImpl), _nb_proc_per_node(nb_proc_per_node) { // Nothing to do @@ -60,7 +61,6 @@ namespace Batch { // Method to submit a job to the batch manager const JobId BatchManager_eLL::submitJob(const Job & job) { - int status; Parametre params = job.getParametre(); const string workDir = params[WORKDIR]; @@ -70,34 +70,23 @@ namespace Batch { // build command file to submit the job and copy it on the server string cmdFile = buildCommandFile(job); - // define name of log file (local) - string logFile = generateTemporaryFileName("LL-submitlog"); - // define command to submit batch string subCommand = string("cd ") + workDir + "; llsubmit " + cmdFile; string command = _protocol.getExecCommand(subCommand, _hostname, _username); - command += " > "; - command += logFile; cerr << command.c_str() << endl; - status = system(command.c_str()); - if (status) - { - ifstream error_message(logFile.c_str()); - string mess; - string temp; - while(getline(error_message, temp)) - mess += temp; - error_message.close(); - throw EmulationException("Error of connection on remote host, error was: " + mess); - } - // read id of submitted job in log file + // submit job + string output; + int status = Utils::getCommandOutput(command, output); + cout << output; + if (status != 0) throw RunTimeException("Can't submit job, error was: " + output); + + // find id of submitted job in output string jobref; - ifstream idfile(logFile.c_str()); + istringstream idfile(output); string line; while (idfile && line.compare(0, 9, "llsubmit:") != 0) getline(idfile, line); - idfile.close(); if (line.compare(0, 9, "llsubmit:") == 0) { string::size_type p1 = line.find_first_of("\""); @@ -106,7 +95,7 @@ namespace Batch { jobref = line.substr(p1 + 1, p2 - p1 - 1); } if (jobref.size() == 0) - throw EmulationException("Error in the submission of the job on the remote host"); + throw RunTimeException("Error in the submission of the job on the remote host"); JobId id(this, jobref); return id; @@ -129,11 +118,11 @@ namespace Batch { if (params.find(WORKDIR) != params.end()) workDir = params[WORKDIR].str(); else - throw EmulationException("params[WORKDIR] is not defined. Please define it, cannot submit this job."); + throw RunTimeException("params[WORKDIR] is not defined. Please define it, cannot submit this job."); if (params.find(EXECUTABLE) != params.end()) fileToExecute = params[EXECUTABLE].str(); else - throw EmulationException("params[EXECUTABLE] is not defined. Please define it, cannot submit this job."); + throw RunTimeException("params[EXECUTABLE] is not defined. Please define it, cannot submit this job."); string::size_type p1 = fileToExecute.find_last_of("/"); string::size_type p2 = fileToExecute.find_last_of("."); @@ -142,7 +131,7 @@ namespace Batch { // Create batch submit file ofstream tempOutputFile; - string tmpFileName = createAndOpenTemporaryFile("LL-script", tempOutputFile); + string tmpFileName = Utils::createAndOpenTemporaryFile("LL-script", tempOutputFile); tempOutputFile << "#!/bin/bash" << endl; tempOutputFile << "# @ output = " << workDir << "/logs/output.log." << rootNameToExecute << endl; @@ -217,7 +206,7 @@ namespace Batch { workDir + "/" + remoteFileName, _hostname, _username); if (status) - throw EmulationException("Cannot copy command file on host " + _hostname); + throw RunTimeException("Cannot copy command file on host " + _hostname); return remoteFileName; } @@ -231,7 +220,7 @@ namespace Batch { int status = system(command.c_str()); if (status) - throw EmulationException("Can't delete job " + jobid.getReference()); + throw RunTimeException("Can't delete job " + jobid.getReference()); cerr << "job " << jobid.getReference() << " killed" << endl; } @@ -263,20 +252,16 @@ namespace Batch { JobInfo BatchManager_eLL::queryJob(const JobId & jobid) { - // define name of log file (local) - string logFile = generateTemporaryFileName("LL-querylog-" + jobid.getReference()); - // define command to query batch string subCommand = "llq -f %st " + jobid.getReference(); string command = _protocol.getExecCommand(subCommand, _hostname, _username); - command += " > "; - command += logFile; cerr << command.c_str() << endl; - int status = system(command.c_str()); + string output; + int status = Utils::getCommandOutput(command, output); if (status != 0) - throw EmulationException("Can't query job " + jobid.getReference()); + throw RunTimeException("Can't query job " + jobid.getReference()); - JobInfo_eLL jobinfo = JobInfo_eLL(jobid.getReference(), logFile); + JobInfo_eLL jobinfo = JobInfo_eLL(jobid.getReference(), output); return jobinfo; } diff --git a/src/LoadLeveler/Batch_BatchManager_eLL.hxx b/src/LoadLeveler/Batch_BatchManager_eLL.hxx index 6516c28..1b7ea39 100644 --- a/src/LoadLeveler/Batch_BatchManager_eLL.hxx +++ b/src/LoadLeveler/Batch_BatchManager_eLL.hxx @@ -35,11 +35,11 @@ #include "Batch_JobId.hxx" #include "Batch_JobInfo.hxx" #include "Batch_FactBatchManager.hxx" -#include "Batch_BatchManager_eClient.hxx" +#include "Batch_BatchManager.hxx" namespace Batch { - class BATCH_EXPORT BatchManager_eLL : public BatchManager_eClient + class BATCH_EXPORT BatchManager_eLL : public BatchManager { public: BatchManager_eLL(const FactBatchManager * parent, const char * host = "localhost", diff --git a/src/LoadLeveler/Batch_FactBatchManager_eLL.cxx b/src/LoadLeveler/Batch_FactBatchManager_eLL.cxx index c983f82..0fa4c7d 100644 --- a/src/LoadLeveler/Batch_FactBatchManager_eLL.cxx +++ b/src/LoadLeveler/Batch_FactBatchManager_eLL.cxx @@ -36,7 +36,7 @@ namespace Batch { static FactBatchManager_eLL sFBM_eLL; - FactBatchManager_eLL::FactBatchManager_eLL() : FactBatchManager_eClient("eLL") + FactBatchManager_eLL::FactBatchManager_eLL() : FactBatchManager("LL") { // Add specific parameters ParameterTypeMap::getInstance().addParameter(LL_JOBTYPE, STRING, 1); @@ -47,18 +47,11 @@ namespace Batch { // Nothing to do } - // Functor - BatchManager * FactBatchManager_eLL::operator() (const char * hostname) const - { - // MESSAGE("Building new BatchManager_eLL on host '" << hostname << "'"); - return new BatchManager_eLL(this, hostname); - } - - BatchManager_eClient * FactBatchManager_eLL::operator() (const char * hostname, - const char * username, - CommunicationProtocolType protocolType, - const char * mpiImpl, - int nb_proc_per_node) const + BatchManager * FactBatchManager_eLL::operator() (const char * hostname, + const char * username, + CommunicationProtocolType protocolType, + const char * mpiImpl, + int nb_proc_per_node) const { // MESSAGE("Building new BatchManager_eLL on host '" << hostname << "'"); return new BatchManager_eLL(this, hostname, username, protocolType, mpiImpl, nb_proc_per_node); diff --git a/src/LoadLeveler/Batch_FactBatchManager_eLL.hxx b/src/LoadLeveler/Batch_FactBatchManager_eLL.hxx index a37cb6e..4d6d591 100644 --- a/src/LoadLeveler/Batch_FactBatchManager_eLL.hxx +++ b/src/LoadLeveler/Batch_FactBatchManager_eLL.hxx @@ -32,8 +32,8 @@ #include #include -#include "Batch_BatchManager_eClient.hxx" -#include "Batch_FactBatchManager_eClient.hxx" +#include "Batch_BatchManager.hxx" +#include "Batch_FactBatchManager.hxx" namespace Batch { @@ -41,20 +41,18 @@ namespace Batch { class BatchManager_eLL; - class BATCH_EXPORT FactBatchManager_eLL : public FactBatchManager_eClient + class BATCH_EXPORT FactBatchManager_eLL : public FactBatchManager { public: // Constructeur et destructeur FactBatchManager_eLL(); virtual ~FactBatchManager_eLL(); - virtual BatchManager * operator() (const char * hostname) const; - virtual BatchManager_eClient * operator() (const char * hostname, - const char * username, - CommunicationProtocolType protocolType, - const char * mpiImpl, - int nb_proc_per_node = 1) const; - + virtual BatchManager * operator() (const char * hostname, + const char * username, + CommunicationProtocolType protocolType, + const char * mpiImpl, + int nb_proc_per_node = 1) const; }; } diff --git a/src/LoadLeveler/Batch_JobInfo_eLL.cxx b/src/LoadLeveler/Batch_JobInfo_eLL.cxx index 13ab39c..59ab7d2 100644 --- a/src/LoadLeveler/Batch_JobInfo_eLL.cxx +++ b/src/LoadLeveler/Batch_JobInfo_eLL.cxx @@ -27,7 +27,6 @@ */ #include -#include #include #include @@ -39,19 +38,18 @@ using namespace std; namespace Batch { - JobInfo_eLL::JobInfo_eLL(const std::string & id, const std::string & logFile) + JobInfo_eLL::JobInfo_eLL(const std::string & id, const std::string & output) : JobInfo() { _param[ID] = id; // read log file - ifstream log(logFile.c_str()); + istringstream log(output); string line; // status should be on the third line for (int i=0 ; i<3 ; i++) getline(log, line); - log.close(); string status; istringstream iss(line); iss >> status; diff --git a/src/LoadLeveler/Batch_JobInfo_eLL.hxx b/src/LoadLeveler/Batch_JobInfo_eLL.hxx index a4ec9f1..ab89129 100644 --- a/src/LoadLeveler/Batch_JobInfo_eLL.hxx +++ b/src/LoadLeveler/Batch_JobInfo_eLL.hxx @@ -38,7 +38,7 @@ namespace Batch { class JobInfo_eLL : public JobInfo { public: - JobInfo_eLL(const std::string & id, const std::string & logFile); + JobInfo_eLL(const std::string & id, const std::string & output); virtual ~JobInfo_eLL(); }; diff --git a/src/LoadLeveler/Test/Test_eLL.cxx b/src/LoadLeveler/Test/Test_eLL.cxx index 62156d7..8f897cc 100644 --- a/src/LoadLeveler/Test/Test_eLL.cxx +++ b/src/LoadLeveler/Test/Test_eLL.cxx @@ -35,10 +35,8 @@ #include #include #include -#include #include #include -#include #include @@ -98,7 +96,6 @@ int main(int argc, char** argv) p[INFILE] = Couple("seta.sh", "tmp/Batch/seta.sh"); p[INFILE] += Couple("setb.sh", "tmp/Batch/setb.sh"); p[OUTFILE] = Couple("result.txt", "tmp/Batch/result.txt"); - p[TMPDIR] = "tmp/Batch/"; p[NBPROC] = 1; p[MAXWALLTIME] = 1; p[MAXRAMSIZE] = 50; @@ -117,8 +114,8 @@ int main(int argc, char** argv) BatchManagerCatalog& c = BatchManagerCatalog::getInstance(); // Create a BatchManager of type ePBS on localhost - FactBatchManager_eClient * fbm = (FactBatchManager_eClient *)(c("eLL")); - BatchManager_eClient * bm = (*fbm)(host.c_str(), user.c_str(), protocol); + FactBatchManager * fbm = c("LL"); + BatchManager * bm = (*fbm)(host.c_str(), user.c_str(), protocol); // Submit the job to the BatchManager JobId jobid = bm->submitJob(job); diff --git a/src/Local/Batch_BatchManager_Local.cxx b/src/Local/Batch_BatchManager_Local.cxx index 2098e1c..f09b2cd 100644 --- a/src/Local/Batch_BatchManager_Local.cxx +++ b/src/Local/Batch_BatchManager_Local.cxx @@ -62,10 +62,11 @@ namespace Batch { // Constructeur - BatchManager_Local::BatchManager_Local(const FactBatchManager * parent, const char * host, - CommunicationProtocolType protocolType) - : BatchManager(parent, host), _connect(0), - _protocol(CommunicationProtocol::getInstance(protocolType)), + BatchManager_Local::BatchManager_Local(const Batch::FactBatchManager * parent, const char * host, + const char * username, + CommunicationProtocolType protocolType, const char * mpiImpl, + int nb_proc_per_node) + : BatchManager(parent, host, username, protocolType, mpiImpl), _connect(0), _idCounter(0) { pthread_mutex_init(&_threads_mutex, NULL); @@ -90,14 +91,12 @@ namespace Batch { pthread_cond_destroy(&_threadSyncCondition); } - const CommunicationProtocol & BatchManager_Local::getProtocol() const - { - return _protocol; - } - // Methode pour le controle des jobs : soumet un job au gestionnaire const JobId BatchManager_Local::submitJob(const Job & job) { + // export input files in the working directory of the execution host + exportInputFiles(job); + Job_Local jobLocal = job; Id id = _idCounter++; ThreadAdapter * p_ta = new ThreadAdapter(*this, job, id); @@ -283,7 +282,11 @@ namespace Batch { if (strlen(drive) > 0) exec_sub_cmd << drive << " && "; #endif - exec_sub_cmd << "cd " << param[WORKDIR] << " && " << param[EXECUTABLE]; + string fileToExecute = param[EXECUTABLE].str(); + string::size_type p1 = fileToExecute.find_last_of("/"); + string fileNameToExecute = fileToExecute.substr(p1+1); + + exec_sub_cmd << "cd " << param[WORKDIR] << " && ./" << fileNameToExecute; if (param.find(ARGUMENTS) != param.end()) { Versatile V = param[ARGUMENTS]; @@ -320,7 +323,7 @@ namespace Batch { user = string(it->second); } - return _protocol.getExecCommandArgs(exec_sub_cmd.str(), param[EXECUTIONHOST], user); + return _protocol.getExecCommandArgs(exec_sub_cmd.str(), _hostname, user); } @@ -359,71 +362,6 @@ namespace Batch { pthread_cleanup_push(BatchManager_Local::setFailedOnCancel, arg); pthread_cleanup_push(BatchManager_Local::kill_child_on_exit, static_cast (&child)); - - // Le code retour cumule (ORed) de tous les appels - // Nul en cas de reussite de l'ensemble des operations - int rc = 0; - - // Cette table contient la liste des fichiers a detruire a la fin du processus - std::vector files_to_delete; - - - - // On copie les fichiers d'entree pour le fils - const Parametre param = p_ta->_job.getParametre(); - Parametre::const_iterator it; - - // On initialise la variable workdir a la valeur du Current Working Directory - char * cwd = -#ifdef WIN32 - _getcwd(NULL, 0); -#else - new char [PATH_MAX]; - getcwd(cwd, PATH_MAX); -#endif - string workdir = cwd; - delete [] cwd; - - if ( (it = param.find(WORKDIR)) != param.end() ) { - workdir = static_cast( (*it).second ); - } - - string executionhost = string(param[EXECUTIONHOST]); - string user; - if ( (it = param.find(USER)) != param.end() ) { - user = string(it->second); - } - - if ( (it = param.find(INFILE)) != param.end() ) { - Versatile V = (*it).second; - Versatile::iterator Vit; - - for(Vit=V.begin(); Vit!=V.end(); Vit++) { - CoupleType cpt = *static_cast< CoupleType * >(*Vit); - Couple cp = cpt; - string local = cp.getLocal(); - string remote = cp.getRemote(); - - std::cerr << workdir << std::endl; - std::cerr << remote << std::endl; - - int status = p_ta->getBatchManager().getProtocol().copyFile(local, "", "", - workdir + "/" + remote, - executionhost, user); - if (status) { - // Echec de la copie - rc |= 1; - } else { - // On enregistre le fichier comme etant a detruire - files_to_delete.push_back(workdir + "/" + remote); - } - - } - } - - - - // On forke/exec un nouveau process pour pouvoir controler le fils // (plus finement qu'avec un appel system) // int rc = system(commande.c_str()); @@ -445,47 +383,6 @@ namespace Batch { } #endif - - // On copie les fichiers de sortie du fils - if ( (it = param.find(OUTFILE)) != param.end() ) { - Versatile V = (*it).second; - Versatile::iterator Vit; - - for(Vit=V.begin(); Vit!=V.end(); Vit++) { - CoupleType cpt = *static_cast< CoupleType * >(*Vit); - Couple cp = cpt; - string local = cp.getLocal(); - string remote = cp.getRemote(); - - int status = p_ta->getBatchManager().getProtocol().copyFile(workdir + "/" + remote, - executionhost, user, - local, "", ""); - if (status) { - // Echec de la copie - rc |= 1; - } else { - // On enregistre le fichier comme etant a detruire - files_to_delete.push_back(workdir + "/" + remote); - } - - } - } - - // On efface les fichiers d'entree et de sortie du fils si les copies precedentes ont reussi - // ou si la creation du fils n'a pu avoir lieu - if ( (rc == 0) || (child < 0) ) { - std::vector::const_iterator it; - for(it=files_to_delete.begin(); it!=files_to_delete.end(); it++) { - p_ta->getBatchManager().getProtocol().removeFile(*it, executionhost, user); -/* string remove_cmd = p_ta->getBatchManager().remove_command(user, executionhost, *it); - UNDER_LOCK( cout << "Removing : " << remove_cmd << endl ); -#ifdef WIN32 - remove_cmd = string("\"") + remove_cmd + string("\""); -#endif - system(remove_cmd.c_str());*/ - } - } - pthread_mutex_lock(&p_ta->_bm._threads_mutex); // Set the job state to FINISHED or FAILED diff --git a/src/Local/Batch_BatchManager_Local.hxx b/src/Local/Batch_BatchManager_Local.hxx index fcc2f90..ab73038 100644 --- a/src/Local/Batch_BatchManager_Local.hxx +++ b/src/Local/Batch_BatchManager_Local.hxx @@ -116,18 +116,16 @@ namespace Batch { public: - // Constructeur et destructeur - BatchManager_Local(const FactBatchManager * parent, - const char * host="localhost", - CommunicationProtocolType protocolType = SSH); // connexion a la machine host + + BatchManager_Local(const Batch::FactBatchManager * parent, const char * host = "localhost", + const char * username = "", + CommunicationProtocolType protocolType = SSH, const char * mpiImpl = "nompi", + int nb_proc_per_node = 1); virtual ~BatchManager_Local(); // Recupere le nom du serveur par defaut // static string BatchManager_Local::getDefaultServer(); - // Get the underlying communication protocol - const CommunicationProtocol & getProtocol() const; - // Methodes pour le controle des jobs virtual const JobId submitJob(const Job & job); // soumet un job au gestionnaire virtual void deleteJob(const JobId & jobid); // retire un job du gestionnaire @@ -149,8 +147,6 @@ namespace Batch { pthread_mutex_t _threads_mutex; std::map _threads; - const CommunicationProtocol & _protocol; - // Methode qui renvoie la commande a executer std::vector exec_command(const Parametre & param) const; diff --git a/src/Local/Batch_FactBatchManager_Local.cxx b/src/Local/Batch_FactBatchManager_Local.cxx index 16f6f47..157a1b9 100644 --- a/src/Local/Batch_FactBatchManager_Local.cxx +++ b/src/Local/Batch_FactBatchManager_Local.cxx @@ -39,38 +39,25 @@ namespace Batch { -#ifdef HAS_SH - static FactBatchManager_Local sFBM_Local_SH("SH", SH); -#endif + static FactBatchManager_Local sFBM_Local; -#ifdef HAS_RSH - static FactBatchManager_Local sFBM_Local_RSH("RSH", RSH); -#endif - -#ifdef HAS_SSH - static FactBatchManager_Local sFBM_Local_SSH("SSH", SSH); -#endif - - // Constructeur - FactBatchManager_Local::FactBatchManager_Local(const char * name, - CommunicationProtocolType protocolType) - : FactBatchManager(name), - _protocolType(protocolType) + FactBatchManager_Local::FactBatchManager_Local() + : FactBatchManager("LOCAL") { - // Nothing to do } - // Destructeur FactBatchManager_Local::~FactBatchManager_Local() { - // Nothing to do } - // Functor - BatchManager * FactBatchManager_Local::operator() (const char * hostname) const + BatchManager * FactBatchManager_Local::operator() (const char * hostname, + const char * username, + CommunicationProtocolType protocolType, + const char * mpi, + int nb_proc_per_node) const { - // MESSAGE("Building new BatchManager_Local on host '" << hostname << "'"); - return new BatchManager_Local(this, hostname, _protocolType); + return new BatchManager_Local(this, hostname, username, + protocolType, mpi, nb_proc_per_node); } } diff --git a/src/Local/Batch_FactBatchManager_Local.hxx b/src/Local/Batch_FactBatchManager_Local.hxx index 4b01a4b..cdbdc9c 100644 --- a/src/Local/Batch_FactBatchManager_Local.hxx +++ b/src/Local/Batch_FactBatchManager_Local.hxx @@ -43,17 +43,15 @@ namespace Batch { class FactBatchManager_Local : public FactBatchManager { public: - // Constructeur et destructeur - FactBatchManager_Local(const char * name, CommunicationProtocolType protocolType); - virtual ~FactBatchManager_Local(); - - virtual BatchManager * operator() (const char * hostname) const; - protected: - - CommunicationProtocolType _protocolType; + FactBatchManager_Local(); + virtual ~FactBatchManager_Local(); - private: + virtual BatchManager * operator() (const char * hostname, + const char * username = "", + CommunicationProtocolType protocolType = SSH, + const char * mpi = "nompi", + int nb_proc_per_node = 1) const; }; diff --git a/src/Local/Batch_Job_Local.cxx b/src/Local/Batch_Job_Local.cxx index 474c6df..28355bc 100644 --- a/src/Local/Batch_Job_Local.cxx +++ b/src/Local/Batch_Job_Local.cxx @@ -46,15 +46,9 @@ namespace Batch { Job_Local::Job_Local(const Job & job) : _command(), _param(job.getParametre()), _env(job.getEnvironnement()) { - // On positionne le nom du EXECUTIONHOST a "localhost" s'il n'est pas precise - if ( _param.find(EXECUTIONHOST) == _param.end() ) { - _param[EXECUTIONHOST] = "localhost"; - } - // On convertit les objets Parametre et Environnement en liste chainee d'attributs + operateur addEnvironnement( _env ); addParametre ( _param ); - } diff --git a/src/Local/Test/Test_Local_RSH.cxx b/src/Local/Test/Test_Local_RSH.cxx index 83622c3..2bf22bd 100644 --- a/src/Local/Test/Test_Local_RSH.cxx +++ b/src/Local/Test/Test_Local_RSH.cxx @@ -64,14 +64,12 @@ int main(int argc, char** argv) Job job; // ... and its parameters ... Parametre p; - p[EXECUTABLE] = "source copied-test-script.sh"; + p[EXECUTABLE] = "test-script.sh"; p[NAME] = "Test_Local_RSH"; p[WORKDIR] = workdir; - p[INFILE] = Couple("seta.sh", "copied-seta.sh"); - p[INFILE] += Couple("setb.sh", "copied-setb.sh"); - p[INFILE] += Couple("test-script.sh", "copied-test-script.sh"); - p[OUTFILE] = Couple("result.txt", "orig-result.txt"); - p[EXECUTIONHOST] = exechost; + p[INFILE] = Couple("seta.sh", workdir + "/copied-seta.sh"); + p[INFILE] += Couple("setb.sh", workdir + "/copied-setb.sh"); + p[OUTFILE] = Couple("result.txt", workdir + "/orig-result.txt"); p[USER] = user; job.setParametre(p); // ... and its environment @@ -83,12 +81,12 @@ int main(int argc, char** argv) BatchManagerCatalog& c = BatchManagerCatalog::getInstance(); // Create a BatchManager of type Local_RSH on localhost - FactBatchManager * fbm = c("RSH"); + FactBatchManager * fbm = c("LOCAL"); if (fbm == NULL) { cerr << "Can't get RSH batch manager factory" << endl; return 1; } - BatchManager * bm = (*fbm)("localhost"); + BatchManager * bm = (*fbm)(exechost.c_str(), user.c_str(), RSH); // Submit the job to the BatchManager JobId jobid = bm->submitJob(job); @@ -97,13 +95,18 @@ int main(int argc, char** argv) // Wait for the end of the job string state = bm->waitForJobEnd(jobid, timeout); - if (state != FINISHED && state != FAILED) { - cerr << "Error: Job not finished after timeout" << endl; + if (state == FINISHED) { + cout << "Job " << jobid.__repr__() << " is done" << endl; + bm->importOutputFiles(job, "resultdir/seconddirname"); + } else if (state == FAILED) { + cerr << "Job " << jobid.__repr__() << " finished in error" << endl; + bm->importOutputFiles(job, "resultdir/seconddirname"); + return 1; + } else { + cerr << "Timeout while executing job" << endl; return 1; } - cout << "Job " << jobid.__repr__() << " is done" << endl; - } catch (GenericException e) { cerr << "Error: " << e << endl; return 1; @@ -115,7 +118,7 @@ int main(int argc, char** argv) // test the result file string exp = "c = 12"; string res; - ifstream f("result.txt"); + ifstream f("resultdir/seconddirname/result.txt"); getline(f, res); f.close(); diff --git a/src/Local/Test/Test_Local_SH.cxx b/src/Local/Test/Test_Local_SH.cxx index 5c3c2e7..3734249 100644 --- a/src/Local/Test/Test_Local_SH.cxx +++ b/src/Local/Test/Test_Local_SH.cxx @@ -63,16 +63,15 @@ int main(int argc, char** argv) Job job; // ... and its parameters ... Parametre p; - p[EXECUTABLE] = string("./copied-") + EXEC_TEST_NAME; + p[EXECUTABLE] = EXEC_TEST_NAME; p[ARGUMENTS] = "copied-seta.sh"; p[ARGUMENTS] += "copied-setb.sh"; p[ARGUMENTS] += "orig-result.txt"; p[NAME] = "Test_Local_SH"; p[WORKDIR] = workdir; - p[INFILE] = Couple("seta.sh", "copied-seta.sh"); - p[INFILE] += Couple("setb.sh", "copied-setb.sh"); - p[INFILE] += Couple(EXEC_TEST_NAME, string("copied-") + EXEC_TEST_NAME); - p[OUTFILE] = Couple("result.txt", "orig-result.txt"); + p[INFILE] = Couple("seta.sh", workdir + "/copied-seta.sh"); + p[INFILE] += Couple("setb.sh", workdir + "/copied-setb.sh"); + p[OUTFILE] = Couple("result.txt", workdir + "/orig-result.txt"); job.setParametre(p); // ... and its environment Environnement e; @@ -83,12 +82,12 @@ int main(int argc, char** argv) BatchManagerCatalog& c = BatchManagerCatalog::getInstance(); // Create a BatchManager of type Local_SH on localhost - FactBatchManager * fbm = c("SH"); + FactBatchManager * fbm = c("LOCAL"); if (fbm == NULL) { cerr << "Can't get SH batch manager factory" << endl; return 1; } - BatchManager * bm = (*fbm)("localhost"); + BatchManager * bm = (*fbm)("localhost", "", SH); // Submit the job to the BatchManager JobId jobid = bm->submitJob(job); @@ -97,13 +96,18 @@ int main(int argc, char** argv) // Wait for the end of the job string state = bm->waitForJobEnd(jobid, timeout); - if (state != FINISHED && state != FAILED) { - cerr << "Error: Job not finished after timeout" << endl; + if (state == FINISHED) { + cout << "Job " << jobid.__repr__() << " is done" << endl; + bm->importOutputFiles(job, "resultdir/seconddirname"); + } else if (state == FAILED) { + cerr << "Job " << jobid.__repr__() << " finished in error" << endl; + bm->importOutputFiles(job, "resultdir/seconddirname"); + return 1; + } else { + cerr << "Timeout while executing job" << endl; return 1; } - cout << "Job " << jobid.__repr__() << " is done" << endl; - } catch (GenericException e) { cerr << "Error: " << e << endl; return 1; @@ -115,7 +119,7 @@ int main(int argc, char** argv) // test the result file string exp = "c = 12"; string res; - ifstream f("result.txt"); + ifstream f("resultdir/seconddirname/result.txt"); getline(f, res); f.close(); diff --git a/src/Local/Test/Test_Local_SSH.cxx b/src/Local/Test/Test_Local_SSH.cxx index b82740e..673a894 100644 --- a/src/Local/Test/Test_Local_SSH.cxx +++ b/src/Local/Test/Test_Local_SSH.cxx @@ -65,14 +65,12 @@ int main(int argc, char** argv) Job job; // ... and its parameters ... Parametre p; - p[EXECUTABLE] = "source copied-test-script.sh"; + p[EXECUTABLE] = "test-script.sh"; p[NAME] = "Test_Local_SSH"; p[WORKDIR] = workdir; - p[INFILE] = Couple("seta.sh", "copied-seta.sh"); - p[INFILE] += Couple("setb.sh", "copied-setb.sh"); - p[INFILE] += Couple("test-script.sh", "copied-test-script.sh"); - p[OUTFILE] = Couple("result.txt", "orig-result.txt"); - p[EXECUTIONHOST] = exechost; + p[INFILE] = Couple("seta.sh", workdir + "/copied-seta.sh"); + p[INFILE] += Couple("setb.sh", workdir + "/copied-setb.sh"); + p[OUTFILE] = Couple("result.txt", workdir + "/orig-result.txt"); p[USER] = user; job.setParametre(p); // ... and its environment (SSH_AUTH_SOCK env var is important for ssh agent authentication) @@ -86,12 +84,12 @@ int main(int argc, char** argv) BatchManagerCatalog& c = BatchManagerCatalog::getInstance(); // Create a BatchManager of type Local_SSH on localhost - FactBatchManager * fbm = c("SSH"); + FactBatchManager * fbm = c("LOCAL"); if (fbm == NULL) { cerr << "Can't get SSH batch manager factory" << endl; return 1; } - BatchManager * bm = (*fbm)("localhost"); + BatchManager * bm = (*fbm)(exechost.c_str(), user.c_str(), SSH); // Submit the job to the BatchManager JobId jobid = bm->submitJob(job); @@ -100,13 +98,18 @@ int main(int argc, char** argv) // Wait for the end of the job string state = bm->waitForJobEnd(jobid, timeout); - if (state != FINISHED && state != FAILED) { - cerr << "Error: Job not finished after timeout" << endl; + if (state == FINISHED) { + cout << "Job " << jobid.__repr__() << " is done" << endl; + bm->importOutputFiles(job, "resultdir/seconddirname"); + } else if (state == FAILED) { + cerr << "Job " << jobid.__repr__() << " finished in error" << endl; + bm->importOutputFiles(job, "resultdir/seconddirname"); + return 1; + } else { + cerr << "Timeout while executing job" << endl; return 1; } - cout << "Job " << jobid.__repr__() << " is done" << endl; - } catch (GenericException e) { cerr << "Error: " << e << endl; return 1; @@ -118,7 +121,7 @@ int main(int argc, char** argv) // test the result file string exp = "c = 12"; string res; - ifstream f("result.txt"); + ifstream f("resultdir/seconddirname/result.txt"); getline(f, res); f.close(); diff --git a/src/Local/Test/test-script.sh b/src/Local/Test/test-script.sh index afd53de..bc7704a 100755 --- a/src/Local/Test/test-script.sh +++ b/src/Local/Test/test-script.sh @@ -1,7 +1,7 @@ #!/bin/sh -source copied-seta.sh -source copied-setb.sh +. ./copied-seta.sh +. ./copied-setb.sh c=`expr $a "*" $b` diff --git a/src/PBS/Batch_BatchManager_ePBS.cxx b/src/PBS/Batch_BatchManager_ePBS.cxx index 2fbfb9e..ee8c624 100644 --- a/src/PBS/Batch_BatchManager_ePBS.cxx +++ b/src/PBS/Batch_BatchManager_ePBS.cxx @@ -35,6 +35,7 @@ #include #include +#include #include "Batch_BatchManager_ePBS.hxx" #include "Batch_JobInfo_ePBS.hxx" @@ -47,8 +48,7 @@ namespace Batch { const char * username, CommunicationProtocolType protocolType, const char * mpiImpl, int nb_proc_per_node) - : BatchManager(parent, host), - BatchManager_eClient(parent, host, username, protocolType, mpiImpl), + : BatchManager(parent, host, username, protocolType, mpiImpl), _nb_proc_per_node(nb_proc_per_node) { // Nothing to do @@ -82,7 +82,7 @@ namespace Batch { string output; int status = Utils::getCommandOutput(command, output); cout << output; - if (status != 0) throw EmulationException("Can't submit job, error was: " + output); + if (status != 0) throw RunTimeException("Can't submit job, error was: " + output); // normally output contains only id of submitted job, we just need to remove the final \n string jobref = output.substr(0, output.size() - 1); @@ -112,7 +112,7 @@ namespace Batch { cerr << command.c_str() << endl; status = system(command.c_str()); if (status) - throw EmulationException("Error of connection on remote host"); + throw RunTimeException("Error of connection on remote host"); cerr << "jobId = " << ref << "killed" << endl; } @@ -120,20 +120,20 @@ namespace Batch { // Methode pour le controle des jobs : suspend un job en file d'attente void BatchManager_ePBS::holdJob(const JobId & jobid) { - throw EmulationException("Not yet implemented"); + throw NotYetImplementedException("BatchManager_ePBS::holdJob"); } // Methode pour le controle des jobs : relache un job suspendu void BatchManager_ePBS::releaseJob(const JobId & jobid) { - throw EmulationException("Not yet implemented"); + throw NotYetImplementedException("BatchManager_ePBS::releaseJob"); } // Methode pour le controle des jobs : modifie un job en file d'attente void BatchManager_ePBS::alterJob(const JobId & jobid, const Parametre & param, const Environnement & env) { - throw EmulationException("Not yet implemented"); + throw NotYetImplementedException("BatchManager_ePBS::alterJob"); } // Methode pour le controle des jobs : modifie un job en file d'attente @@ -163,7 +163,7 @@ namespace Batch { string output; int status = Utils::getCommandOutput(command, output); if(status && status != 153 && status != 256*153) - throw EmulationException("Error of connection on remote host"); + throw RunTimeException("Error of connection on remote host"); JobInfo_ePBS ji = JobInfo_ePBS(id, output); return ji; @@ -172,7 +172,7 @@ namespace Batch { // Methode pour le controle des jobs : teste si un job est present en machine bool BatchManager_ePBS::isRunning(const JobId & jobid) { - throw EmulationException("Not yet implemented"); + throw NotYetImplementedException("BatchManager_ePBS::isRunning"); } std::string BatchManager_ePBS::buildSubmissionScript(const Job & job) @@ -192,11 +192,11 @@ namespace Batch { if (params.find(WORKDIR) != params.end()) workDir = params[WORKDIR].str(); else - throw EmulationException("params[WORKDIR] is not defined ! Please defined it, cannot submit this job"); + throw RunTimeException("params[WORKDIR] is not defined ! Please defined it, cannot submit this job"); if (params.find(EXECUTABLE) != params.end()) fileToExecute = params[EXECUTABLE].str(); else - throw EmulationException("params[EXECUTABLE] is not defined ! Please defined it, cannot submit this job"); + throw RunTimeException("params[EXECUTABLE] is not defined ! Please defined it, cannot submit this job"); // Optional parameters if (params.find(NBPROC) != params.end()) @@ -215,7 +215,7 @@ namespace Batch { // Create batch submit file ofstream tempOutputFile; - std::string TmpFileName = createAndOpenTemporaryFile("PBS-script", tempOutputFile); + std::string TmpFileName = Utils::createAndOpenTemporaryFile("PBS-script", tempOutputFile); tempOutputFile << "#! /bin/sh -f" << endl; if (params.find(NAME) != params.end()) { @@ -285,7 +285,7 @@ namespace Batch { workDir + "/" + remoteFileName, _hostname, _username); if (status) - throw EmulationException("Error of connection on remote host, cannot copy batch submission file"); + throw RunTimeException("Error of connection on remote host, cannot copy batch submission file"); return remoteFileName; } } diff --git a/src/PBS/Batch_BatchManager_ePBS.hxx b/src/PBS/Batch_BatchManager_ePBS.hxx index a32fcc2..325f4ba 100644 --- a/src/PBS/Batch_BatchManager_ePBS.hxx +++ b/src/PBS/Batch_BatchManager_ePBS.hxx @@ -36,11 +36,11 @@ #include "Batch_JobId.hxx" #include "Batch_JobInfo.hxx" #include "Batch_FactBatchManager.hxx" -#include "Batch_BatchManager_eClient.hxx" +#include "Batch_BatchManager.hxx" namespace Batch { - class BATCH_EXPORT BatchManager_ePBS : public BatchManager_eClient + class BATCH_EXPORT BatchManager_ePBS : public BatchManager { public: // Constructeur et destructeur diff --git a/src/PBS/Batch_FactBatchManager_ePBS.cxx b/src/PBS/Batch_FactBatchManager_ePBS.cxx index 415ac58..32f711a 100644 --- a/src/PBS/Batch_FactBatchManager_ePBS.cxx +++ b/src/PBS/Batch_FactBatchManager_ePBS.cxx @@ -28,17 +28,15 @@ * */ -#include #include "Batch_BatchManager_ePBS.hxx" #include "Batch_FactBatchManager_ePBS.hxx" -//#include "utilities.h" namespace Batch { static FactBatchManager_ePBS sFBM_ePBS; // Constructeur - FactBatchManager_ePBS::FactBatchManager_ePBS() : FactBatchManager_eClient("ePBS") + FactBatchManager_ePBS::FactBatchManager_ePBS() : FactBatchManager("PBS") { // Nothing to do } @@ -49,22 +47,13 @@ namespace Batch { // Nothing to do } - // Functor - BatchManager * FactBatchManager_ePBS::operator() (const char * hostname) const + BatchManager * FactBatchManager_ePBS::operator() (const char * hostname, + const char * username, + CommunicationProtocolType protocolType, + const char * mpiImpl, + int nb_proc_per_node) const { - // MESSAGE("Building new BatchManager_PBS on host '" << hostname << "'"); - return new BatchManager_ePBS(this, hostname); - } - - BatchManager_eClient * FactBatchManager_ePBS::operator() (const char * hostname, - const char * username, - CommunicationProtocolType protocolType, - const char * mpiImpl, - int nb_proc_per_node) const - { - // MESSAGE("Building new BatchManager_PBS on host '" << hostname << "'"); return new BatchManager_ePBS(this, hostname, username, protocolType, mpiImpl, nb_proc_per_node); } - } diff --git a/src/PBS/Batch_FactBatchManager_ePBS.hxx b/src/PBS/Batch_FactBatchManager_ePBS.hxx index a379d5b..2e0f49e 100644 --- a/src/PBS/Batch_FactBatchManager_ePBS.hxx +++ b/src/PBS/Batch_FactBatchManager_ePBS.hxx @@ -33,32 +33,25 @@ #include "Batch_Defines.hxx" -#include -#include -#include "Batch_BatchManager_eClient.hxx" -#include "Batch_FactBatchManager_eClient.hxx" +#include "Batch_BatchManager.hxx" +#include "Batch_FactBatchManager.hxx" namespace Batch { class BatchManager_ePBS; - class BATCH_EXPORT FactBatchManager_ePBS : public FactBatchManager_eClient + class BATCH_EXPORT FactBatchManager_ePBS : public FactBatchManager { public: // Constructeur et destructeur FactBatchManager_ePBS(); virtual ~FactBatchManager_ePBS(); - virtual BatchManager * operator() (const char * hostname) const; - virtual BatchManager_eClient * operator() (const char * hostname, - const char * username, - CommunicationProtocolType protocolType, - const char * mpiImpl, - int nb_proc_per_node = 1) const; - - protected: - - private: + virtual BatchManager * operator() (const char * hostname, + const char * username, + CommunicationProtocolType protocolType, + const char * mpiImpl, + int nb_proc_per_node = 1) const; }; diff --git a/src/PBS/CMakeLists.txt b/src/PBS/CMakeLists.txt index bf57649..a543f75 100644 --- a/src/PBS/CMakeLists.txt +++ b/src/PBS/CMakeLists.txt @@ -25,14 +25,14 @@ SET(CLASS_LIST PBS/Batch_BatchManager_ePBS PBS/Batch_JobInfo_ePBS ) -IF (BUILD_PBS_INTERFACE AND PBS_FOUND) - SET(CLASS_LIST ${CLASS_LIST} - PBS/Batch_BatchManager_PBS - PBS/Batch_FactBatchManager_PBS - PBS/Batch_Job_PBS - PBS/Batch_JobInfo_PBS - ) -ENDIF (BUILD_PBS_INTERFACE AND PBS_FOUND) +#IF (BUILD_PBS_INTERFACE AND PBS_FOUND) +# SET(CLASS_LIST ${CLASS_LIST} +# PBS/Batch_BatchManager_PBS +# PBS/Batch_FactBatchManager_PBS +# PBS/Batch_Job_PBS +# PBS/Batch_JobInfo_PBS +# ) +#ENDIF (BUILD_PBS_INTERFACE AND PBS_FOUND) APPEND_CLASSES_TO_SRC_FILES(${CLASS_LIST}) diff --git a/src/PBS/Test/CMakeLists.txt b/src/PBS/Test/CMakeLists.txt index bbe5574..b03ee03 100644 --- a/src/PBS/Test/CMakeLists.txt +++ b/src/PBS/Test/CMakeLists.txt @@ -43,8 +43,8 @@ IF (HAS_RSH) ADD_TEST(ePBS_RSH Test_ePBS RSH) ENDIF (HAS_RSH) -IF (BUILD_PBS_INTERFACE AND PBS_FOUND) - add_executable(Test_PBS Test_PBS.cxx) - target_link_libraries(Test_PBS Batch SimpleParser) - ADD_TEST(PBS Test_PBS) -ENDIF (BUILD_PBS_INTERFACE AND PBS_FOUND) +#IF (BUILD_PBS_INTERFACE AND PBS_FOUND) +# add_executable(Test_PBS Test_PBS.cxx) +# target_link_libraries(Test_PBS Batch SimpleParser) +# ADD_TEST(PBS Test_PBS) +#ENDIF (BUILD_PBS_INTERFACE AND PBS_FOUND) diff --git a/src/PBS/Test/Test_ePBS.cxx b/src/PBS/Test/Test_ePBS.cxx index c016089..424beae 100644 --- a/src/PBS/Test/Test_ePBS.cxx +++ b/src/PBS/Test/Test_ePBS.cxx @@ -35,9 +35,7 @@ #include #include #include -#include #include -#include #include @@ -96,7 +94,6 @@ int main(int argc, char** argv) p[INFILE] = Couple("seta.sh", "tmp/Batch/seta.sh"); p[INFILE] += Couple("setb.sh", "tmp/Batch/setb.sh"); p[OUTFILE] = Couple("result.txt", "tmp/Batch/result.txt"); - p[TMPDIR] = "tmp/Batch/"; p[NBPROC] = 1; p[MAXWALLTIME] = 1; p[MAXRAMSIZE] = 128; @@ -113,8 +110,8 @@ int main(int argc, char** argv) BatchManagerCatalog& c = BatchManagerCatalog::getInstance(); // Create a BatchManager of type ePBS on localhost - FactBatchManager_eClient * fbm = (FactBatchManager_eClient *)(c("ePBS")); - BatchManager_eClient * bm = (*fbm)(host.c_str(), user.c_str(), protocol, "nompi", 8); + FactBatchManager * fbm = c("PBS"); + BatchManager * bm = (*fbm)(host.c_str(), user.c_str(), protocol, "nompi", 8); // Submit the job to the BatchManager JobId jobid = bm->submitJob(job); diff --git a/src/Python/Test/Test_Python_Local_SH.py b/src/Python/Test/Test_Python_Local_SH.py index a6dae7d..d327324 100644 --- a/src/Python/Test/Test_Python_Local_SH.py +++ b/src/Python/Test/Test_Python_Local_SH.py @@ -42,13 +42,13 @@ def work(): job = Job() # ... and its parameters ... p = {} - p[EXECUTABLE] = './copied-' + config.EXEC_TEST_NAME + p[EXECUTABLE] = config.EXEC_TEST_FULL_PATH p[ARGUMENTS] = ["copied-seta.sh", "copied-setb.sh", "orig-result.txt"]; p[NAME] = 'Test_Python_Local_SH' p[WORKDIR] = config.TEST_LOCAL_SH_WORK_DIR - p[INFILE] = [('seta.sh', 'copied-seta.sh'), ('setb.sh', 'copied-setb.sh'), - (config.EXEC_TEST_FULL_PATH, 'copied-' + config.EXEC_TEST_NAME)] - p[OUTFILE] = [('result.txt', 'orig-result.txt')] + p[INFILE] = [('seta.sh', p[WORKDIR] + '/copied-seta.sh'), + ('setb.sh', p[WORKDIR] + '/copied-setb.sh')] + p[OUTFILE] = [('result.txt', p[WORKDIR] + '/orig-result.txt')] job.setParametre(p) # ... and its environment e = {} @@ -58,8 +58,8 @@ def work(): # Get the catalog c = BatchManagerCatalog.getInstance() - # Create a BatchManager of type Local_SSH on localhost - bm = c('SH')('localhost') + # Create a BatchManager of type Local_SH on localhost + bm = c('LOCAL')('localhost', '', SH) # Submit the job to the BatchManager jobid = bm.submitJob(job) @@ -71,15 +71,25 @@ def work(): # Wait for the end of the job state = bm.waitForJobEnd(jobid, config.TEST_LOCAL_SH_TIMEOUT); + + if state == FINISHED: + print "Job", jobid, "is done" + bm.importOutputFiles(job, "resultdir/seconddirname") + elif state == FAILED: + print "Job", jobid, " finished in error" + bm.importOutputFiles(job, "resultdir/seconddirname") + return 1 + else: + print "Timeout while executing job" + return 1 + if state != FINISHED and state != FAILED: print "Error: Job not finished after timeout" return 1; - print "Job", jobid, "is done" - # test the result file exp = "c = 12" - f = open('result.txt') + f = open('resultdir/seconddirname/result.txt') res = f.read().strip() print "result found : %s, expected : %s" % (res, exp) diff --git a/src/Python/libBatch_Swig.i b/src/Python/libBatch_Swig.i index ff36cfd..3abc72b 100644 --- a/src/Python/libBatch_Swig.i +++ b/src/Python/libBatch_Swig.i @@ -65,10 +65,8 @@ #include "Batch_CommunicationProtocol.hxx" #include "Batch_BatchManager.hxx" -#include "Batch_BatchManager_eClient.hxx" #include "Batch_BatchManagerCatalog.hxx" #include "Batch_FactBatchManager.hxx" -#include "Batch_FactBatchManager_eClient.hxx" %} /* Les classes exportees en Python */ @@ -84,10 +82,8 @@ %include Batch_CommunicationProtocol.hxx %include Batch_BatchManager.hxx -%include Batch_BatchManager_eClient.hxx %include Batch_BatchManagerCatalog.hxx %include Batch_FactBatchManager.hxx -%include Batch_FactBatchManager_eClient.hxx %include Batch_Constants.hxx diff --git a/src/Python/libBatch_Swig_typemap.i b/src/Python/libBatch_Swig_typemap.i index 27a2ca0..355a36d 100644 --- a/src/Python/libBatch_Swig_typemap.i +++ b/src/Python/libBatch_Swig_typemap.i @@ -277,12 +277,3 @@ static bool initEnvironment(Batch::Environnement & newEnv, PyObject * input) bool res = initEnvironment($1, $input); if (!res) return NULL; } - -// Dynamic cast to FactBatchManager_eClient if necessary -%typemap(out) Batch::FactBatchManager * -{ - if(dynamic_cast($1)) - $result=SWIG_NewPointerObj((void*)$1,$descriptor(Batch::FactBatchManager_eClient *),$owner); - else - $result=SWIG_NewPointerObj((void*)$1,$descriptor(Batch::FactBatchManager *),$owner); -} diff --git a/src/SGE/Batch_BatchManager_eSGE.cxx b/src/SGE/Batch_BatchManager_eSGE.cxx index e576631..0bb4fbb 100644 --- a/src/SGE/Batch_BatchManager_eSGE.cxx +++ b/src/SGE/Batch_BatchManager_eSGE.cxx @@ -46,7 +46,10 @@ #include #endif -#include "Batch_Constants.hxx" +#include +#include +#include + #include "Batch_BatchManager_eSGE.hxx" #include "Batch_JobInfo_eSGE.hxx" @@ -57,8 +60,7 @@ namespace Batch { BatchManager_eSGE::BatchManager_eSGE(const FactBatchManager * parent, const char * host, const char * username, CommunicationProtocolType protocolType, const char * mpiImpl) - : BatchManager(parent, host), - BatchManager_eClient(parent, host, username, protocolType, mpiImpl) + : BatchManager(parent, host, username, protocolType, mpiImpl) { // Nothing to do } @@ -72,7 +74,6 @@ namespace Batch { // Methode pour le controle des jobs : soumet un job au gestionnaire const JobId BatchManager_eSGE::submitJob(const Job & job) { - int status; Parametre params = job.getParametre(); const std::string workDir = params[WORKDIR]; const string fileToExecute = params[EXECUTABLE]; @@ -86,36 +87,21 @@ namespace Batch { // build batch script for job buildBatchScript(job); - // define name of log file (local) - string logFile = generateTemporaryFileName("SGE-submitlog"); - // define command to submit batch string subCommand = string("bash -l -c \\\"cd ") + workDir + "; qsub " + fileNameToExecute + "_Batch.sh\\\""; string command = _protocol.getExecCommand(subCommand, _hostname, _username); - command += " > "; - command += logFile; command += " 2>&1"; cerr << command.c_str() << endl; - status = system(command.c_str()); - if(status) - { - ifstream error_message(logFile.c_str()); - std::string mess; - std::string temp; - while(std::getline(error_message, temp)) - mess += temp; - error_message.close(); - throw EmulationException("Error of connection on remote host, error was: " + mess); - } - - // read id of submitted job in log file - char line[128]; - FILE *fp = fopen(logFile.c_str(),"r"); - fgets( line, 128, fp); - fclose(fp); + // submit job + string output; + int status = Utils::getCommandOutput(command, output); + cout << output; + if (status != 0) throw RunTimeException("Can't submit job, error was: " + output); + + // find id of submitted job in output string strjob; - istringstream iss(line); + istringstream iss(output); iss >> strjob >> strjob >> strjob; JobId id(this, strjob); @@ -143,7 +129,7 @@ namespace Batch { cerr << command.c_str() << endl; status = system(command.c_str()); if(status) - throw EmulationException("Error of connection on remote host"); + throw RunTimeException("Error of connection on remote host"); cerr << "jobId = " << ref << "killed" << endl; } @@ -151,20 +137,20 @@ namespace Batch { // Methode pour le controle des jobs : suspend un job en file d'attente void BatchManager_eSGE::holdJob(const JobId & jobid) { - throw EmulationException("Not yet implemented"); + throw NotYetImplementedException("BatchManager_eSGE::holdJob"); } // Methode pour le controle des jobs : relache un job suspendu void BatchManager_eSGE::releaseJob(const JobId & jobid) { - throw EmulationException("Not yet implemented"); + throw NotYetImplementedException("BatchManager_eSGE::releaseJob"); } // Methode pour le controle des jobs : modifie un job en file d'attente void BatchManager_eSGE::alterJob(const JobId & jobid, const Parametre & param, const Environnement & env) { - throw EmulationException("Not yet implemented"); + throw NotYetImplementedException("BatchManager_eSGE::alterJob"); } // Methode pour le controle des jobs : modifie un job en file d'attente @@ -186,27 +172,24 @@ namespace Batch { istringstream iss(jobid.getReference()); iss >> id; - // define name of log file (local) - string logFile = generateTemporaryFileName(string("SGE-querylog-id") + jobid.getReference()); - // define command to query batch string subCommand = string("bash -l -c \\\"qstat | grep ") + iss.str() + string("\\\""); string command = _protocol.getExecCommand(subCommand, _hostname, _username); - command += " > "; - command += logFile; cerr << command.c_str() << endl; - int status = system(command.c_str()); + + string output; + int status = Utils::getCommandOutput(command, output); if (status && status != 256) - throw EmulationException("Error of connection on remote host"); + throw RunTimeException("Error of connection on remote host"); - JobInfo_eSGE ji = JobInfo_eSGE(id,logFile); + JobInfo_eSGE ji = JobInfo_eSGE(id, output); return ji; } // Methode pour le controle des jobs : teste si un job est present en machine bool BatchManager_eSGE::isRunning(const JobId & jobid) { - throw EmulationException("Not yet implemented"); + throw NotYetImplementedException("BatchManager_eSGE::isRunning"); } void BatchManager_eSGE::buildBatchScript(const Job & job) @@ -228,11 +211,11 @@ namespace Batch { if (params.find(WORKDIR) != params.end()) workDir = params[WORKDIR].str(); else - throw EmulationException("params[WORKDIR] is not defined ! Please defined it, cannot submit this job"); + throw RunTimeException("params[WORKDIR] is not defined ! Please defined it, cannot submit this job"); if (params.find(EXECUTABLE) != params.end()) fileToExecute = params[EXECUTABLE].str(); else - throw EmulationException("params[EXECUTABLE] is not defined ! Please defined it, cannot submit this job"); + throw RunTimeException("params[EXECUTABLE] is not defined ! Please defined it, cannot submit this job"); // Optional parameters if (params.find(NBPROC) != params.end()) @@ -251,7 +234,7 @@ namespace Batch { // Create batch submit file ofstream tempOutputFile; - std::string TmpFileName = createAndOpenTemporaryFile("SGE-script", tempOutputFile); + std::string TmpFileName = Utils::createAndOpenTemporaryFile("SGE-script", tempOutputFile); tempOutputFile << "#! /bin/sh -f" << endl; if (queue != "") @@ -280,7 +263,7 @@ namespace Batch { workDir + "/" + rootNameToExecute + "_Batch.sh", _hostname, _username); if (status) - throw EmulationException("Error of connection on remote host"); + throw RunTimeException("Error of connection on remote host"); #endif //WIN32 } diff --git a/src/SGE/Batch_BatchManager_eSGE.hxx b/src/SGE/Batch_BatchManager_eSGE.hxx index 3d95501..d432f40 100644 --- a/src/SGE/Batch_BatchManager_eSGE.hxx +++ b/src/SGE/Batch_BatchManager_eSGE.hxx @@ -36,11 +36,11 @@ #include "Batch_JobId.hxx" #include "Batch_JobInfo.hxx" #include "Batch_FactBatchManager.hxx" -#include "Batch_BatchManager_eClient.hxx" +#include "Batch_BatchManager.hxx" namespace Batch { - class BATCH_EXPORT BatchManager_eSGE : public BatchManager_eClient + class BATCH_EXPORT BatchManager_eSGE : public BatchManager { public: // Constructeur et destructeur diff --git a/src/SGE/Batch_FactBatchManager_eSGE.cxx b/src/SGE/Batch_FactBatchManager_eSGE.cxx index 19b575e..4dc7020 100644 --- a/src/SGE/Batch_FactBatchManager_eSGE.cxx +++ b/src/SGE/Batch_FactBatchManager_eSGE.cxx @@ -36,7 +36,7 @@ namespace Batch { static FactBatchManager_eSGE sFBM_eSGE; // Constructeur - FactBatchManager_eSGE::FactBatchManager_eSGE() : FactBatchManager_eClient("eSGE") + FactBatchManager_eSGE::FactBatchManager_eSGE() : FactBatchManager("SGE") { // Nothing to do } @@ -47,22 +47,14 @@ namespace Batch { // Nothing to do } - // Functor - BatchManager * FactBatchManager_eSGE::operator() (const char * hostname) const - { - // MESSAGE("Building new BatchManager_SGE on host '" << hostname << "'"); - return new BatchManager_eSGE(this, hostname); - } - - BatchManager_eClient * FactBatchManager_eSGE::operator() (const char * hostname, - const char * username, - CommunicationProtocolType protocolType, - const char * mpiImpl, - int nb_proc_per_node) const + BatchManager * FactBatchManager_eSGE::operator() (const char * hostname, + const char * username, + CommunicationProtocolType protocolType, + const char * mpiImpl, + int nb_proc_per_node) const { // MESSAGE("Building new BatchManager_SGE on host '" << hostname << "'"); return new BatchManager_eSGE(this, hostname, username, protocolType, mpiImpl); } - } diff --git a/src/SGE/Batch_FactBatchManager_eSGE.hxx b/src/SGE/Batch_FactBatchManager_eSGE.hxx index 84983b6..4de6ac8 100644 --- a/src/SGE/Batch_FactBatchManager_eSGE.hxx +++ b/src/SGE/Batch_FactBatchManager_eSGE.hxx @@ -33,28 +33,23 @@ #include "Batch_Defines.hxx" -#include "Batch_BatchManager_eClient.hxx" -#include "Batch_FactBatchManager_eClient.hxx" +#include "Batch_BatchManager.hxx" +#include "Batch_FactBatchManager.hxx" namespace Batch { - class BATCH_EXPORT FactBatchManager_eSGE : public FactBatchManager_eClient + class BATCH_EXPORT FactBatchManager_eSGE : public FactBatchManager { public: // Constructeur et destructeur FactBatchManager_eSGE(); virtual ~FactBatchManager_eSGE(); - virtual BatchManager * operator() (const char * hostname) const; - virtual BatchManager_eClient * operator() (const char * hostname, - const char * username, - CommunicationProtocolType protocolType, - const char * mpiImpl, - int nb_proc_per_node = 1) const; - - protected: - - private: + virtual BatchManager * operator() (const char * hostname, + const char * username, + CommunicationProtocolType protocolType, + const char * mpiImpl, + int nb_proc_per_node = 1) const; }; diff --git a/src/SGE/Batch_JobInfo_eSGE.cxx b/src/SGE/Batch_JobInfo_eSGE.cxx index 7760544..a15475e 100644 --- a/src/SGE/Batch_JobInfo_eSGE.cxx +++ b/src/SGE/Batch_JobInfo_eSGE.cxx @@ -31,7 +31,6 @@ #include #include -#include #include #include "Batch_Constants.hxx" @@ -48,7 +47,7 @@ namespace Batch { // Constructeurs - JobInfo_eSGE::JobInfo_eSGE(int id, string logFile) : JobInfo() + JobInfo_eSGE::JobInfo_eSGE(int id, const std::string & output) : JobInfo() { // On remplit les membres _param et _env ostringstream oss; @@ -57,7 +56,7 @@ namespace Batch { // read of log file char line[128]; - ifstream fp(logFile.c_str(),ios::in); + istringstream fp(output); string status; string sline; diff --git a/src/SGE/Batch_JobInfo_eSGE.hxx b/src/SGE/Batch_JobInfo_eSGE.hxx index 27c1038..c6d6f40 100644 --- a/src/SGE/Batch_JobInfo_eSGE.hxx +++ b/src/SGE/Batch_JobInfo_eSGE.hxx @@ -44,7 +44,7 @@ namespace Batch { public: // Constructeurs et destructeur JobInfo_eSGE() : _running(false) {}; - JobInfo_eSGE(int id,std::string logFile); + JobInfo_eSGE(int id, const std::string & output); virtual ~JobInfo_eSGE(); // Constructeur par recopie diff --git a/src/Slurm/Batch_BatchManager_eSlurm.cxx b/src/Slurm/Batch_BatchManager_eSlurm.cxx index 556012d..d63f185 100644 --- a/src/Slurm/Batch_BatchManager_eSlurm.cxx +++ b/src/Slurm/Batch_BatchManager_eSlurm.cxx @@ -46,9 +46,7 @@ namespace Batch { CommunicationProtocolType protocolType, const char * mpiImpl, int nb_proc_per_node) - : BatchManager(parent, host), - BatchManager_eClient(parent, host, username, protocolType, mpiImpl), - _nb_proc_per_node(nb_proc_per_node) + : BatchManager(parent, host, username, protocolType, mpiImpl) { } @@ -78,13 +76,13 @@ namespace Batch { string output; int status = Utils::getCommandOutput(command, output); cout << output; - if (status != 0) throw EmulationException("Can't submit job, error was: " + output); + if (status != 0) throw RunTimeException("Can't submit job, error was: " + output); // find id of submitted job in output string search = "Submitted batch job "; string::size_type pos = output.find(search); if (pos == string::npos) - throw EmulationException("Error in the submission of the job on the remote host"); + throw RunTimeException("Error in the submission of the job on the remote host"); pos += search.size(); string::size_type endl_pos = output.find('\n', pos); string::size_type count = (endl_pos == string::npos)? string::npos : endl_pos - pos; @@ -111,11 +109,11 @@ namespace Batch { if (params.find(WORKDIR) != params.end()) workDir = params[WORKDIR].str(); else - throw EmulationException("params[WORKDIR] is not defined. Please define it, cannot submit this job."); + throw RunTimeException("params[WORKDIR] is not defined. Please define it, cannot submit this job."); if (params.find(EXECUTABLE) != params.end()) fileToExecute = params[EXECUTABLE].str(); else - throw EmulationException("params[EXECUTABLE] is not defined. Please define it, cannot submit this job."); + throw RunTimeException("params[EXECUTABLE] is not defined. Please define it, cannot submit this job."); string::size_type p1 = fileToExecute.find_last_of("/"); string::size_type p2 = fileToExecute.find_last_of("."); @@ -124,7 +122,7 @@ namespace Batch { // Create batch submit file ofstream tempOutputFile; - string tmpFileName = createAndOpenTemporaryFile("slurm-script", tempOutputFile); + string tmpFileName = Utils::createAndOpenTemporaryFile("slurm-script", tempOutputFile); tempOutputFile << "#!/bin/bash" << endl; tempOutputFile << "#SBATCH --output=" << workDir << "/logs/output.log." << rootNameToExecute << endl; @@ -181,7 +179,7 @@ namespace Batch { workDir + "/" + remoteFileName, _hostname, _username); if (status) - throw EmulationException("Cannot copy command file on host " + _hostname); + throw RunTimeException("Cannot copy command file on host " + _hostname); return remoteFileName; } @@ -195,7 +193,7 @@ namespace Batch { int status = system(command.c_str()); if (status) - throw EmulationException("Can't delete job " + jobid.getReference()); + throw RunTimeException("Can't delete job " + jobid.getReference()); cerr << "job " << jobid.getReference() << " killed" << endl; } diff --git a/src/Slurm/Batch_BatchManager_eSlurm.hxx b/src/Slurm/Batch_BatchManager_eSlurm.hxx index 839ff1e..b086863 100644 --- a/src/Slurm/Batch_BatchManager_eSlurm.hxx +++ b/src/Slurm/Batch_BatchManager_eSlurm.hxx @@ -35,11 +35,11 @@ #include #include #include -#include +#include namespace Batch { - class BATCH_EXPORT BatchManager_eSlurm : public BatchManager_eClient + class BATCH_EXPORT BatchManager_eSlurm : public BatchManager { public: @@ -66,8 +66,6 @@ namespace Batch { std::string buildCommandFile(const Job & job); - int _nb_proc_per_node; - }; } diff --git a/src/Slurm/Batch_FactBatchManager_eSlurm.cxx b/src/Slurm/Batch_FactBatchManager_eSlurm.cxx index 36a217a..9fd77ab 100644 --- a/src/Slurm/Batch_FactBatchManager_eSlurm.cxx +++ b/src/Slurm/Batch_FactBatchManager_eSlurm.cxx @@ -35,7 +35,7 @@ namespace Batch { static FactBatchManager_eSlurm sFBM_eSlurm; - FactBatchManager_eSlurm::FactBatchManager_eSlurm() : FactBatchManager_eClient("eSLURM") + FactBatchManager_eSlurm::FactBatchManager_eSlurm() : FactBatchManager("SLURM") { } @@ -43,19 +43,12 @@ namespace Batch { { } - BatchManager * FactBatchManager_eSlurm::operator() (const char * hostname) const + BatchManager * FactBatchManager_eSlurm::operator() (const char * hostname, + const char * username, + CommunicationProtocolType protocolType, + const char * mpiImpl, + int nb_proc_per_node) const { - // MESSAGE("Building new BatchManager_eSlurm on host '" << hostname << "'"); - return new BatchManager_eSlurm(this, hostname); - } - - BatchManager_eClient * FactBatchManager_eSlurm::operator() (const char * hostname, - const char * username, - CommunicationProtocolType protocolType, - const char * mpiImpl, - int nb_proc_per_node) const - { - // MESSAGE("Building new BatchManager_eSlurm on host '" << hostname << "'"); return new BatchManager_eSlurm(this, hostname, username, protocolType, mpiImpl, nb_proc_per_node); } diff --git a/src/Slurm/Batch_FactBatchManager_eSlurm.hxx b/src/Slurm/Batch_FactBatchManager_eSlurm.hxx index 969763e..7eace42 100644 --- a/src/Slurm/Batch_FactBatchManager_eSlurm.hxx +++ b/src/Slurm/Batch_FactBatchManager_eSlurm.hxx @@ -30,25 +30,23 @@ #define _FACTBATCHMANAGER_ESLURM_H_ #include -#include -#include -#include +#include +#include namespace Batch { - class BATCH_EXPORT FactBatchManager_eSlurm : public FactBatchManager_eClient + class BATCH_EXPORT FactBatchManager_eSlurm : public FactBatchManager { public: FactBatchManager_eSlurm(); virtual ~FactBatchManager_eSlurm(); - virtual BatchManager * operator() (const char * hostname) const; - virtual BatchManager_eClient * operator() (const char * hostname, - const char * username, - CommunicationProtocolType protocolType, - const char * mpiImpl, - int nb_proc_per_node = 1) const; + virtual BatchManager * operator() (const char * hostname, + const char * username, + CommunicationProtocolType protocolType, + const char * mpiImpl, + int nb_proc_per_node = 1) const; }; diff --git a/src/Slurm/Test/Test_eSlurm.cxx b/src/Slurm/Test/Test_eSlurm.cxx index 38b5d15..41e2e46 100644 --- a/src/Slurm/Test/Test_eSlurm.cxx +++ b/src/Slurm/Test/Test_eSlurm.cxx @@ -35,9 +35,7 @@ #include #include #include -#include #include -#include #include @@ -95,7 +93,6 @@ int main(int argc, char** argv) p[INFILE] = Couple("seta.sh", "tmp/Batch/seta.sh"); p[INFILE] += Couple("setb.sh", "tmp/Batch/setb.sh"); p[OUTFILE] = Couple("result.txt", "tmp/Batch/result.txt"); - p[TMPDIR] = "tmp/Batch/"; p[NBPROC] = 1; p[MAXWALLTIME] = 1; p[MAXRAMSIZE] = 50; @@ -112,8 +109,8 @@ int main(int argc, char** argv) BatchManagerCatalog& c = BatchManagerCatalog::getInstance(); // Create a BatchManager of type ePBS on localhost - FactBatchManager_eClient * fbm = (FactBatchManager_eClient *)(c("eSLURM")); - BatchManager_eClient * bm = (*fbm)(host.c_str(), user.c_str(), protocol); + FactBatchManager * fbm = c("SLURM"); + BatchManager * bm = (*fbm)(host.c_str(), user.c_str(), protocol); // Submit the job to the BatchManager JobId jobid = bm->submitJob(job); diff --git a/src/Vishnu/Batch_BatchManager_eVishnu.cxx b/src/Vishnu/Batch_BatchManager_eVishnu.cxx index 9c39013..61ec2fd 100644 --- a/src/Vishnu/Batch_BatchManager_eVishnu.cxx +++ b/src/Vishnu/Batch_BatchManager_eVishnu.cxx @@ -52,9 +52,8 @@ namespace Batch { CommunicationProtocolType protocolType, const char * mpiImpl, int nb_proc_per_node) - : BatchManager(parent, host), - // Force SH protocol for Vishnu - BatchManager_eClient(parent, host, username, SH, mpiImpl), + : // Force SH protocol for Vishnu + BatchManager(parent, host, username, SH, mpiImpl), _nb_proc_per_node(nb_proc_per_node) { } @@ -93,19 +92,19 @@ namespace Batch { string output; int status = Utils::getCommandOutput(command, output); cout << output; - if (status != 0) throw EmulationException("Can't submit job, error was: " + output); + if (status != 0) throw RunTimeException("Can't submit job, error was: " + output); // find id of submitted job in output string search = "Job Id : "; string::size_type pos = output.find(search); if (pos == string::npos) - throw EmulationException("Error in the submission of the job on the remote host"); + throw RunTimeException("Error in the submission of the job on the remote host"); pos += search.size(); string::size_type endl_pos = output.find('\n', pos); string::size_type count = (endl_pos == string::npos)? string::npos : endl_pos - pos; string jobref = output.substr(pos, count); if (jobref.size() == 0) - throw EmulationException("Error in the submission of the job on the remote host"); + throw RunTimeException("Error in the submission of the job on the remote host"); JobId id(this, jobref); return id; @@ -161,7 +160,7 @@ namespace Batch { int status = Utils::getCommandOutput(command, output); cout << output; if (status != 0) - throw EmulationException("Can't copy input files, error was: " + output); + throw RunTimeException("Can't copy input files, error was: " + output); } /** @@ -181,11 +180,11 @@ namespace Batch { if (params.find(WORKDIR) != params.end()) workDir = params[WORKDIR].str(); else - throw EmulationException("params[WORKDIR] is not defined. Please define it, cannot submit this job."); + throw RunTimeException("params[WORKDIR] is not defined. Please define it, cannot submit this job."); if (params.find(EXECUTABLE) != params.end()) fileToExecute = params[EXECUTABLE].str(); else - throw EmulationException("params[EXECUTABLE] is not defined. Please define it, cannot submit this job."); + throw RunTimeException("params[EXECUTABLE] is not defined. Please define it, cannot submit this job."); string::size_type p1 = fileToExecute.find_last_of("/"); string::size_type p2 = fileToExecute.find_last_of("."); @@ -194,7 +193,7 @@ namespace Batch { // Create batch submit file ofstream tempOutputFile; - string tmpFileName = createAndOpenTemporaryFile("vishnu-script", tempOutputFile); + string tmpFileName = Utils::createAndOpenTemporaryFile("vishnu-script", tempOutputFile); tempOutputFile << "#!/bin/sh" << endl; tempOutputFile << "#% vishnu_output=" << workDir << "/logs/output.log." << rootNameToExecute << endl; @@ -248,7 +247,7 @@ namespace Batch { int status = system(command.c_str()); if (status) - throw EmulationException("Can't delete job " + jobid.getReference()); + throw RunTimeException("Can't delete job " + jobid.getReference()); cerr << "job " << jobid.getReference() << " killed" << endl; } @@ -291,7 +290,7 @@ namespace Batch { string output; int status = Utils::getCommandOutput(command, output); if (status != 0) - throw EmulationException("Can't query job " + jobid.getReference()); + throw RunTimeException("Can't query job " + jobid.getReference()); JobInfo_eVishnu jobinfo = JobInfo_eVishnu(jobid.getReference(), output); return jobinfo; } @@ -310,7 +309,7 @@ namespace Batch { string absdir = (Utils::isAbsolutePath(directory))? directory : cwd + "/" + directory; int status = CommunicationProtocol::getInstance(SH).makeDirectory(absdir, "", ""); if (status != 0) { - throw EmulationException("Can't create result directory"); + throw RunTimeException("Can't create result directory"); } string subCommand = string("export OMNIORB_CONFIG=$VISHNU_CONFIG_FILE; "); @@ -347,7 +346,7 @@ namespace Batch { status = Utils::getCommandOutput(command, output); cout << output; if (status != 0) - throw EmulationException("Can't import output files, error was: " + output); + throw RunTimeException("Can't import output files, error was: " + output); } } diff --git a/src/Vishnu/Batch_BatchManager_eVishnu.hxx b/src/Vishnu/Batch_BatchManager_eVishnu.hxx index 094547a..373a92e 100644 --- a/src/Vishnu/Batch_BatchManager_eVishnu.hxx +++ b/src/Vishnu/Batch_BatchManager_eVishnu.hxx @@ -35,11 +35,11 @@ #include #include #include -#include +#include namespace Batch { - class BATCH_EXPORT BatchManager_eVishnu : public BatchManager_eClient + class BATCH_EXPORT BatchManager_eVishnu : public BatchManager { public: diff --git a/src/Vishnu/Batch_FactBatchManager_eVishnu.cxx b/src/Vishnu/Batch_FactBatchManager_eVishnu.cxx index 3f4fc4f..5b8652c 100644 --- a/src/Vishnu/Batch_FactBatchManager_eVishnu.cxx +++ b/src/Vishnu/Batch_FactBatchManager_eVishnu.cxx @@ -35,7 +35,7 @@ namespace Batch { static FactBatchManager_eVishnu sFBM_eVishnu; - FactBatchManager_eVishnu::FactBatchManager_eVishnu() : FactBatchManager_eClient("eVISHNU") + FactBatchManager_eVishnu::FactBatchManager_eVishnu() : FactBatchManager("VISHNU") { } @@ -43,17 +43,11 @@ namespace Batch { { } - BatchManager * FactBatchManager_eVishnu::operator() (const char * hostname) const - { - // MESSAGE("Building new BatchManager_eVishnu on host '" << hostname << "'"); - return new BatchManager_eVishnu(this, hostname); - } - - BatchManager_eClient * FactBatchManager_eVishnu::operator() (const char * hostname, - const char * username, - CommunicationProtocolType protocolType, - const char * mpiImpl, - int nb_proc_per_node) const + BatchManager * FactBatchManager_eVishnu::operator() (const char * hostname, + const char * username, + CommunicationProtocolType protocolType, + const char * mpiImpl, + int nb_proc_per_node) const { // MESSAGE("Building new BatchManager_eVishnu on host '" << hostname << "'"); return new BatchManager_eVishnu(this, hostname, username, protocolType, mpiImpl, nb_proc_per_node); diff --git a/src/Vishnu/Batch_FactBatchManager_eVishnu.hxx b/src/Vishnu/Batch_FactBatchManager_eVishnu.hxx index 5bfaaeb..bee9b98 100644 --- a/src/Vishnu/Batch_FactBatchManager_eVishnu.hxx +++ b/src/Vishnu/Batch_FactBatchManager_eVishnu.hxx @@ -31,24 +31,23 @@ #include #include -#include -#include +#include +#include namespace Batch { - class BATCH_EXPORT FactBatchManager_eVishnu : public FactBatchManager_eClient + class BATCH_EXPORT FactBatchManager_eVishnu : public FactBatchManager { public: FactBatchManager_eVishnu(); virtual ~FactBatchManager_eVishnu(); - virtual BatchManager * operator() (const char * hostname) const; - virtual BatchManager_eClient * operator() (const char * hostname, - const char * username, - CommunicationProtocolType protocolType, - const char * mpiImpl, - int nb_proc_per_node = 1) const; + virtual BatchManager * operator() (const char * hostname, + const char * username, + CommunicationProtocolType protocolType, + const char * mpiImpl, + int nb_proc_per_node = 1) const; }; diff --git a/src/Vishnu/Test/Test_eVishnu.cxx b/src/Vishnu/Test/Test_eVishnu.cxx index f6c48e1..0ef9d12 100644 --- a/src/Vishnu/Test/Test_eVishnu.cxx +++ b/src/Vishnu/Test/Test_eVishnu.cxx @@ -35,9 +35,7 @@ #include #include #include -#include #include -#include #include @@ -86,8 +84,8 @@ int main(int argc, char** argv) BatchManagerCatalog& c = BatchManagerCatalog::getInstance(); // Create a BatchManager of type ePBS on localhost - FactBatchManager_eClient * fbm = (FactBatchManager_eClient *)(c("eVISHNU")); - BatchManager_eClient * bm = (*fbm)(host.c_str(), user.c_str(), SH); + FactBatchManager * fbm = c("VISHNU"); + BatchManager * bm = (*fbm)(host.c_str(), user.c_str(), SH); // Submit the job to the BatchManager JobId jobid = bm->submitJob(job);