From 2f0ffd4ac11a91d89b8d252bf031181ae4fd3ce3 Mon Sep 17 00:00:00 2001 From: ribes Date: Mon, 18 Feb 2008 10:30:18 +0000 Subject: [PATCH] - Adding walltime and mem options to PBS batch --- idl/SALOME_ContainerManager.idl | 17 ++- src/Launcher/BatchLight_BatchManager_PBS.cxx | 6 + src/Launcher/BatchLight_Job.cxx | 145 ++++++++++++++++--- src/Launcher/BatchLight_Job.hxx | 12 +- src/Launcher/SALOME_Launcher.cxx | 27 +++- src/Launcher/SALOME_Launcher.hxx | 2 +- 6 files changed, 174 insertions(+), 35 deletions(-) diff --git a/idl/SALOME_ContainerManager.idl b/idl/SALOME_ContainerManager.idl index c53d27a9c..7fcc66cf6 100644 --- a/idl/SALOME_ContainerManager.idl +++ b/idl/SALOME_ContainerManager.idl @@ -68,6 +68,21 @@ struct MachineParameters enum policy {P_FIRST,P_CYCL,P_BEST}; typedef policy ResPolicy; +/*! + Structure used for Salome Batch Job parameters +*/ +struct BatchParameters +{ + string batch_directory; // Where batch command will be launched + // and log files will be created + string expected_during_time; // Time for the batch + // has to be like this : hh:mm + string mem; // Minimum of memory needed + // has to be like : 32gb or 512mb + + long nb_proc; // Number of processors requested +}; + /*! \brief Interface of the %salomelauncher This interface is used for interaction with the unique instance of SalomeLauncher @@ -77,7 +92,7 @@ struct MachineParameters long submitSalomeJob( in string fileToExecute, in FilesList filesToExport, in FilesList filesToImport, - in long NumberOfProcessors, + in BatchParameters batch_params, in MachineParameters params ) raises (SALOME::SALOME_Exception); string querySalomeJob( in long jobId, in MachineParameters params ) raises (SALOME::SALOME_Exception); void deleteSalomeJob( in long jobId, in MachineParameters params ) raises (SALOME::SALOME_Exception); diff --git a/src/Launcher/BatchLight_BatchManager_PBS.cxx b/src/Launcher/BatchLight_BatchManager_PBS.cxx index 0fd52131a..6a573c1b8 100644 --- a/src/Launcher/BatchLight_BatchManager_PBS.cxx +++ b/src/Launcher/BatchLight_BatchManager_PBS.cxx @@ -311,6 +311,8 @@ namespace BatchLight { BEGIN_OF("BatchManager_PBS::buildSalomeBatchScript"); int status; const int nbproc = job->getNbProc(); + std::string edt = job->getExpectedDuringTime(); + std::string mem = job->getMemory(); const std::string dirForTmpFiles = job->getDirForTmpFiles(); const char *fileToExecute = job->getFileToExecute(); string::size_type p1 = string(fileToExecute).find_last_of("/"); @@ -340,6 +342,10 @@ namespace BatchLight { tempOutputFile << "#! /bin/sh -f" << endl ; tempOutputFile << "#PBS -l nodes=" << nbnodes << endl ; + if (edt != "") + tempOutputFile << "#PBS -l walltime=" << edt << ":00" << endl ; + if (mem != "") + tempOutputFile << "#PBS -l mem=" << mem << endl ; // In some systems qsub does not correctly expand env variables // like PBS_O_HOME for #PBS directives.... //tempOutputFile << "#PBS -o /$PBS_O_HOME/" << dirForTmpFiles << "/runSalome.output.log.${PBS_JOBID}" << endl ; diff --git a/src/Launcher/BatchLight_Job.cxx b/src/Launcher/BatchLight_Job.cxx index 9e70bbd37..1980054a9 100644 --- a/src/Launcher/BatchLight_Job.cxx +++ b/src/Launcher/BatchLight_Job.cxx @@ -27,33 +27,134 @@ */ #include "BatchLight_Job.hxx" +#include + using namespace std; +using namespace BatchLight; + +Job::Job(const char *fileToExecute, + const Engines::FilesList& filesToExport, + const Engines::FilesList& filesToImport, + const Engines::BatchParameters& batch_params) : _fileToExecute(fileToExecute), + _filesToExport(filesToExport), + _filesToImport(filesToImport), + _batch_params(batch_params) +{ + _dirForTmpFiles = "/tmp/default_batch_tmp_directory"; + std::string _fileNameToExecute = ""; +} + +Job::~Job() +{ + MESSAGE("Job destructor"); +} + +void +Job::addFileToImportList(std::string file_name) +{ + CORBA::ULong lgth = _filesToImport.length(); + _filesToImport.length(lgth+1); + _filesToImport[lgth] = CORBA::string_dup(file_name.c_str()); +} + +const std::string +Job::getExpectedDuringTime() +{ + std::string str(_batch_params.expected_during_time); + return str; +} + +const std::string +Job::getMemory() +{ + std::string str(_batch_params.mem); + return str; +} + +bool +Job::check() { + bool rtn = true; + INFOS("Warning : batch_directory option is not currently implemented"); + INFOS("Warning : currently these informations are only in the PBS batch manager"); + INFOS("Job parameters are :"); + INFOS("Directory : $HOME/Batch/$date"); -namespace BatchLight { - - // Constructeur - Job::Job(const char *fileToExecute, - const Engines::FilesList& filesToExport, - const Engines::FilesList& filesToImport, - const int nbproc) : _fileToExecute(fileToExecute), - _filesToExport(filesToExport), - _filesToImport(filesToImport), - _nbproc(nbproc) - { - _dirForTmpFiles = "/tmp/default_batch_tmp_directory"; - std::string _fileNameToExecute = ""; + // check expected_during_time (check the format) + std::string edt_info; + std::string edt_value = _batch_params.expected_during_time.in(); + if (edt_value != "") { + std::string begin_edt_value = edt_value.substr(0, 2); + std::string mid_edt_value = edt_value.substr(2, 1); + std::string end_edt_value = edt_value.substr(3); + + long value; + std::istringstream iss(begin_edt_value); + if (!(iss >> value)) { + edt_info = "Error on definition ! : " + edt_value; + rtn = false; + } + else if (value < 0) { + edt_info = "Error on definition time is negative ! : " + value; + rtn = false; + } + std::istringstream iss_2(end_edt_value); + if (!(iss_2 >> value)) { + edt_info = "Error on definition ! : " + edt_value; + rtn = false; + } + else if (value < 0) { + edt_info = "Error on definition time is negative ! : " + value; + rtn = false; + } + if (mid_edt_value != ":") { + edt_info = "Error on definition ! :" + edt_value; + rtn = false; + } } + else { + edt_info = "No value given"; + } + INFOS("Expected during time : " << edt_info); - Job::~Job() - { - MESSAGE("Job destructor"); + // check memory (check the format) + std::string mem_info; + std::string mem_value = _batch_params.mem.in(); + if (mem_value != "") { + std::string begin_mem_value = mem_value.substr(0, mem_value.length()-2); + long re_mem_value; + std::istringstream iss(begin_mem_value); + if (!(iss >> re_mem_value)) { + mem_info = "Error on definition ! : " + mem_value; + rtn = false; + } + else if (re_mem_value <= 0) { + mem_info = "Error on definition memory is negative ! : " + mem_value; + rtn = false; + } + std::string end_mem_value = mem_value.substr(mem_value.length()-2); + if (end_mem_value != "gb" and end_mem_value != "mb") { + mem_info = "Error on definition, type is bad ! " + mem_value; + rtn = false; + } + } + else { + mem_info = "No value given"; } + INFOS("Memory : " << mem_info); - void - Job::addFileToImportList(std::string file_name) - { - CORBA::ULong lgth = _filesToImport.length(); - _filesToImport.length(lgth+1); - _filesToImport[lgth] = CORBA::string_dup(file_name.c_str()); + // check nb_proc + std::string nb_proc_info; + ostringstream nb_proc_value; + nb_proc_value << _batch_params.nb_proc; + if(_batch_params.nb_proc <= 0) { + nb_proc_info = "Bad value ! nb_proc = "; + nb_proc_info += nb_proc_value.str(); + rtn = false; } + else { + nb_proc_info = nb_proc_value.str(); + } + INFOS("Nb of processors : " << nb_proc_info); + + return rtn; } diff --git a/src/Launcher/BatchLight_Job.hxx b/src/Launcher/BatchLight_Job.hxx index 12b5711db..e17e790b9 100644 --- a/src/Launcher/BatchLight_Job.hxx +++ b/src/Launcher/BatchLight_Job.hxx @@ -39,23 +39,29 @@ namespace BatchLight { { public: // Constructeurs et destructeur - Job(const char *fileToExecute, const Engines::FilesList& filesToExport, const Engines::FilesList& filesToImport, const int nbproc); + Job(const char *fileToExecute, + const Engines::FilesList& filesToExport, + const Engines::FilesList& filesToImport, + const Engines::BatchParameters& batch_params); virtual ~Job(); const char *getFileToExecute() const { return _fileToExecute; } const Engines::FilesList getFilesToExportList() const { return _filesToExport; } const Engines::FilesList getFilesToImportList() const { return _filesToImport; } void addFileToImportList(std::string file_name); - const int getNbProc() const { return _nbproc; } + const CORBA::Long getNbProc() const { return _batch_params.nb_proc; } + const std::string getExpectedDuringTime(); + const std::string getMemory(); const std::string getDirForTmpFiles() const { return _dirForTmpFiles;} void setDirForTmpFiles(std::string dirForTmpFiles) {_dirForTmpFiles = dirForTmpFiles; SCRUTE(_dirForTmpFiles);} + bool check(); protected: const char* _fileToExecute; const Engines::FilesList _filesToExport; Engines::FilesList _filesToImport; - const int _nbproc; + Engines::BatchParameters _batch_params; std::string _dirForTmpFiles; // Tmp directory on the server private: diff --git a/src/Launcher/SALOME_Launcher.cxx b/src/Launcher/SALOME_Launcher.cxx index 3b73b7c02..a507c010d 100644 --- a/src/Launcher/SALOME_Launcher.cxx +++ b/src/Launcher/SALOME_Launcher.cxx @@ -120,7 +120,7 @@ CORBA::Long SALOME_Launcher::getPID() CORBA::Long SALOME_Launcher::submitSalomeJob( const char * fileToExecute , const Engines::FilesList& filesToExport , const Engines::FilesList& filesToImport , - const CORBA::Long NumberOfProcessors , + const Engines::BatchParameters& batch_params, const Engines::MachineParameters& params) { MESSAGE("BEGIN OF SALOME_Launcher::submitSalomeJob"); @@ -128,22 +128,33 @@ CORBA::Long SALOME_Launcher::submitSalomeJob( const char * fileToExecute , try{ // find a cluster matching the structure params Engines::CompoList aCompoList ; - Engines::MachineList *aMachineList = _ResManager->GetFittingResources( params , aCompoList ) ; + Engines::MachineList *aMachineList = _ResManager->GetFittingResources(params, aCompoList); + if (aMachineList->length() == 0) + throw SALOME_Exception("No resources have been found with your parameters"); + const Engines::MachineParameters* p = _ResManager->GetMachineParameters((*aMachineList)[0]); string clustername(p->alias); - + INFOS("Choose cluster" << clustername); + // search batch manager for that cluster in map or instanciate one std::map < string, BatchLight::BatchManager * >::const_iterator it = _batchmap.find(clustername); - SCRUTE(clustername); if(it == _batchmap.end()) - _batchmap[clustername] = FactoryBatchManager( p ); + { + _batchmap[clustername] = FactoryBatchManager(p); + // TODO: Add a test for the cluster ! + } - // submit job on cluster - BatchLight::Job* job = new BatchLight::Job( fileToExecute, filesToExport, filesToImport, NumberOfProcessors ); + // create and submit job on cluster + BatchLight::Job* job = new BatchLight::Job(fileToExecute, filesToExport, filesToImport, batch_params); + bool res = job->check(); + if (!res) { + delete job; + throw SALOME_Exception("Job parameters are bad (see informations above)"); + } jobId = _batchmap[clustername]->submitJob(job); } catch(const SALOME_Exception &ex){ - MESSAGE(ex.what()); + INFOS(ex.what()); THROW_SALOME_CORBA_EXCEPTION(ex.what(),SALOME::INTERNAL_ERROR); } return jobId; diff --git a/src/Launcher/SALOME_Launcher.hxx b/src/Launcher/SALOME_Launcher.hxx index 08570fe72..4bc5d65ce 100644 --- a/src/Launcher/SALOME_Launcher.hxx +++ b/src/Launcher/SALOME_Launcher.hxx @@ -55,7 +55,7 @@ public: CORBA::Long submitSalomeJob(const char * fileToExecute , const Engines::FilesList& filesToExport , const Engines::FilesList& filesToImport , - const CORBA::Long NumberOfProcessors , + const Engines::BatchParameters& batch_params, const Engines::MachineParameters& params); char* querySalomeJob( const CORBA::Long jobId, const Engines::MachineParameters& params); -- 2.39.2