From 8ab245caa8db966ffda5214cbc758a554c04fe24 Mon Sep 17 00:00:00 2001 From: ribes Date: Tue, 17 Nov 2009 09:27:42 +0000 Subject: [PATCH] - Correct use of nb_proc_per_node - Generate CatalogResources - Adding queue support --- idl/SALOME_ContainerManager.idl | 8 +++++- src/Launcher/Launcher.cxx | 4 ++- src/Launcher/Launcher_Job.cxx | 35 ++++++++++++++++++++++++ src/Launcher/Launcher_Job.hxx | 6 +++++ src/Launcher/Launcher_Job_Command.cxx | 21 ++++----------- src/Launcher/Launcher_Job_Command.hxx | 2 +- src/Launcher/Launcher_Job_YACSFile.cxx | 37 ++++++++++++++------------ src/Launcher/SALOME_Launcher.cxx | 5 ++++ 8 files changed, 82 insertions(+), 36 deletions(-) diff --git a/idl/SALOME_ContainerManager.idl b/idl/SALOME_ContainerManager.idl index f7d1869e6..7fb7004d5 100644 --- a/idl/SALOME_ContainerManager.idl +++ b/idl/SALOME_ContainerManager.idl @@ -150,9 +150,15 @@ struct JobParameters this case, default value of the selected resource will be used. */ string maximum_during_time; - MachineParameters resource_required; + // Memory is expressed in megabytes -> mem_mb // Number of Processors -> nb_node + MachineParameters resource_required; + + /*! + Name of the batch queue choosed - optional + */ + string queue; }; /*! \brief Interface of the %salomelauncher diff --git a/src/Launcher/Launcher.cxx b/src/Launcher/Launcher.cxx index 405a5d910..28c274115 100644 --- a/src/Launcher/Launcher.cxx +++ b/src/Launcher/Launcher.cxx @@ -480,6 +480,8 @@ Batch::BatchManager_eClient *Launcher_cpp::FactoryBatchManager(ParserResourcesTy Batch::CommunicationProtocolType protocol; Batch::FactBatchManager_eClient* fact; + int nb_proc_per_node = params.DataForSort._nbOfProcPerNode; + hostname = params.Alias; switch(params.Protocol){ case rsh: @@ -549,7 +551,7 @@ Batch::BatchManager_eClient *Launcher_cpp::FactoryBatchManager(ParserResourcesTy #endif throw LauncherException("no batchmanager for that cluster"); } - return (*fact)(hostname.c_str(), protocol, mpi.c_str()); + return (*fact)(hostname.c_str(), protocol, mpi.c_str(), nb_proc_per_node); #else throw LauncherException("Method Launcher_cpp::FactoryBatchManager is not available " "(libBatch was not present at compilation time)"); diff --git a/src/Launcher/Launcher_Job.cxx b/src/Launcher/Launcher_Job.cxx index d45b8be51..df4372f98 100644 --- a/src/Launcher/Launcher_Job.cxx +++ b/src/Launcher/Launcher_Job.cxx @@ -39,6 +39,7 @@ Launcher::Job::Job() _machine_required_params.mem_mb = -1; _machine_required_params.parallelLib = ""; _machine_required_params.nb_component_nodes = -1; + _queue = ""; #ifdef WITH_LIBBATCH _batch_job = new Batch::Job(); @@ -163,6 +164,12 @@ Launcher::Job::setMachineRequiredParams(const machineParams & machine_required_p _machine_required_params = machine_required_params; } +void +Launcher::Job::setQueue(const std::string & queue) +{ + _queue = queue; +} + std::string Launcher::Job::getWorkDirectory() { @@ -205,6 +212,12 @@ Launcher::Job::getMachineRequiredParams() return _machine_required_params; } +std::string +Launcher::Job::getQueue() +{ + return _queue; +} + void Launcher::Job::checkMaximumDuringTime(const std::string & maximum_during_time) { @@ -270,6 +283,24 @@ Launcher::Job::convertMaximumDuringTime(const std::string & edt) return ret; } +std::string +Launcher::Job::getLaunchDate() +{ + time_t rawtime; + time(&rawtime); + std::string launch_date = ctime(&rawtime); + int i = 0 ; + for (;i < launch_date.size(); i++) + if (launch_date[i] == '/' or + launch_date[i] == '-' or + launch_date[i] == ':' or + launch_date[i] == ' ') + launch_date[i] = '_'; + launch_date.erase(--launch_date.end()); // Last caracter is a \n + + return launch_date; +} + std::string Launcher::Job::updateJobState() { @@ -388,6 +419,10 @@ Launcher::Job::common_job_params() if (_maximum_during_time_in_second != -1) params[MAXWALLTIME] = _maximum_during_time_in_second; + // Queue + if (_queue != "") + params[QUEUE] = _queue; + return params; } diff --git a/src/Launcher/Launcher_Job.hxx b/src/Launcher/Launcher_Job.hxx index 1f90b75db..1c719e0ca 100644 --- a/src/Launcher/Launcher_Job.hxx +++ b/src/Launcher/Launcher_Job.hxx @@ -25,6 +25,8 @@ #include "ResourcesManager.hxx" #include +#include +#include #include #include @@ -66,6 +68,7 @@ namespace Launcher void add_out_file(const std::string & file); void setMaximumDuringTime(const std::string & maximum_during_time); void setMachineRequiredParams(const machineParams & machine_required_params); + void setQueue(const std::string & queue); std::string getWorkDirectory(); std::string getLocalDirectory(); @@ -74,6 +77,7 @@ namespace Launcher const std::list & get_out_files(); std::string getMaximumDuringTime(); machineParams getMachineRequiredParams(); + std::string getQueue(); std::string updateJobState(); @@ -83,6 +87,7 @@ namespace Launcher // Helps long convertMaximumDuringTime(const std::string & maximum_during_time); + std::string getLaunchDate(); // Abstract class virtual void update_job() = 0; @@ -102,6 +107,7 @@ namespace Launcher std::string _maximum_during_time; long _maximum_during_time_in_second; machineParams _machine_required_params; + std::string _queue; #ifdef WITH_LIBBATCH // Connection with LIBBATCH diff --git a/src/Launcher/Launcher_Job_Command.cxx b/src/Launcher/Launcher_Job_Command.cxx index 13e5634b0..464581720 100644 --- a/src/Launcher/Launcher_Job_Command.cxx +++ b/src/Launcher/Launcher_Job_Command.cxx @@ -85,19 +85,20 @@ Launcher::Job_Command::update_job() } // log - std::string log_file = "command.log"; + std::string launch_date = getLaunchDate(); + std::string log_file = "command_" + launch_date + ".log"; std::string log_local_file = _result_directory + "/" + log_file; std::string log_remote_file = _work_directory + "/" + log_file; params[OUTFILE] += Batch::Couple(log_local_file, log_remote_file); - params[EXECUTABLE] = buildCommandScript(params); + params[EXECUTABLE] = buildCommandScript(params, launch_date); _batch_job->setParametre(params); #endif } #ifdef WITH_LIBBATCH std::string -Launcher::Job_Command::buildCommandScript(Batch::Parametre params) +Launcher::Job_Command::buildCommandScript(Batch::Parametre params, std::string launch_date) { // parameters std::string work_directory = params[WORKDIR].str(); @@ -108,18 +109,6 @@ Launcher::Job_Command::buildCommandScript(Batch::Parametre params) std::string command_name = _command.substr(p1+1,p2-p1-1); std::string command_file_name = _command.substr(p1+1); - time_t rawtime; - time(&rawtime); - std::string launch_date = ctime(&rawtime); - int i = 0 ; - for (;i < launch_date.size(); i++) - if (launch_date[i] == '/' or - launch_date[i] == '-' or - launch_date[i] == ':' or - launch_date[i] == ' ') - launch_date[i] = '_'; - launch_date.erase(--launch_date.end()); // Last caracter is a \n - std::string launch_date_port_file = launch_date; std::string launch_script = "/tmp/runCommand_" + command_name + "_" + launch_date + ".sh"; std::ofstream launch_script_stream; @@ -133,7 +122,7 @@ Launcher::Job_Command::buildCommandScript(Batch::Parametre params) std::string::size_type last = _env_file.find_last_of("/"); launch_script_stream << "source " << _env_file.substr(last+1) << std::endl; } - launch_script_stream << "./" << command_file_name << " > command.log 2>&1" << std::endl; + launch_script_stream << "./" << command_file_name << " > command_" << launch_date << ".log 2>&1" << std::endl; // Return launch_script_stream.flush(); diff --git a/src/Launcher/Launcher_Job_Command.hxx b/src/Launcher/Launcher_Job_Command.hxx index 318c1db19..9a7a8cb80 100644 --- a/src/Launcher/Launcher_Job_Command.hxx +++ b/src/Launcher/Launcher_Job_Command.hxx @@ -46,7 +46,7 @@ namespace Launcher #ifdef WITH_LIBBATCH protected: - std::string buildCommandScript(Batch::Parametre params); + std::string buildCommandScript(Batch::Parametre params, std::string launch_date); #endif private: diff --git a/src/Launcher/Launcher_Job_YACSFile.cxx b/src/Launcher/Launcher_Job_YACSFile.cxx index b685cb2a0..ff99d9cde 100644 --- a/src/Launcher/Launcher_Job_YACSFile.cxx +++ b/src/Launcher/Launcher_Job_YACSFile.cxx @@ -20,8 +20,6 @@ #include "Launcher_Job_YACSFile.hxx" -#include -#include Launcher::Job_YACSFile::Job_YACSFile(const std::string & yacs_file) { @@ -93,18 +91,7 @@ Launcher::Job_YACSFile::buildSalomeCouplingScript(Batch::Parametre params) std::string::size_type p1 = _yacs_file.find_last_of("/"); std::string::size_type p2 = _yacs_file.find_last_of("."); std::string yacs_file_name = _yacs_file.substr(p1+1,p2-p1-1); - - time_t rawtime; - time(&rawtime); - std::string launch_date = ctime(&rawtime); - int i = 0 ; - for (;i < launch_date.size(); i++) - if (launch_date[i] == '/' or - launch_date[i] == '-' or - launch_date[i] == ':' or - launch_date[i] == ' ') - launch_date[i] = '_'; - launch_date.erase(--launch_date.end()); // Last caracter is a \n + std::string launch_date = getLaunchDate(); std::string launch_date_port_file = launch_date; std::string launch_script = "/tmp/runSalome_" + yacs_file_name + "_" + launch_date + ".sh"; @@ -117,10 +104,26 @@ Launcher::Job_YACSFile::buildSalomeCouplingScript(Batch::Parametre params) launch_script_stream << "export SALOME_TMP_DIR=" << work_directory << "/logs" << std::endl; // -- Generates Catalog Resources - // TODO + std::string machine_protocol = "ssh"; + if (_machine_definition.Protocol == rsh) + machine_protocol = "rsh"; + launch_script_stream << "CATALOG_FILE=" << work_directory << "/CatalogResources_" << launch_date << ".xml" << std::endl; + launch_script_stream << "export USER_CATALOG_RESOURCES_FILE=" << "$CATALOG_FILE" << std::endl; + + launch_script_stream << "echo '' > $CATALOG_FILE" << std::endl; + launch_script_stream << "echo '' >> $CATALOG_FILE" << std::endl; + launch_script_stream << "cat $LIBBATCH_NODEFILE | sort -u | while read host" << std::endl; + launch_script_stream << "do" << std::endl; + launch_script_stream << "echo '> $CATALOG_FILE" << std::endl; + launch_script_stream << "echo ' userName=\"" << _machine_definition.UserName << "\"' >> $CATALOG_FILE" << std::endl; + launch_script_stream << "echo ' appliPath=\"" << _machine_definition.AppliPath << "\"' >> $CATALOG_FILE" << std::endl; + launch_script_stream << "echo '/>' >> $CATALOG_FILE" << std::endl; + launch_script_stream << "done" << std::endl; + launch_script_stream << "echo '' >> $CATALOG_FILE" << std::endl; // Launch SALOME with an appli - launch_script_stream << _machine_definition.AppliPath << "/runAppli --terminal --ns-port-log=" << launch_date_port_file << " > logs/salome.log 2>&1" << std::endl; + launch_script_stream << _machine_definition.AppliPath << "/runAppli --terminal --ns-port-log=" << launch_date_port_file << " > logs/salome_" << launch_date << ".log 2>&1" << std::endl; launch_script_stream << "current=0\n" << "stop=20\n" << "while ! test -f " << _machine_definition.AppliPath << "/" << launch_date_port_file << "\n" @@ -133,7 +136,7 @@ Launcher::Job_YACSFile::buildSalomeCouplingScript(Batch::Parametre params) << " fi\n" << "done\n" << "port=`cat " << _machine_definition.AppliPath << "/" << launch_date_port_file << "`\n"; - launch_script_stream << _machine_definition.AppliPath << "/runSession driver " << yacs_file_name << ".xml > logs/yacs.log 2>&1" << std::endl; + launch_script_stream << _machine_definition.AppliPath << "/runSession driver " << yacs_file_name << ".xml > logs/yacs_" << launch_date << ".log 2>&1" << std::endl; launch_script_stream << _machine_definition.AppliPath << "/runSession killSalomeWithPort.py $port" << std::endl; // Return diff --git a/src/Launcher/SALOME_Launcher.cxx b/src/Launcher/SALOME_Launcher.cxx index 924724b9d..761ef3912 100644 --- a/src/Launcher/SALOME_Launcher.cxx +++ b/src/Launcher/SALOME_Launcher.cxx @@ -258,6 +258,11 @@ SALOME_Launcher::createJob(const Engines::JobParameters & job_parameters) INFOS(ex.msg.c_str()); THROW_SALOME_CORBA_EXCEPTION(ex.msg.c_str(),SALOME::INTERNAL_ERROR); } + + // Queue + std::string queue = job_parameters.queue.in(); + if (queue != "") + new_job->setQueue(queue); // Resources requirements try -- 2.39.2