From 17ab24d3f5a6a3f0c0613ff389a73c85cf21b449 Mon Sep 17 00:00:00 2001 From: barate Date: Wed, 3 Apr 2013 08:29:00 +0000 Subject: [PATCH] Integrate developments from N. Toukourou at INRIA (OAR and CooRM support) --- idl/SALOME_Launcher.idl | 5 ++ src/Launcher/BatchTest.cxx | 12 ++++ src/Launcher/Launcher.cxx | 30 ++++++++ src/Launcher/Launcher.hxx | 5 ++ src/Launcher/Launcher_Job.cxx | 46 +++++++++++++ src/Launcher/Launcher_Job.hxx | 15 ++++ src/Launcher/Launcher_Job_SALOME.cxx | 4 +- src/Launcher/SALOME_Launcher.cxx | 68 +++++++++++++++++-- src/Launcher/SALOME_Launcher.hxx | 1 + .../SALOME_ResourcesCatalog_Parser.cxx | 8 +++ .../SALOME_ResourcesCatalog_Parser.hxx | 2 +- 11 files changed, 188 insertions(+), 8 deletions(-) diff --git a/idl/SALOME_Launcher.idl b/idl/SALOME_Launcher.idl index e57471b88..fb90010bc 100644 --- a/idl/SALOME_Launcher.idl +++ b/idl/SALOME_Launcher.idl @@ -77,6 +77,10 @@ struct JobParameters Specific parameters for each type of job - optional */ Engines::ParameterList specific_parameters; + + // Parameters for COORM + string launcher_file; + string launcher_args; }; struct JobDescription @@ -101,6 +105,7 @@ interface SalomeLauncher long createJob (in Engines::JobParameters job_parameters) raises (SALOME::SALOME_Exception); void launchJob (in long job_id) raises (SALOME::SALOME_Exception); string getJobState (in long job_id) raises (SALOME::SALOME_Exception); + string getAssignedHostnames (in long job_id) raises (SALOME::SALOME_Exception); // Get names or ids of hosts assigned to the job void getJobResults(in long job_id, in string directory) raises (SALOME::SALOME_Exception); boolean getJobDumpState(in long job_id, in string directory) raises (SALOME::SALOME_Exception); void stopJob (in long job_id) raises (SALOME::SALOME_Exception); diff --git a/src/Launcher/BatchTest.cxx b/src/Launcher/BatchTest.cxx index cf566544b..1ff5422fb 100644 --- a/src/Launcher/BatchTest.cxx +++ b/src/Launcher/BatchTest.cxx @@ -307,6 +307,18 @@ BatchTest::test_jobsubmit_simple() result = "OK"; return result; } + if (batch_type == "oar") + { + INFOS("test_jobsubmit_simple not yet implemented for oar... return OK"); + result = "OK"; + return result; + } + if (batch_type == "coorm") + { + INFOS("test_jobsubmit_simple not yet implemented for coorm... return OK"); + result = "OK"; + return result; + } if (batch_type != "pbs") { result += "Batch type unknown ! : " + batch_type; diff --git a/src/Launcher/Launcher.cxx b/src/Launcher/Launcher.cxx index cb883ffe7..d42b3a3ba 100644 --- a/src/Launcher/Launcher.cxx +++ b/src/Launcher/Launcher.cxx @@ -184,6 +184,30 @@ Launcher_cpp::getJobState(int job_id) return state.c_str(); } +//============================================================================= +/*! + * Get job assigned hostnames + */ +//============================================================================= +const char * +Launcher_cpp::getAssignedHostnames(int job_id) +{ + LAUNCHER_MESSAGE("Get job assigned hostnames"); + + // Check if job exist + std::map::const_iterator it_job = _launcher_job_map.find(job_id); + if (it_job == _launcher_job_map.end()) + { + LAUNCHER_INFOS("Cannot find the job, is it created ? job number: " << job_id); + throw LauncherException("Cannot find the job, is it created ?"); + } + + Launcher::Job * job = it_job->second; + std::string assigned_hostnames = job->getAssignedHostnames(); + + return assigned_hostnames.c_str(); +} + //============================================================================= /*! * Get Job result - the result directory could be changed @@ -439,6 +463,12 @@ Launcher_cpp::FactoryBatchManager(ParserResourcesType& params) case vishnu: bmType = "VISHNU"; break; + case oar: + bmType = "OAR"; + break; + case coorm: + bmType = "COORM"; + break; default: LAUNCHER_MESSAGE("Bad batch description of the resource: Batch = " << params.Batch); throw LauncherException("No batchmanager for that cluster - Bad batch description of the resource"); diff --git a/src/Launcher/Launcher.hxx b/src/Launcher/Launcher.hxx index 48895566c..228355c30 100644 --- a/src/Launcher/Launcher.hxx +++ b/src/Launcher/Launcher.hxx @@ -48,6 +48,10 @@ struct batchParams{ std::string expected_during_time; std::string mem; unsigned long nb_proc; + + // Parameters for COORM + std::string launcher_file; + std::string launcher_args; }; class LAUNCHER_EXPORT Launcher_cpp @@ -61,6 +65,7 @@ public: void createJob(Launcher::Job * new_job); void launchJob(int job_id); const char * getJobState(int job_id); + const char * getAssignedHostnames(int job_id); // Get names or ids of hosts assigned to the job void getJobResults(int job_id, std::string directory); bool getJobDumpState(int job_id, std::string directory); void stopJob(int job_id); diff --git a/src/Launcher/Launcher_Job.cxx b/src/Launcher/Launcher_Job.cxx index 10c65c2c3..c9d52da1e 100644 --- a/src/Launcher/Launcher_Job.cxx +++ b/src/Launcher/Launcher_Job.cxx @@ -53,6 +53,10 @@ Launcher::Job::Job() _queue = ""; _job_type = ""; + // Parameters for COORM + _launcher_file = ""; + _launcher_args = ""; + #ifdef WITH_LIBBATCH _batch_job = new Batch::Job(); #endif @@ -149,6 +153,13 @@ Launcher::Job::getState() return _state; } +// Get names or ids of hosts assigned to the job +std::string +Launcher::Job::getAssignedHostnames() +{ + return _assigned_hostnames; +} + void Launcher::Job::setNumber(const int & number) { @@ -270,6 +281,18 @@ Launcher::Job::setMaximumDuration(const std::string & maximum_duration) _maximum_duration = maximum_duration; } +// For COORM +void +Launcher::Job::setLauncherFile(const std::string & launcher_file) +{ + _launcher_file = launcher_file; +} +void +Launcher::Job::setLauncherArgs(const std::string & launcher_args) +{ + _launcher_args = launcher_args; +} + void Launcher::Job::setResourceRequiredParams(const resourceParams & resource_required_params) { @@ -319,6 +342,18 @@ Launcher::Job::getMaximumDuration() return _maximum_duration; } +// For COORM +std::string +Launcher::Job::getLauncherFile() +{ + return _launcher_file; +} +std::string +Launcher::Job::getLauncherArgs() +{ + return _launcher_args; +} + resourceParams Launcher::Job::getResourceRequiredParams() { @@ -434,6 +469,7 @@ Launcher::Job::updateJobState() Batch::JobInfo job_info = _batch_job_id.queryJob(); Batch::Parametre par = job_info.getParametre(); _state = par[Batch::STATE].str(); + _assigned_hostnames = par[Batch::ASSIGNEDHOSTNAMES].str(); LAUNCHER_MESSAGE("State received is: " << par[Batch::STATE].str()); } #endif @@ -476,6 +512,10 @@ Launcher::Job::common_job_params() } params[Batch::WORKDIR] = _work_directory; + // Parameters for COORM + params[Batch::LAUNCHER_FILE] = _launcher_file; + params[Batch::LAUNCHER_ARGS] = _launcher_args; + // If result_directory is not defined, we use HOME environnement if (_result_directory == "") _result_directory = getenv("HOME"); @@ -571,6 +611,9 @@ Launcher::Job::addToXmlDocument(xmlNodePtr root_node) xmlNewChild(node, NULL, xmlCharStrdup("local_directory"), xmlCharStrdup(getLocalDirectory().c_str())); xmlNewChild(node, NULL, xmlCharStrdup("result_directory"), xmlCharStrdup(getResultDirectory().c_str())); + // Parameters for COORM + xmlNewChild(node, NULL, xmlCharStrdup("launcher_file"), xmlCharStrdup(getLauncherFile().c_str())); + // Files xmlNodePtr files_node = xmlNewChild(node, NULL, xmlCharStrdup("files"), NULL); std::list in_files = get_in_files(); @@ -605,6 +648,9 @@ Launcher::Job::addToXmlDocument(xmlNodePtr root_node) xmlNewChild(node, NULL, xmlCharStrdup("maximum_duration"), xmlCharStrdup(getMaximumDuration().c_str())); xmlNewChild(node, NULL, xmlCharStrdup("queue"), xmlCharStrdup(getQueue().c_str())); + // For COORM + xmlNewChild(node, NULL, xmlCharStrdup("launcher_args"), xmlCharStrdup(getLauncherArgs().c_str())); + // Specific parameters part xmlNodePtr specific_parameters_node = xmlNewChild(node, NULL, xmlCharStrdup("specific_parameters"), NULL); std::map specific_parameters = getSpecificParameters(); diff --git a/src/Launcher/Launcher_Job.hxx b/src/Launcher/Launcher_Job.hxx index 5b45c8543..f1f973254 100644 --- a/src/Launcher/Launcher_Job.hxx +++ b/src/Launcher/Launcher_Job.hxx @@ -57,6 +57,8 @@ namespace Launcher // State of a Job: CREATED, IN_PROCESS, QUEUED, RUNNING, PAUSED, FINISHED, ERROR void setState(const std::string & state); std::string getState(); + // Get names or ids of hosts assigned to the job + std::string getAssignedHostnames(); void setNumber(const int & number); int getNumber(); @@ -77,6 +79,10 @@ namespace Launcher void setQueue(const std::string & queue); void setEnvFile(const std::string & env_file); + // For COORM + void setLauncherFile(const std::string & launcher_file); + void setLauncherArgs(const std::string & launcher_args); + std::string getJobName(); std::string getJobFile(); std::string getWorkDirectory(); @@ -90,6 +96,10 @@ namespace Launcher std::string getEnvFile(); std::string getJobType(); + // For COORM + std::string getLauncherFile(); + std::string getLauncherArgs(); + std::string updateJobState(); void addSpecificParameter(const std::string & name, @@ -120,6 +130,7 @@ namespace Launcher std::string _job_type; std::string _state; + std::string _assigned_hostnames; // Assigned hostnames std::string _launch_date; std::string _env_file; @@ -141,6 +152,10 @@ namespace Launcher resourceParams _resource_required_params; std::string _queue; + // Parameters for COORM + std::string _launcher_file; + std::string _launcher_args; + #ifdef WITH_LIBBATCH // Connection with LIBBATCH public: diff --git a/src/Launcher/Launcher_Job_SALOME.cxx b/src/Launcher/Launcher_Job_SALOME.cxx index fd2810d9a..fe38e09b6 100644 --- a/src/Launcher/Launcher_Job_SALOME.cxx +++ b/src/Launcher/Launcher_Job_SALOME.cxx @@ -95,7 +95,9 @@ Launcher::Job_SALOME::buildSalomeScript(Batch::Parametre params) launch_script_stream << "echo '' >> $CATALOG_FILE" << std::endl; launch_script_stream << "cat $LIBBATCH_NODEFILE | sort | uniq -c | while read nbproc host" << std::endl; launch_script_stream << "do" << std::endl; - launch_script_stream << "echo '> $CATALOG_FILE" << std::endl; launch_script_stream << "echo ' userName=\"" << _resource_definition.UserName << "\"' >> $CATALOG_FILE" << std::endl; launch_script_stream << "echo ' appliPath=\"" << _resource_definition.AppliPath << "\"' >> $CATALOG_FILE" << std::endl; diff --git a/src/Launcher/SALOME_Launcher.cxx b/src/Launcher/SALOME_Launcher.cxx index a0b02bd1f..07e802c45 100644 --- a/src/Launcher/SALOME_Launcher.cxx +++ b/src/Launcher/SALOME_Launcher.cxx @@ -116,6 +116,12 @@ SALOME_Launcher::createJob(const Engines::JobParameters & job_parameters) new_job->setLocalDirectory(local_directory); new_job->setResultDirectory(result_directory); + // Parameters for COORM + std::string launcher_file = job_parameters.launcher_file.in(); + std::string launcher_args = job_parameters.launcher_args.in(); + new_job->setLauncherFile(launcher_file); + new_job->setLauncherArgs(launcher_args); + // Job File std::string job_file = job_parameters.job_file.in(); try @@ -229,6 +235,23 @@ SALOME_Launcher::getJobState(CORBA::Long job_id) return CORBA::string_dup(result.c_str()); } +// Get names or ids of hosts assigned to the job +char * +SALOME_Launcher::getAssignedHostnames(CORBA::Long job_id) +{ + std::string result; + try + { + result = _l.getAssignedHostnames(job_id); + } + catch(const LauncherException &ex) + { + INFOS(ex.msg.c_str()); + THROW_SALOME_CORBA_EXCEPTION(ex.msg.c_str(),SALOME::BAD_PARAM); + } + return CORBA::string_dup(result.c_str()); +} + void SALOME_Launcher::getJobResults(CORBA::Long job_id, const char * directory) { @@ -446,6 +469,10 @@ SALOME_Launcher::getJobParameters(CORBA::Long job_id) job_parameters->local_directory = CORBA::string_dup(job->getLocalDirectory().c_str()); job_parameters->result_directory = CORBA::string_dup(job->getResultDirectory().c_str()); + // Parameters for COORM + job_parameters->launcher_file = CORBA::string_dup(job->getLauncherFile().c_str()); + job_parameters->launcher_args = CORBA::string_dup(job->getLauncherArgs().c_str()); + int i = 0; int j = 0; std::list in_files = job->get_in_files(); @@ -579,11 +606,17 @@ SALOME_Launcher::loadJobs(const char* jobs_file) xmlNodePtr work_directory_node = xmlNextElementSibling(env_file_node); xmlNodePtr local_directory_node = xmlNextElementSibling(work_directory_node); xmlNodePtr result_directory_node = xmlNextElementSibling(local_directory_node); + + // Parameters for COORM + xmlNodePtr launcher_file_node = xmlNextElementSibling(result_directory_node); + if (job_file_node == NULL || env_file_node == NULL || work_directory_node == NULL || local_directory_node == NULL || - result_directory_node == NULL + result_directory_node == NULL || + // For COORM + launcher_file_node == NULL ) { INFOS("A bad job is found, some user_part are not found"); @@ -594,7 +627,9 @@ SALOME_Launcher::loadJobs(const char* jobs_file) xmlStrcmp(env_file_node->name, xmlCharStrdup("env_file")) || xmlStrcmp(work_directory_node->name, xmlCharStrdup("work_directory")) || xmlStrcmp(local_directory_node->name, xmlCharStrdup("local_directory")) || - xmlStrcmp(result_directory_node->name, xmlCharStrdup("result_directory")) + xmlStrcmp(result_directory_node->name, xmlCharStrdup("result_directory")) || + // For COORM + xmlStrcmp(launcher_file_node->name, xmlCharStrdup("launcher_file")) ) { INFOS("A bad job is found, some user part node are not in the rigth or does not have a correct name"); @@ -617,18 +652,29 @@ SALOME_Launcher::loadJobs(const char* jobs_file) xmlChar* work_directory = xmlNodeGetContent(work_directory_node); xmlChar* local_directory = xmlNodeGetContent(local_directory_node); xmlChar* result_directory = xmlNodeGetContent(result_directory_node); + + // Parameters for COORM + xmlChar* launcher_file = xmlNodeGetContent(launcher_file_node); + new_job->setEnvFile(std::string((const char *)env_file)); new_job->setWorkDirectory(std::string((const char *)work_directory)); new_job->setLocalDirectory(std::string((const char *)local_directory)); new_job->setResultDirectory(std::string((const char *)result_directory)); + + // Parameters for COORM + new_job->setLauncherFile(std::string((const char *)launcher_file)); + xmlFree(job_file); xmlFree(env_file); xmlFree(work_directory); xmlFree(local_directory); xmlFree(result_directory); + // Parameters for COORM + xmlFree(launcher_file); + // Get in and out files - xmlNodePtr files_node = xmlNextElementSibling(result_directory_node); + xmlNodePtr files_node = xmlNextElementSibling(launcher_file_node); if (files_node == NULL) { INFOS("A bad job is found, user_part files is not found"); @@ -663,9 +709,12 @@ SALOME_Launcher::loadJobs(const char* jobs_file) xmlNodePtr res_node = xmlNextElementSibling(files_node); xmlNodePtr maximum_duration_node = xmlNextElementSibling(res_node); xmlNodePtr queue_node = xmlNextElementSibling(maximum_duration_node); + xmlNodePtr launcher_args_node = xmlNextElementSibling(queue_node); if (res_node == NULL || maximum_duration_node == NULL || - queue_node == NULL + queue_node == NULL || + // For COORM + launcher_args_node == NULL ) { INFOS("A bad job is found, some user_part are not found"); @@ -674,7 +723,9 @@ SALOME_Launcher::loadJobs(const char* jobs_file) } if (xmlStrcmp(res_node->name, xmlCharStrdup("resource_params")) || xmlStrcmp(maximum_duration_node->name, xmlCharStrdup("maximum_duration")) || - xmlStrcmp(queue_node->name, xmlCharStrdup("queue")) + xmlStrcmp(queue_node->name, xmlCharStrdup("queue")) || + // For COORM + xmlStrcmp(launcher_args_node->name, xmlCharStrdup("launcher_args")) ) { INFOS("A bad job is found, some user part node are not in the rigth or does not have a correct name"); @@ -698,7 +749,12 @@ SALOME_Launcher::loadJobs(const char* jobs_file) xmlFree(maximum_duration); xmlFree(queue); - xmlNodePtr specific_node = xmlNextElementSibling(queue_node); + // For COORM + xmlChar* launcher_args = xmlNodeGetContent(launcher_args_node); + new_job->setLauncherArgs(std::string((const char *)launcher_args)); + xmlFree(launcher_args); + + xmlNodePtr specific_node = xmlNextElementSibling(launcher_args_node); if (specific_node == NULL) { INFOS("A bad job is found, specific_parameters part is not found"); diff --git a/src/Launcher/SALOME_Launcher.hxx b/src/Launcher/SALOME_Launcher.hxx index 56dbb95ba..c13cc7189 100644 --- a/src/Launcher/SALOME_Launcher.hxx +++ b/src/Launcher/SALOME_Launcher.hxx @@ -49,6 +49,7 @@ public: CORBA::Long createJob (const Engines::JobParameters & job_parameters); void launchJob (CORBA::Long job_id); char * getJobState (CORBA::Long job_id); + char * getAssignedHostnames (CORBA::Long job_id); // Get names or ids of hosts assigned to the job void getJobResults(CORBA::Long job_id, const char * directory); CORBA::Boolean getJobDumpState(CORBA::Long job_id, const char * directory); void stopJob (CORBA::Long job_id); diff --git a/src/ResourcesManager/SALOME_ResourcesCatalog_Parser.cxx b/src/ResourcesManager/SALOME_ResourcesCatalog_Parser.cxx index 31da2ed45..7a5750f41 100644 --- a/src/ResourcesManager/SALOME_ResourcesCatalog_Parser.cxx +++ b/src/ResourcesManager/SALOME_ResourcesCatalog_Parser.cxx @@ -274,6 +274,10 @@ ParserResourcesType::getBatchTypeStr() const return "ll"; case vishnu: return "vishnu"; + case oar: + return "oar"; + case coorm: + return "coorm"; case ssh_batch: return "ssh_batch"; default: @@ -350,6 +354,10 @@ void ParserResourcesType::setBatchTypeStr(const string & batchTypeStr) Batch = ll; else if (batchTypeStr == "vishnu") Batch = vishnu; + else if (batchTypeStr == "oar") + Batch = oar; + else if (batchTypeStr == "coorm") + Batch = coorm; else if (batchTypeStr == "") Batch = none; else diff --git a/src/ResourcesManager/SALOME_ResourcesCatalog_Parser.hxx b/src/ResourcesManager/SALOME_ResourcesCatalog_Parser.hxx index d23eccc0e..75969394f 100755 --- a/src/ResourcesManager/SALOME_ResourcesCatalog_Parser.hxx +++ b/src/ResourcesManager/SALOME_ResourcesCatalog_Parser.hxx @@ -45,7 +45,7 @@ enum AccessProtocolType {sh, rsh, ssh, srun, pbsdsh, blaunch}; enum ResourceType {cluster, single_machine}; -enum BatchType {none, pbs, lsf, sge, ssh_batch, ccc, ll, slurm, vishnu}; +enum BatchType {none, pbs, lsf, sge, ssh_batch, ccc, ll, slurm, vishnu, oar, coorm}; enum MpiImplType {nompi, lam, mpich1, mpich2, openmpi, ompi, slurmmpi, prun}; -- 2.39.2