From: Konstantin Leontev Date: Fri, 11 Aug 2023 10:41:32 +0000 (+0100) Subject: [bos #35138][EDF] (2023-T1) Specialization of resources in KERNEL. JobParameters... X-Git-Url: http://git.salome-platform.org/gitweb/?a=commitdiff_plain;h=7c47a7f559dd337dc09462cba410d23dd27f1817;p=modules%2Fkernel.git [bos #35138][EDF] (2023-T1) Specialization of resources in KERNEL. JobParameters resource_required type changed to ResourceParametersJob. --- diff --git a/idl/SALOME_Launcher.idl b/idl/SALOME_Launcher.idl index 5193d29b7..95dd67e35 100644 --- a/idl/SALOME_Launcher.idl +++ b/idl/SALOME_Launcher.idl @@ -137,7 +137,7 @@ struct JobParameters - mem_mb -> Memory expressed in megabytes. - nb_proc -> Number of Processors. */ - ResourceParametersContainer resource_required; + ResourceParametersJob resource_required; //! Name of the batch queue chosen - optional string queue; @@ -150,7 +150,7 @@ struct JobParameters //! Specifies if the job must run in exclusive mode (without sharing nodes with other jobs) boolean exclusive; - //! Specifies the memory limit per cpu (exclusive with resource_required.mem_mb) + //! Specifies the memory limit per cpu unsigned long mem_per_cpu; //! Workload Characterization Key - mandatory on some clusters diff --git a/src/Launcher/Launcher.cxx b/src/Launcher/Launcher.cxx index 63ea28b1c..a9b016610 100644 --- a/src/Launcher/Launcher.cxx +++ b/src/Launcher/Launcher.cxx @@ -493,17 +493,9 @@ Launcher_cpp::getJobParameters(int job_id) job_parameters.wckey = job->getWCKey(); job_parameters.extra_params = job->getExtraParams(); - resourceParamsContainer resource_params = job->getResourceRequiredParams(); - job_parameters.resource_required.name = resource_params.name; - job_parameters.resource_required.hostname = resource_params.hostname; - job_parameters.resource_required.OS = resource_params.OS; - job_parameters.resource_required.nb_proc = resource_params.nb_proc; - job_parameters.resource_required.nb_node = resource_params.nb_node; - job_parameters.resource_required.nb_proc_per_node = resource_params.nb_proc_per_node; - job_parameters.resource_required.cpu_clock = resource_params.cpu_clock; - job_parameters.resource_required.mem_mb = resource_params.mem_mb; - - job_parameters.specific_parameters = job->getSpecificParameters(); + job_parameters.resource_required = job->getResourceRequiredParams(); + + job_parameters.specific_parameters = job->getSpecificParameters(); return job_parameters; } @@ -548,15 +540,15 @@ Launcher_cpp::createJobWithFile(const std::string xmlExecuteFile, for(size_t i=0; i < job_params.OutputFile.size();i++) new_job->add_out_file(job_params.OutputFile[i]); - resourceParamsContainer p; + resourceParamsJob p; p.hostname = clusterName; p.name = ""; - p.OS = ""; - p.nb_proc = job_params.NbOfProcesses; - p.nb_node = 0; - p.nb_proc_per_node = 0; - p.cpu_clock = 0; - p.mem_mb = 0; + // p.OS = ""; + // p.nb_proc = job_params.NbOfProcesses; + // p.nb_node = 0; + // p.nb_proc_per_node = 0; + // p.cpu_clock = 0; + // p.mem_mb = 0; new_job->setResourceRequiredParams(p); createJob(new_job.get()); @@ -862,12 +854,12 @@ Launcher_cpp::getBatchManager(Launcher::Job * job) // Select a resource for the job std::vector ResourceList; - resourceParamsContainer params = job->getResourceRequiredParams(); + resourceParamsJob params = job->getResourceRequiredParams(); // Consider only resources that can launch batch jobs try { - ResourceList = _ResManager->GetFittingResourcesContainer(params); + ResourceList = _ResManager->GetFittingResourcesJob(params); } catch(const ResourcesException &ex) { diff --git a/src/Launcher/Launcher.hxx b/src/Launcher/Launcher.hxx index 5f767194f..d71e75508 100644 --- a/src/Launcher/Launcher.hxx +++ b/src/Launcher/Launcher.hxx @@ -51,7 +51,7 @@ struct LAUNCHER_EXPORT JobParameters_cpp std::string local_directory; std::string result_directory; std::string maximum_duration; - resourceParamsContainer resource_required; + resourceParamsJob resource_required; std::string queue; std::string partition; bool exclusive; diff --git a/src/Launcher/Launcher_Job.cxx b/src/Launcher/Launcher_Job.cxx index 20d1b92dd..aa3256d44 100644 --- a/src/Launcher/Launcher_Job.cxx +++ b/src/Launcher/Launcher_Job.cxx @@ -315,7 +315,7 @@ Launcher::Job::setLauncherArgs(const std::string & launcher_args) } void -Launcher::Job::setResourceRequiredParams(const resourceParamsContainer& resource_required_params) +Launcher::Job::setResourceRequiredParams(const resourceParamsJob& resource_required_params) { checkResourceRequiredParams(resource_required_params); _resource_required_params = resource_required_params; @@ -422,7 +422,7 @@ Launcher::Job::getLauncherArgs() const return _launcher_args; } -resourceParamsContainer +resourceParamsJob Launcher::Job::getResourceRequiredParams() const { return _resource_required_params; @@ -527,7 +527,7 @@ Launcher::Job::checkMaximumDuration(const std::string & maximum_duration) } void -Launcher::Job::checkResourceRequiredParams(const resourceParamsContainer& resource_required_params) +Launcher::Job::checkResourceRequiredParams(const resourceParamsJob& resource_required_params) { // TODO: check if we need this check for a job: // nb_proc has be to > 0 diff --git a/src/Launcher/Launcher_Job.hxx b/src/Launcher/Launcher_Job.hxx index 134f18e1c..9a99fd556 100644 --- a/src/Launcher/Launcher_Job.hxx +++ b/src/Launcher/Launcher_Job.hxx @@ -71,7 +71,7 @@ namespace Launcher void add_in_file(const std::string & file); void add_out_file(const std::string & file); void setMaximumDuration(const std::string & maximum_duration); - void setResourceRequiredParams(const resourceParamsContainer & resource_required_params); + void setResourceRequiredParams(const resourceParamsJob& resource_required_params); void setQueue(const std::string & queue); void setPartition(const std::string & partition); void setEnvFile(const std::string & env_file); @@ -94,7 +94,7 @@ namespace Launcher const std::list & get_in_files() const; const std::list & get_out_files() const; std::string getMaximumDuration() const; - resourceParamsContainer getResourceRequiredParams() const; + resourceParamsJob getResourceRequiredParams() const; std::string getQueue() const; std::string getPartition() const; std::string getEnvFile() const; @@ -119,7 +119,7 @@ namespace Launcher // Checks void checkMaximumDuration(const std::string & maximum_duration); - void checkResourceRequiredParams(const resourceParamsContainer & resource_required_params); + void checkResourceRequiredParams(const resourceParamsJob& resource_required_params); // Helps long convertMaximumDuration(const std::string & maximum_duration); @@ -157,7 +157,7 @@ namespace Launcher std::map _specific_parameters; std::string _maximum_duration; long _maximum_duration_in_second; - resourceParamsContainer _resource_required_params; + resourceParamsJob _resource_required_params; std::string _queue; std::string _partition; bool _exclusive; diff --git a/src/Launcher/Launcher_XML_Persistence.cxx b/src/Launcher/Launcher_XML_Persistence.cxx index a76a776af..02d8590f9 100644 --- a/src/Launcher/Launcher_XML_Persistence.cxx +++ b/src/Launcher/Launcher_XML_Persistence.cxx @@ -175,7 +175,7 @@ XML_Persistence::addJobToXmlDocument(xmlNodePtr root_node, const Job & job) } // Resource part - resourceParamsContainer resource_params = job.getResourceRequiredParams(); + const resourceParamsJob resource_params = job.getResourceRequiredParams(); xmlNodePtr res_node = addNode(node, "resource_params", ""); addNode(res_node, "name", resource_params.name); if (!resource_params.hostname.empty()) @@ -394,7 +394,7 @@ XML_Persistence::parseUserNode(Job * new_job, xmlNodePtr user_node) void XML_Persistence::parseResourceNode(Job * new_job, xmlNodePtr res_node) { - resourceParamsContainer p; + resourceParamsJob p; xmlNodePtr current_node = xmlFirstElementChild(res_node); while (current_node != NULL) { @@ -403,20 +403,20 @@ XML_Persistence::parseResourceNode(Job * new_job, xmlNodePtr res_node) p.name = getNodeContent(current_node); else if (node_name == "hostname") p.hostname = getNodeContent(current_node); - else if (node_name == "OS") - p.OS = getNodeContent(current_node); - else if (node_name == "nb_proc") - p.nb_proc = getNumericalNodeContent(current_node); - else if (node_name == "nb_node") - p.nb_node = getNumericalNodeContent(current_node); - else if (node_name == "nb_proc_per_node") - p.nb_proc_per_node = getNumericalNodeContent(current_node); - else if (node_name == "cpu_clock") - p.cpu_clock = getNumericalNodeContent(current_node); - else if (node_name == "mem_mb") - p.mem_mb = getNumericalNodeContent(current_node); - else if (node_name == "mem_per_cpu") - new_job->setMemPerCpu(getNumericalNodeContent(current_node)); + // else if (node_name == "OS") + // p.OS = getNodeContent(current_node); + // else if (node_name == "nb_proc") + // p.nb_proc = getNumericalNodeContent(current_node); + // else if (node_name == "nb_node") + // p.nb_node = getNumericalNodeContent(current_node); + // else if (node_name == "nb_proc_per_node") + // p.nb_proc_per_node = getNumericalNodeContent(current_node); + // else if (node_name == "cpu_clock") + // p.cpu_clock = getNumericalNodeContent(current_node); + // else if (node_name == "mem_mb") + // p.mem_mb = getNumericalNodeContent(current_node); + // else if (node_name == "mem_per_cpu") + // new_job->setMemPerCpu(getNumericalNodeContent(current_node)); else throw LauncherException(string("invalid node \"") + node_name + "\""); current_node = xmlNextElementSibling(current_node); diff --git a/src/Launcher/SALOME_Launcher.cxx b/src/Launcher/SALOME_Launcher.cxx index 6b876147f..7b0d1fc1d 100644 --- a/src/Launcher/SALOME_Launcher.cxx +++ b/src/Launcher/SALOME_Launcher.cxx @@ -628,7 +628,7 @@ SALOME_Launcher::JobParameters_CORBA2CPP( result.result_directory = job_parameters.result_directory.in(); result.maximum_duration = job_parameters.maximum_duration.in(); - result.resource_required = resourceParametersContainer_CORBAtoCPP(job_parameters.resource_required); + result.resource_required = resourceParametersJob_CORBAtoCPP(job_parameters.resource_required); result.queue = job_parameters.queue.in(); result.partition = job_parameters.partition.in(); @@ -677,7 +677,7 @@ SALOME_Launcher::JobParameters_CPP2CORBA(const JobParameters_cpp& job_parameters result->result_directory = CORBA::string_dup(job_parameters.result_directory.c_str()); result->maximum_duration = CORBA::string_dup(job_parameters.maximum_duration.c_str()); - result->resource_required = resourceParametersContainer_CPPtoCORBA(job_parameters.resource_required); + result->resource_required = resourceParametersJob_CPPtoCORBA(job_parameters.resource_required); result->queue = CORBA::string_dup(job_parameters.queue.c_str()); result->partition = CORBA::string_dup(job_parameters.partition.c_str()); diff --git a/src/Launcher/Test/launcher_use_case.py b/src/Launcher/Test/launcher_use_case.py index f18316739..73ae29181 100644 --- a/src/Launcher/Test/launcher_use_case.py +++ b/src/Launcher/Test/launcher_use_case.py @@ -31,9 +31,8 @@ if __name__ == '__main__': salome.salome_init() launcher = salome.naming_service.Resolve('/SalomeLauncher') job_params = salome.JobParameters() - job_params.resource_required = salome.ResourceParametersContainer() + job_params.resource_required = salome.ResourceParametersJob() job_params.resource_required.name = "localhost" - job_params.resource_required.nb_proc = 1 # slurm: --ntasks job_params.job_type = "command" #cwd = os.getcwd() diff --git a/src/Launcher/Test/test_launcher.py b/src/Launcher/Test/test_launcher.py index 5ed629dca..0649c495d 100755 --- a/src/Launcher/Test/test_launcher.py +++ b/src/Launcher/Test/test_launcher.py @@ -82,8 +82,7 @@ class TestCompo(unittest.TestCase): def create_JobParameters(self): job_params = salome.JobParameters() job_params.wckey="P11N0:SALOME" #needed by edf clusters - job_params.resource_required = salome.ResourceParametersContainer() - job_params.resource_required.nb_proc = 1 + job_params.resource_required = salome.ResourceParametersJob() return job_params ############################## diff --git a/src/Launcher_SWIG/Launcher.i b/src/Launcher_SWIG/Launcher.i index e1c39af05..ec3c4dc52 100644 --- a/src/Launcher_SWIG/Launcher.i +++ b/src/Launcher_SWIG/Launcher.i @@ -112,7 +112,7 @@ public: std::string local_directory; std::string result_directory; std::string maximum_duration; - resourceParamsContainer resource_required; + resourceParamsJob resource_required; std::string queue; std::string partition; bool exclusive; diff --git a/src/Launcher_SWIG/Test/test_swig_launcher.py b/src/Launcher_SWIG/Test/test_swig_launcher.py index 9f54d5b48..20ba2d9a0 100755 --- a/src/Launcher_SWIG/Test/test_swig_launcher.py +++ b/src/Launcher_SWIG/Test/test_swig_launcher.py @@ -52,7 +52,7 @@ def createJobParameters(): return jp def createResourceParameters(): - return pylauncher.resourceParamsContainer() + return pylauncher.resourceParamsJob() # Test of SalomeLauncher. # This test should be run in the salome environment, using "salome shell". @@ -77,7 +77,7 @@ class TestCompo(unittest.TestCase): # Get the list of possible ressources ressource_param = createResourceParameters() rm = createResourcesManager() - cls.ressources = rm.GetFittingResourcesContainer(ressource_param) + cls.ressources = rm.GetFittingResourcesJob(ressource_param) def verifyFile(self, path, content): try: @@ -91,7 +91,6 @@ class TestCompo(unittest.TestCase): def create_JobParameters(self): job_params = createJobParameters() job_params.wckey="P11U5:CARBONES" #needed by edf clusters - job_params.resource_required.nb_proc = 1 return job_params ############################## diff --git a/src/Launcher_SWIG/test.py b/src/Launcher_SWIG/test.py index 64f5ac8d6..c85fb5a7b 100644 --- a/src/Launcher_SWIG/test.py +++ b/src/Launcher_SWIG/test.py @@ -24,10 +24,9 @@ jp.job_type = "command" jp.job_file = "/home/I35256/salome/scripts/job_sh/script.sh" jp.work_directory = "/tmp/wd" jp.result_directory = "/tmp/rd" -rp = pylauncher.resourceParamsContainer() +rp = pylauncher.resourceParamsJob() rp.name="localhost" rp.hostname="localhost" -rp.nb_proc = 1 jp.resource_required = rp launcher = pylauncher.Launcher_cpp() # no catalog. localhost is defined anyway diff --git a/src/LifeCycleCORBA_SWIG/LifeCycleCORBA.py b/src/LifeCycleCORBA_SWIG/LifeCycleCORBA.py index 2a59505ae..303f0ab8d 100644 --- a/src/LifeCycleCORBA_SWIG/LifeCycleCORBA.py +++ b/src/LifeCycleCORBA_SWIG/LifeCycleCORBA.py @@ -60,6 +60,14 @@ class ResourceParametersContainer(Engines.ResourceParametersContainer): Engines.ResourceParametersContainer.__init__(self, name, hostname, policy, resList, OS, componentList, nb_proc, mem_mb, cpu_clock, nb_node, nb_proc_per_node) + +class ResourceParametersJob(Engines.ResourceParametersJob): + def __init__(self, name="", hostname="", + policy="", resList = None): + if resList is None: + resList = [] + Engines.ResourceParametersJob.__init__(self, name, hostname, + policy, resList) class JobParameters (Engines.JobParameters): def __init__(self, job_name="", job_type="", job_file="", pre_command="", env_file="", in_files=None, out_files=None, @@ -74,7 +82,7 @@ class JobParameters (Engines.JobParameters): if specific_parameters is None: specific_parameters = [] if resource_required is None: - resource_required = ResourceParametersContainer() + resource_required = ResourceParametersJob() Engines.JobParameters.__init__(self, job_name, job_type, job_file, pre_command, env_file, in_files, out_files, work_directory, local_directory, result_directory, maximum_duration, resource_required, queue, partition, exclusive, mem_per_cpu, diff --git a/src/ResourcesManager/ResourcesManager.cxx b/src/ResourcesManager/ResourcesManager.cxx index d9b9b7512..e1ae2676e 100644 --- a/src/ResourcesManager/ResourcesManager.cxx +++ b/src/ResourcesManager/ResourcesManager.cxx @@ -139,6 +139,17 @@ namespace // } } + template ResourceList GetAllResources(const T& resources) + { + ResourceList result; + for (const auto& res : resources) + { + result.push_back(res.first); + } + + return result; + } + template ResourceList GetResourcesByHostname(const std::string& hostnameIn, const T& resourceList) { if (hostnameIn.empty()) @@ -338,6 +349,12 @@ ResourceList ResourcesManager_cpp::GetFittingResourcesJob(const resourceParamsJo throw ResourcesException("Resource name was not found in resource list! Requested name: " + params.name); } + if (params.hostname.empty()) + { + // Use all available resources + return GetAllResources(_resourcesListJob); + } + // Step 3 ResourceList result = GetResourcesByHostname(params.hostname, _resourcesListJob);