From afe9bfb78a894850116c6583e5e1794239ce894c Mon Sep 17 00:00:00 2001 From: Nabil Ghodbane Date: Mon, 5 Oct 2020 17:33:49 +0200 Subject: [PATCH] spns #20110 Tests Jobmanager on ORCUS --- products/KERNEL.pyconf | 1 + products/LIBBATCH.pyconf | 1 + products/patches/kernel_launcher_job.patch | 15 ++++++++ products/patches/libbatch_slurm_orcus.patch | 40 +++++++++++++++++++++ 4 files changed, 57 insertions(+) create mode 100644 products/patches/kernel_launcher_job.patch create mode 100644 products/patches/libbatch_slurm_orcus.patch diff --git a/products/KERNEL.pyconf b/products/KERNEL.pyconf index 7f23197..9a49665 100644 --- a/products/KERNEL.pyconf +++ b/products/KERNEL.pyconf @@ -9,6 +9,7 @@ default : repo : $PROJECTS.projects.salome.git_info.default_git_server + "modules/kernel.git" repo_dev : $PROJECTS.projects.salome.git_info.default_git_server_dev + "kernel.git" } + patches : ['kernel_launcher_job.patch'] environ : { } diff --git a/products/LIBBATCH.pyconf b/products/LIBBATCH.pyconf index 100afe5..e223a09 100755 --- a/products/LIBBATCH.pyconf +++ b/products/LIBBATCH.pyconf @@ -9,6 +9,7 @@ default : repo : $PROJECTS.projects.salome.git_info.default_git_server + "tools/libbatch.git" repo_dev : $PROJECTS.projects.salome.git_info.default_git_server_dev + "libbatch.git" } + patches : ['libbatch_slurm_orcus.patch'] environ : { env_script : $name + ".py" diff --git a/products/patches/kernel_launcher_job.patch b/products/patches/kernel_launcher_job.patch new file mode 100644 index 0000000..3d006b7 --- /dev/null +++ b/products/patches/kernel_launcher_job.patch @@ -0,0 +1,15 @@ +diff --git a/src/Launcher/Launcher_Job_SALOME.cxx b/src/Launcher/Launcher_Job_SALOME.cxx +index 47ec6f1..e287538 100644 +--- a/src/Launcher/Launcher_Job_SALOME.cxx ++++ b/src/Launcher/Launcher_Job_SALOME.cxx +@@ -134,8 +134,8 @@ Launcher::Job_SALOME::buildSalomeScript(Batch::Parametre params) + } + // Create file for ns-port-log + if (is_launcher_file) +- // for a salome application file, we write NS_PORT_FILE_PATH in launch_tmp_dir +- launch_script_stream << "NS_PORT_FILE_PATH=$(mktemp " << launch_tmp_dir << "nsport_XXXXXX) &&\n"; ++ // for a salome application file, we write NS_PORT_FILE_PATH in working directory ++ launch_script_stream << "NS_PORT_FILE_PATH=$(mktemp " << work_directory << "/nsport_" << _launch_date << "_XXXXXX) &&\n"; + else + launch_script_stream << "NS_PORT_FILE_PATH=$(mktemp " << _resource_definition.AppliPath << "/USERS/nsport_XXXXXX) &&\n"; + diff --git a/products/patches/libbatch_slurm_orcus.patch b/products/patches/libbatch_slurm_orcus.patch new file mode 100644 index 0000000..f646603 --- /dev/null +++ b/products/patches/libbatch_slurm_orcus.patch @@ -0,0 +1,40 @@ +diff --git a/src/Slurm/BatchManager_Slurm.cxx b/src/Slurm/BatchManager_Slurm.cxx +index 1c7f8d7..a1e28ed 100644 +--- a/src/Slurm/BatchManager_Slurm.cxx ++++ b/src/Slurm/BatchManager_Slurm.cxx +@@ -64,7 +64,7 @@ namespace Batch { + string cmdFile = buildCommandFile(job); + + // define command to submit batch +- string subCommand = string("cd ") + workDir + "; sbatch " + cmdFile; ++ string subCommand = string("bash -l -c \\\"cd ") + workDir + "; sbatch " + cmdFile + "\\\""; + string command = _protocol.getExecCommand(subCommand, _hostname, _username); + command += " 2>&1"; + LOG(command); +@@ -203,7 +203,7 @@ namespace Batch { + void BatchManager_Slurm::deleteJob(const JobId & jobid) + { + // define command to delete job +- string subCommand = "scancel " + jobid.getReference(); ++ string subCommand = string("bash -l -c \\\"scancel ") + jobid.getReference() + "\\\""; + string command = _protocol.getExecCommand(subCommand, _hostname, _username); + LOG(command); + +@@ -217,7 +217,7 @@ namespace Batch { + JobInfo BatchManager_Slurm::queryJob(const JobId & jobid) + { + // First try to query the job with "squeue" command +- string subCommand = "squeue -h -o %T -j " + jobid.getReference() + " 2>/dev/null"; ++ string subCommand = string("bash -l -c \\\"squeue -h -o %T -j ") + jobid.getReference() + " 2>/dev/null" + "\\\""; + string command = _protocol.getExecCommand(subCommand, _hostname, _username); + LOG(command); + string output; +@@ -237,7 +237,7 @@ namespace Batch { + // If "squeue" failed, the job may be finished. In this case, try to query the job with + // "sacct". + if (! found) { +- string subCommand = "sacct -X -o State%-10 -n -j " + jobid.getReference(); ++ string subCommand = string("bash -l -c \\\"sacct -X -o State%-10 -n -j ") + jobid.getReference() + "\\\""; + string command = _protocol.getExecCommand(subCommand, _hostname, _username); + LOG(command); + string output; -- 2.39.2