From: Ovidiu Mircescu Date: Mon, 3 Sep 2018 09:53:24 +0000 (+0200) Subject: Fix srun protocol for EDF clusters. X-Git-Tag: V9_1_0~6 X-Git-Url: http://git.salome-platform.org/gitweb/?a=commitdiff_plain;h=f8817a847cd151846cc55aab33c35191b756426e;p=modules%2Fkernel.git Fix srun protocol for EDF clusters. This commit fixes the following problem when using more than 1 node for running a YACS schema : it was not possible to run more than one container on the second node and the next ones. We had this type of message: srun: Job 2703790 step creation temporarily disabled, retrying srun: error: Unable to create step for job 2703790: Job/step already completing or completed --- diff --git a/src/Container/SALOME_ContainerManager.cxx b/src/Container/SALOME_ContainerManager.cxx index 12a34fb7a..9713f9c97 100644 --- a/src/Container/SALOME_ContainerManager.cxx +++ b/src/Container/SALOME_ContainerManager.cxx @@ -1170,7 +1170,7 @@ std::string SALOME_ContainerManager::BuildTempFileToLaunchRemoteContainer (const else if (resInfo.Protocol == srun) { - command = "srun -n 1 -N 1 --share --nodelist="; + command = "srun -n 1 -N 1 --share --mem-per-cpu=0 --nodelist="; std::string commandRcp = "rcp "; commandRcp += tmpFileName; commandRcp += " "; @@ -1218,7 +1218,7 @@ std::string SALOME_ContainerManager::GetMPIZeroNode(const std::string machine, c else if (resInfo.Protocol == ssh) command = "ssh "; else if (resInfo.Protocol == srun) - command = "srun -n 1 -N 1 --share --nodelist="; + command = "srun -n 1 -N 1 --share --mem-per-cpu=0 --nodelist="; else throw SALOME_Exception("Unknown protocol"); @@ -1323,7 +1323,7 @@ std::string SALOME_ContainerManager::getCommandToRunRemoteProcess(AccessProtocol case srun: // no need to redefine the user with srun, the job user is taken by default // (note: for srun, user id can be specified with " --uid=") - command << "srun -n 1 -N 1 --share --nodelist=" << hostname << " "; + command << "srun -n 1 -N 1 --share --mem-per-cpu=0 --nodelist=" << hostname << " "; break; case pbsdsh: command << "pbsdsh -o -h " << hostname << " ";