From bdbae5d02be1d29fe7413a34d58a304417eb5f60 Mon Sep 17 00:00:00 2001 From: ribes Date: Wed, 13 Feb 2008 15:05:53 +0000 Subject: [PATCH] - Add some features for PBS --- bin/launchConfigureParser.py | 18 ++- bin/runSalome.py | 5 + src/Launcher/BatchLight_BatchManager.cxx | 128 ++++++++++-------- src/Launcher/BatchLight_BatchManager.hxx | 17 +-- src/Launcher/BatchLight_BatchManager_PBS.cxx | 104 +++++++++----- src/Launcher/BatchLight_BatchManager_PBS.hxx | 11 +- .../BatchLight_BatchManager_SLURM.cxx | 71 ++++++---- .../BatchLight_BatchManager_SLURM.hxx | 6 +- src/Launcher/BatchLight_Job.cxx | 18 ++- src/Launcher/BatchLight_Job.hxx | 10 +- 10 files changed, 245 insertions(+), 143 deletions(-) diff --git a/bin/launchConfigureParser.py b/bin/launchConfigureParser.py index 150f15598..399c40ef7 100755 --- a/bin/launchConfigureParser.py +++ b/bin/launchConfigureParser.py @@ -586,6 +586,15 @@ def CreateOptionParser (theAdditionalOptions=[]): dest="pinter", help=help_str) + # Print Naming service port into a user file. Default: False. + help_str = "Print Naming Service Port into a user file." + o_nspl = optparse.Option("--ns-port-log", + metavar="", + type="string", + action="store", + dest="ns_port_log_file", + help=help_str) + # All options opt_list = [o_t,o_g, # GUI/Terminal o_d,o_o, # Desktop @@ -604,13 +613,14 @@ def CreateOptionParser (theAdditionalOptions=[]): o_c, # Catch exceptions o_a, # Print free port and exit o_n, # --nosave-config - o_pi] # Interactive python console + o_pi, # Interactive python console + o_nspl] #std_options = ["gui", "desktop", "log_file", "py_scripts", "resources", # "xterm", "modules", "embedded", "standalone", # "portkill", "killall", "interp", "splash", - # "catch_exceptions", "print_port", "save_config"] + # "catch_exceptions", "print_port", "save_config", "ns_port_log_file"] opt_list += theAdditionalOptions @@ -811,6 +821,10 @@ def get_env(theAdditionalOptions=[], appname="SalomeApp"): else: args[file_nam] = [cmd_opts.log_file] + # Naming Service port log file + if cmd_opts.ns_port_log_file is not None: + args["ns_port_log_file"] = cmd_opts.ns_port_log_file + # Python scripts args[script_nam] = [] if cmd_opts.py_scripts is not None: diff --git a/bin/runSalome.py b/bin/runSalome.py index 6c01d90e4..7645a1519 100755 --- a/bin/runSalome.py +++ b/bin/runSalome.py @@ -785,6 +785,11 @@ def searchFreePort(args, save_config=1): system('ln -s -f %s %s/.omniORB_last.cfg'%(os.environ['OMNIORB_CONFIG'], home)) pass # + if args.has_key('ns_port_log_file'): + file_name= '%s/%s'%(home, args["ns_port_log_file"]) + f = open(file_name, "w") + f.write(os.environ['NSPORT']) + f.close() break print "%s"%(NSPORT), if NSPORT == limit: diff --git a/src/Launcher/BatchLight_BatchManager.cxx b/src/Launcher/BatchLight_BatchManager.cxx index 4e0241f38..449c8962c 100644 --- a/src/Launcher/BatchLight_BatchManager.cxx +++ b/src/Launcher/BatchLight_BatchManager.cxx @@ -70,20 +70,19 @@ namespace BatchLight { int id; // temporary directory on cluster to put input files for job - setDirForTmpFiles(); - SCRUTE(_dirForTmpFiles); + setDirForTmpFiles(job); // export input files on cluster - exportInputFiles(job->getFileToExecute(),job->getFilesToExportList()); + exportInputFiles(job); // build salome coupling script for job - buildSalomeCouplingScript(job->getFileToExecute()); + buildSalomeCouplingScript(job); // build batch script for job - buildSalomeBatchScript(job->getNbProc()); + buildSalomeBatchScript(job); // submit job on cluster - id = submit(); + id = submit(job); // register job on map _jobmap[id] = job; @@ -91,77 +90,93 @@ namespace BatchLight { return id; } - void BatchManager::setDirForTmpFiles() + void BatchManager::setDirForTmpFiles(BatchLight::Job* job) { - int i; + std::string dirForTmpFiles; + std::string thedate; - _dirForTmpFiles = string("Batch/"); - Batch::Date date = Batch::Date(time(0)) ; - std::string thedate = date.str() ; + // Adding date to the directory name + Batch::Date date = Batch::Date(time(0)); + thedate = date.str(); int lend = thedate.size() ; - i = 0 ; + int i = 0 ; while ( i < lend ) { if ( thedate[i] == '/' || thedate[i] == '-' || thedate[i] == ':' ) { thedate[i] = '_' ; } i++ ; } - _dirForTmpFiles += thedate ; + + dirForTmpFiles += string("Batch/"); + dirForTmpFiles += thedate ; + job->setDirForTmpFiles(dirForTmpFiles); } - void BatchManager::exportInputFiles(const char *fileToExecute, const Engines::FilesList filesToExportList) throw(SALOME_Exception) + void BatchManager::exportInputFiles(BatchLight::Job* job) throw(SALOME_Exception) { BEGIN_OF("BatchManager::exportInFiles"); - string command = _params.protocol; int status; + const char * fileToExecute = job->getFileToExecute(); + const Engines::FilesList filesToExportList = job->getFilesToExportList(); + const std::string dirForTmpFiles = job->getDirForTmpFiles(); + std::string command; + std::string copy_command; + // Test protocol + if( _params.protocol == "rsh" ) + copy_command = "rcp "; + else if( _params.protocol == "ssh" ) + copy_command = "scp "; + else + throw SALOME_Exception("Unknown protocol : only rsh and ssh are known !"); + + // First step : creating batch tmp files directory + command = _params.protocol; command += " "; - if (_params.username != ""){ command += _params.username; command += "@"; } - command += _params.hostname; command += " \"mkdir -p "; - command += _dirForTmpFiles ; + command += dirForTmpFiles; command += "\"" ; SCRUTE(command.c_str()); status = system(command.c_str()); - if(status) - throw SALOME_Exception("Error of connection on remote host"); - - if( _params.protocol == "rsh" ) - command = "rcp "; - else if( _params.protocol == "ssh" ) - command = "scp "; - else - throw SALOME_Exception("Unknown protocol"); + if(status) { + std::ostringstream oss; + oss << status; + std::string ex_mess("Error of connection on remote host ! status = "); + ex_mess += oss.str(); + throw SALOME_Exception(ex_mess.c_str()); + } + // Second step : copy fileToExecute into + // batch tmp files directory + command = copy_command; command += fileToExecute; command += " "; - if (_params.username != ""){ command += _params.username; command += "@"; } - command += _params.hostname; command += ":"; - command += _dirForTmpFiles ; + command += dirForTmpFiles; SCRUTE(command.c_str()); status = system(command.c_str()); - if(status) - throw SALOME_Exception("Error of connection on remote host"); + if(status) { + std::ostringstream oss; + oss << status; + std::string ex_mess("Error of connection on remote host ! status = "); + ex_mess += oss.str(); + throw SALOME_Exception(ex_mess.c_str()); + } - int i ; - for ( i = 0 ; i < filesToExportList.length() ; i++ ) { - if( _params.protocol == "rsh" ) - command = "rcp "; - else if( _params.protocol == "ssh" ) - command = "scp "; - else - throw SALOME_Exception("Unknown protocol"); + // Third step : copy filesToExportList into + // batch tmp files directory + for (int i = 0 ; i < filesToExportList.length() ; i++ ) { + command = copy_command; command += filesToExportList[i] ; command += " "; if (_params.username != ""){ @@ -170,11 +185,16 @@ namespace BatchLight { } command += _params.hostname; command += ":"; - command += _dirForTmpFiles ; + command += dirForTmpFiles ; SCRUTE(command.c_str()); status = system(command.c_str()); - if(status) - throw SALOME_Exception("Error of connection on remote host"); + if(status) { + std::ostringstream oss; + oss << status; + std::string ex_mess("Error of connection on remote host ! status = "); + ex_mess += oss.str(); + throw SALOME_Exception(ex_mess.c_str()); + } } END_OF("BatchManager::exportInFiles"); @@ -221,10 +241,8 @@ namespace BatchLight { strcpy(temp, "/tmp/command"); strcat(temp, "XXXXXX"); #ifndef WNT - mkstemp(temp); #else - char aPID[80]; itoa(getpid(), aPID, 10); strcat(temp, aPID); @@ -236,18 +254,16 @@ namespace BatchLight { return command; } - void BatchManager::RmTmpFile() + void BatchManager::RmTmpFile(std::string & TemporaryFileName) { - if (_TmpFileName != ""){ - string command = "rm "; - command += _TmpFileName; - char *temp = strdup(command.c_str()); - int lgthTemp = strlen(temp); - temp[lgthTemp - 3] = '*'; - temp[lgthTemp - 2] = '\0'; - system(temp); - free(temp); - } + string command = "rm "; + command += TemporaryFileName; + char *temp = strdup(command.c_str()); + int lgthTemp = strlen(temp); + temp[lgthTemp - 3] = '*'; + temp[lgthTemp - 2] = '\0'; + system(temp); + free(temp); } MpiImpl *BatchManager::FactoryMpiImpl(string mpiImpl) throw(SALOME_Exception) diff --git a/src/Launcher/BatchLight_BatchManager.hxx b/src/Launcher/BatchLight_BatchManager.hxx index a8ea0061a..7183c1d1b 100644 --- a/src/Launcher/BatchLight_BatchManager.hxx +++ b/src/Launcher/BatchLight_BatchManager.hxx @@ -69,22 +69,17 @@ namespace BatchLight { protected: batchParams _params; MpiImpl *_mpiImpl; - std::map _jobmap; - std::string _dirForTmpFiles; // repertoire temporaire sur le serveur - std::string _TmpFileName; - std::string _fileNameToExecute; - virtual int submit() throw(SALOME_Exception) = 0; - void setDirForTmpFiles(); - void exportInputFiles( const char *fileToExecute, const Engines::FilesList filesToExportList ) throw(SALOME_Exception); - virtual void buildSalomeCouplingScript( const char *fileToExecute ) throw(SALOME_Exception) = 0; - virtual void buildSalomeBatchScript( const int nbproc ) throw(SALOME_Exception) = 0; + virtual int submit(BatchLight::Job* job) throw(SALOME_Exception) = 0; + void setDirForTmpFiles(BatchLight::Job* job); + void exportInputFiles(BatchLight::Job* job) throw(SALOME_Exception); + virtual void buildSalomeCouplingScript(BatchLight::Job* job) throw(SALOME_Exception) = 0; + virtual void buildSalomeBatchScript(BatchLight::Job* job) throw(SALOME_Exception) = 0; std::string BuildTemporaryFileName() const; - void RmTmpFile(); + void RmTmpFile(std::string & TemporaryFileName); MpiImpl *FactoryMpiImpl(std::string mpiImpl) throw(SALOME_Exception); - private: }; diff --git a/src/Launcher/BatchLight_BatchManager_PBS.cxx b/src/Launcher/BatchLight_BatchManager_PBS.cxx index bba030a0c..7085f08b4 100644 --- a/src/Launcher/BatchLight_BatchManager_PBS.cxx +++ b/src/Launcher/BatchLight_BatchManager_PBS.cxx @@ -121,9 +121,10 @@ namespace BatchLight { command += _params.hostname; command += " \"qstat -f " ; - ostringstream oss2; - oss2 << jobid; - command += oss2.str(); + //ostringstream oss2; + //oss2 << jobid; + //command += oss2.str(); + command += _pbs_job_name[jobid]; command += "\" > "; command += logFile; SCRUTE(command.c_str()); @@ -163,23 +164,27 @@ namespace BatchLight { return jstatus; } - void BatchManager_PBS::buildSalomeCouplingScript( const char *fileToExecute ) throw(SALOME_Exception) + void BatchManager_PBS::buildSalomeCouplingScript(BatchLight::Job* job) throw(SALOME_Exception) { BEGIN_OF("BatchManager_PBS::buildSalomeCouplingScript"); int status; + const char *fileToExecute = job->getFileToExecute(); + const std::string dirForTmpFiles = job->getDirForTmpFiles(); + int idx = dirForTmpFiles.find("Batch/"); + std::string filelogtemp = dirForTmpFiles.substr(idx+6, dirForTmpFiles.length()); string::size_type p1 = string(fileToExecute).find_last_of("/"); string::size_type p2 = string(fileToExecute).find_last_of("."); - _fileNameToExecute = string(fileToExecute).substr(p1+1,p2-p1-1); + std::string fileNameToExecute = string(fileToExecute).substr(p1+1,p2-p1-1); + std::string TmpFileName = BuildTemporaryFileName(); - _TmpFileName = BuildTemporaryFileName(); ofstream tempOutputFile; - tempOutputFile.open(_TmpFileName.c_str(), ofstream::out ); + tempOutputFile.open(TmpFileName.c_str(), ofstream::out ); tempOutputFile << "#! /bin/sh -f" << endl ; tempOutputFile << "cd " ; tempOutputFile << _params.applipath << endl ; tempOutputFile << "export PYTHONPATH=~/" ; - tempOutputFile << _dirForTmpFiles ; + tempOutputFile << dirForTmpFiles ; tempOutputFile << ":$PYTHONPATH" << endl ; tempOutputFile << "if test " ; tempOutputFile << _mpiImpl->rank() ; @@ -190,7 +195,10 @@ namespace BatchLight { if ( i != _params.modulesList.size()-1 ) tempOutputFile << "," ; } - tempOutputFile << " --standalone=registry,study,moduleCatalog --killall &" << endl ; + //tempOutputFile << " --standalone=registry,study,moduleCatalog --killall &" << endl ; + tempOutputFile << " --standalone=registry,study,moduleCatalog --ns-port-log=" + << filelogtemp + << " &\n"; tempOutputFile << " for ((ip=1; ip < "; tempOutputFile << _mpiImpl->size(); tempOutputFile << " ; ip++))" << endl; @@ -199,8 +207,16 @@ namespace BatchLight { tempOutputFile << " done" << endl ; tempOutputFile << " sleep 5" << endl ; tempOutputFile << " ./runSession waitContainers.py $arglist" << endl ; - tempOutputFile << " ./runSession python ~/" << _dirForTmpFiles << "/" << _fileNameToExecute << ".py" << endl; - tempOutputFile << " ./runSession killCurrentPort" << endl; + tempOutputFile << " ./runSession python ~/" << dirForTmpFiles << "/" << fileNameToExecute << ".py" << endl; + + //tempOutputFile << " ./runSession killCurrentPort" << endl; + tempOutputFile << " if [ -f \"" << filelogtemp << "\" ]\n" + << " then\n" + << " port=`cat " << filelogtemp << "`\n" + << " rm " << filelogtemp << "\n" + << " fi\n" + << " ./runSession killSalomeWithPort.py $port\n"; + tempOutputFile << "else" << endl ; tempOutputFile << " sleep 5" << endl ; tempOutputFile << " ./runSession waitNS.py" << endl ; @@ -209,8 +225,8 @@ namespace BatchLight { tempOutputFile << "fi" << endl ; tempOutputFile.flush(); tempOutputFile.close(); - chmod(_TmpFileName.c_str(), 0x1ED); - SCRUTE(_TmpFileName.c_str()) ; + chmod(TmpFileName.c_str(), 0x1ED); + SCRUTE(TmpFileName.c_str()) ; string command; if( _params.protocol == "rsh" ) @@ -220,7 +236,7 @@ namespace BatchLight { else throw SALOME_Exception("Unknown protocol"); - command += _TmpFileName; + command += TmpFileName; command += " "; if (_params.username != ""){ command += _params.username; @@ -228,24 +244,30 @@ namespace BatchLight { } command += _params.hostname; command += ":"; - command += _dirForTmpFiles ; + command += dirForTmpFiles ; command += "/runSalome_" ; - command += _fileNameToExecute ; + command += fileNameToExecute ; command += "_Batch.sh" ; - SCRUTE(_fileNameToExecute) ; + SCRUTE(fileNameToExecute) ; SCRUTE(command.c_str()); status = system(command.c_str()); if(status) throw SALOME_Exception("Error of connection on remote host"); - RmTmpFile(); + RmTmpFile(TmpFileName); END_OF("BatchManager_PBS::buildSalomeCouplingScript"); } - void BatchManager_PBS::buildSalomeBatchScript( const int nbproc ) throw(SALOME_Exception) + void BatchManager_PBS::buildSalomeBatchScript(BatchLight::Job* job) throw(SALOME_Exception) { BEGIN_OF("BatchManager_PBS::buildSalomeBatchScript"); int status; + const int nbproc = job->getNbProc(); + const std::string dirForTmpFiles = job->getDirForTmpFiles(); + const char *fileToExecute = job->getFileToExecute(); + string::size_type p1 = string(fileToExecute).find_last_of("/"); + string::size_type p2 = string(fileToExecute).find_last_of("."); + std::string fileNameToExecute = string(fileToExecute).substr(p1+1,p2-p1-1); int nbmaxproc = _params.nbnodes * _params.nbprocpernode; if( nbproc > nbmaxproc ){ @@ -259,23 +281,24 @@ namespace BatchLight { else nbnodes = _params.nbnodes; - _TmpFileName = BuildTemporaryFileName(); + std::string TmpFileName = BuildTemporaryFileName(); ofstream tempOutputFile; - tempOutputFile.open(_TmpFileName.c_str(), ofstream::out ); + tempOutputFile.open(TmpFileName.c_str(), ofstream::out ); ostringstream filenameToExecute; - filenameToExecute << " ~/" << _dirForTmpFiles << "/runSalome_" << _fileNameToExecute << "_Batch.sh"; + filenameToExecute << " ~/" << dirForTmpFiles << "/runSalome_" << fileNameToExecute << "_Batch.sh"; tempOutputFile << "#! /bin/sh -f" << endl ; tempOutputFile << "#PBS -l nodes=" << nbnodes << endl ; - tempOutputFile << "#PBS -o ~/" << _dirForTmpFiles << "/runSalome.log${PBS_JOBID}" << endl ; + tempOutputFile << "#PBS -o /$PBS_O_HOME/" << dirForTmpFiles << "/runSalome.output.log" << endl ; + tempOutputFile << "#PBS -e /$PBS_O_HOME/" << dirForTmpFiles << "/runSalome.error.log" << endl ; tempOutputFile << _mpiImpl->boot("${PBS_NODEFILE}",nbnodes); tempOutputFile << _mpiImpl->run("${PBS_NODEFILE}",nbproc,filenameToExecute.str()); tempOutputFile << _mpiImpl->halt(); tempOutputFile.flush(); tempOutputFile.close(); - chmod(_TmpFileName.c_str(), 0x1ED); - SCRUTE(_TmpFileName.c_str()) ; + chmod(TmpFileName.c_str(), 0x1ED); + SCRUTE(TmpFileName.c_str()) ; string command; if( _params.protocol == "rsh" ) @@ -284,7 +307,7 @@ namespace BatchLight { command = "scp "; else throw SALOME_Exception("Unknown protocol"); - command += _TmpFileName; + command += TmpFileName; command += " "; if (_params.username != ""){ command += _params.username; @@ -292,23 +315,34 @@ namespace BatchLight { } command += _params.hostname; command += ":"; - command += _dirForTmpFiles ; + command += dirForTmpFiles ; command += "/" ; - command += _fileNameToExecute ; + command += fileNameToExecute ; command += "_Batch.sh" ; SCRUTE(command.c_str()); status = system(command.c_str()); if(status) throw SALOME_Exception("Error of connection on remote host"); - - RmTmpFile(); + + // Adding log files into import list files + ostringstream file_name_output; + file_name_output << "~/" << dirForTmpFiles << "/" << "runSalome.output.log"; + ostringstream file_name_error; + file_name_error << "~/" << dirForTmpFiles << "/" << "runSalome.error.log"; + job->addFileToImportList(file_name_output.str()); + job->addFileToImportList(file_name_error.str()); + RmTmpFile(TmpFileName); END_OF("BatchManager_PBS::buildSalomeBatchScript"); - } - int BatchManager_PBS::submit() throw(SALOME_Exception) + int BatchManager_PBS::submit(BatchLight::Job* job) throw(SALOME_Exception) { BEGIN_OF("BatchManager_PBS::submit"); + const std::string dirForTmpFiles = job->getDirForTmpFiles(); + const char *fileToExecute = job->getFileToExecute(); + string::size_type p1 = string(fileToExecute).find_last_of("/"); + string::size_type p2 = string(fileToExecute).find_last_of("."); + std::string fileNameToExecute = string(fileToExecute).substr(p1+1,p2-p1-1); // define name of log file string logFile="/tmp/logs/"; @@ -340,9 +374,9 @@ namespace BatchLight { command += _params.hostname; command += " \"qsub " ; - command += _dirForTmpFiles ; + command += dirForTmpFiles ; command += "/" ; - command += _fileNameToExecute ; + command += fileNameToExecute ; command += "_Batch.sh\" > "; command += logFile; SCRUTE(command.c_str()); @@ -368,6 +402,8 @@ namespace BatchLight { istringstream iss(strjob); iss >> id; + // Ajout dans la map + _pbs_job_name[id] = sline; END_OF("BatchManager_PBS::submit"); return id; } diff --git a/src/Launcher/BatchLight_BatchManager_PBS.hxx b/src/Launcher/BatchLight_BatchManager_PBS.hxx index e7e5789c7..e0c21651b 100644 --- a/src/Launcher/BatchLight_BatchManager_PBS.hxx +++ b/src/Launcher/BatchLight_BatchManager_PBS.hxx @@ -49,9 +49,14 @@ namespace BatchLight { std::string queryJob(const int & jobid); // renvoie l'etat du job private: - void buildSalomeCouplingScript( const char *fileToExecute ) throw(SALOME_Exception); - void buildSalomeBatchScript( const int nbproc ) throw(SALOME_Exception); - int submit() throw(SALOME_Exception); + void buildSalomeCouplingScript(BatchLight::Job* job) throw(SALOME_Exception); + void buildSalomeBatchScript(BatchLight::Job* job) throw(SALOME_Exception); + int submit(BatchLight::Job* job) throw(SALOME_Exception); + + // Permet d'avoir la chaîne complête pour demander + // le statut du job + typedef std::map _pbs_job_name_t; + _pbs_job_name_t _pbs_job_name; }; } diff --git a/src/Launcher/BatchLight_BatchManager_SLURM.cxx b/src/Launcher/BatchLight_BatchManager_SLURM.cxx index 9a740e328..2dbf01fff 100644 --- a/src/Launcher/BatchLight_BatchManager_SLURM.cxx +++ b/src/Launcher/BatchLight_BatchManager_SLURM.cxx @@ -143,23 +143,25 @@ namespace BatchLight { return jstatus; } - void BatchManager_SLURM::buildSalomeCouplingScript( const char *fileToExecute ) throw(SALOME_Exception) + void BatchManager_SLURM::buildSalomeCouplingScript(BatchLight::Job* job) throw(SALOME_Exception) { BEGIN_OF("BatchManager_SLURM::buildSalomeCouplingScript"); int status; + const char *fileToExecute = job->getFileToExecute(); + const std::string dirForTmpFiles = job->getDirForTmpFiles(); string::size_type p1 = string(fileToExecute).find_last_of("/"); string::size_type p2 = string(fileToExecute).find_last_of("."); - _fileNameToExecute = string(fileToExecute).substr(p1+1,p2-p1-1); + std::string fileNameToExecute = string(fileToExecute).substr(p1+1,p2-p1-1); - _TmpFileName = BuildTemporaryFileName(); + std::string TmpFileName = BuildTemporaryFileName(); ofstream tempOutputFile; - tempOutputFile.open(_TmpFileName.c_str(), ofstream::out ); + tempOutputFile.open(TmpFileName.c_str(), ofstream::out ); tempOutputFile << "#! /bin/sh -f" << endl ; tempOutputFile << "cd " ; tempOutputFile << _params.applipath << endl ; tempOutputFile << "export PYTHONPATH=~/" ; - tempOutputFile << _dirForTmpFiles ; + tempOutputFile << dirForTmpFiles ; tempOutputFile << ":$PYTHONPATH" << endl ; tempOutputFile << "if test $SLURM_PROCID = 0; then" << endl ; tempOutputFile << " ./runAppli --terminal --modules=" ; @@ -175,7 +177,7 @@ namespace BatchLight { tempOutputFile << " done" << endl ; tempOutputFile << " ./runSession waitNS.sh" << endl ; tempOutputFile << " ./runSession waitContainers.py $arglist" << endl ; - tempOutputFile << " ./runSession python ~/" << _dirForTmpFiles << "/" << _fileNameToExecute << ".py" << endl; + tempOutputFile << " ./runSession python ~/" << dirForTmpFiles << "/" << fileNameToExecute << ".py" << endl; tempOutputFile << " ./runSession killCurrentPort" << endl; tempOutputFile << "else" << endl ; tempOutputFile << " ./runSession waitNS.sh" << endl ; @@ -183,8 +185,8 @@ namespace BatchLight { tempOutputFile << "fi" << endl ; tempOutputFile.flush(); tempOutputFile.close(); - chmod(_TmpFileName.c_str(), 0x1ED); - SCRUTE(_TmpFileName.c_str()) ; + chmod(TmpFileName.c_str(), 0x1ED); + SCRUTE(TmpFileName.c_str()) ; string command; if( _params.protocol == "rsh" ) @@ -194,7 +196,7 @@ namespace BatchLight { else throw SALOME_Exception("Unknown protocol"); - command += _TmpFileName; + command += TmpFileName; command += " "; if (_params.username != ""){ command += _params.username; @@ -202,35 +204,41 @@ namespace BatchLight { } command += _params.hostname; command += ":"; - command += _dirForTmpFiles ; + command += dirForTmpFiles ; command += "/runSalome_" ; - command += _fileNameToExecute ; + command += fileNameToExecute ; command += "_Batch.sh" ; SCRUTE(command.c_str()); status = system(command.c_str()); if(status) throw SALOME_Exception("Error of connection on remote host"); - RmTmpFile(); + RmTmpFile(TmpFileName); END_OF("BatchManager_SLURM::buildSalomeCouplingScript"); } - void BatchManager_SLURM::buildSalomeBatchScript( const int nbproc ) throw(SALOME_Exception) + void BatchManager_SLURM::buildSalomeBatchScript(BatchLight::Job* job) throw(SALOME_Exception) { BEGIN_OF("BatchManager_SLURM::buildSalomeBatchScript"); int status; - _TmpFileName = BuildTemporaryFileName(); + const int nbproc = job->getNbProc(); + const std::string dirForTmpFiles = job->getDirForTmpFiles(); + std::string TmpFileName = BuildTemporaryFileName(); ofstream tempOutputFile; - tempOutputFile.open(_TmpFileName.c_str(), ofstream::out ); + tempOutputFile.open(TmpFileName.c_str(), ofstream::out ); + const char *fileToExecute = job->getFileToExecute(); + string::size_type p1 = string(fileToExecute).find_last_of("/"); + string::size_type p2 = string(fileToExecute).find_last_of("."); + std::string fileNameToExecute = string(fileToExecute).substr(p1+1,p2-p1-1); tempOutputFile << "#! /bin/sh -f" << endl ; tempOutputFile << "#BSUB -n " << nbproc << endl ; - tempOutputFile << "#BSUB -o " << _dirForTmpFiles << "/runSalome.log%J" << endl ; - tempOutputFile << "mpirun -srun ~/" << _dirForTmpFiles << "/runSalome_" << _fileNameToExecute << "_Batch.sh" << endl ; + tempOutputFile << "#BSUB -o " << dirForTmpFiles << "/runSalome.log%J" << endl ; + tempOutputFile << "mpirun -srun ~/" << dirForTmpFiles << "/runSalome_" << fileNameToExecute << "_Batch.sh" << endl ; tempOutputFile.flush(); tempOutputFile.close(); - chmod(_TmpFileName.c_str(), 0x1ED); - SCRUTE(_TmpFileName.c_str()) ; + chmod(TmpFileName.c_str(), 0x1ED); + SCRUTE(TmpFileName.c_str()) ; string command; if( _params.protocol == "rsh" ) @@ -239,7 +247,7 @@ namespace BatchLight { command = "scp "; else throw SALOME_Exception("Unknown protocol"); - command += _TmpFileName; + command += TmpFileName; command += " "; if (_params.username != ""){ command += _params.username; @@ -247,23 +255,28 @@ namespace BatchLight { } command += _params.hostname; command += ":"; - command += _dirForTmpFiles ; + command += dirForTmpFiles ; command += "/" ; - command += _fileNameToExecute ; + command += fileNameToExecute ; command += "_Batch.sh" ; SCRUTE(command.c_str()); status = system(command.c_str()); if(status) throw SALOME_Exception("Error of connection on remote host"); - RmTmpFile(); + RmTmpFile(TmpFileName); END_OF("BatchManager_SLURM::buildSalomeBatchScript"); } - int BatchManager_SLURM::submit() throw(SALOME_Exception) + int BatchManager_SLURM::submit(BatchLight::Job* job) throw(SALOME_Exception) { BEGIN_OF("BatchManager_SLURM::submit"); + const std::string dirForTmpFiles = job->getDirForTmpFiles(); + const char *fileToExecute = job->getFileToExecute(); + string::size_type p1 = string(fileToExecute).find_last_of("/"); + string::size_type p2 = string(fileToExecute).find_last_of("."); + std::string fileNameToExecute = string(fileToExecute).substr(p1+1,p2-p1-1); // define name of log file string logFile="/tmp/logs/"; @@ -295,9 +308,9 @@ namespace BatchLight { command += _params.hostname; command += " \"bsub < " ; - command += _dirForTmpFiles ; + command += dirForTmpFiles ; command += "/" ; - command += _fileNameToExecute ; + command += fileNameToExecute ; command += "_Batch.sh\" > "; command += logFile; SCRUTE(command.c_str()); @@ -312,9 +325,9 @@ namespace BatchLight { fclose(fp); string sline(line); - int p1 = sline.find("<"); - int p2 = sline.find(">"); - string strjob = sline.substr(p1+1,p2-p1-1); + int p10 = sline.find("<"); + int p20 = sline.find(">"); + string strjob = sline.substr(p10+1,p20-p10-1); int id; istringstream iss(strjob); diff --git a/src/Launcher/BatchLight_BatchManager_SLURM.hxx b/src/Launcher/BatchLight_BatchManager_SLURM.hxx index ed21624dd..6024b28de 100644 --- a/src/Launcher/BatchLight_BatchManager_SLURM.hxx +++ b/src/Launcher/BatchLight_BatchManager_SLURM.hxx @@ -49,9 +49,9 @@ namespace BatchLight { std::string queryJob(const int & jobid); // renvoie l'etat du job protected: - void buildSalomeCouplingScript( const char *fileToExecute ) throw(SALOME_Exception); - void buildSalomeBatchScript( const int nbproc ) throw(SALOME_Exception); - int submit() throw(SALOME_Exception); + void buildSalomeCouplingScript(BatchLight::Job* job) throw(SALOME_Exception); + void buildSalomeBatchScript(BatchLight::Job* job) throw(SALOME_Exception); + int submit(BatchLight::Job* job) throw(SALOME_Exception); private: diff --git a/src/Launcher/BatchLight_Job.cxx b/src/Launcher/BatchLight_Job.cxx index 9762a98a6..9e70bbd37 100644 --- a/src/Launcher/BatchLight_Job.cxx +++ b/src/Launcher/BatchLight_Job.cxx @@ -32,9 +32,16 @@ using namespace std; namespace BatchLight { // Constructeur - Job::Job(const char *fileToExecute, const Engines::FilesList& filesToExport, const Engines::FilesList& filesToImport, const int nbproc) : _fileToExecute(fileToExecute), _filesToExport(filesToExport), _filesToImport(filesToImport), _nbproc(nbproc) + Job::Job(const char *fileToExecute, + const Engines::FilesList& filesToExport, + const Engines::FilesList& filesToImport, + const int nbproc) : _fileToExecute(fileToExecute), + _filesToExport(filesToExport), + _filesToImport(filesToImport), + _nbproc(nbproc) { - // Nothing to do + _dirForTmpFiles = "/tmp/default_batch_tmp_directory"; + std::string _fileNameToExecute = ""; } Job::~Job() @@ -42,4 +49,11 @@ namespace BatchLight { MESSAGE("Job destructor"); } + void + Job::addFileToImportList(std::string file_name) + { + CORBA::ULong lgth = _filesToImport.length(); + _filesToImport.length(lgth+1); + _filesToImport[lgth] = CORBA::string_dup(file_name.c_str()); + } } diff --git a/src/Launcher/BatchLight_Job.hxx b/src/Launcher/BatchLight_Job.hxx index 23ac8f3bb..12b5711db 100644 --- a/src/Launcher/BatchLight_Job.hxx +++ b/src/Launcher/BatchLight_Job.hxx @@ -45,14 +45,18 @@ namespace BatchLight { const char *getFileToExecute() const { return _fileToExecute; } const Engines::FilesList getFilesToExportList() const { return _filesToExport; } const Engines::FilesList getFilesToImportList() const { return _filesToImport; } + void addFileToImportList(std::string file_name); const int getNbProc() const { return _nbproc; } - + + const std::string getDirForTmpFiles() const { return _dirForTmpFiles;} + void setDirForTmpFiles(std::string dirForTmpFiles) {_dirForTmpFiles = dirForTmpFiles; + SCRUTE(_dirForTmpFiles);} protected: const char* _fileToExecute; const Engines::FilesList _filesToExport; - const Engines::FilesList _filesToImport; + Engines::FilesList _filesToImport; const int _nbproc; - + std::string _dirForTmpFiles; // Tmp directory on the server private: }; -- 2.39.2