--- /dev/null
+// Copyright (C) 2005 OPEN CASCADE, EADS/CCR, LIP6, CEA/DEN,
+// CEDRAT, EDF R&D, LEG, PRINCIPIA R&D, BUREAU VERITAS
+//
+// This library is free software; you can redistribute it and/or
+// modify it under the terms of the GNU Lesser General Public
+// License as published by the Free Software Foundation; either
+// version 2.1 of the License.
+//
+// This library is distributed in the hope that it will be useful
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+// Lesser General Public License for more details.
+//
+// You should have received a copy of the GNU Lesser General Public
+// License along with this library; if not, write to the Free Software
+// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+//
+// See http://www.salome-platform.org/ or email : webmaster.salome@opencascade.com
+//
+/*
+ * BatchManager.cxx :
+ *
+ * Auteur : Bernard SECHER - CEA/DEN
+ * Date : Juillet 2007
+ * Projet : SALOME
+ *
+ */
+
+#include <iostream>
+#include <sstream>
+#include <string>
+#include <netdb.h>
+#include "BatchLight_Job.hxx"
+#include "BatchLight_BatchManager.hxx"
+#include "Batch_Date.hxx"
+using namespace std;
+
+namespace BatchLight {
+
+ // Constructeur
+ BatchManager::BatchManager(const batchParams& p) throw(SALOME_Exception) : _params(p)
+ {
+ SCRUTE(_params.hostname);
+ SCRUTE(_params.protocol);
+ SCRUTE(_params.username);
+ // On verifie que le hostname est correct
+ if (!gethostbyname(_params.hostname.c_str())) { // hostname unknown from network
+ string msg = "hostname \"";
+ msg += _params.hostname;
+ msg += "\" unknown from the network";
+ throw SALOME_Exception(msg.c_str());
+ }
+ }
+
+ // Destructeur
+ BatchManager::~BatchManager()
+ {
+ // Nothing to do
+ }
+
+ void BatchManager::setDirForTmpFiles()
+ {
+ int i;
+
+ _dirForTmpFiles = string("Batch/");
+ Batch::Date date = Batch::Date(time(0)) ;
+ std::string thedate = date.str() ;
+ int lend = thedate.size() ;
+ i = 0 ;
+ while ( i < lend ) {
+ if ( thedate[i] == '/' || thedate[i] == '-' || thedate[i] == ':' ) {
+ thedate[i] = '_' ;
+ }
+ i++ ;
+ }
+ _dirForTmpFiles += thedate ;
+ }
+
+ void BatchManager::exportInFiles(const char *fileToExecute, const Engines::FilesToExportList filesToExportList) throw(SALOME_Exception)
+ {
+ BEGIN_OF("BatchManager::exportInFiles");
+ string command = _params.protocol;
+ int status;
+
+ command += " ";
+
+ if (_params.username != ""){
+ command += _params.username;
+ command += "@";
+ }
+
+ command += _params.hostname;
+ command += " \"mkdir -p ";
+ command += _dirForTmpFiles ;
+ command += "\"" ;
+ SCRUTE(command.c_str());
+ status = system(command.c_str());
+ if(status)
+ throw SALOME_Exception("Error of connection on remote host");
+
+ if( _params.protocol == "rsh" )
+ command = "rcp ";
+ else if( _params.protocol == "ssh" )
+ command = "scp ";
+ else
+ throw SALOME_Exception("Unknown protocol");
+
+ command += fileToExecute;
+ command += " ";
+
+ if (_params.username != ""){
+ command += _params.username;
+ command += "@";
+ }
+
+ command += _params.hostname;
+ command += ":";
+ command += _dirForTmpFiles ;
+ SCRUTE(command.c_str());
+ status = system(command.c_str());
+ if(status)
+ throw SALOME_Exception("Error of connection on remote host");
+
+ int i ;
+ for ( i = 0 ; i < filesToExportList.length() ; i++ ) {
+ if( _params.protocol == "rsh" )
+ command = "rcp ";
+ else if( _params.protocol == "ssh" )
+ command = "scp ";
+ else
+ throw SALOME_Exception("Unknown protocol");
+ command += filesToExportList[i] ;
+ command += " ";
+ if (_params.username != ""){
+ command += _params.username;
+ command += "@";
+ }
+ command += _params.hostname;
+ command += ":";
+ command += _dirForTmpFiles ;
+ SCRUTE(command.c_str());
+ status = system(command.c_str());
+ if(status)
+ throw SALOME_Exception("Error of connection on remote host");
+ }
+
+ END_OF("BatchManager::exportInFiles");
+ }
+
+ void BatchManager::submit() throw(SALOME_Exception)
+ {
+ BEGIN_OF("BatchManager::submit");
+ string command;
+ int status;
+
+ if( _params.protocol == "rsh" )
+ command = "rsh ";
+ else if( _params.protocol == "ssh" )
+ command = "ssh ";
+ else
+ throw SALOME_Exception("Unknown protocol");
+
+ if (_params.username != ""){
+ command += _params.username;
+ command += "@";
+ }
+
+ command += _params.hostname;
+ command += " \"tcsh " ;
+ command += _dirForTmpFiles ;
+ command += "/" ;
+ command += _fileNameToExecute ;
+ command += "_bsub.sh\"" ;
+ SCRUTE(command.c_str());
+ status = system(command.c_str());
+ if(status)
+ throw SALOME_Exception("Error of connection on remote host");
+
+ END_OF("BatchManager::submit");
+ }
+
+ string BatchManager::BuildTemporaryFileName() const
+ {
+ //build more complex file name to support multiple salome session
+ char *temp = new char[19];
+ strcpy(temp, "/tmp/command");
+ strcat(temp, "XXXXXX");
+#ifndef WNT
+
+ mkstemp(temp);
+#else
+
+ char aPID[80];
+ itoa(getpid(), aPID, 10);
+ strcat(temp, aPID);
+#endif
+
+ string command(temp);
+ delete [] temp;
+ command += ".sh";
+ return command;
+ }
+
+void BatchManager::RmTmpFile()
+{
+ if (_TmpFileName != "")
+ {
+ string command = "rm ";
+ command += _TmpFileName;
+ char *temp = strdup(command.c_str());
+ int lgthTemp = strlen(temp);
+ temp[lgthTemp - 3] = '*';
+ temp[lgthTemp - 2] = '\0';
+ system(temp);
+ free(temp);
+ }
+}
+
+}
--- /dev/null
+// Copyright (C) 2005 OPEN CASCADE, EADS/CCR, LIP6, CEA/DEN,
+// CEDRAT, EDF R&D, LEG, PRINCIPIA R&D, BUREAU VERITAS
+//
+// This library is free software; you can redistribute it and/or
+// modify it under the terms of the GNU Lesser General Public
+// License as published by the Free Software Foundation; either
+// version 2.1 of the License.
+//
+// This library is distributed in the hope that it will be useful
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+// Lesser General Public License for more details.
+//
+// You should have received a copy of the GNU Lesser General Public
+// License along with this library; if not, write to the Free Software
+// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+//
+// See http://www.salome-platform.org/ or email : webmaster.salome@opencascade.com
+//
+/*
+ * BatchManager.hxx :
+ *
+ * Auteur : Bernard SECHER - CEA/DEN
+ * Date : Juillet 2007
+ * Projet : SALOME
+ *
+ */
+
+#ifndef _BL_BATCHMANAGER_H_
+#define _BL_BATCHMANAGER_H_
+
+#include <vector>
+#include <string>
+#include "Utils_SALOME_Exception.hxx"
+#include <SALOMEconfig.h>
+#include CORBA_CLIENT_HEADER(SALOME_ContainerManager)
+
+namespace BatchLight {
+
+ class Job;
+
+ struct batchParams{
+ std::string hostname; // serveur ou tourne le BatchManager
+ std::string protocol; // protocole d'acces au serveur: ssh ou rsh
+ std::string username; // username d'acces au serveur
+ std::string applipath; // path of apllication directory on server
+ std::vector<std::string> modulesList; // list of Salome modules installed on server
+ };
+
+ class BatchManager
+ {
+ public:
+ // Constructeur et destructeur
+ BatchManager(const batchParams& p) throw(SALOME_Exception); // connexion a la machine host
+ virtual ~BatchManager();
+
+ // Methodes pour le controle des jobs : virtuelles pures
+ virtual const int submitJob(BatchLight::Job & job) = 0; // soumet un job au gestionnaire
+ virtual void deleteJob(const int & jobid) = 0; // retire un job du gestionnaire
+ virtual int queryJob(const int & jobid) = 0; // renvoie l'etat du job
+
+ protected:
+ batchParams _params;
+ std::string _dirForTmpFiles; // repertoire temporaire sur le serveur
+ std::string _TmpFileName;
+ std::string _fileNameToExecute;
+
+ void setDirForTmpFiles();
+ void exportInFiles( const char *fileToExecute, const Engines::FilesToExportList filesToExportList ) throw(SALOME_Exception);
+ virtual void buildSalomeCouplingScript( const char *fileToExecute ) throw(SALOME_Exception) = 0;
+ virtual void buildSalomeBatchScript( const int nbproc ) throw(SALOME_Exception) = 0;
+ virtual void buildSalomeSubmitBatchScript() throw(SALOME_Exception) = 0;
+ void submit() throw(SALOME_Exception);
+
+ std::string BuildTemporaryFileName() const;
+ void RmTmpFile();
+
+ private:
+
+ };
+
+}
+
+#endif
--- /dev/null
+// Copyright (C) 2005 OPEN CASCADE, EADS/CCR, LIP6, CEA/DEN,
+// CEDRAT, EDF R&D, LEG, PRINCIPIA R&D, BUREAU VERITAS
+//
+// This library is free software; you can redistribute it and/or
+// modify it under the terms of the GNU Lesser General Public
+// License as published by the Free Software Foundation; either
+// version 2.1 of the License.
+//
+// This library is distributed in the hope that it will be useful
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+// Lesser General Public License for more details.
+//
+// You should have received a copy of the GNU Lesser General Public
+// License along with this library; if not, write to the Free Software
+// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+//
+// See http://www.salome-platform.org/ or email : webmaster.salome@opencascade.com
+//
+/*
+ * BatchManager.cxx :
+ *
+ * Auteur : Bernard SECHER - CEA/DEN
+ * Date : Juillet 2007
+ * Projet : SALOME
+ *
+ */
+
+#include "BatchLight_BatchManager_SLURM.hxx"
+#include "utilities.h"
+#include "BatchLight_Job.hxx"
+#include <fstream>
+#include <iostream>
+#include <sstream>
+#include <sys/stat.h>
+
+using namespace std;
+
+namespace BatchLight {
+
+ // Constructeur
+ BatchManager_SLURM::BatchManager_SLURM(const batchParams& p) throw(SALOME_Exception) : BatchManager(p)
+ {
+ }
+
+ // Destructeur
+ BatchManager_SLURM::~BatchManager_SLURM()
+ {
+ // Nothing to do
+ }
+
+ // Methode pour le controle des jobs : soumet un job au gestionnaire
+ const int BatchManager_SLURM::submitJob(Job & job)
+ {
+ BEGIN_OF("BatchManager_SLURM::submitJob");
+ int id=0;
+
+ setDirForTmpFiles();
+ SCRUTE(_dirForTmpFiles);
+ exportInFiles(job.getFileToExecute(),job.getFilesToExportList());
+ buildSalomeCouplingScript(job.getFileToExecute());
+ buildSalomeBatchScript(job.getNbProc());
+ buildSalomeSubmitBatchScript();
+ submit();
+ END_OF("BatchManager_SLURM::submitJob");
+ return id;
+ }
+
+ // Methode pour le controle des jobs : retire un job du gestionnaire
+ void BatchManager_SLURM::deleteJob(const int & jobid)
+ {
+ }
+
+ // Methode pour le controle des jobs : renvoie l'etat du job
+ int BatchManager_SLURM::queryJob(const int & jobid)
+ {
+ int ji=0;
+ return ji;
+ }
+
+ void BatchManager_SLURM::buildSalomeCouplingScript( const char *fileToExecute ) throw(SALOME_Exception)
+ {
+ BEGIN_OF("BatchManager_SLURM::buildSalomeCouplingScript");
+ int status;
+ int lenf = strlen( fileToExecute ) ;
+ int i = lenf-1 ;
+ while ( i >= 0 && fileToExecute[i] != '/' ) {
+ i -= 1 ;
+ }
+ char * FileNameToExecute = new char[lenf-4-i] ;
+ strncpy(FileNameToExecute , &fileToExecute[i+1] , lenf-4-i) ;
+ _fileNameToExecute = string( FileNameToExecute ) ;
+ delete FileNameToExecute ;
+ SCRUTE(_fileNameToExecute) ;
+
+ _TmpFileName = BuildTemporaryFileName();
+ ofstream tempOutputFile;
+ tempOutputFile.open(_TmpFileName.c_str(), ofstream::out );
+ tempOutputFile << "#! /bin/sh -f" << endl ;
+ tempOutputFile << "cd " ;
+ tempOutputFile << _params.applipath << endl ;
+ tempOutputFile << "export PYTHONPATH=~/" ;
+ tempOutputFile << _dirForTmpFiles ;
+ tempOutputFile << ":$PYTHONPATH" << endl ;
+ tempOutputFile << "if test $SLURM_PROCID = 0; then" << endl ;
+ tempOutputFile << " ./runAppli --terminal --batch --modules=" ;
+ for ( i = 0 ; i < _params.modulesList.size() ; i++ ) {
+ tempOutputFile << _params.modulesList[i] ;
+ if ( i != _params.modulesList.size()-1 )
+ tempOutputFile << "," ;
+ }
+ tempOutputFile << " --standalone=registry,study,moduleCatalog --killall &" << endl ;
+ tempOutputFile << " for ((ip=1; ip < ${SLURM_NPROCS} ; ip++))" << endl;
+ tempOutputFile << " do" << endl ;
+ tempOutputFile << " arglist=\"$arglist YACS_Server_\"$ip" << endl ;
+ tempOutputFile << " done" << endl ;
+ tempOutputFile << " sleep 5" << endl ;
+ tempOutputFile << " ./runSession waitContainers.py $arglist" << endl ;
+ tempOutputFile << " ./runSession python ~/" << _dirForTmpFiles << "/" << _fileNameToExecute << ".py" << endl;
+ tempOutputFile << " ./runSession killCurrentPort" << endl;
+ tempOutputFile << "else" << endl ;
+ tempOutputFile << " sleep 5" << endl ;
+ tempOutputFile << " ./runSession waitNS.py" << endl ;
+ tempOutputFile << " ./runSession SALOME_Container 'YACS_Server_'${SLURM_PROCID}" << endl ;
+ tempOutputFile << "fi" << endl ;
+ tempOutputFile.flush();
+ tempOutputFile.close();
+ chmod(_TmpFileName.c_str(), 0x1ED);
+ SCRUTE(_TmpFileName.c_str()) ;
+
+ string command;
+ if( _params.protocol == "rsh" )
+ command = "rcp ";
+ else if( _params.protocol == "ssh" )
+ command = "scp ";
+ else
+ throw SALOME_Exception("Unknown protocol");
+
+ command += _TmpFileName;
+ command += " ";
+ if (_params.username != ""){
+ command += _params.username;
+ command += "@";
+ }
+ command += _params.hostname;
+ command += ":";
+ command += _dirForTmpFiles ;
+ command += "/runSalome_" ;
+ command += _fileNameToExecute ;
+ command += "_Batch.sh" ;
+ SCRUTE(command.c_str());
+ status = system(command.c_str());
+ if(status)
+ throw SALOME_Exception("Error of connection on remote host");
+ RmTmpFile();
+
+ END_OF("BatchManager_SLURM::buildSalomeCouplingScript");
+ }
+
+ void BatchManager_SLURM::buildSalomeBatchScript( const int nbproc ) throw(SALOME_Exception)
+ {
+ BEGIN_OF("BatchManager_SLURM::buildSalomeBatchScript");
+ int status;
+ _TmpFileName = BuildTemporaryFileName();
+ ofstream tempOutputFile;
+ tempOutputFile.open(_TmpFileName.c_str(), ofstream::out );
+
+ tempOutputFile << "#! /bin/sh -f" << endl ;
+ tempOutputFile << "#BSUB -n " ;
+ tempOutputFile << nbproc << endl ;
+ tempOutputFile << "#BSUB -o runSalome.log%J" << endl ;
+ tempOutputFile << "mpirun -srun ~/" ;
+ tempOutputFile << _dirForTmpFiles ;
+ tempOutputFile << "/runSalome_" ;
+ tempOutputFile << _fileNameToExecute ;
+ tempOutputFile << "_Batch.sh" << endl ;
+ tempOutputFile.flush();
+ tempOutputFile.close();
+ chmod(_TmpFileName.c_str(), 0x1ED);
+ SCRUTE(_TmpFileName.c_str()) ;
+
+ string command;
+ if( _params.protocol == "rsh" )
+ command = "rcp ";
+ else if( _params.protocol == "ssh" )
+ command = "scp ";
+ else
+ throw SALOME_Exception("Unknown protocol");
+ command += _TmpFileName;
+ command += " ";
+ if (_params.username != ""){
+ command += _params.username;
+ command += "@";
+ }
+ command += _params.hostname;
+ command += ":";
+ command += _dirForTmpFiles ;
+ command += "/" ;
+ command += _fileNameToExecute ;
+ command += "_Batch.sh" ;
+ SCRUTE(command.c_str());
+ status = system(command.c_str());
+ if(status)
+ throw SALOME_Exception("Error of connection on remote host");
+
+ RmTmpFile();
+ END_OF("BatchManager_SLURM::buildSalomeBatchScript");
+
+ }
+
+ void BatchManager_SLURM::buildSalomeSubmitBatchScript() throw(SALOME_Exception)
+ {
+
+ BEGIN_OF("BatchManager_SLURM::buildSalomeSubmitBatchScript");
+ _TmpFileName = BuildTemporaryFileName();
+ int status;
+ ofstream tempOutputFile;
+ tempOutputFile.open(_TmpFileName.c_str(), ofstream::out );
+
+ tempOutputFile << "#! /bin/sh -f" << endl ;
+ tempOutputFile << "bsub < ~/" ;
+ tempOutputFile << _dirForTmpFiles ;
+ tempOutputFile << "/" ;
+ tempOutputFile << _fileNameToExecute ;
+ tempOutputFile << "_Batch.sh &" << endl ;
+ tempOutputFile.flush();
+ tempOutputFile.close();
+ chmod(_TmpFileName.c_str(), 0x1ED);
+ SCRUTE(_TmpFileName.c_str()) ;
+
+ string command;
+ if( _params.protocol == "rsh" )
+ command = "rcp ";
+ else if( _params.protocol == "ssh" )
+ command = "scp ";
+ else
+ throw SALOME_Exception("Unknown protocol");
+ command += _TmpFileName;
+ command += " ";
+ if (_params.username != ""){
+ command += _params.username;
+ command += "@";
+ }
+ command += _params.hostname;
+ command += ":";
+ command += _dirForTmpFiles ;
+ command += "/" ;
+ command += _fileNameToExecute ;
+ command += "_bsub.sh" ;
+ SCRUTE(command.c_str());
+ status = system(command.c_str());
+ if(status)
+ throw SALOME_Exception("Error of connection on remote host");
+
+ RmTmpFile();
+ END_OF("BatchManager_SLURM::buildSalomeSubmitBatchScript");
+
+ }
+
+}
--- /dev/null
+// Copyright (C) 2005 OPEN CASCADE, EADS/CCR, LIP6, CEA/DEN,
+// CEDRAT, EDF R&D, LEG, PRINCIPIA R&D, BUREAU VERITAS
+//
+// This library is free software; you can redistribute it and/or
+// modify it under the terms of the GNU Lesser General Public
+// License as published by the Free Software Foundation; either
+// version 2.1 of the License.
+//
+// This library is distributed in the hope that it will be useful
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+// Lesser General Public License for more details.
+//
+// You should have received a copy of the GNU Lesser General Public
+// License along with this library; if not, write to the Free Software
+// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+//
+// See http://www.salome-platform.org/ or email : webmaster.salome@opencascade.com
+//
+/*
+ * BatchManager.hxx :
+ *
+ * Auteur : Bernard SECHER - CEA/DEN
+ * Date : Juillet 2007
+ * Projet : SALOME
+ *
+ */
+
+#ifndef _BL_BATCHMANAGER_SLURM_H_
+#define _BL_BATCHMANAGER_SLURM_H_
+
+#include <string>
+#include "Utils_SALOME_Exception.hxx"
+#include "BatchLight_BatchManager.hxx"
+
+namespace BatchLight {
+
+ class Job;
+
+ class BatchManager_SLURM : public BatchManager
+ {
+ public:
+ // Constructeur et destructeur
+ BatchManager_SLURM(const batchParams& p) throw(SALOME_Exception); // connexion a la machine host
+ virtual ~BatchManager_SLURM();
+
+ // Methodes pour le controle des jobs : virtuelles pures
+ virtual const int submitJob(BatchLight::Job & job); // soumet un job au gestionnaire
+ virtual void deleteJob(const int & jobid); // retire un job du gestionnaire
+ virtual int queryJob(const int & jobid); // renvoie l'etat du job
+
+ protected:
+ virtual void buildSalomeCouplingScript( const char *fileToExecute ) throw(SALOME_Exception);
+ virtual void buildSalomeBatchScript( const int nbproc ) throw(SALOME_Exception);
+ virtual void buildSalomeSubmitBatchScript() throw(SALOME_Exception);
+
+ private:
+
+ };
+
+}
+
+#endif
--- /dev/null
+// Copyright (C) 2005 OPEN CASCADE, EADS/CCR, LIP6, CEA/DEN,
+// CEDRAT, EDF R&D, LEG, PRINCIPIA R&D, BUREAU VERITAS
+//
+// This library is free software; you can redistribute it and/or
+// modify it under the terms of the GNU Lesser General Public
+// License as published by the Free Software Foundation; either
+// version 2.1 of the License.
+//
+// This library is distributed in the hope that it will be useful
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+// Lesser General Public License for more details.
+//
+// You should have received a copy of the GNU Lesser General Public
+// License along with this library; if not, write to the Free Software
+// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+//
+// See http://www.salome-platform.org/ or email : webmaster.salome@opencascade.com
+//
+/*
+ * Job.cxx :
+ *
+ * Auteur : Bernard SECHER - CEA/DEN
+ * Date : Juillet 2007
+ * Projet : SALOME
+ *
+ */
+
+#include "BatchLight_Job.hxx"
+using namespace std;
+
+namespace BatchLight {
+
+ // Constructeur
+ Job::Job(const char *fileToExecute, const Engines::FilesToExportList& filesToExport, const int nbproc) : _fileToExecute(fileToExecute), _filesToExport(filesToExport), _nbproc(nbproc)
+ {
+ // Nothing to do
+ }
+
+}
--- /dev/null
+// Copyright (C) 2005 OPEN CASCADE, EADS/CCR, LIP6, CEA/DEN,
+// CEDRAT, EDF R&D, LEG, PRINCIPIA R&D, BUREAU VERITAS
+//
+// This library is free software; you can redistribute it and/or
+// modify it under the terms of the GNU Lesser General Public
+// License as published by the Free Software Foundation; either
+// version 2.1 of the License.
+//
+// This library is distributed in the hope that it will be useful
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+// Lesser General Public License for more details.
+//
+// You should have received a copy of the GNU Lesser General Public
+// License along with this library; if not, write to the Free Software
+// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+//
+// See http://www.salome-platform.org/ or email : webmaster.salome@opencascade.com
+//
+/*
+ * Job.hxx :
+ *
+ * Auteur : Bernard SECHER - CEA/DEN
+ * Date : Juillet 2007
+ * Projet : SALOME
+ *
+ */
+
+#ifndef _BL_JOB_H_
+#define _BL_JOB_H_
+
+#include "utilities.h"
+#include <SALOMEconfig.h>
+#include CORBA_CLIENT_HEADER(SALOME_ContainerManager)
+
+namespace BatchLight {
+
+ class Job
+ {
+ public:
+ // Constructeurs et destructeur
+ Job(const char *fileToExecute, const Engines::FilesToExportList& filesToExport, const int nbproc);
+ virtual ~Job() {}
+
+ const char *getFileToExecute() { return _fileToExecute; }
+ const Engines::FilesToExportList getFilesToExportList() { return _filesToExport; }
+ const int getNbProc() { return _nbproc; }
+
+ protected:
+ const char* _fileToExecute;
+ const Engines::FilesToExportList _filesToExport;
+ const int _nbproc;
+
+ private:
+
+ };
+
+}
+
+#endif
Batch_PyVersatile.hxx \
Batch_RunTimeException.hxx \
Batch_StringType.hxx \
- Batch_TypeMismatchException.hxx
+ Batch_TypeMismatchException.hxx \
+ BatchLight_BatchManager.hxx \
+ BatchLight_BatchManager_SLURM.hxx \
+ BatchLight_Job.hxx
LIB_SRC = \
Batch_PyVersatile.cxx \
Batch_RunTimeException.cxx \
Batch_StringType.cxx \
- Batch_TypeMismatchException.cxx
+ Batch_TypeMismatchException.cxx \
+ BatchLight_BatchManager.cxx \
+ BatchLight_BatchManager_SLURM.cxx \
+ BatchLight_Job.cxx
LIB_CPPFLAGS = \
@PYTHON_INCLUDES@ \
-I$(srcdir)/../Basics \
-I$(srcdir)/../SALOMELocalTrace \
+ -I$(srcdir)/../Utils \
-I$(top_builddir)/salome_adm/unix \
+ -I$(top_builddir)/idl \
$(LIB_CPPFLAGS)
libSalomeBatch_la_LDFLAGS = -no-undefined -version-info=0:0:0
const CORBA::Long NumberOfProcessors ,
const Engines::MachineParameters& params)
{
- BEGIN_OF("SALOME_ContainerManager::batchSalomeJob");
- // Determination provisoire de l'extension .py
- // Il faudra une classe dans Utils pour gerer les Path FileNames et Extensions
- int lenf = strlen( fileToExecute ) ;
- if ( strcmp( &fileToExecute[lenf-3] ,".py" ) == NULL ) {
- int i = lenf-1 ;
- while ( i >= 0 && fileToExecute[i] != '/' ) {
- i -= 1 ;
- }
- char * FileNameToExecute = new char[lenf-4-i] ;
- strncpy(FileNameToExecute , &fileToExecute[i+1] , lenf-4-i) ;
- string fileNameToExecute =string( FileNameToExecute ) ;
- delete FileNameToExecute ;
- SCRUTE(fileNameToExecute) ;
-// Le /tmp n'est pas le meme d'un noeud a un autre ===>
- //string DirForTmpFiles = string("/tmp/")+string(getenv("USER"))+string("/") ;
- string DirForTmpFiles = string("Batch/") ;
- Batch::Date date = Batch::Date(time(0)) ;
- std::string thedate = date.str() ;
- int lend = thedate.size() ;
- i = 0 ;
- while ( i < lend ) {
- if ( thedate[i] == '/' || thedate[i] == '-' || thedate[i] == ':' ) {
- thedate[i] = '_' ;
- }
- i++ ;
- }
- SCRUTE(thedate);
- DirForTmpFiles += thedate ;
- SCRUTE(DirForTmpFiles) ;
- // Problemes avec ResourcesManager ...
- // Solution pour l'instant :
- // 31.05.107 : hostname : tantal
- // Alias : tantale.ccc.cea.fr
- Engines::CompoList aCompoList ;
- Engines::MachineList aMachineList = *GetFittingResources( params , aCompoList ) ;
- SCRUTE(aMachineList[0]) ;
- std::string aCluster = FindFirst( aMachineList) ;
- SCRUTE(aCluster) ;
- //Creation of /tmp/$USER/date_hh_mn_ss/ and copy of FileNameToExecute
- // and of filesToExport in that directory
- _ResManager->CopyFileNamesToExecute(aCluster,DirForTmpFiles,fileToExecute,filesToExport) ;
- //Creation of /tmp/$USER/date_hh_mn_ss/runSalome_'FileNameToExecute'_Batch.sh
- string runSalome_Batch = _ResManager->BuildCmdrunSalomeBatch(aCluster,DirForTmpFiles,fileNameToExecute) ;
- SCRUTE(runSalome_Batch) ;
- //Creation of /tmp/$USER/date_hh_mn_ss/'FileNameToExecute'_Batch.sh
- string FileNameToExecute_Batch = _ResManager->BuildCmdFileNameToExecute_Batch(aCluster,NumberOfProcessors,DirForTmpFiles,fileNameToExecute) ;
- SCRUTE(FileNameToExecute_Batch) ;
- //Creation of /tmp/$USER/date_hh_mn_ss/'FileNameToExecute'_bsub.sh
- string FileNameToExecute_bsub = _ResManager->BuildCmdFileNameToExecute_bsub(aCluster,DirForTmpFiles,fileNameToExecute) ;
- SCRUTE(FileNameToExecute_bsub) ;
- //Launch of /tmp/$USER/date_hh_mn_ss/'FileNameToExecute'_bsub.sh on theCluster
- string sshCommand = _ResManager->CmdToExecute_bsub(aCluster,DirForTmpFiles,fileNameToExecute) ;
- SCRUTE(sshCommand) ;
- }
- else {
- MESSAGE("SALOME_ContainerManager::batchSalomeJob unknown extension " << fileToExecute);
- }
- END_OF("SALOME_ContainerManager::batchSalomeJob");
+ _ResManager->batchSalomeJob(fileToExecute, filesToExport, NumberOfProcessors, params);
}
//=============================================================================
# This local variable defines the list of CPPFLAGS common to all target in this package.
COMMON_CPPFLAGS=\
+ -I$(srcdir)/../Batch \
-I$(srcdir)/../Basics \
-I$(srcdir)/../SALOMELocalTrace \
-I$(srcdir)/../NamingService \
// See http://www.salome-platform.org/ or email : webmaster.salome@opencascade.com
//
#include "SALOME_ResourcesManager.hxx"
-//#include "SALOME_Container_i.hxx"
+#include "BatchLight_Job.hxx"
+#include "BatchLight_BatchManager_SLURM.hxx"
#include "Utils_ExceptHandlers.hxx"
#include "OpUtil.hxx"
}
//=============================================================================
-/*!
- * Copy FileNameToExecute and filesToExport in DirForTmpFiles of machine
- */
-//=============================================================================
-void SALOME_ResourcesManager::CopyFileNamesToExecute(const std::string& machine,
- const std::string& DirForTmpFiles ,
- const std::string& PathFileNameToExecute ,
- const Engines::FilesToExportList& filesToExport) throw(SALOME_Exception)
-{
- BEGIN_OF("SALOME_ResourcesManager::CopyFileNamesToExecute");
- const ParserResourcesType& resInfo = _resourcesList[machine];
- string command;
- int status;
-
- if (resInfo.Protocol == rsh)
- command = "rsh ";
- else if (resInfo.Protocol == ssh)
- command = "ssh ";
- else
- throw SALOME_Exception("Unknown protocol");
-
- if (resInfo.UserName != ""){
- command += resInfo.UserName;
- command += "@";
- }
-
- command += resInfo.Alias;
- command += " \"mkdir -p ";
- command += DirForTmpFiles ;
- command += "\"" ;
- SCRUTE(command.c_str());
- status = system(command.c_str());
- if(status)
- throw SALOME_Exception("Error of connection on remote host");
-
- if (resInfo.Protocol == rsh)
- command = "rcp ";
- else if (resInfo.Protocol == ssh)
- command = "scp ";
- else
- throw SALOME_Exception("Unknown protocol");
-
- command += PathFileNameToExecute ;
- command += " ";
-
- if (resInfo.UserName != ""){
- command += resInfo.UserName;
- command += "@";
- }
-
- command += resInfo.Alias;
- command += ":";
- command += DirForTmpFiles ;
- SCRUTE(command.c_str());
- status = system(command.c_str());
- if(status)
- throw SALOME_Exception("Error of connection on remote host");
-
- int i ;
- for ( i = 0 ; i < filesToExport.length() ; i++ ) {
- if (resInfo.Protocol == rsh)
- command = "rcp ";
- else if (resInfo.Protocol == ssh)
- command = "scp ";
- else
- throw SALOME_Exception("Unknown protocol");
- command += filesToExport[i] ;
- command += " ";
- if (resInfo.UserName != ""){
- command += resInfo.UserName;
- command += "@";
- }
- command += resInfo.Alias;
- command += ":";
- command += DirForTmpFiles ;
- SCRUTE(command.c_str());
- status = system(command.c_str());
- if(status)
- throw SALOME_Exception("Error of connection on remote host");
- }
-
- END_OF("SALOME_ResourcesManager::CopyFileNamesToExecute");
-}
-
-//=============================================================================
-/*!
- * builds the shell to create for runSalome Batch on a Cluster :
- * #! /bin/sh -f
- * source preReqFilePath
- * export PYTHONPATH=/$HOME/Batch/date_hh_mn_ss:$PYTHONPATH
- * if test $SLURM_PROCID = 0; then
- * runSalome --terminal --batch --modules=ListOfModules --standalone=registry,study,moduleCatalog --execute='FileNameToExecute',killall --killall
- * else
- * sleep 10
- * export SALOME_BATCH="1"
- * SALOME_Container "YACS_Server_"${SLURM_PROCID}
- * fi
- *
- * with preReqFilePath of CatalogResource for Salome environnement
- * with ListOfModules of CatalogResource
- * with FileNameToExecute as python script to execute
- */
-//=============================================================================
-std::string SALOME_ResourcesManager::BuildCmdrunSalomeBatch(
- const std::string& machine,
- const std::string& DirForTmpFiles ,
- const std::string& FileNameToExecute ) throw(SALOME_Exception)
-{
- BEGIN_OF("SALOME_ResourcesManager::BuildCmdrunSalomeBatch");
- int status;
- _TmpFileName = BuildTemporaryFileName();
- ofstream tempOutputFile;
- tempOutputFile.open(_TmpFileName.c_str(), ofstream::out );
- const ParserResourcesType& resInfo = _resourcesList[machine];
- resInfo.Print() ;
- tempOutputFile << "#! /bin/sh -f" << endl ;
- tempOutputFile << "cd " ;
- tempOutputFile << resInfo.AppliPath << endl ;
- tempOutputFile << "export PYTHONPATH=~/" ;
- tempOutputFile << DirForTmpFiles ;
- tempOutputFile << ":$PYTHONPATH" << endl ;
- tempOutputFile << "if test $SLURM_PROCID = 0; then" << endl ;
- tempOutputFile << " ./runAppli --terminal --batch --modules=" ;
- int i ;
- for ( i = 0 ; i < resInfo.ModulesList.size() ; i++ ) {
- tempOutputFile << resInfo.ModulesList[i] ;
- if ( i != resInfo.ModulesList.size()-1 )
- tempOutputFile << "," ;
- }
- tempOutputFile << " --standalone=registry,study,moduleCatalog --killall &" << endl ;
- tempOutputFile << " for ((ip=1; ip < ${SLURM_NPROCS} ; ip++))" << endl;
- tempOutputFile << " do" << endl ;
- tempOutputFile << " arglist=\"$arglist YACS_Server_\"$ip" << endl ;
- tempOutputFile << " done" << endl ;
- tempOutputFile << " sleep 5" << endl ;
- tempOutputFile << " ./runSession waitContainers.py $arglist" << endl ;
- tempOutputFile << " ./runSession python ~/" << DirForTmpFiles << "/" << FileNameToExecute << ".py" << endl;
- tempOutputFile << " ./runSession killCurrentPort" << endl;
- tempOutputFile << "else" << endl ;
- tempOutputFile << " sleep 5" << endl ;
- tempOutputFile << " ./runSession waitNS.py" << endl ;
- tempOutputFile << " ./runSession SALOME_Container 'YACS_Server_'${SLURM_PROCID}" << endl ;
- tempOutputFile << "fi" << endl ;
- tempOutputFile.flush();
- tempOutputFile.close();
- chmod(_TmpFileName.c_str(), 0x1ED);
- SCRUTE(_TmpFileName.c_str()) ;
-
- string command;
- if (resInfo.Protocol == rsh)
- command = "rcp ";
- else if (resInfo.Protocol == ssh)
- command = "scp ";
- else
- throw SALOME_Exception("Unknown protocol");
-
- command += _TmpFileName;
- command += " ";
- if (resInfo.UserName != ""){
- command += resInfo.UserName;
- command += "@";
- }
- command += resInfo.Alias;
- command += ":";
- command += DirForTmpFiles ;
- command += "/runSalome_" ;
- command += FileNameToExecute ;
- command += "_Batch.sh" ;
- SCRUTE(command.c_str());
- status = system(command.c_str());
- if(status)
- throw SALOME_Exception("Error of connection on remote host");
- RmTmpFile();
-
- END_OF("SALOME_ResourcesManager::BuildCmdrunSalomeBatch");
- return command;
-}
-
-//=============================================================================
-/*!
- * builds the shell to create for runSalome Batch on a Cluster :
- * #! /bin/sh -f
- * #BSUB -n NumberOfProcessors
- * #BSUB -o runSalome.log%J
- * mpirun -srun /$HOME/Batch/date_hh_mn_ss/runSalome_'FileNameToExecute'_Batch.sh
- * with NumberOfProcessors from params
- * with FileNameToExecute as python script to execute
- */
+/*! CORBA Method:
+ * Submit a batch job on a cluster and returns the JobId
+ * \param fileToExecute : .py/.exe/.sh/... to execute on the batch cluster
+ * \param filesToExport : to export on the batch cluster
+ * \param NumberOfProcessors : Number of processors needed on the batch cluster
+ * \param params : Constraints for the choice of the batch cluster
+ */
//=============================================================================
-std::string SALOME_ResourcesManager::BuildCmdFileNameToExecute_Batch(
- const std::string& machine,
- const long NumberOfProcessors,
- const std::string& DirForTmpFiles ,
- const std::string& FileNameToExecute ) throw(SALOME_Exception)
+CORBA::Long SALOME_ResourcesManager::batchSalomeJob(
+ const char * fileToExecute ,
+ const Engines::FilesToExportList& filesToExport ,
+ const CORBA::Long NumberOfProcessors ,
+ const Engines::MachineParameters& params)
{
- BEGIN_OF("SALOME_ResourcesManager::BuildCmdFileNameToExecute_Batch");
- int status;
- _TmpFileName = BuildTemporaryFileName();
- ofstream tempOutputFile;
- tempOutputFile.open(_TmpFileName.c_str(), ofstream::out );
- const ParserResourcesType& resInfo = _resourcesList[machine];
- resInfo.Print() ;
- tempOutputFile << "#! /bin/sh -f" << endl ;
- tempOutputFile << "#BSUB -n " ;
- tempOutputFile << NumberOfProcessors << endl ;
- tempOutputFile << "#BSUB -o runSalome.log%J" << endl ;
- tempOutputFile << "mpirun -srun ~/" ;
- tempOutputFile << DirForTmpFiles ;
- tempOutputFile << "/runSalome_" ;
- tempOutputFile << FileNameToExecute ;
- tempOutputFile << "_Batch.sh" << endl ;
- tempOutputFile.flush();
- tempOutputFile.close();
- chmod(_TmpFileName.c_str(), 0x1ED);
- SCRUTE(_TmpFileName.c_str()) ;
-
- string command;
- if (resInfo.Protocol == rsh)
- command = "rcp ";
- else if (resInfo.Protocol == ssh)
- command = "scp ";
- else
- throw SALOME_Exception("Unknown protocol");
- command += _TmpFileName;
- command += " ";
- if (resInfo.UserName != ""){
- command += resInfo.UserName;
- command += "@";
- }
- command += resInfo.Alias;
- command += ":";
- command += DirForTmpFiles ;
- command += "/" ;
- command += FileNameToExecute ;
- command += "_Batch.sh" ;
- SCRUTE(command.c_str());
- status = system(command.c_str());
- if(status)
- throw SALOME_Exception("Error of connection on remote host");
-
- RmTmpFile();
- END_OF("SALOME_ResourcesManager::BuildCmdFileNameToExecute_Batch");
-
- return command;
-}
-
-//=============================================================================
-/*!
- * builds the shell to create for runSalome Batch on a Cluster :
- * #! /bin/sh -f
- * bsub < /$HOME/Batch/date_hh_mn_ss/'FileNameToExecute'_Batch.sh &
- * with preReqFilePath of CatalogResource for Salome environnement
- * with ListOfModules of CatalogResource
- * with FileNameToExecute as python script to execute
- */
-//=============================================================================
-std::string SALOME_ResourcesManager::BuildCmdFileNameToExecute_bsub(
- const std::string& machine,
- const std::string& DirForTmpFiles ,
- const std::string& FileNameToExecute ) throw(SALOME_Exception)
-{
- BEGIN_OF("SALOME_ResourcesManager::BuildCmdFileNameToExecute_bsub");
- _TmpFileName = BuildTemporaryFileName();
- int status;
- ofstream tempOutputFile;
- tempOutputFile.open(_TmpFileName.c_str(), ofstream::out );
- const ParserResourcesType& resInfo = _resourcesList[machine];
- resInfo.Print() ;
- tempOutputFile << "#! /bin/sh -f" << endl ;
- tempOutputFile << "bsub < ~/" ;
- tempOutputFile << DirForTmpFiles ;
- tempOutputFile << "/" ;
- tempOutputFile << FileNameToExecute ;
- tempOutputFile << "_Batch.sh &" << endl ;
- tempOutputFile.flush();
- tempOutputFile.close();
- chmod(_TmpFileName.c_str(), 0x1ED);
- SCRUTE(_TmpFileName.c_str()) ;
-
- string command;
- if (resInfo.Protocol == rsh)
- command = "rcp ";
- else if (resInfo.Protocol == ssh)
- command = "scp ";
+ BEGIN_OF("SALOME_ResourcesManager::batchSalomeJob");
+ Engines::CompoList aCompoList ;
+ vector<string> aMachineList = GetFittingResources( params , aCompoList ) ;
+ const ParserResourcesType& resInfo = _resourcesList[aMachineList[0]];
+
+ BatchLight::batchParams p;
+ p.hostname = resInfo.Alias;
+ if( resInfo.Protocol == rsh )
+ p.protocol = "rsh";
+ else if( resInfo.Protocol == ssh )
+ p.protocol = "ssh";
else
throw SALOME_Exception("Unknown protocol");
- command += _TmpFileName;
- command += " ";
- if (resInfo.UserName != ""){
- command += resInfo.UserName;
- command += "@";
+ p.username = resInfo.UserName;
+ p.applipath = resInfo.AppliPath;
+ p.modulesList = resInfo.ModulesList;
+
+ try{
+ BatchLight::Job job = BatchLight::Job( fileToExecute, filesToExport, NumberOfProcessors );
+ BatchLight::BatchManager_SLURM bms = BatchLight::BatchManager_SLURM(p);
+ bms.submitJob(job);
}
- command += resInfo.Alias;
- command += ":";
- command += DirForTmpFiles ;
- command += "/" ;
- command += FileNameToExecute ;
- command += "_bsub.sh" ;
- SCRUTE(command.c_str());
- status = system(command.c_str());
- if(status)
- throw SALOME_Exception("Error of connection on remote host");
-
- RmTmpFile();
- END_OF("SALOME_ResourcesManager::BuildCmdFileNameToExecute_bsub");
-
- return command;
-}
-
-//=============================================================================
-/*!
- * builds the rsh/ssh command for submitting of a batch job :
- * ssh tantale.ccc.cea.fr /$HOME/Batch/date_hh_mn_ss/'FileNameToExecute'_bsub.sh
- * with FileNameToExecute as python script to execute
- */
-//=============================================================================
-std::string SALOME_ResourcesManager::CmdToExecute_bsub(
- const std::string& machine,
- const std::string& DirForTmpFiles ,
- const std::string& FileNameToExecute ) throw(SALOME_Exception)
-{
- BEGIN_OF("SALOME_ResourcesManager::CmdToExecute_bsub");
- const ParserResourcesType& resInfo = _resourcesList[machine];
- string command;
- resInfo.Print();
- int status;
-
- if (resInfo.Protocol == rsh)
- command = "rsh " ;
- else if (resInfo.Protocol == ssh)
- command = "ssh ";
- else
- throw SALOME_Exception("Unknown protocol");
- if (resInfo.UserName != ""){
- command += resInfo.UserName;
- command += "@";
+ catch(const SALOME_Exception &ex){
+ MESSAGE(ex.what());
}
- command += resInfo.Alias;
- command += " \"tcsh " ;
- command += DirForTmpFiles ;
- command += "/" ;
- command += FileNameToExecute ;
- command += "_bsub.sh\"" ;
- SCRUTE(command.c_str());
- status = system(command.c_str());
- if(status)
- throw SALOME_Exception("Error of connection on remote host");
-
- END_OF("SALOME_ResourcesManager::CmdToExecute_bsub");
-
- return command;
+ END_OF("SALOME_ResourcesManager::batchSalomeJob");
}
//=============================================================================
(const std::string& machine,
const Engines::MachineParameters& params, const long id);
- void CopyFileNamesToExecute(const std::string& machine,
- const std::string& DirForTmpFiles ,
- const std::string& PathFileNameToExecute ,
- const Engines::FilesToExportList& filesToExport) throw(SALOME_Exception);
- std::string BuildCmdrunSalomeBatch(const std::string& machine,
- const std::string& DirForTmpFiles ,
- const std::string& FileNameToExecute ) throw(SALOME_Exception);
- std::string BuildCmdFileNameToExecute_Batch(const std::string& machine,
- const long NumberOfProcessors,
- const std::string& DirForTmpFiles ,
- const std::string& FileNameToExecute ) throw(SALOME_Exception);
- std::string BuildCmdFileNameToExecute_bsub(const std::string& machine,
- const std::string& DirForTmpFiles ,
- const std::string& FileNameToExecute ) throw(SALOME_Exception);
- std::string CmdToExecute_bsub(const std::string& machine,
- const std::string& DirForTmpFiles ,
- const std::string& FileNameToExecute ) throw(SALOME_Exception);
+ CORBA::Long batchSalomeJob(const char * fileToExecute ,
+ const Engines::FilesToExportList& filesToExport ,
+ const CORBA::Long NumberOfProcessors ,
+ const Engines::MachineParameters& params);
std::string BuildCommandToLaunchLocalContainer
(const Engines::MachineParameters& params, const long id);