From: caremoli Date: Mon, 28 Jan 2008 14:24:15 +0000 (+0000) Subject: CCAR: move BatchLight classes from Batch to Launcher (dependency loop) X-Git-Url: http://git.salome-platform.org/gitweb/?a=commitdiff_plain;h=cc359250421a7443ead7b19d8398cbb8489a6858;p=modules%2Fkernel.git CCAR: move BatchLight classes from Batch to Launcher (dependency loop) --- diff --git a/src/Batch/BatchLight_BatchManager.cxx b/src/Batch/BatchLight_BatchManager.cxx deleted file mode 100644 index 4e0241f38..000000000 --- a/src/Batch/BatchLight_BatchManager.cxx +++ /dev/null @@ -1,272 +0,0 @@ -// Copyright (C) 2005 OPEN CASCADE, EADS/CCR, LIP6, CEA/DEN, -// CEDRAT, EDF R&D, LEG, PRINCIPIA R&D, BUREAU VERITAS -// -// This library is free software; you can redistribute it and/or -// modify it under the terms of the GNU Lesser General Public -// License as published by the Free Software Foundation; either -// version 2.1 of the License. -// -// This library is distributed in the hope that it will be useful -// but WITHOUT ANY WARRANTY; without even the implied warranty of -// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU -// Lesser General Public License for more details. -// -// You should have received a copy of the GNU Lesser General Public -// License along with this library; if not, write to the Free Software -// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA -// -// See http://www.salome-platform.org/ or email : webmaster.salome@opencascade.com -// -/* - * BatchManager.cxx : - * - * Auteur : Bernard SECHER - CEA/DEN - * Date : Juillet 2007 - * Projet : SALOME - * - */ - -#include -#include -#include -#include -#include "BatchLight_Job.hxx" -#include "BatchLight_BatchManager.hxx" -#include "Batch_Date.hxx" -using namespace std; - -namespace BatchLight { - - // Constructeur - BatchManager::BatchManager(const batchParams& p) throw(SALOME_Exception) : _params(p) - { - SCRUTE(_params.hostname); - SCRUTE(_params.protocol); - SCRUTE(_params.username); - // On verifie que le hostname est correct - if (!gethostbyname(_params.hostname.c_str())) { // hostname unknown from network - string msg = "hostname \""; - msg += _params.hostname; - msg += "\" unknown from the network"; - throw SALOME_Exception(msg.c_str()); - } - _mpiImpl = NULL; - } - - // Destructeur - BatchManager::~BatchManager() - { - MESSAGE("BatchManager destructor "<<_params.hostname); - std::map < int, const BatchLight::Job * >::const_iterator it; - for(it=_jobmap.begin();it!=_jobmap.end();it++) - delete it->second; - if(_mpiImpl) delete _mpiImpl; - } - - // Methode pour le controle des jobs : soumet un job au gestionnaire - const int BatchManager::submitJob(Job* job) - { - BEGIN_OF("BatchManager::submitJob"); - int id; - - // temporary directory on cluster to put input files for job - setDirForTmpFiles(); - SCRUTE(_dirForTmpFiles); - - // export input files on cluster - exportInputFiles(job->getFileToExecute(),job->getFilesToExportList()); - - // build salome coupling script for job - buildSalomeCouplingScript(job->getFileToExecute()); - - // build batch script for job - buildSalomeBatchScript(job->getNbProc()); - - // submit job on cluster - id = submit(); - - // register job on map - _jobmap[id] = job; - END_OF("BatchManager::submitJob"); - return id; - } - - void BatchManager::setDirForTmpFiles() - { - int i; - - _dirForTmpFiles = string("Batch/"); - Batch::Date date = Batch::Date(time(0)) ; - std::string thedate = date.str() ; - int lend = thedate.size() ; - i = 0 ; - while ( i < lend ) { - if ( thedate[i] == '/' || thedate[i] == '-' || thedate[i] == ':' ) { - thedate[i] = '_' ; - } - i++ ; - } - _dirForTmpFiles += thedate ; - } - - void BatchManager::exportInputFiles(const char *fileToExecute, const Engines::FilesList filesToExportList) throw(SALOME_Exception) - { - BEGIN_OF("BatchManager::exportInFiles"); - string command = _params.protocol; - int status; - - command += " "; - - if (_params.username != ""){ - command += _params.username; - command += "@"; - } - - command += _params.hostname; - command += " \"mkdir -p "; - command += _dirForTmpFiles ; - command += "\"" ; - SCRUTE(command.c_str()); - status = system(command.c_str()); - if(status) - throw SALOME_Exception("Error of connection on remote host"); - - if( _params.protocol == "rsh" ) - command = "rcp "; - else if( _params.protocol == "ssh" ) - command = "scp "; - else - throw SALOME_Exception("Unknown protocol"); - - command += fileToExecute; - command += " "; - - if (_params.username != ""){ - command += _params.username; - command += "@"; - } - - command += _params.hostname; - command += ":"; - command += _dirForTmpFiles ; - SCRUTE(command.c_str()); - status = system(command.c_str()); - if(status) - throw SALOME_Exception("Error of connection on remote host"); - - int i ; - for ( i = 0 ; i < filesToExportList.length() ; i++ ) { - if( _params.protocol == "rsh" ) - command = "rcp "; - else if( _params.protocol == "ssh" ) - command = "scp "; - else - throw SALOME_Exception("Unknown protocol"); - command += filesToExportList[i] ; - command += " "; - if (_params.username != ""){ - command += _params.username; - command += "@"; - } - command += _params.hostname; - command += ":"; - command += _dirForTmpFiles ; - SCRUTE(command.c_str()); - status = system(command.c_str()); - if(status) - throw SALOME_Exception("Error of connection on remote host"); - } - - END_OF("BatchManager::exportInFiles"); - } - - void BatchManager::importOutputFiles( const char *directory, const CORBA::Long jobId ) throw(SALOME_Exception) - { - BEGIN_OF("BatchManager::importOutputFiles"); - string command; - int status; - - const BatchLight::Job* myJob = _jobmap[jobId]; - Engines::FilesList filesToImportList = myJob->getFilesToImportList(); - - for ( int i = 0 ; i < filesToImportList.length() ; i++ ) { - if( _params.protocol == "rsh" ) - command = "rcp "; - else if( _params.protocol == "ssh" ) - command = "scp "; - else - throw SALOME_Exception("Unknown protocol"); - if (_params.username != ""){ - command += _params.username; - command += "@"; - } - command += _params.hostname; - command += ":"; - command += filesToImportList[i] ; - command += " "; - command += directory; - SCRUTE(command.c_str()); - status = system(command.c_str()); - if(status) - throw SALOME_Exception("Error of connection on remote host"); - } - - END_OF("BatchManager::importOutputFiles"); - } - - string BatchManager::BuildTemporaryFileName() const - { - //build more complex file name to support multiple salome session - char *temp = new char[19]; - strcpy(temp, "/tmp/command"); - strcat(temp, "XXXXXX"); -#ifndef WNT - - mkstemp(temp); -#else - - char aPID[80]; - itoa(getpid(), aPID, 10); - strcat(temp, aPID); -#endif - - string command(temp); - delete [] temp; - command += ".sh"; - return command; - } - - void BatchManager::RmTmpFile() - { - if (_TmpFileName != ""){ - string command = "rm "; - command += _TmpFileName; - char *temp = strdup(command.c_str()); - int lgthTemp = strlen(temp); - temp[lgthTemp - 3] = '*'; - temp[lgthTemp - 2] = '\0'; - system(temp); - free(temp); - } - } - - MpiImpl *BatchManager::FactoryMpiImpl(string mpiImpl) throw(SALOME_Exception) - { - if(mpiImpl == "lam") - return new MpiImpl_LAM(); - else if(mpiImpl == "mpich1") - return new MpiImpl_MPICH1(); - else if(mpiImpl == "mpich2") - return new MpiImpl_MPICH2(); - else if(mpiImpl == "openmpi") - return new MpiImpl_OPENMPI(); - else if(mpiImpl == "indif") - throw SALOME_Exception("you must specify a mpi implementation in CatalogResources.xml file"); - else{ - ostringstream oss; - oss << mpiImpl << " : not yet implemented"; - throw SALOME_Exception(oss.str().c_str()); - } - } - -} diff --git a/src/Batch/BatchLight_BatchManager.hxx b/src/Batch/BatchLight_BatchManager.hxx deleted file mode 100644 index a8ea0061a..000000000 --- a/src/Batch/BatchLight_BatchManager.hxx +++ /dev/null @@ -1,94 +0,0 @@ -// Copyright (C) 2005 OPEN CASCADE, EADS/CCR, LIP6, CEA/DEN, -// CEDRAT, EDF R&D, LEG, PRINCIPIA R&D, BUREAU VERITAS -// -// This library is free software; you can redistribute it and/or -// modify it under the terms of the GNU Lesser General Public -// License as published by the Free Software Foundation; either -// version 2.1 of the License. -// -// This library is distributed in the hope that it will be useful -// but WITHOUT ANY WARRANTY; without even the implied warranty of -// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU -// Lesser General Public License for more details. -// -// You should have received a copy of the GNU Lesser General Public -// License along with this library; if not, write to the Free Software -// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA -// -// See http://www.salome-platform.org/ or email : webmaster.salome@opencascade.com -// -/* - * BatchManager.hxx : - * - * Auteur : Bernard SECHER - CEA/DEN - * Date : Juillet 2007 - * Projet : SALOME - * - */ - -#ifndef _BL_BATCHMANAGER_H_ -#define _BL_BATCHMANAGER_H_ - -#include -#include -#include -#include "Utils_SALOME_Exception.hxx" -#include -#include -#include CORBA_CLIENT_HEADER(SALOME_ContainerManager) -#include "MpiImpl.hxx" - -namespace BatchLight { - - class Job; - - struct batchParams{ - std::string hostname; // serveur ou tourne le BatchManager - std::string protocol; // protocole d'acces au serveur: ssh ou rsh - std::string username; // username d'acces au serveur - std::string applipath; // path of apllication directory on server - std::vector modulesList; // list of Salome modules installed on server - unsigned int nbnodes; // number of nodes on cluster - unsigned int nbprocpernode; // number of processors on each node - std::string mpiImpl; // mpi implementation - }; - - class BatchManager - { - public: - // Constructeur et destructeur - BatchManager(const batchParams& p) throw(SALOME_Exception); // connexion a la machine host - virtual ~BatchManager(); - - // Methodes pour le controle des jobs : virtuelles pures - const int submitJob(BatchLight::Job* job); // soumet un job au gestionnaire - virtual void deleteJob(const int & jobid) = 0; // retire un job du gestionnaire - virtual std::string queryJob(const int & jobid) = 0; // renvoie l'etat du job - void importOutputFiles( const char *directory, const CORBA::Long jobId ) throw(SALOME_Exception); - - protected: - batchParams _params; - MpiImpl *_mpiImpl; - - std::map _jobmap; - std::string _dirForTmpFiles; // repertoire temporaire sur le serveur - std::string _TmpFileName; - std::string _fileNameToExecute; - - virtual int submit() throw(SALOME_Exception) = 0; - void setDirForTmpFiles(); - void exportInputFiles( const char *fileToExecute, const Engines::FilesList filesToExportList ) throw(SALOME_Exception); - virtual void buildSalomeCouplingScript( const char *fileToExecute ) throw(SALOME_Exception) = 0; - virtual void buildSalomeBatchScript( const int nbproc ) throw(SALOME_Exception) = 0; - - std::string BuildTemporaryFileName() const; - void RmTmpFile(); - MpiImpl *FactoryMpiImpl(std::string mpiImpl) throw(SALOME_Exception); - - private: - - }; - -} - -#endif diff --git a/src/Batch/BatchLight_BatchManager_PBS.cxx b/src/Batch/BatchLight_BatchManager_PBS.cxx deleted file mode 100644 index bba030a0c..000000000 --- a/src/Batch/BatchLight_BatchManager_PBS.cxx +++ /dev/null @@ -1,375 +0,0 @@ -// Copyright (C) 2005 OPEN CASCADE, EADS/CCR, LIP6, CEA/DEN, -// CEDRAT, EDF R&D, LEG, PRINCIPIA R&D, BUREAU VERITAS -// -// This library is free software; you can redistribute it and/or -// modify it under the terms of the GNU Lesser General Public -// License as published by the Free Software Foundation; either -// version 2.1 of the License. -// -// This library is distributed in the hope that it will be useful -// but WITHOUT ANY WARRANTY; without even the implied warranty of -// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU -// Lesser General Public License for more details. -// -// You should have received a copy of the GNU Lesser General Public -// License along with this library; if not, write to the Free Software -// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA -// -// See http://www.salome-platform.org/ or email : webmaster.salome@opencascade.com -// -/* - * BatchManager.cxx : - * - * Auteur : Bernard SECHER - CEA/DEN - * Date : Juillet 2007 - * Projet : SALOME - * - */ - -#include "BatchLight_BatchManager_PBS.hxx" -#include "utilities.h" -#include "BatchLight_Job.hxx" -#include -#include -#include -#include - -using namespace std; - -namespace BatchLight { - - // Constructeur - BatchManager_PBS::BatchManager_PBS(const batchParams& p) throw(SALOME_Exception) : BatchManager(p) - { - // pbs batch system needs to know mpi implementation - _mpiImpl = FactoryMpiImpl(_params.mpiImpl); - } - - // Destructeur - BatchManager_PBS::~BatchManager_PBS() - { - MESSAGE("BatchManager_PBS destructor "<<_params.hostname); - } - - // Methode pour le controle des jobs : retire un job du gestionnaire - void BatchManager_PBS::deleteJob(const int & jobid) - { - BEGIN_OF("BatchManager_PBS::deleteJob"); - string command; - int status; - ostringstream oss; - oss << jobid; - - // define command to submit batch - if( _params.protocol == "rsh" ) - command = "rsh "; - else if( _params.protocol == "ssh" ) - command = "ssh "; - else - throw SALOME_Exception("Unknown protocol"); - - if (_params.username != ""){ - command += _params.username; - command += "@"; - } - - command += _params.hostname; - command += " \"qdel " ; - command += oss.str(); - command += "\""; - SCRUTE(command.c_str()); - status = system(command.c_str()); - if(status) - throw SALOME_Exception("Error of connection on remote host"); - - MESSAGE("jobId = " << jobid << "killed"); - END_OF("BatchManager_PBS::deleteJob"); - } - - // Methode pour le controle des jobs : renvoie l'etat du job - string BatchManager_PBS::queryJob(const int & jobid) - { - BEGIN_OF("BatchManager_PBS::queryJob"); - // define name of log file - string jstatus; - string logFile="/tmp/logs/"; - logFile += getenv("USER"); - logFile += "/batchSalome_"; - - srand ( time(NULL) ); - int ir = rand(); - ostringstream oss; - oss << ir; - logFile += oss.str(); - logFile += ".log"; - - string command; - int status; - - // define command to submit batch - if( _params.protocol == "rsh" ) - command = "rsh "; - else if( _params.protocol == "ssh" ) - command = "ssh "; - else - throw SALOME_Exception("Unknown protocol"); - - if (_params.username != ""){ - command += _params.username; - command += "@"; - } - - command += _params.hostname; - command += " \"qstat -f " ; - ostringstream oss2; - oss2 << jobid; - command += oss2.str(); - command += "\" > "; - command += logFile; - SCRUTE(command.c_str()); - status = system(command.c_str()); - if(status && status != 153 && status != 256*153){ - MESSAGE("status="<> jstatus; - iss >> jstatus; - iss >> jstatus; - } - else - jstatus = "U"; - } - - MESSAGE("jobId = " << jobid << " " << jstatus); - END_OF("BatchManager_PBS::queryJob"); - return jstatus; - } - - void BatchManager_PBS::buildSalomeCouplingScript( const char *fileToExecute ) throw(SALOME_Exception) - { - BEGIN_OF("BatchManager_PBS::buildSalomeCouplingScript"); - int status; - - string::size_type p1 = string(fileToExecute).find_last_of("/"); - string::size_type p2 = string(fileToExecute).find_last_of("."); - _fileNameToExecute = string(fileToExecute).substr(p1+1,p2-p1-1); - - _TmpFileName = BuildTemporaryFileName(); - ofstream tempOutputFile; - tempOutputFile.open(_TmpFileName.c_str(), ofstream::out ); - tempOutputFile << "#! /bin/sh -f" << endl ; - tempOutputFile << "cd " ; - tempOutputFile << _params.applipath << endl ; - tempOutputFile << "export PYTHONPATH=~/" ; - tempOutputFile << _dirForTmpFiles ; - tempOutputFile << ":$PYTHONPATH" << endl ; - tempOutputFile << "if test " ; - tempOutputFile << _mpiImpl->rank() ; - tempOutputFile << " = 0; then" << endl ; - tempOutputFile << " ./runAppli --terminal --modules=" ; - for ( int i = 0 ; i < _params.modulesList.size() ; i++ ) { - tempOutputFile << _params.modulesList[i] ; - if ( i != _params.modulesList.size()-1 ) - tempOutputFile << "," ; - } - tempOutputFile << " --standalone=registry,study,moduleCatalog --killall &" << endl ; - tempOutputFile << " for ((ip=1; ip < "; - tempOutputFile << _mpiImpl->size(); - tempOutputFile << " ; ip++))" << endl; - tempOutputFile << " do" << endl ; - tempOutputFile << " arglist=\"$arglist YACS_Server_\"$ip" << endl ; - tempOutputFile << " done" << endl ; - tempOutputFile << " sleep 5" << endl ; - tempOutputFile << " ./runSession waitContainers.py $arglist" << endl ; - tempOutputFile << " ./runSession python ~/" << _dirForTmpFiles << "/" << _fileNameToExecute << ".py" << endl; - tempOutputFile << " ./runSession killCurrentPort" << endl; - tempOutputFile << "else" << endl ; - tempOutputFile << " sleep 5" << endl ; - tempOutputFile << " ./runSession waitNS.py" << endl ; - tempOutputFile << " ./runSession SALOME_Container 'YACS_Server_'"; - tempOutputFile << _mpiImpl->rank() << endl ; - tempOutputFile << "fi" << endl ; - tempOutputFile.flush(); - tempOutputFile.close(); - chmod(_TmpFileName.c_str(), 0x1ED); - SCRUTE(_TmpFileName.c_str()) ; - - string command; - if( _params.protocol == "rsh" ) - command = "rcp "; - else if( _params.protocol == "ssh" ) - command = "scp "; - else - throw SALOME_Exception("Unknown protocol"); - - command += _TmpFileName; - command += " "; - if (_params.username != ""){ - command += _params.username; - command += "@"; - } - command += _params.hostname; - command += ":"; - command += _dirForTmpFiles ; - command += "/runSalome_" ; - command += _fileNameToExecute ; - command += "_Batch.sh" ; - SCRUTE(_fileNameToExecute) ; - SCRUTE(command.c_str()); - status = system(command.c_str()); - if(status) - throw SALOME_Exception("Error of connection on remote host"); - RmTmpFile(); - - END_OF("BatchManager_PBS::buildSalomeCouplingScript"); - } - - void BatchManager_PBS::buildSalomeBatchScript( const int nbproc ) throw(SALOME_Exception) - { - BEGIN_OF("BatchManager_PBS::buildSalomeBatchScript"); - int status; - - int nbmaxproc = _params.nbnodes * _params.nbprocpernode; - if( nbproc > nbmaxproc ){ - MESSAGE(nbproc << " processors asked on a cluster of " << nbmaxproc << " processors"); - throw SALOME_Exception("Too much processors asked for that cluster"); - } - - int nbnodes; - if( nbproc < _params.nbnodes ) - nbnodes = nbproc; - else - nbnodes = _params.nbnodes; - - _TmpFileName = BuildTemporaryFileName(); - ofstream tempOutputFile; - tempOutputFile.open(_TmpFileName.c_str(), ofstream::out ); - - ostringstream filenameToExecute; - filenameToExecute << " ~/" << _dirForTmpFiles << "/runSalome_" << _fileNameToExecute << "_Batch.sh"; - - tempOutputFile << "#! /bin/sh -f" << endl ; - tempOutputFile << "#PBS -l nodes=" << nbnodes << endl ; - tempOutputFile << "#PBS -o ~/" << _dirForTmpFiles << "/runSalome.log${PBS_JOBID}" << endl ; - tempOutputFile << _mpiImpl->boot("${PBS_NODEFILE}",nbnodes); - tempOutputFile << _mpiImpl->run("${PBS_NODEFILE}",nbproc,filenameToExecute.str()); - tempOutputFile << _mpiImpl->halt(); - tempOutputFile.flush(); - tempOutputFile.close(); - chmod(_TmpFileName.c_str(), 0x1ED); - SCRUTE(_TmpFileName.c_str()) ; - - string command; - if( _params.protocol == "rsh" ) - command = "rcp "; - else if( _params.protocol == "ssh" ) - command = "scp "; - else - throw SALOME_Exception("Unknown protocol"); - command += _TmpFileName; - command += " "; - if (_params.username != ""){ - command += _params.username; - command += "@"; - } - command += _params.hostname; - command += ":"; - command += _dirForTmpFiles ; - command += "/" ; - command += _fileNameToExecute ; - command += "_Batch.sh" ; - SCRUTE(command.c_str()); - status = system(command.c_str()); - if(status) - throw SALOME_Exception("Error of connection on remote host"); - - RmTmpFile(); - END_OF("BatchManager_PBS::buildSalomeBatchScript"); - - } - - int BatchManager_PBS::submit() throw(SALOME_Exception) - { - BEGIN_OF("BatchManager_PBS::submit"); - - // define name of log file - string logFile="/tmp/logs/"; - logFile += getenv("USER"); - logFile += "/batchSalome_"; - - srand ( time(NULL) ); - int ir = rand(); - ostringstream oss; - oss << ir; - logFile += oss.str(); - logFile += ".log"; - - string command; - int status; - - // define command to submit batch - if( _params.protocol == "rsh" ) - command = "rsh "; - else if( _params.protocol == "ssh" ) - command = "ssh "; - else - throw SALOME_Exception("Unknown protocol"); - - if (_params.username != ""){ - command += _params.username; - command += "@"; - } - - command += _params.hostname; - command += " \"qsub " ; - command += _dirForTmpFiles ; - command += "/" ; - command += _fileNameToExecute ; - command += "_Batch.sh\" > "; - command += logFile; - SCRUTE(command.c_str()); - status = system(command.c_str()); - if(status) - throw SALOME_Exception("Error of connection on remote host"); - - // read id of submitted job in log file - char line[128]; - FILE *fp = fopen(logFile.c_str(),"r"); - fgets( line, 128, fp); - fclose(fp); - - string sline(line); - int pos = sline.find("."); - string strjob; - if(pos == string::npos) - strjob = sline; - else - strjob = sline.substr(0,pos); - - int id; - istringstream iss(strjob); - iss >> id; - - END_OF("BatchManager_PBS::submit"); - return id; - } - -} diff --git a/src/Batch/BatchLight_BatchManager_PBS.hxx b/src/Batch/BatchLight_BatchManager_PBS.hxx deleted file mode 100644 index e7e5789c7..000000000 --- a/src/Batch/BatchLight_BatchManager_PBS.hxx +++ /dev/null @@ -1,59 +0,0 @@ -// Copyright (C) 2005 OPEN CASCADE, EADS/CCR, LIP6, CEA/DEN, -// CEDRAT, EDF R&D, LEG, PRINCIPIA R&D, BUREAU VERITAS -// -// This library is free software; you can redistribute it and/or -// modify it under the terms of the GNU Lesser General Public -// License as published by the Free Software Foundation; either -// version 2.1 of the License. -// -// This library is distributed in the hope that it will be useful -// but WITHOUT ANY WARRANTY; without even the implied warranty of -// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU -// Lesser General Public License for more details. -// -// You should have received a copy of the GNU Lesser General Public -// License along with this library; if not, write to the Free Software -// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA -// -// See http://www.salome-platform.org/ or email : webmaster.salome@opencascade.com -// -/* - * BatchManager.hxx : - * - * Auteur : Bernard SECHER - CEA/DEN - * Date : Juillet 2007 - * Projet : SALOME - * - */ - -#ifndef _BL_BATCHMANAGER_PBS_H_ -#define _BL_BATCHMANAGER_PBS_H_ - -#include -#include "Utils_SALOME_Exception.hxx" -#include "BatchLight_BatchManager.hxx" - -namespace BatchLight { - - class Job; - - class BatchManager_PBS : public BatchManager - { - public: - // Constructeur et destructeur - BatchManager_PBS(const batchParams& p) throw(SALOME_Exception); // connexion a la machine host - virtual ~BatchManager_PBS(); - - // Methodes pour le controle des jobs : virtuelles pures - void deleteJob(const int & jobid); // retire un job du gestionnaire - std::string queryJob(const int & jobid); // renvoie l'etat du job - - private: - void buildSalomeCouplingScript( const char *fileToExecute ) throw(SALOME_Exception); - void buildSalomeBatchScript( const int nbproc ) throw(SALOME_Exception); - int submit() throw(SALOME_Exception); - }; - -} - -#endif diff --git a/src/Batch/BatchLight_BatchManager_SLURM.cxx b/src/Batch/BatchLight_BatchManager_SLURM.cxx deleted file mode 100644 index 9a740e328..000000000 --- a/src/Batch/BatchLight_BatchManager_SLURM.cxx +++ /dev/null @@ -1,327 +0,0 @@ -// Copyright (C) 2005 OPEN CASCADE, EADS/CCR, LIP6, CEA/DEN, -// CEDRAT, EDF R&D, LEG, PRINCIPIA R&D, BUREAU VERITAS -// -// This library is free software; you can redistribute it and/or -// modify it under the terms of the GNU Lesser General Public -// License as published by the Free Software Foundation; either -// version 2.1 of the License. -// -// This library is distributed in the hope that it will be useful -// but WITHOUT ANY WARRANTY; without even the implied warranty of -// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU -// Lesser General Public License for more details. -// -// You should have received a copy of the GNU Lesser General Public -// License along with this library; if not, write to the Free Software -// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA -// -// See http://www.salome-platform.org/ or email : webmaster.salome@opencascade.com -// -/* - * BatchManager.cxx : - * - * Auteur : Bernard SECHER - CEA/DEN - * Date : Juillet 2007 - * Projet : SALOME - * - */ - -#include "BatchLight_BatchManager_SLURM.hxx" -#include "utilities.h" -#include "BatchLight_Job.hxx" -#include -#include -#include -#include - -using namespace std; - -namespace BatchLight { - - // Constructeur - BatchManager_SLURM::BatchManager_SLURM(const batchParams& p) throw(SALOME_Exception) : BatchManager(p) - { - } - - // Destructeur - BatchManager_SLURM::~BatchManager_SLURM() - { - MESSAGE("BatchManager_SLURM destructor "<<_params.hostname); - } - - // Methode pour le controle des jobs : retire un job du gestionnaire - void BatchManager_SLURM::deleteJob(const int & jobid) - { - BEGIN_OF("BatchManager_SLURM::deleteJob"); - string command; - int status; - ostringstream oss; - oss << jobid; - - // define command to submit batch - if( _params.protocol == "rsh" ) - command = "rsh "; - else if( _params.protocol == "ssh" ) - command = "ssh "; - else - throw SALOME_Exception("Unknown protocol"); - - if (_params.username != ""){ - command += _params.username; - command += "@"; - } - - command += _params.hostname; - command += " \"bkill " ; - command += oss.str(); - command += "\""; - SCRUTE(command.c_str()); - status = system(command.c_str()); - if(status) - throw SALOME_Exception("Error of connection on remote host"); - - MESSAGE("jobId = " << jobid << "killed"); - END_OF("BatchManager_SLURM::deleteJob"); - } - - // Methode pour le controle des jobs : renvoie l'etat du job - string BatchManager_SLURM::queryJob(const int & jobid) - { - BEGIN_OF("BatchManager_SLURM::queryJob"); - // define name of log file - string logFile="/tmp/logs/"; - logFile += getenv("USER"); - logFile += "/batchSalome_"; - - srand ( time(NULL) ); - int ir = rand(); - ostringstream oss; - oss << ir; - logFile += oss.str(); - logFile += ".log"; - - string command; - int status; - - // define command to submit batch - if( _params.protocol == "rsh" ) - command = "rsh "; - else if( _params.protocol == "ssh" ) - command = "ssh "; - else - throw SALOME_Exception("Unknown protocol"); - - if (_params.username != ""){ - command += _params.username; - command += "@"; - } - - command += _params.hostname; - command += " \"bjobs " ; - ostringstream oss2; - oss2 << jobid; - command += oss2.str(); - command += "\" > "; - command += logFile; - SCRUTE(command.c_str()); - status = system(command.c_str()); - if(status) - throw SALOME_Exception("Error of connection on remote host"); - - // read staus of job in log file - char line[128]; - ifstream fp(logFile.c_str(),ios::in); - fp.getline(line,80,'\n'); - - string sjobid, username, jstatus; - fp >> sjobid; - fp >> username; - fp >> jstatus; - - MESSAGE("jobId = " << jobid << " " << jstatus); - END_OF("BatchManager_SLURM::queryJob"); - return jstatus; - } - - void BatchManager_SLURM::buildSalomeCouplingScript( const char *fileToExecute ) throw(SALOME_Exception) - { - BEGIN_OF("BatchManager_SLURM::buildSalomeCouplingScript"); - int status; - - string::size_type p1 = string(fileToExecute).find_last_of("/"); - string::size_type p2 = string(fileToExecute).find_last_of("."); - _fileNameToExecute = string(fileToExecute).substr(p1+1,p2-p1-1); - - _TmpFileName = BuildTemporaryFileName(); - ofstream tempOutputFile; - tempOutputFile.open(_TmpFileName.c_str(), ofstream::out ); - tempOutputFile << "#! /bin/sh -f" << endl ; - tempOutputFile << "cd " ; - tempOutputFile << _params.applipath << endl ; - tempOutputFile << "export PYTHONPATH=~/" ; - tempOutputFile << _dirForTmpFiles ; - tempOutputFile << ":$PYTHONPATH" << endl ; - tempOutputFile << "if test $SLURM_PROCID = 0; then" << endl ; - tempOutputFile << " ./runAppli --terminal --modules=" ; - for ( int i = 0 ; i < _params.modulesList.size() ; i++ ) { - tempOutputFile << _params.modulesList[i] ; - if ( i != _params.modulesList.size()-1 ) - tempOutputFile << "," ; - } - tempOutputFile << " --standalone=registry,study,moduleCatalog --killall &" << endl ; - tempOutputFile << " for ((ip=1; ip < ${SLURM_NPROCS} ; ip++))" << endl; - tempOutputFile << " do" << endl ; - tempOutputFile << " arglist=\"$arglist YACS_Server_\"$ip" << endl ; - tempOutputFile << " done" << endl ; - tempOutputFile << " ./runSession waitNS.sh" << endl ; - tempOutputFile << " ./runSession waitContainers.py $arglist" << endl ; - tempOutputFile << " ./runSession python ~/" << _dirForTmpFiles << "/" << _fileNameToExecute << ".py" << endl; - tempOutputFile << " ./runSession killCurrentPort" << endl; - tempOutputFile << "else" << endl ; - tempOutputFile << " ./runSession waitNS.sh" << endl ; - tempOutputFile << " ./runSession SALOME_Container 'YACS_Server_'${SLURM_PROCID}" << endl ; - tempOutputFile << "fi" << endl ; - tempOutputFile.flush(); - tempOutputFile.close(); - chmod(_TmpFileName.c_str(), 0x1ED); - SCRUTE(_TmpFileName.c_str()) ; - - string command; - if( _params.protocol == "rsh" ) - command = "rcp "; - else if( _params.protocol == "ssh" ) - command = "scp "; - else - throw SALOME_Exception("Unknown protocol"); - - command += _TmpFileName; - command += " "; - if (_params.username != ""){ - command += _params.username; - command += "@"; - } - command += _params.hostname; - command += ":"; - command += _dirForTmpFiles ; - command += "/runSalome_" ; - command += _fileNameToExecute ; - command += "_Batch.sh" ; - SCRUTE(command.c_str()); - status = system(command.c_str()); - if(status) - throw SALOME_Exception("Error of connection on remote host"); - RmTmpFile(); - - END_OF("BatchManager_SLURM::buildSalomeCouplingScript"); - } - - void BatchManager_SLURM::buildSalomeBatchScript( const int nbproc ) throw(SALOME_Exception) - { - BEGIN_OF("BatchManager_SLURM::buildSalomeBatchScript"); - int status; - _TmpFileName = BuildTemporaryFileName(); - ofstream tempOutputFile; - tempOutputFile.open(_TmpFileName.c_str(), ofstream::out ); - - tempOutputFile << "#! /bin/sh -f" << endl ; - tempOutputFile << "#BSUB -n " << nbproc << endl ; - tempOutputFile << "#BSUB -o " << _dirForTmpFiles << "/runSalome.log%J" << endl ; - tempOutputFile << "mpirun -srun ~/" << _dirForTmpFiles << "/runSalome_" << _fileNameToExecute << "_Batch.sh" << endl ; - tempOutputFile.flush(); - tempOutputFile.close(); - chmod(_TmpFileName.c_str(), 0x1ED); - SCRUTE(_TmpFileName.c_str()) ; - - string command; - if( _params.protocol == "rsh" ) - command = "rcp "; - else if( _params.protocol == "ssh" ) - command = "scp "; - else - throw SALOME_Exception("Unknown protocol"); - command += _TmpFileName; - command += " "; - if (_params.username != ""){ - command += _params.username; - command += "@"; - } - command += _params.hostname; - command += ":"; - command += _dirForTmpFiles ; - command += "/" ; - command += _fileNameToExecute ; - command += "_Batch.sh" ; - SCRUTE(command.c_str()); - status = system(command.c_str()); - if(status) - throw SALOME_Exception("Error of connection on remote host"); - - RmTmpFile(); - END_OF("BatchManager_SLURM::buildSalomeBatchScript"); - - } - - int BatchManager_SLURM::submit() throw(SALOME_Exception) - { - BEGIN_OF("BatchManager_SLURM::submit"); - - // define name of log file - string logFile="/tmp/logs/"; - logFile += getenv("USER"); - logFile += "/batchSalome_"; - - srand ( time(NULL) ); - int ir = rand(); - ostringstream oss; - oss << ir; - logFile += oss.str(); - logFile += ".log"; - - string command; - int status; - - // define command to submit batch - if( _params.protocol == "rsh" ) - command = "rsh "; - else if( _params.protocol == "ssh" ) - command = "ssh "; - else - throw SALOME_Exception("Unknown protocol"); - - if (_params.username != ""){ - command += _params.username; - command += "@"; - } - - command += _params.hostname; - command += " \"bsub < " ; - command += _dirForTmpFiles ; - command += "/" ; - command += _fileNameToExecute ; - command += "_Batch.sh\" > "; - command += logFile; - SCRUTE(command.c_str()); - status = system(command.c_str()); - if(status) - throw SALOME_Exception("Error of connection on remote host"); - - // read id of submitted job in log file - char line[128]; - FILE *fp = fopen(logFile.c_str(),"r"); - fgets( line, 128, fp); - fclose(fp); - - string sline(line); - int p1 = sline.find("<"); - int p2 = sline.find(">"); - string strjob = sline.substr(p1+1,p2-p1-1); - - int id; - istringstream iss(strjob); - iss >> id; - - END_OF("BatchManager_SLURM::submit"); - return id; - } - -} diff --git a/src/Batch/BatchLight_BatchManager_SLURM.hxx b/src/Batch/BatchLight_BatchManager_SLURM.hxx deleted file mode 100644 index ed21624dd..000000000 --- a/src/Batch/BatchLight_BatchManager_SLURM.hxx +++ /dev/null @@ -1,62 +0,0 @@ -// Copyright (C) 2005 OPEN CASCADE, EADS/CCR, LIP6, CEA/DEN, -// CEDRAT, EDF R&D, LEG, PRINCIPIA R&D, BUREAU VERITAS -// -// This library is free software; you can redistribute it and/or -// modify it under the terms of the GNU Lesser General Public -// License as published by the Free Software Foundation; either -// version 2.1 of the License. -// -// This library is distributed in the hope that it will be useful -// but WITHOUT ANY WARRANTY; without even the implied warranty of -// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU -// Lesser General Public License for more details. -// -// You should have received a copy of the GNU Lesser General Public -// License along with this library; if not, write to the Free Software -// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA -// -// See http://www.salome-platform.org/ or email : webmaster.salome@opencascade.com -// -/* - * BatchManager.hxx : - * - * Auteur : Bernard SECHER - CEA/DEN - * Date : Juillet 2007 - * Projet : SALOME - * - */ - -#ifndef _BL_BATCHMANAGER_SLURM_H_ -#define _BL_BATCHMANAGER_SLURM_H_ - -#include -#include "Utils_SALOME_Exception.hxx" -#include "BatchLight_BatchManager.hxx" - -namespace BatchLight { - - class Job; - - class BatchManager_SLURM : public BatchManager - { - public: - // Constructeur et destructeur - BatchManager_SLURM(const batchParams& p) throw(SALOME_Exception); // connexion a la machine host - virtual ~BatchManager_SLURM(); - - // Methodes pour le controle des jobs : virtuelles pures - void deleteJob(const int & jobid); // retire un job du gestionnaire - std::string queryJob(const int & jobid); // renvoie l'etat du job - - protected: - void buildSalomeCouplingScript( const char *fileToExecute ) throw(SALOME_Exception); - void buildSalomeBatchScript( const int nbproc ) throw(SALOME_Exception); - int submit() throw(SALOME_Exception); - - private: - - }; - -} - -#endif diff --git a/src/Batch/BatchLight_Job.cxx b/src/Batch/BatchLight_Job.cxx deleted file mode 100644 index 9762a98a6..000000000 --- a/src/Batch/BatchLight_Job.cxx +++ /dev/null @@ -1,45 +0,0 @@ -// Copyright (C) 2005 OPEN CASCADE, EADS/CCR, LIP6, CEA/DEN, -// CEDRAT, EDF R&D, LEG, PRINCIPIA R&D, BUREAU VERITAS -// -// This library is free software; you can redistribute it and/or -// modify it under the terms of the GNU Lesser General Public -// License as published by the Free Software Foundation; either -// version 2.1 of the License. -// -// This library is distributed in the hope that it will be useful -// but WITHOUT ANY WARRANTY; without even the implied warranty of -// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU -// Lesser General Public License for more details. -// -// You should have received a copy of the GNU Lesser General Public -// License along with this library; if not, write to the Free Software -// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA -// -// See http://www.salome-platform.org/ or email : webmaster.salome@opencascade.com -// -/* - * Job.cxx : - * - * Auteur : Bernard SECHER - CEA/DEN - * Date : Juillet 2007 - * Projet : SALOME - * - */ - -#include "BatchLight_Job.hxx" -using namespace std; - -namespace BatchLight { - - // Constructeur - Job::Job(const char *fileToExecute, const Engines::FilesList& filesToExport, const Engines::FilesList& filesToImport, const int nbproc) : _fileToExecute(fileToExecute), _filesToExport(filesToExport), _filesToImport(filesToImport), _nbproc(nbproc) - { - // Nothing to do - } - - Job::~Job() - { - MESSAGE("Job destructor"); - } - -} diff --git a/src/Batch/BatchLight_Job.hxx b/src/Batch/BatchLight_Job.hxx deleted file mode 100644 index 23ac8f3bb..000000000 --- a/src/Batch/BatchLight_Job.hxx +++ /dev/null @@ -1,62 +0,0 @@ -// Copyright (C) 2005 OPEN CASCADE, EADS/CCR, LIP6, CEA/DEN, -// CEDRAT, EDF R&D, LEG, PRINCIPIA R&D, BUREAU VERITAS -// -// This library is free software; you can redistribute it and/or -// modify it under the terms of the GNU Lesser General Public -// License as published by the Free Software Foundation; either -// version 2.1 of the License. -// -// This library is distributed in the hope that it will be useful -// but WITHOUT ANY WARRANTY; without even the implied warranty of -// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU -// Lesser General Public License for more details. -// -// You should have received a copy of the GNU Lesser General Public -// License along with this library; if not, write to the Free Software -// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA -// -// See http://www.salome-platform.org/ or email : webmaster.salome@opencascade.com -// -/* - * Job.hxx : - * - * Auteur : Bernard SECHER - CEA/DEN - * Date : Juillet 2007 - * Projet : SALOME - * - */ - -#ifndef _BL_JOB_H_ -#define _BL_JOB_H_ - -#include "utilities.h" -#include -#include CORBA_CLIENT_HEADER(SALOME_ContainerManager) - -namespace BatchLight { - - class Job - { - public: - // Constructeurs et destructeur - Job(const char *fileToExecute, const Engines::FilesList& filesToExport, const Engines::FilesList& filesToImport, const int nbproc); - virtual ~Job(); - - const char *getFileToExecute() const { return _fileToExecute; } - const Engines::FilesList getFilesToExportList() const { return _filesToExport; } - const Engines::FilesList getFilesToImportList() const { return _filesToImport; } - const int getNbProc() const { return _nbproc; } - - protected: - const char* _fileToExecute; - const Engines::FilesList _filesToExport; - const Engines::FilesList _filesToImport; - const int _nbproc; - - private: - - }; - -} - -#endif diff --git a/src/Batch/Batch_Date.cxx b/src/Batch/Batch_Date.cxx index 0ec1166ef..a657696ac 100644 --- a/src/Batch/Batch_Date.cxx +++ b/src/Batch/Batch_Date.cxx @@ -60,7 +60,7 @@ namespace Batch { _sec = p_tm->tm_sec; } else { - char c; +// char c; // istringstream ist(s); // ist >> _day >> c // >> _month >> c diff --git a/src/Batch/Batch_PyVersatile.cxx b/src/Batch/Batch_PyVersatile.cxx index 395634bbf..f38132ca2 100644 --- a/src/Batch/Batch_PyVersatile.cxx +++ b/src/Batch/Batch_PyVersatile.cxx @@ -27,12 +27,12 @@ * */ -#include #include #include "Batch_TypeMismatchException.hxx" #include "Batch_ListIsFullException.hxx" #include "Batch_InvalidArgumentException.hxx" #include "Batch_PyVersatile.hxx" +#include namespace Batch { @@ -93,7 +93,7 @@ namespace Batch { if (_maxsize != 1) { // une liste obj = PyList_New(0); for(Versatile::const_iterator it=begin(); it!=end(); it++) { - char ch[2] = {0, 0}; +// char ch[2] = {0, 0}; string st; Couple cp; // PyObject * tuple; @@ -137,7 +137,7 @@ namespace Batch { } } else { // un scalaire - char ch[2] = {0, 0}; +// char ch[2] = {0, 0}; string st; Couple cp; // PyObject * tuple; diff --git a/src/Batch/Makefile.am b/src/Batch/Makefile.am index 44cd7ad59..0ec62e54c 100644 --- a/src/Batch/Makefile.am +++ b/src/Batch/Makefile.am @@ -59,12 +59,7 @@ LIB_INCLUDES = \ Batch_PyVersatile.hxx \ Batch_RunTimeException.hxx \ Batch_StringType.hxx \ - Batch_TypeMismatchException.hxx \ - BatchLight_BatchManager.hxx \ - BatchLight_BatchManager_PBS.hxx \ - BatchLight_BatchManager_SLURM.hxx \ - BatchLight_Job.hxx \ - MpiImpl.hxx + Batch_TypeMismatchException.hxx LIB_SRC = \ @@ -96,18 +91,14 @@ LIB_SRC = \ Batch_PyVersatile.cxx \ Batch_RunTimeException.cxx \ Batch_StringType.cxx \ - Batch_TypeMismatchException.cxx \ - BatchLight_BatchManager.cxx \ - BatchLight_BatchManager_SLURM.cxx \ - BatchLight_BatchManager_PBS.cxx \ - BatchLight_Job.cxx \ - MpiImpl.cxx + Batch_TypeMismatchException.cxx LIB_CPPFLAGS = \ - @PYTHON_INCLUDES@ \ + ${PYTHON_INCLUDES} \ -I$(srcdir)/../Basics \ - -I$(srcdir)/../SALOMELocalTrace + -I$(srcdir)/../SALOMELocalTrace \ + -I$(top_builddir)/salome_adm/unix LIB_LIBADD = \ ../SALOMELocalTrace/libSALOMELocalTrace.la \ @@ -193,8 +184,8 @@ LIB_SRC += \ Batch_JobInfo_LSF.cxx \ Batch_Job_LSF.cxx -LIB_CPPFLAGS += @LSF_INCLUDES@ -LIB_LIBADD += @LSF_LIBDIR@ @LSF_LIBS@ +LIB_CPPFLAGS += ${LSF_INCLUDES} +LIB_LIBADD += ${LSF_LIBDIR} ${LSF_LIBS} endif @@ -209,16 +200,6 @@ salomeinclude_HEADERS = $(LIB_INCLUDES) # lib_LTLIBRARIES = libSalomeBatch.la libSalomeBatch_la_SOURCES = $(LIB_SRC) -libSalomeBatch_la_CPPFLAGS = \ - ${PYTHON_INCLUDES} \ - -I$(srcdir)/../Basics \ - -I$(srcdir)/../SALOMELocalTrace \ - -I$(srcdir)/../Utils \ - -I$(top_builddir)/salome_adm/unix \ - -I$(top_builddir)/idl \ - ${CORBA_CXXFLAGS} ${CORBA_INCLUDES} $(LIB_CPPFLAGS) - +libSalomeBatch_la_CPPFLAGS = ${LIB_CPPFLAGS} libSalomeBatch_la_LDFLAGS = -no-undefined -version-info=0:0:0 -libSalomeBatch_la_LIBADD = \ - ../Utils/libOpUtil.la \ - $(LIB_LIBADD) +libSalomeBatch_la_LIBADD = $(LIB_LIBADD) diff --git a/src/Batch/MpiImpl.cxx b/src/Batch/MpiImpl.cxx deleted file mode 100644 index 036018b1e..000000000 --- a/src/Batch/MpiImpl.cxx +++ /dev/null @@ -1,212 +0,0 @@ -// Copyright (C) 2005 OPEN CASCADE, EADS/CCR, LIP6, CEA/DEN, -// CEDRAT, EDF R&D, LEG, PRINCIPIA R&D, BUREAU VERITAS -// -// This library is free software; you can redistribute it and/or -// modify it under the terms of the GNU Lesser General Public -// License as published by the Free Software Foundation; either -// version 2.1 of the License. -// -// This library is distributed in the hope that it will be useful -// but WITHOUT ANY WARRANTY; without even the implied warranty of -// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU -// Lesser General Public License for more details. -// -// You should have received a copy of the GNU Lesser General Public -// License along with this library; if not, write to the Free Software -// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA -// -// See http://www.salome-platform.org/ or email : webmaster.salome@opencascade.com -// -/* - * BatchManager.cxx : - * - * Auteur : Bernard SECHER - CEA/DEN - * Date : Juillet 2007 - * Projet : SALOME - * - */ - -#include -#include -#include -#include "utilities.h" -#include "MpiImpl.hxx" - -using namespace std; - -// Constructor -MpiImpl::MpiImpl() -{ - MESSAGE("MpiImpl constructor"); -} - -// Destructor -MpiImpl::~MpiImpl() -{ - MESSAGE("MpiImpl destructor"); -} - -// lam implementation -// Constructor -MpiImpl_LAM::MpiImpl_LAM() : MpiImpl() -{ -} - -// Destructor -MpiImpl_LAM::~MpiImpl_LAM() -{ - MESSAGE("MpiImpl_LAM destructor"); -} - -string MpiImpl_LAM::size() -{ - return "${LAMWORLD}"; -} - -string MpiImpl_LAM::rank() -{ - return "${LAMRANK}"; -} - -string MpiImpl_LAM::boot(const string machinefile, const unsigned int nbnodes) -{ - ostringstream oss; - oss << "lamboot " << machinefile << endl; - return oss.str(); -} - -string MpiImpl_LAM::run(const string machinefile, const unsigned int nbproc, const string fileNameToExecute) -{ - ostringstream oss; - oss << "mpirun -np " << nbproc << " " << fileNameToExecute << endl; - return oss.str(); -} - -string MpiImpl_LAM::halt() -{ - ostringstream oss; - oss << "lamhalt" << endl; - return oss.str(); -} - -// mpich1 implementation -// Constructor -MpiImpl_MPICH1::MpiImpl_MPICH1() : MpiImpl() -{ -} - -// Destructor -MpiImpl_MPICH1::~MpiImpl_MPICH1() -{ - MESSAGE("MpiImpl_MPICH1 destructor"); -} - -string MpiImpl_MPICH1::size() -{ - throw SALOME_Exception("mpich1 doesn't work with this batch system to submit salome session"); -} - -string MpiImpl_MPICH1::rank() -{ - throw SALOME_Exception("mpich1 doesn't work with this batch system to submit salome session"); -} - -string MpiImpl_MPICH1::boot(const string machinefile, const unsigned int nbnodes) -{ - return ""; -} - -string MpiImpl_MPICH1::run(const string machinefile, const unsigned int nbproc, const string fileNameToExecute) -{ - ostringstream oss; - oss << "mpirun -machinefile " << machinefile << " -np " << nbproc << " " << fileNameToExecute << endl; - return oss.str(); -} - -string MpiImpl_MPICH1::halt() -{ - return ""; -} - -// mpich2 implementation -// Constructor -MpiImpl_MPICH2::MpiImpl_MPICH2() : MpiImpl() -{ -} - -// Destructor -MpiImpl_MPICH2::~MpiImpl_MPICH2() -{ - MESSAGE("MpiImpl_MPICH2 destructor"); -} - -string MpiImpl_MPICH2::size() -{ - return "${PMI_SIZE}"; -} - -string MpiImpl_MPICH2::rank() -{ - return "${PMI_RANK}"; -} - -string MpiImpl_MPICH2::boot(const string machinefile, const unsigned int nbnodes) -{ - ostringstream oss; - oss << "mpdboot -n " << nbnodes << " -f " << machinefile << endl; - return oss.str(); -} - -string MpiImpl_MPICH2::run(const string machinefile, const unsigned int nbproc, const string fileNameToExecute) -{ - ostringstream oss; - oss << "mpirun -np " << nbproc << " " << fileNameToExecute << endl; - return oss.str(); -} - -string MpiImpl_MPICH2::halt() -{ - ostringstream oss; - oss << "mpdallexit" << endl; - return oss.str(); -} - -// openmpi implementation -// Constructor -MpiImpl_OPENMPI::MpiImpl_OPENMPI() : MpiImpl() -{ -} - -// Destructor -MpiImpl_OPENMPI::~MpiImpl_OPENMPI() -{ - MESSAGE("MpiImpl_OPENMPI destructor"); -} - -string MpiImpl_OPENMPI::size() -{ - return "${OMPI_MCA_ns_nds_num_procs}"; -} - -string MpiImpl_OPENMPI::rank() -{ - return "${OMPI_MCA_ns_nds_vpid}"; -} - -string MpiImpl_OPENMPI::boot(const string machinefile, const unsigned int nbnodes) -{ - return ""; -} - -string MpiImpl_OPENMPI::run(const string machinefile, const unsigned int nbproc, const string fileNameToExecute) -{ - ostringstream oss; - oss << "mpirun -hostfile " << machinefile << " -np " << nbproc << " " << fileNameToExecute << endl; - return oss.str(); -} - -string MpiImpl_OPENMPI::halt() -{ - return ""; -} - diff --git a/src/Batch/MpiImpl.hxx b/src/Batch/MpiImpl.hxx deleted file mode 100644 index beeac0301..000000000 --- a/src/Batch/MpiImpl.hxx +++ /dev/null @@ -1,131 +0,0 @@ -// Copyright (C) 2005 OPEN CASCADE, EADS/CCR, LIP6, CEA/DEN, -// CEDRAT, EDF R&D, LEG, PRINCIPIA R&D, BUREAU VERITAS -// -// This library is free software; you can redistribute it and/or -// modify it under the terms of the GNU Lesser General Public -// License as published by the Free Software Foundation; either -// version 2.1 of the License. -// -// This library is distributed in the hope that it will be useful -// but WITHOUT ANY WARRANTY; without even the implied warranty of -// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU -// Lesser General Public License for more details. -// -// You should have received a copy of the GNU Lesser General Public -// License along with this library; if not, write to the Free Software -// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA -// -// See http://www.salome-platform.org/ or email : webmaster.salome@opencascade.com -// -/* - * BatchManager.hxx : - * - * Auteur : Bernard SECHER - CEA/DEN - * Date : Juillet 2007 - * Projet : SALOME - * - */ - -#ifndef _BL_MPIIMPL_H_ -#define _BL_MPIIMPL_H_ - -#include -#include "Utils_SALOME_Exception.hxx" -#include - -class MpiImpl -{ -public: - // Constructeur et destructeur - MpiImpl(); // constrcuctor - virtual ~MpiImpl(); //Destructor - - virtual std::string size() = 0; // get number of process of current job - virtual std::string rank() = 0; // get process number of current job - virtual std::string boot(const std::string machinefile, const unsigned int nbnodes) = 0; // get boot command - virtual std::string run(const std::string machinefile, const unsigned int nbproc, const std::string fileNameToExecute) = 0; // get run command - virtual std::string halt() = 0; // get stop command - -protected: - -private: - -}; - -class MpiImpl_LAM : public MpiImpl -{ -public: - // Constructeur et destructeur - MpiImpl_LAM(); // constructor - virtual ~MpiImpl_LAM(); //Destructor - - std::string size(); // get number of process of current job - std::string rank(); // get process number of current job - std::string boot( const std::string machinefile, const unsigned int nbnodes); // get boot command - std::string run( const std::string machinefile, const unsigned int nbproc, const std::string fileNameToExecute); // get run command - std::string halt(); // get stop command - -protected: - -private: - -}; - -class MpiImpl_MPICH1 : public MpiImpl -{ -public: - // Constructeur et destructeur - MpiImpl_MPICH1(); // constructor - virtual ~MpiImpl_MPICH1(); //Destructor - - std::string size(); // get number of process of current job - std::string rank(); // get process number of current job - std::string boot( const std::string machinefile, const unsigned int nbnodes); // get boot command - std::string run( const std::string machinefile, const unsigned int nbproc, const std::string fileNameToExecute); // get run command - std::string halt(); // get stop command - -protected: - -private: - -}; - -class MpiImpl_MPICH2 : public MpiImpl -{ -public: - // Constructeur et destructeur - MpiImpl_MPICH2(); // constructor - virtual ~MpiImpl_MPICH2(); //Destructor - - std::string size(); // get number of process of current job - std::string rank(); // get process number of current job - std::string boot( const std::string machinefile, const unsigned int nbnodes); // get boot command - std::string run( const std::string machinefile, const unsigned int nbproc, const std::string fileNameToExecute); // get run command - std::string halt(); // get stop command - -protected: - -private: - -}; - -class MpiImpl_OPENMPI : public MpiImpl -{ -public: - // Constructeur et destructeur - MpiImpl_OPENMPI(); // constructor - virtual ~MpiImpl_OPENMPI(); //Destructor - - std::string size(); // get number of process of current job - std::string rank(); // get process number of current job - std::string boot( const std::string machinefile, const unsigned int nbnodes); // get boot command - std::string run( const std::string machinefile, const unsigned int nbproc, const std::string fileNameToExecute); // get run command - std::string halt(); // get stop command - -protected: - -private: - -}; - -#endif diff --git a/src/Launcher/BatchLight_BatchManager.cxx b/src/Launcher/BatchLight_BatchManager.cxx new file mode 100644 index 000000000..4e0241f38 --- /dev/null +++ b/src/Launcher/BatchLight_BatchManager.cxx @@ -0,0 +1,272 @@ +// Copyright (C) 2005 OPEN CASCADE, EADS/CCR, LIP6, CEA/DEN, +// CEDRAT, EDF R&D, LEG, PRINCIPIA R&D, BUREAU VERITAS +// +// This library is free software; you can redistribute it and/or +// modify it under the terms of the GNU Lesser General Public +// License as published by the Free Software Foundation; either +// version 2.1 of the License. +// +// This library is distributed in the hope that it will be useful +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +// Lesser General Public License for more details. +// +// You should have received a copy of the GNU Lesser General Public +// License along with this library; if not, write to the Free Software +// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +// +// See http://www.salome-platform.org/ or email : webmaster.salome@opencascade.com +// +/* + * BatchManager.cxx : + * + * Auteur : Bernard SECHER - CEA/DEN + * Date : Juillet 2007 + * Projet : SALOME + * + */ + +#include +#include +#include +#include +#include "BatchLight_Job.hxx" +#include "BatchLight_BatchManager.hxx" +#include "Batch_Date.hxx" +using namespace std; + +namespace BatchLight { + + // Constructeur + BatchManager::BatchManager(const batchParams& p) throw(SALOME_Exception) : _params(p) + { + SCRUTE(_params.hostname); + SCRUTE(_params.protocol); + SCRUTE(_params.username); + // On verifie que le hostname est correct + if (!gethostbyname(_params.hostname.c_str())) { // hostname unknown from network + string msg = "hostname \""; + msg += _params.hostname; + msg += "\" unknown from the network"; + throw SALOME_Exception(msg.c_str()); + } + _mpiImpl = NULL; + } + + // Destructeur + BatchManager::~BatchManager() + { + MESSAGE("BatchManager destructor "<<_params.hostname); + std::map < int, const BatchLight::Job * >::const_iterator it; + for(it=_jobmap.begin();it!=_jobmap.end();it++) + delete it->second; + if(_mpiImpl) delete _mpiImpl; + } + + // Methode pour le controle des jobs : soumet un job au gestionnaire + const int BatchManager::submitJob(Job* job) + { + BEGIN_OF("BatchManager::submitJob"); + int id; + + // temporary directory on cluster to put input files for job + setDirForTmpFiles(); + SCRUTE(_dirForTmpFiles); + + // export input files on cluster + exportInputFiles(job->getFileToExecute(),job->getFilesToExportList()); + + // build salome coupling script for job + buildSalomeCouplingScript(job->getFileToExecute()); + + // build batch script for job + buildSalomeBatchScript(job->getNbProc()); + + // submit job on cluster + id = submit(); + + // register job on map + _jobmap[id] = job; + END_OF("BatchManager::submitJob"); + return id; + } + + void BatchManager::setDirForTmpFiles() + { + int i; + + _dirForTmpFiles = string("Batch/"); + Batch::Date date = Batch::Date(time(0)) ; + std::string thedate = date.str() ; + int lend = thedate.size() ; + i = 0 ; + while ( i < lend ) { + if ( thedate[i] == '/' || thedate[i] == '-' || thedate[i] == ':' ) { + thedate[i] = '_' ; + } + i++ ; + } + _dirForTmpFiles += thedate ; + } + + void BatchManager::exportInputFiles(const char *fileToExecute, const Engines::FilesList filesToExportList) throw(SALOME_Exception) + { + BEGIN_OF("BatchManager::exportInFiles"); + string command = _params.protocol; + int status; + + command += " "; + + if (_params.username != ""){ + command += _params.username; + command += "@"; + } + + command += _params.hostname; + command += " \"mkdir -p "; + command += _dirForTmpFiles ; + command += "\"" ; + SCRUTE(command.c_str()); + status = system(command.c_str()); + if(status) + throw SALOME_Exception("Error of connection on remote host"); + + if( _params.protocol == "rsh" ) + command = "rcp "; + else if( _params.protocol == "ssh" ) + command = "scp "; + else + throw SALOME_Exception("Unknown protocol"); + + command += fileToExecute; + command += " "; + + if (_params.username != ""){ + command += _params.username; + command += "@"; + } + + command += _params.hostname; + command += ":"; + command += _dirForTmpFiles ; + SCRUTE(command.c_str()); + status = system(command.c_str()); + if(status) + throw SALOME_Exception("Error of connection on remote host"); + + int i ; + for ( i = 0 ; i < filesToExportList.length() ; i++ ) { + if( _params.protocol == "rsh" ) + command = "rcp "; + else if( _params.protocol == "ssh" ) + command = "scp "; + else + throw SALOME_Exception("Unknown protocol"); + command += filesToExportList[i] ; + command += " "; + if (_params.username != ""){ + command += _params.username; + command += "@"; + } + command += _params.hostname; + command += ":"; + command += _dirForTmpFiles ; + SCRUTE(command.c_str()); + status = system(command.c_str()); + if(status) + throw SALOME_Exception("Error of connection on remote host"); + } + + END_OF("BatchManager::exportInFiles"); + } + + void BatchManager::importOutputFiles( const char *directory, const CORBA::Long jobId ) throw(SALOME_Exception) + { + BEGIN_OF("BatchManager::importOutputFiles"); + string command; + int status; + + const BatchLight::Job* myJob = _jobmap[jobId]; + Engines::FilesList filesToImportList = myJob->getFilesToImportList(); + + for ( int i = 0 ; i < filesToImportList.length() ; i++ ) { + if( _params.protocol == "rsh" ) + command = "rcp "; + else if( _params.protocol == "ssh" ) + command = "scp "; + else + throw SALOME_Exception("Unknown protocol"); + if (_params.username != ""){ + command += _params.username; + command += "@"; + } + command += _params.hostname; + command += ":"; + command += filesToImportList[i] ; + command += " "; + command += directory; + SCRUTE(command.c_str()); + status = system(command.c_str()); + if(status) + throw SALOME_Exception("Error of connection on remote host"); + } + + END_OF("BatchManager::importOutputFiles"); + } + + string BatchManager::BuildTemporaryFileName() const + { + //build more complex file name to support multiple salome session + char *temp = new char[19]; + strcpy(temp, "/tmp/command"); + strcat(temp, "XXXXXX"); +#ifndef WNT + + mkstemp(temp); +#else + + char aPID[80]; + itoa(getpid(), aPID, 10); + strcat(temp, aPID); +#endif + + string command(temp); + delete [] temp; + command += ".sh"; + return command; + } + + void BatchManager::RmTmpFile() + { + if (_TmpFileName != ""){ + string command = "rm "; + command += _TmpFileName; + char *temp = strdup(command.c_str()); + int lgthTemp = strlen(temp); + temp[lgthTemp - 3] = '*'; + temp[lgthTemp - 2] = '\0'; + system(temp); + free(temp); + } + } + + MpiImpl *BatchManager::FactoryMpiImpl(string mpiImpl) throw(SALOME_Exception) + { + if(mpiImpl == "lam") + return new MpiImpl_LAM(); + else if(mpiImpl == "mpich1") + return new MpiImpl_MPICH1(); + else if(mpiImpl == "mpich2") + return new MpiImpl_MPICH2(); + else if(mpiImpl == "openmpi") + return new MpiImpl_OPENMPI(); + else if(mpiImpl == "indif") + throw SALOME_Exception("you must specify a mpi implementation in CatalogResources.xml file"); + else{ + ostringstream oss; + oss << mpiImpl << " : not yet implemented"; + throw SALOME_Exception(oss.str().c_str()); + } + } + +} diff --git a/src/Launcher/BatchLight_BatchManager.hxx b/src/Launcher/BatchLight_BatchManager.hxx new file mode 100644 index 000000000..a8ea0061a --- /dev/null +++ b/src/Launcher/BatchLight_BatchManager.hxx @@ -0,0 +1,94 @@ +// Copyright (C) 2005 OPEN CASCADE, EADS/CCR, LIP6, CEA/DEN, +// CEDRAT, EDF R&D, LEG, PRINCIPIA R&D, BUREAU VERITAS +// +// This library is free software; you can redistribute it and/or +// modify it under the terms of the GNU Lesser General Public +// License as published by the Free Software Foundation; either +// version 2.1 of the License. +// +// This library is distributed in the hope that it will be useful +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +// Lesser General Public License for more details. +// +// You should have received a copy of the GNU Lesser General Public +// License along with this library; if not, write to the Free Software +// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +// +// See http://www.salome-platform.org/ or email : webmaster.salome@opencascade.com +// +/* + * BatchManager.hxx : + * + * Auteur : Bernard SECHER - CEA/DEN + * Date : Juillet 2007 + * Projet : SALOME + * + */ + +#ifndef _BL_BATCHMANAGER_H_ +#define _BL_BATCHMANAGER_H_ + +#include +#include +#include +#include "Utils_SALOME_Exception.hxx" +#include +#include +#include CORBA_CLIENT_HEADER(SALOME_ContainerManager) +#include "MpiImpl.hxx" + +namespace BatchLight { + + class Job; + + struct batchParams{ + std::string hostname; // serveur ou tourne le BatchManager + std::string protocol; // protocole d'acces au serveur: ssh ou rsh + std::string username; // username d'acces au serveur + std::string applipath; // path of apllication directory on server + std::vector modulesList; // list of Salome modules installed on server + unsigned int nbnodes; // number of nodes on cluster + unsigned int nbprocpernode; // number of processors on each node + std::string mpiImpl; // mpi implementation + }; + + class BatchManager + { + public: + // Constructeur et destructeur + BatchManager(const batchParams& p) throw(SALOME_Exception); // connexion a la machine host + virtual ~BatchManager(); + + // Methodes pour le controle des jobs : virtuelles pures + const int submitJob(BatchLight::Job* job); // soumet un job au gestionnaire + virtual void deleteJob(const int & jobid) = 0; // retire un job du gestionnaire + virtual std::string queryJob(const int & jobid) = 0; // renvoie l'etat du job + void importOutputFiles( const char *directory, const CORBA::Long jobId ) throw(SALOME_Exception); + + protected: + batchParams _params; + MpiImpl *_mpiImpl; + + std::map _jobmap; + std::string _dirForTmpFiles; // repertoire temporaire sur le serveur + std::string _TmpFileName; + std::string _fileNameToExecute; + + virtual int submit() throw(SALOME_Exception) = 0; + void setDirForTmpFiles(); + void exportInputFiles( const char *fileToExecute, const Engines::FilesList filesToExportList ) throw(SALOME_Exception); + virtual void buildSalomeCouplingScript( const char *fileToExecute ) throw(SALOME_Exception) = 0; + virtual void buildSalomeBatchScript( const int nbproc ) throw(SALOME_Exception) = 0; + + std::string BuildTemporaryFileName() const; + void RmTmpFile(); + MpiImpl *FactoryMpiImpl(std::string mpiImpl) throw(SALOME_Exception); + + private: + + }; + +} + +#endif diff --git a/src/Launcher/BatchLight_BatchManager_PBS.cxx b/src/Launcher/BatchLight_BatchManager_PBS.cxx new file mode 100644 index 000000000..bba030a0c --- /dev/null +++ b/src/Launcher/BatchLight_BatchManager_PBS.cxx @@ -0,0 +1,375 @@ +// Copyright (C) 2005 OPEN CASCADE, EADS/CCR, LIP6, CEA/DEN, +// CEDRAT, EDF R&D, LEG, PRINCIPIA R&D, BUREAU VERITAS +// +// This library is free software; you can redistribute it and/or +// modify it under the terms of the GNU Lesser General Public +// License as published by the Free Software Foundation; either +// version 2.1 of the License. +// +// This library is distributed in the hope that it will be useful +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +// Lesser General Public License for more details. +// +// You should have received a copy of the GNU Lesser General Public +// License along with this library; if not, write to the Free Software +// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +// +// See http://www.salome-platform.org/ or email : webmaster.salome@opencascade.com +// +/* + * BatchManager.cxx : + * + * Auteur : Bernard SECHER - CEA/DEN + * Date : Juillet 2007 + * Projet : SALOME + * + */ + +#include "BatchLight_BatchManager_PBS.hxx" +#include "utilities.h" +#include "BatchLight_Job.hxx" +#include +#include +#include +#include + +using namespace std; + +namespace BatchLight { + + // Constructeur + BatchManager_PBS::BatchManager_PBS(const batchParams& p) throw(SALOME_Exception) : BatchManager(p) + { + // pbs batch system needs to know mpi implementation + _mpiImpl = FactoryMpiImpl(_params.mpiImpl); + } + + // Destructeur + BatchManager_PBS::~BatchManager_PBS() + { + MESSAGE("BatchManager_PBS destructor "<<_params.hostname); + } + + // Methode pour le controle des jobs : retire un job du gestionnaire + void BatchManager_PBS::deleteJob(const int & jobid) + { + BEGIN_OF("BatchManager_PBS::deleteJob"); + string command; + int status; + ostringstream oss; + oss << jobid; + + // define command to submit batch + if( _params.protocol == "rsh" ) + command = "rsh "; + else if( _params.protocol == "ssh" ) + command = "ssh "; + else + throw SALOME_Exception("Unknown protocol"); + + if (_params.username != ""){ + command += _params.username; + command += "@"; + } + + command += _params.hostname; + command += " \"qdel " ; + command += oss.str(); + command += "\""; + SCRUTE(command.c_str()); + status = system(command.c_str()); + if(status) + throw SALOME_Exception("Error of connection on remote host"); + + MESSAGE("jobId = " << jobid << "killed"); + END_OF("BatchManager_PBS::deleteJob"); + } + + // Methode pour le controle des jobs : renvoie l'etat du job + string BatchManager_PBS::queryJob(const int & jobid) + { + BEGIN_OF("BatchManager_PBS::queryJob"); + // define name of log file + string jstatus; + string logFile="/tmp/logs/"; + logFile += getenv("USER"); + logFile += "/batchSalome_"; + + srand ( time(NULL) ); + int ir = rand(); + ostringstream oss; + oss << ir; + logFile += oss.str(); + logFile += ".log"; + + string command; + int status; + + // define command to submit batch + if( _params.protocol == "rsh" ) + command = "rsh "; + else if( _params.protocol == "ssh" ) + command = "ssh "; + else + throw SALOME_Exception("Unknown protocol"); + + if (_params.username != ""){ + command += _params.username; + command += "@"; + } + + command += _params.hostname; + command += " \"qstat -f " ; + ostringstream oss2; + oss2 << jobid; + command += oss2.str(); + command += "\" > "; + command += logFile; + SCRUTE(command.c_str()); + status = system(command.c_str()); + if(status && status != 153 && status != 256*153){ + MESSAGE("status="<> jstatus; + iss >> jstatus; + iss >> jstatus; + } + else + jstatus = "U"; + } + + MESSAGE("jobId = " << jobid << " " << jstatus); + END_OF("BatchManager_PBS::queryJob"); + return jstatus; + } + + void BatchManager_PBS::buildSalomeCouplingScript( const char *fileToExecute ) throw(SALOME_Exception) + { + BEGIN_OF("BatchManager_PBS::buildSalomeCouplingScript"); + int status; + + string::size_type p1 = string(fileToExecute).find_last_of("/"); + string::size_type p2 = string(fileToExecute).find_last_of("."); + _fileNameToExecute = string(fileToExecute).substr(p1+1,p2-p1-1); + + _TmpFileName = BuildTemporaryFileName(); + ofstream tempOutputFile; + tempOutputFile.open(_TmpFileName.c_str(), ofstream::out ); + tempOutputFile << "#! /bin/sh -f" << endl ; + tempOutputFile << "cd " ; + tempOutputFile << _params.applipath << endl ; + tempOutputFile << "export PYTHONPATH=~/" ; + tempOutputFile << _dirForTmpFiles ; + tempOutputFile << ":$PYTHONPATH" << endl ; + tempOutputFile << "if test " ; + tempOutputFile << _mpiImpl->rank() ; + tempOutputFile << " = 0; then" << endl ; + tempOutputFile << " ./runAppli --terminal --modules=" ; + for ( int i = 0 ; i < _params.modulesList.size() ; i++ ) { + tempOutputFile << _params.modulesList[i] ; + if ( i != _params.modulesList.size()-1 ) + tempOutputFile << "," ; + } + tempOutputFile << " --standalone=registry,study,moduleCatalog --killall &" << endl ; + tempOutputFile << " for ((ip=1; ip < "; + tempOutputFile << _mpiImpl->size(); + tempOutputFile << " ; ip++))" << endl; + tempOutputFile << " do" << endl ; + tempOutputFile << " arglist=\"$arglist YACS_Server_\"$ip" << endl ; + tempOutputFile << " done" << endl ; + tempOutputFile << " sleep 5" << endl ; + tempOutputFile << " ./runSession waitContainers.py $arglist" << endl ; + tempOutputFile << " ./runSession python ~/" << _dirForTmpFiles << "/" << _fileNameToExecute << ".py" << endl; + tempOutputFile << " ./runSession killCurrentPort" << endl; + tempOutputFile << "else" << endl ; + tempOutputFile << " sleep 5" << endl ; + tempOutputFile << " ./runSession waitNS.py" << endl ; + tempOutputFile << " ./runSession SALOME_Container 'YACS_Server_'"; + tempOutputFile << _mpiImpl->rank() << endl ; + tempOutputFile << "fi" << endl ; + tempOutputFile.flush(); + tempOutputFile.close(); + chmod(_TmpFileName.c_str(), 0x1ED); + SCRUTE(_TmpFileName.c_str()) ; + + string command; + if( _params.protocol == "rsh" ) + command = "rcp "; + else if( _params.protocol == "ssh" ) + command = "scp "; + else + throw SALOME_Exception("Unknown protocol"); + + command += _TmpFileName; + command += " "; + if (_params.username != ""){ + command += _params.username; + command += "@"; + } + command += _params.hostname; + command += ":"; + command += _dirForTmpFiles ; + command += "/runSalome_" ; + command += _fileNameToExecute ; + command += "_Batch.sh" ; + SCRUTE(_fileNameToExecute) ; + SCRUTE(command.c_str()); + status = system(command.c_str()); + if(status) + throw SALOME_Exception("Error of connection on remote host"); + RmTmpFile(); + + END_OF("BatchManager_PBS::buildSalomeCouplingScript"); + } + + void BatchManager_PBS::buildSalomeBatchScript( const int nbproc ) throw(SALOME_Exception) + { + BEGIN_OF("BatchManager_PBS::buildSalomeBatchScript"); + int status; + + int nbmaxproc = _params.nbnodes * _params.nbprocpernode; + if( nbproc > nbmaxproc ){ + MESSAGE(nbproc << " processors asked on a cluster of " << nbmaxproc << " processors"); + throw SALOME_Exception("Too much processors asked for that cluster"); + } + + int nbnodes; + if( nbproc < _params.nbnodes ) + nbnodes = nbproc; + else + nbnodes = _params.nbnodes; + + _TmpFileName = BuildTemporaryFileName(); + ofstream tempOutputFile; + tempOutputFile.open(_TmpFileName.c_str(), ofstream::out ); + + ostringstream filenameToExecute; + filenameToExecute << " ~/" << _dirForTmpFiles << "/runSalome_" << _fileNameToExecute << "_Batch.sh"; + + tempOutputFile << "#! /bin/sh -f" << endl ; + tempOutputFile << "#PBS -l nodes=" << nbnodes << endl ; + tempOutputFile << "#PBS -o ~/" << _dirForTmpFiles << "/runSalome.log${PBS_JOBID}" << endl ; + tempOutputFile << _mpiImpl->boot("${PBS_NODEFILE}",nbnodes); + tempOutputFile << _mpiImpl->run("${PBS_NODEFILE}",nbproc,filenameToExecute.str()); + tempOutputFile << _mpiImpl->halt(); + tempOutputFile.flush(); + tempOutputFile.close(); + chmod(_TmpFileName.c_str(), 0x1ED); + SCRUTE(_TmpFileName.c_str()) ; + + string command; + if( _params.protocol == "rsh" ) + command = "rcp "; + else if( _params.protocol == "ssh" ) + command = "scp "; + else + throw SALOME_Exception("Unknown protocol"); + command += _TmpFileName; + command += " "; + if (_params.username != ""){ + command += _params.username; + command += "@"; + } + command += _params.hostname; + command += ":"; + command += _dirForTmpFiles ; + command += "/" ; + command += _fileNameToExecute ; + command += "_Batch.sh" ; + SCRUTE(command.c_str()); + status = system(command.c_str()); + if(status) + throw SALOME_Exception("Error of connection on remote host"); + + RmTmpFile(); + END_OF("BatchManager_PBS::buildSalomeBatchScript"); + + } + + int BatchManager_PBS::submit() throw(SALOME_Exception) + { + BEGIN_OF("BatchManager_PBS::submit"); + + // define name of log file + string logFile="/tmp/logs/"; + logFile += getenv("USER"); + logFile += "/batchSalome_"; + + srand ( time(NULL) ); + int ir = rand(); + ostringstream oss; + oss << ir; + logFile += oss.str(); + logFile += ".log"; + + string command; + int status; + + // define command to submit batch + if( _params.protocol == "rsh" ) + command = "rsh "; + else if( _params.protocol == "ssh" ) + command = "ssh "; + else + throw SALOME_Exception("Unknown protocol"); + + if (_params.username != ""){ + command += _params.username; + command += "@"; + } + + command += _params.hostname; + command += " \"qsub " ; + command += _dirForTmpFiles ; + command += "/" ; + command += _fileNameToExecute ; + command += "_Batch.sh\" > "; + command += logFile; + SCRUTE(command.c_str()); + status = system(command.c_str()); + if(status) + throw SALOME_Exception("Error of connection on remote host"); + + // read id of submitted job in log file + char line[128]; + FILE *fp = fopen(logFile.c_str(),"r"); + fgets( line, 128, fp); + fclose(fp); + + string sline(line); + int pos = sline.find("."); + string strjob; + if(pos == string::npos) + strjob = sline; + else + strjob = sline.substr(0,pos); + + int id; + istringstream iss(strjob); + iss >> id; + + END_OF("BatchManager_PBS::submit"); + return id; + } + +} diff --git a/src/Launcher/BatchLight_BatchManager_PBS.hxx b/src/Launcher/BatchLight_BatchManager_PBS.hxx new file mode 100644 index 000000000..e7e5789c7 --- /dev/null +++ b/src/Launcher/BatchLight_BatchManager_PBS.hxx @@ -0,0 +1,59 @@ +// Copyright (C) 2005 OPEN CASCADE, EADS/CCR, LIP6, CEA/DEN, +// CEDRAT, EDF R&D, LEG, PRINCIPIA R&D, BUREAU VERITAS +// +// This library is free software; you can redistribute it and/or +// modify it under the terms of the GNU Lesser General Public +// License as published by the Free Software Foundation; either +// version 2.1 of the License. +// +// This library is distributed in the hope that it will be useful +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +// Lesser General Public License for more details. +// +// You should have received a copy of the GNU Lesser General Public +// License along with this library; if not, write to the Free Software +// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +// +// See http://www.salome-platform.org/ or email : webmaster.salome@opencascade.com +// +/* + * BatchManager.hxx : + * + * Auteur : Bernard SECHER - CEA/DEN + * Date : Juillet 2007 + * Projet : SALOME + * + */ + +#ifndef _BL_BATCHMANAGER_PBS_H_ +#define _BL_BATCHMANAGER_PBS_H_ + +#include +#include "Utils_SALOME_Exception.hxx" +#include "BatchLight_BatchManager.hxx" + +namespace BatchLight { + + class Job; + + class BatchManager_PBS : public BatchManager + { + public: + // Constructeur et destructeur + BatchManager_PBS(const batchParams& p) throw(SALOME_Exception); // connexion a la machine host + virtual ~BatchManager_PBS(); + + // Methodes pour le controle des jobs : virtuelles pures + void deleteJob(const int & jobid); // retire un job du gestionnaire + std::string queryJob(const int & jobid); // renvoie l'etat du job + + private: + void buildSalomeCouplingScript( const char *fileToExecute ) throw(SALOME_Exception); + void buildSalomeBatchScript( const int nbproc ) throw(SALOME_Exception); + int submit() throw(SALOME_Exception); + }; + +} + +#endif diff --git a/src/Launcher/BatchLight_BatchManager_SLURM.cxx b/src/Launcher/BatchLight_BatchManager_SLURM.cxx new file mode 100644 index 000000000..9a740e328 --- /dev/null +++ b/src/Launcher/BatchLight_BatchManager_SLURM.cxx @@ -0,0 +1,327 @@ +// Copyright (C) 2005 OPEN CASCADE, EADS/CCR, LIP6, CEA/DEN, +// CEDRAT, EDF R&D, LEG, PRINCIPIA R&D, BUREAU VERITAS +// +// This library is free software; you can redistribute it and/or +// modify it under the terms of the GNU Lesser General Public +// License as published by the Free Software Foundation; either +// version 2.1 of the License. +// +// This library is distributed in the hope that it will be useful +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +// Lesser General Public License for more details. +// +// You should have received a copy of the GNU Lesser General Public +// License along with this library; if not, write to the Free Software +// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +// +// See http://www.salome-platform.org/ or email : webmaster.salome@opencascade.com +// +/* + * BatchManager.cxx : + * + * Auteur : Bernard SECHER - CEA/DEN + * Date : Juillet 2007 + * Projet : SALOME + * + */ + +#include "BatchLight_BatchManager_SLURM.hxx" +#include "utilities.h" +#include "BatchLight_Job.hxx" +#include +#include +#include +#include + +using namespace std; + +namespace BatchLight { + + // Constructeur + BatchManager_SLURM::BatchManager_SLURM(const batchParams& p) throw(SALOME_Exception) : BatchManager(p) + { + } + + // Destructeur + BatchManager_SLURM::~BatchManager_SLURM() + { + MESSAGE("BatchManager_SLURM destructor "<<_params.hostname); + } + + // Methode pour le controle des jobs : retire un job du gestionnaire + void BatchManager_SLURM::deleteJob(const int & jobid) + { + BEGIN_OF("BatchManager_SLURM::deleteJob"); + string command; + int status; + ostringstream oss; + oss << jobid; + + // define command to submit batch + if( _params.protocol == "rsh" ) + command = "rsh "; + else if( _params.protocol == "ssh" ) + command = "ssh "; + else + throw SALOME_Exception("Unknown protocol"); + + if (_params.username != ""){ + command += _params.username; + command += "@"; + } + + command += _params.hostname; + command += " \"bkill " ; + command += oss.str(); + command += "\""; + SCRUTE(command.c_str()); + status = system(command.c_str()); + if(status) + throw SALOME_Exception("Error of connection on remote host"); + + MESSAGE("jobId = " << jobid << "killed"); + END_OF("BatchManager_SLURM::deleteJob"); + } + + // Methode pour le controle des jobs : renvoie l'etat du job + string BatchManager_SLURM::queryJob(const int & jobid) + { + BEGIN_OF("BatchManager_SLURM::queryJob"); + // define name of log file + string logFile="/tmp/logs/"; + logFile += getenv("USER"); + logFile += "/batchSalome_"; + + srand ( time(NULL) ); + int ir = rand(); + ostringstream oss; + oss << ir; + logFile += oss.str(); + logFile += ".log"; + + string command; + int status; + + // define command to submit batch + if( _params.protocol == "rsh" ) + command = "rsh "; + else if( _params.protocol == "ssh" ) + command = "ssh "; + else + throw SALOME_Exception("Unknown protocol"); + + if (_params.username != ""){ + command += _params.username; + command += "@"; + } + + command += _params.hostname; + command += " \"bjobs " ; + ostringstream oss2; + oss2 << jobid; + command += oss2.str(); + command += "\" > "; + command += logFile; + SCRUTE(command.c_str()); + status = system(command.c_str()); + if(status) + throw SALOME_Exception("Error of connection on remote host"); + + // read staus of job in log file + char line[128]; + ifstream fp(logFile.c_str(),ios::in); + fp.getline(line,80,'\n'); + + string sjobid, username, jstatus; + fp >> sjobid; + fp >> username; + fp >> jstatus; + + MESSAGE("jobId = " << jobid << " " << jstatus); + END_OF("BatchManager_SLURM::queryJob"); + return jstatus; + } + + void BatchManager_SLURM::buildSalomeCouplingScript( const char *fileToExecute ) throw(SALOME_Exception) + { + BEGIN_OF("BatchManager_SLURM::buildSalomeCouplingScript"); + int status; + + string::size_type p1 = string(fileToExecute).find_last_of("/"); + string::size_type p2 = string(fileToExecute).find_last_of("."); + _fileNameToExecute = string(fileToExecute).substr(p1+1,p2-p1-1); + + _TmpFileName = BuildTemporaryFileName(); + ofstream tempOutputFile; + tempOutputFile.open(_TmpFileName.c_str(), ofstream::out ); + tempOutputFile << "#! /bin/sh -f" << endl ; + tempOutputFile << "cd " ; + tempOutputFile << _params.applipath << endl ; + tempOutputFile << "export PYTHONPATH=~/" ; + tempOutputFile << _dirForTmpFiles ; + tempOutputFile << ":$PYTHONPATH" << endl ; + tempOutputFile << "if test $SLURM_PROCID = 0; then" << endl ; + tempOutputFile << " ./runAppli --terminal --modules=" ; + for ( int i = 0 ; i < _params.modulesList.size() ; i++ ) { + tempOutputFile << _params.modulesList[i] ; + if ( i != _params.modulesList.size()-1 ) + tempOutputFile << "," ; + } + tempOutputFile << " --standalone=registry,study,moduleCatalog --killall &" << endl ; + tempOutputFile << " for ((ip=1; ip < ${SLURM_NPROCS} ; ip++))" << endl; + tempOutputFile << " do" << endl ; + tempOutputFile << " arglist=\"$arglist YACS_Server_\"$ip" << endl ; + tempOutputFile << " done" << endl ; + tempOutputFile << " ./runSession waitNS.sh" << endl ; + tempOutputFile << " ./runSession waitContainers.py $arglist" << endl ; + tempOutputFile << " ./runSession python ~/" << _dirForTmpFiles << "/" << _fileNameToExecute << ".py" << endl; + tempOutputFile << " ./runSession killCurrentPort" << endl; + tempOutputFile << "else" << endl ; + tempOutputFile << " ./runSession waitNS.sh" << endl ; + tempOutputFile << " ./runSession SALOME_Container 'YACS_Server_'${SLURM_PROCID}" << endl ; + tempOutputFile << "fi" << endl ; + tempOutputFile.flush(); + tempOutputFile.close(); + chmod(_TmpFileName.c_str(), 0x1ED); + SCRUTE(_TmpFileName.c_str()) ; + + string command; + if( _params.protocol == "rsh" ) + command = "rcp "; + else if( _params.protocol == "ssh" ) + command = "scp "; + else + throw SALOME_Exception("Unknown protocol"); + + command += _TmpFileName; + command += " "; + if (_params.username != ""){ + command += _params.username; + command += "@"; + } + command += _params.hostname; + command += ":"; + command += _dirForTmpFiles ; + command += "/runSalome_" ; + command += _fileNameToExecute ; + command += "_Batch.sh" ; + SCRUTE(command.c_str()); + status = system(command.c_str()); + if(status) + throw SALOME_Exception("Error of connection on remote host"); + RmTmpFile(); + + END_OF("BatchManager_SLURM::buildSalomeCouplingScript"); + } + + void BatchManager_SLURM::buildSalomeBatchScript( const int nbproc ) throw(SALOME_Exception) + { + BEGIN_OF("BatchManager_SLURM::buildSalomeBatchScript"); + int status; + _TmpFileName = BuildTemporaryFileName(); + ofstream tempOutputFile; + tempOutputFile.open(_TmpFileName.c_str(), ofstream::out ); + + tempOutputFile << "#! /bin/sh -f" << endl ; + tempOutputFile << "#BSUB -n " << nbproc << endl ; + tempOutputFile << "#BSUB -o " << _dirForTmpFiles << "/runSalome.log%J" << endl ; + tempOutputFile << "mpirun -srun ~/" << _dirForTmpFiles << "/runSalome_" << _fileNameToExecute << "_Batch.sh" << endl ; + tempOutputFile.flush(); + tempOutputFile.close(); + chmod(_TmpFileName.c_str(), 0x1ED); + SCRUTE(_TmpFileName.c_str()) ; + + string command; + if( _params.protocol == "rsh" ) + command = "rcp "; + else if( _params.protocol == "ssh" ) + command = "scp "; + else + throw SALOME_Exception("Unknown protocol"); + command += _TmpFileName; + command += " "; + if (_params.username != ""){ + command += _params.username; + command += "@"; + } + command += _params.hostname; + command += ":"; + command += _dirForTmpFiles ; + command += "/" ; + command += _fileNameToExecute ; + command += "_Batch.sh" ; + SCRUTE(command.c_str()); + status = system(command.c_str()); + if(status) + throw SALOME_Exception("Error of connection on remote host"); + + RmTmpFile(); + END_OF("BatchManager_SLURM::buildSalomeBatchScript"); + + } + + int BatchManager_SLURM::submit() throw(SALOME_Exception) + { + BEGIN_OF("BatchManager_SLURM::submit"); + + // define name of log file + string logFile="/tmp/logs/"; + logFile += getenv("USER"); + logFile += "/batchSalome_"; + + srand ( time(NULL) ); + int ir = rand(); + ostringstream oss; + oss << ir; + logFile += oss.str(); + logFile += ".log"; + + string command; + int status; + + // define command to submit batch + if( _params.protocol == "rsh" ) + command = "rsh "; + else if( _params.protocol == "ssh" ) + command = "ssh "; + else + throw SALOME_Exception("Unknown protocol"); + + if (_params.username != ""){ + command += _params.username; + command += "@"; + } + + command += _params.hostname; + command += " \"bsub < " ; + command += _dirForTmpFiles ; + command += "/" ; + command += _fileNameToExecute ; + command += "_Batch.sh\" > "; + command += logFile; + SCRUTE(command.c_str()); + status = system(command.c_str()); + if(status) + throw SALOME_Exception("Error of connection on remote host"); + + // read id of submitted job in log file + char line[128]; + FILE *fp = fopen(logFile.c_str(),"r"); + fgets( line, 128, fp); + fclose(fp); + + string sline(line); + int p1 = sline.find("<"); + int p2 = sline.find(">"); + string strjob = sline.substr(p1+1,p2-p1-1); + + int id; + istringstream iss(strjob); + iss >> id; + + END_OF("BatchManager_SLURM::submit"); + return id; + } + +} diff --git a/src/Launcher/BatchLight_BatchManager_SLURM.hxx b/src/Launcher/BatchLight_BatchManager_SLURM.hxx new file mode 100644 index 000000000..ed21624dd --- /dev/null +++ b/src/Launcher/BatchLight_BatchManager_SLURM.hxx @@ -0,0 +1,62 @@ +// Copyright (C) 2005 OPEN CASCADE, EADS/CCR, LIP6, CEA/DEN, +// CEDRAT, EDF R&D, LEG, PRINCIPIA R&D, BUREAU VERITAS +// +// This library is free software; you can redistribute it and/or +// modify it under the terms of the GNU Lesser General Public +// License as published by the Free Software Foundation; either +// version 2.1 of the License. +// +// This library is distributed in the hope that it will be useful +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +// Lesser General Public License for more details. +// +// You should have received a copy of the GNU Lesser General Public +// License along with this library; if not, write to the Free Software +// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +// +// See http://www.salome-platform.org/ or email : webmaster.salome@opencascade.com +// +/* + * BatchManager.hxx : + * + * Auteur : Bernard SECHER - CEA/DEN + * Date : Juillet 2007 + * Projet : SALOME + * + */ + +#ifndef _BL_BATCHMANAGER_SLURM_H_ +#define _BL_BATCHMANAGER_SLURM_H_ + +#include +#include "Utils_SALOME_Exception.hxx" +#include "BatchLight_BatchManager.hxx" + +namespace BatchLight { + + class Job; + + class BatchManager_SLURM : public BatchManager + { + public: + // Constructeur et destructeur + BatchManager_SLURM(const batchParams& p) throw(SALOME_Exception); // connexion a la machine host + virtual ~BatchManager_SLURM(); + + // Methodes pour le controle des jobs : virtuelles pures + void deleteJob(const int & jobid); // retire un job du gestionnaire + std::string queryJob(const int & jobid); // renvoie l'etat du job + + protected: + void buildSalomeCouplingScript( const char *fileToExecute ) throw(SALOME_Exception); + void buildSalomeBatchScript( const int nbproc ) throw(SALOME_Exception); + int submit() throw(SALOME_Exception); + + private: + + }; + +} + +#endif diff --git a/src/Launcher/BatchLight_Job.cxx b/src/Launcher/BatchLight_Job.cxx new file mode 100644 index 000000000..9762a98a6 --- /dev/null +++ b/src/Launcher/BatchLight_Job.cxx @@ -0,0 +1,45 @@ +// Copyright (C) 2005 OPEN CASCADE, EADS/CCR, LIP6, CEA/DEN, +// CEDRAT, EDF R&D, LEG, PRINCIPIA R&D, BUREAU VERITAS +// +// This library is free software; you can redistribute it and/or +// modify it under the terms of the GNU Lesser General Public +// License as published by the Free Software Foundation; either +// version 2.1 of the License. +// +// This library is distributed in the hope that it will be useful +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +// Lesser General Public License for more details. +// +// You should have received a copy of the GNU Lesser General Public +// License along with this library; if not, write to the Free Software +// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +// +// See http://www.salome-platform.org/ or email : webmaster.salome@opencascade.com +// +/* + * Job.cxx : + * + * Auteur : Bernard SECHER - CEA/DEN + * Date : Juillet 2007 + * Projet : SALOME + * + */ + +#include "BatchLight_Job.hxx" +using namespace std; + +namespace BatchLight { + + // Constructeur + Job::Job(const char *fileToExecute, const Engines::FilesList& filesToExport, const Engines::FilesList& filesToImport, const int nbproc) : _fileToExecute(fileToExecute), _filesToExport(filesToExport), _filesToImport(filesToImport), _nbproc(nbproc) + { + // Nothing to do + } + + Job::~Job() + { + MESSAGE("Job destructor"); + } + +} diff --git a/src/Launcher/BatchLight_Job.hxx b/src/Launcher/BatchLight_Job.hxx new file mode 100644 index 000000000..23ac8f3bb --- /dev/null +++ b/src/Launcher/BatchLight_Job.hxx @@ -0,0 +1,62 @@ +// Copyright (C) 2005 OPEN CASCADE, EADS/CCR, LIP6, CEA/DEN, +// CEDRAT, EDF R&D, LEG, PRINCIPIA R&D, BUREAU VERITAS +// +// This library is free software; you can redistribute it and/or +// modify it under the terms of the GNU Lesser General Public +// License as published by the Free Software Foundation; either +// version 2.1 of the License. +// +// This library is distributed in the hope that it will be useful +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +// Lesser General Public License for more details. +// +// You should have received a copy of the GNU Lesser General Public +// License along with this library; if not, write to the Free Software +// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +// +// See http://www.salome-platform.org/ or email : webmaster.salome@opencascade.com +// +/* + * Job.hxx : + * + * Auteur : Bernard SECHER - CEA/DEN + * Date : Juillet 2007 + * Projet : SALOME + * + */ + +#ifndef _BL_JOB_H_ +#define _BL_JOB_H_ + +#include "utilities.h" +#include +#include CORBA_CLIENT_HEADER(SALOME_ContainerManager) + +namespace BatchLight { + + class Job + { + public: + // Constructeurs et destructeur + Job(const char *fileToExecute, const Engines::FilesList& filesToExport, const Engines::FilesList& filesToImport, const int nbproc); + virtual ~Job(); + + const char *getFileToExecute() const { return _fileToExecute; } + const Engines::FilesList getFilesToExportList() const { return _filesToExport; } + const Engines::FilesList getFilesToImportList() const { return _filesToImport; } + const int getNbProc() const { return _nbproc; } + + protected: + const char* _fileToExecute; + const Engines::FilesList _filesToExport; + const Engines::FilesList _filesToImport; + const int _nbproc; + + private: + + }; + +} + +#endif diff --git a/src/Launcher/Makefile.am b/src/Launcher/Makefile.am index 9c441ebc1..b0323d754 100644 --- a/src/Launcher/Makefile.am +++ b/src/Launcher/Makefile.am @@ -36,6 +36,11 @@ include $(top_srcdir)/salome_adm/unix/make_common_starter.am # # header files salomeinclude_HEADERS = \ + BatchLight_BatchManager.hxx \ + BatchLight_BatchManager_PBS.hxx \ + BatchLight_BatchManager_SLURM.hxx \ + BatchLight_Job.hxx \ + MpiImpl.hxx \ SALOME_Launcher.hxx # Scripts to be installed @@ -93,7 +98,12 @@ COMMON_LIBS =\ # lib_LTLIBRARIES = libSalomeLauncher.la libSalomeLauncher_la_SOURCES=\ - SALOME_Launcher.cxx + SALOME_Launcher.cxx \ + BatchLight_BatchManager.cxx \ + BatchLight_BatchManager_SLURM.cxx \ + BatchLight_BatchManager_PBS.cxx \ + BatchLight_Job.cxx \ + MpiImpl.cxx libSalomeLauncher_la_CPPFLAGS =\ $(COMMON_CPPFLAGS) diff --git a/src/Launcher/MpiImpl.cxx b/src/Launcher/MpiImpl.cxx new file mode 100644 index 000000000..036018b1e --- /dev/null +++ b/src/Launcher/MpiImpl.cxx @@ -0,0 +1,212 @@ +// Copyright (C) 2005 OPEN CASCADE, EADS/CCR, LIP6, CEA/DEN, +// CEDRAT, EDF R&D, LEG, PRINCIPIA R&D, BUREAU VERITAS +// +// This library is free software; you can redistribute it and/or +// modify it under the terms of the GNU Lesser General Public +// License as published by the Free Software Foundation; either +// version 2.1 of the License. +// +// This library is distributed in the hope that it will be useful +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +// Lesser General Public License for more details. +// +// You should have received a copy of the GNU Lesser General Public +// License along with this library; if not, write to the Free Software +// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +// +// See http://www.salome-platform.org/ or email : webmaster.salome@opencascade.com +// +/* + * BatchManager.cxx : + * + * Auteur : Bernard SECHER - CEA/DEN + * Date : Juillet 2007 + * Projet : SALOME + * + */ + +#include +#include +#include +#include "utilities.h" +#include "MpiImpl.hxx" + +using namespace std; + +// Constructor +MpiImpl::MpiImpl() +{ + MESSAGE("MpiImpl constructor"); +} + +// Destructor +MpiImpl::~MpiImpl() +{ + MESSAGE("MpiImpl destructor"); +} + +// lam implementation +// Constructor +MpiImpl_LAM::MpiImpl_LAM() : MpiImpl() +{ +} + +// Destructor +MpiImpl_LAM::~MpiImpl_LAM() +{ + MESSAGE("MpiImpl_LAM destructor"); +} + +string MpiImpl_LAM::size() +{ + return "${LAMWORLD}"; +} + +string MpiImpl_LAM::rank() +{ + return "${LAMRANK}"; +} + +string MpiImpl_LAM::boot(const string machinefile, const unsigned int nbnodes) +{ + ostringstream oss; + oss << "lamboot " << machinefile << endl; + return oss.str(); +} + +string MpiImpl_LAM::run(const string machinefile, const unsigned int nbproc, const string fileNameToExecute) +{ + ostringstream oss; + oss << "mpirun -np " << nbproc << " " << fileNameToExecute << endl; + return oss.str(); +} + +string MpiImpl_LAM::halt() +{ + ostringstream oss; + oss << "lamhalt" << endl; + return oss.str(); +} + +// mpich1 implementation +// Constructor +MpiImpl_MPICH1::MpiImpl_MPICH1() : MpiImpl() +{ +} + +// Destructor +MpiImpl_MPICH1::~MpiImpl_MPICH1() +{ + MESSAGE("MpiImpl_MPICH1 destructor"); +} + +string MpiImpl_MPICH1::size() +{ + throw SALOME_Exception("mpich1 doesn't work with this batch system to submit salome session"); +} + +string MpiImpl_MPICH1::rank() +{ + throw SALOME_Exception("mpich1 doesn't work with this batch system to submit salome session"); +} + +string MpiImpl_MPICH1::boot(const string machinefile, const unsigned int nbnodes) +{ + return ""; +} + +string MpiImpl_MPICH1::run(const string machinefile, const unsigned int nbproc, const string fileNameToExecute) +{ + ostringstream oss; + oss << "mpirun -machinefile " << machinefile << " -np " << nbproc << " " << fileNameToExecute << endl; + return oss.str(); +} + +string MpiImpl_MPICH1::halt() +{ + return ""; +} + +// mpich2 implementation +// Constructor +MpiImpl_MPICH2::MpiImpl_MPICH2() : MpiImpl() +{ +} + +// Destructor +MpiImpl_MPICH2::~MpiImpl_MPICH2() +{ + MESSAGE("MpiImpl_MPICH2 destructor"); +} + +string MpiImpl_MPICH2::size() +{ + return "${PMI_SIZE}"; +} + +string MpiImpl_MPICH2::rank() +{ + return "${PMI_RANK}"; +} + +string MpiImpl_MPICH2::boot(const string machinefile, const unsigned int nbnodes) +{ + ostringstream oss; + oss << "mpdboot -n " << nbnodes << " -f " << machinefile << endl; + return oss.str(); +} + +string MpiImpl_MPICH2::run(const string machinefile, const unsigned int nbproc, const string fileNameToExecute) +{ + ostringstream oss; + oss << "mpirun -np " << nbproc << " " << fileNameToExecute << endl; + return oss.str(); +} + +string MpiImpl_MPICH2::halt() +{ + ostringstream oss; + oss << "mpdallexit" << endl; + return oss.str(); +} + +// openmpi implementation +// Constructor +MpiImpl_OPENMPI::MpiImpl_OPENMPI() : MpiImpl() +{ +} + +// Destructor +MpiImpl_OPENMPI::~MpiImpl_OPENMPI() +{ + MESSAGE("MpiImpl_OPENMPI destructor"); +} + +string MpiImpl_OPENMPI::size() +{ + return "${OMPI_MCA_ns_nds_num_procs}"; +} + +string MpiImpl_OPENMPI::rank() +{ + return "${OMPI_MCA_ns_nds_vpid}"; +} + +string MpiImpl_OPENMPI::boot(const string machinefile, const unsigned int nbnodes) +{ + return ""; +} + +string MpiImpl_OPENMPI::run(const string machinefile, const unsigned int nbproc, const string fileNameToExecute) +{ + ostringstream oss; + oss << "mpirun -hostfile " << machinefile << " -np " << nbproc << " " << fileNameToExecute << endl; + return oss.str(); +} + +string MpiImpl_OPENMPI::halt() +{ + return ""; +} + diff --git a/src/Launcher/MpiImpl.hxx b/src/Launcher/MpiImpl.hxx new file mode 100644 index 000000000..beeac0301 --- /dev/null +++ b/src/Launcher/MpiImpl.hxx @@ -0,0 +1,131 @@ +// Copyright (C) 2005 OPEN CASCADE, EADS/CCR, LIP6, CEA/DEN, +// CEDRAT, EDF R&D, LEG, PRINCIPIA R&D, BUREAU VERITAS +// +// This library is free software; you can redistribute it and/or +// modify it under the terms of the GNU Lesser General Public +// License as published by the Free Software Foundation; either +// version 2.1 of the License. +// +// This library is distributed in the hope that it will be useful +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +// Lesser General Public License for more details. +// +// You should have received a copy of the GNU Lesser General Public +// License along with this library; if not, write to the Free Software +// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +// +// See http://www.salome-platform.org/ or email : webmaster.salome@opencascade.com +// +/* + * BatchManager.hxx : + * + * Auteur : Bernard SECHER - CEA/DEN + * Date : Juillet 2007 + * Projet : SALOME + * + */ + +#ifndef _BL_MPIIMPL_H_ +#define _BL_MPIIMPL_H_ + +#include +#include "Utils_SALOME_Exception.hxx" +#include + +class MpiImpl +{ +public: + // Constructeur et destructeur + MpiImpl(); // constrcuctor + virtual ~MpiImpl(); //Destructor + + virtual std::string size() = 0; // get number of process of current job + virtual std::string rank() = 0; // get process number of current job + virtual std::string boot(const std::string machinefile, const unsigned int nbnodes) = 0; // get boot command + virtual std::string run(const std::string machinefile, const unsigned int nbproc, const std::string fileNameToExecute) = 0; // get run command + virtual std::string halt() = 0; // get stop command + +protected: + +private: + +}; + +class MpiImpl_LAM : public MpiImpl +{ +public: + // Constructeur et destructeur + MpiImpl_LAM(); // constructor + virtual ~MpiImpl_LAM(); //Destructor + + std::string size(); // get number of process of current job + std::string rank(); // get process number of current job + std::string boot( const std::string machinefile, const unsigned int nbnodes); // get boot command + std::string run( const std::string machinefile, const unsigned int nbproc, const std::string fileNameToExecute); // get run command + std::string halt(); // get stop command + +protected: + +private: + +}; + +class MpiImpl_MPICH1 : public MpiImpl +{ +public: + // Constructeur et destructeur + MpiImpl_MPICH1(); // constructor + virtual ~MpiImpl_MPICH1(); //Destructor + + std::string size(); // get number of process of current job + std::string rank(); // get process number of current job + std::string boot( const std::string machinefile, const unsigned int nbnodes); // get boot command + std::string run( const std::string machinefile, const unsigned int nbproc, const std::string fileNameToExecute); // get run command + std::string halt(); // get stop command + +protected: + +private: + +}; + +class MpiImpl_MPICH2 : public MpiImpl +{ +public: + // Constructeur et destructeur + MpiImpl_MPICH2(); // constructor + virtual ~MpiImpl_MPICH2(); //Destructor + + std::string size(); // get number of process of current job + std::string rank(); // get process number of current job + std::string boot( const std::string machinefile, const unsigned int nbnodes); // get boot command + std::string run( const std::string machinefile, const unsigned int nbproc, const std::string fileNameToExecute); // get run command + std::string halt(); // get stop command + +protected: + +private: + +}; + +class MpiImpl_OPENMPI : public MpiImpl +{ +public: + // Constructeur et destructeur + MpiImpl_OPENMPI(); // constructor + virtual ~MpiImpl_OPENMPI(); //Destructor + + std::string size(); // get number of process of current job + std::string rank(); // get process number of current job + std::string boot( const std::string machinefile, const unsigned int nbnodes); // get boot command + std::string run( const std::string machinefile, const unsigned int nbproc, const std::string fileNameToExecute); // get run command + std::string halt(); // get stop command + +protected: + +private: + +}; + +#endif