From: secher Date: Mon, 17 Sep 2007 10:40:17 +0000 (+0000) Subject: add MpiImpl object to manage different mpi implementations X-Git-Url: http://git.salome-platform.org/gitweb/?a=commitdiff_plain;h=af25e2f35205425b0232ef00c251fe25f5d55474;p=modules%2Fkernel.git add MpiImpl object to manage different mpi implementations --- diff --git a/src/Batch/BatchLight_BatchManager.cxx b/src/Batch/BatchLight_BatchManager.cxx index 4047c47a4..4e0241f38 100644 --- a/src/Batch/BatchLight_BatchManager.cxx +++ b/src/Batch/BatchLight_BatchManager.cxx @@ -50,6 +50,7 @@ namespace BatchLight { msg += "\" unknown from the network"; throw SALOME_Exception(msg.c_str()); } + _mpiImpl = NULL; } // Destructeur @@ -59,6 +60,7 @@ namespace BatchLight { std::map < int, const BatchLight::Job * >::const_iterator it; for(it=_jobmap.begin();it!=_jobmap.end();it++) delete it->second; + if(_mpiImpl) delete _mpiImpl; } // Methode pour le controle des jobs : soumet un job au gestionnaire @@ -234,10 +236,9 @@ namespace BatchLight { return command; } -void BatchManager::RmTmpFile() -{ - if (_TmpFileName != "") - { + void BatchManager::RmTmpFile() + { + if (_TmpFileName != ""){ string command = "rm "; command += _TmpFileName; char *temp = strdup(command.c_str()); @@ -247,6 +248,25 @@ void BatchManager::RmTmpFile() system(temp); free(temp); } -} + } + + MpiImpl *BatchManager::FactoryMpiImpl(string mpiImpl) throw(SALOME_Exception) + { + if(mpiImpl == "lam") + return new MpiImpl_LAM(); + else if(mpiImpl == "mpich1") + return new MpiImpl_MPICH1(); + else if(mpiImpl == "mpich2") + return new MpiImpl_MPICH2(); + else if(mpiImpl == "openmpi") + return new MpiImpl_OPENMPI(); + else if(mpiImpl == "indif") + throw SALOME_Exception("you must specify a mpi implementation in CatalogResources.xml file"); + else{ + ostringstream oss; + oss << mpiImpl << " : not yet implemented"; + throw SALOME_Exception(oss.str().c_str()); + } + } } diff --git a/src/Batch/BatchLight_BatchManager.hxx b/src/Batch/BatchLight_BatchManager.hxx index 73bf92571..a8ea0061a 100644 --- a/src/Batch/BatchLight_BatchManager.hxx +++ b/src/Batch/BatchLight_BatchManager.hxx @@ -36,6 +36,7 @@ #include #include #include CORBA_CLIENT_HEADER(SALOME_ContainerManager) +#include "MpiImpl.hxx" namespace BatchLight { @@ -67,6 +68,7 @@ namespace BatchLight { protected: batchParams _params; + MpiImpl *_mpiImpl; std::map _jobmap; std::string _dirForTmpFiles; // repertoire temporaire sur le serveur @@ -81,6 +83,7 @@ namespace BatchLight { std::string BuildTemporaryFileName() const; void RmTmpFile(); + MpiImpl *FactoryMpiImpl(std::string mpiImpl) throw(SALOME_Exception); private: diff --git a/src/Batch/BatchLight_BatchManager_PBS.cxx b/src/Batch/BatchLight_BatchManager_PBS.cxx index e008699da..ae539128c 100644 --- a/src/Batch/BatchLight_BatchManager_PBS.cxx +++ b/src/Batch/BatchLight_BatchManager_PBS.cxx @@ -41,6 +41,8 @@ namespace BatchLight { // Constructeur BatchManager_PBS::BatchManager_PBS(const batchParams& p) throw(SALOME_Exception) : BatchManager(p) { + // pbs batch system needs to know mpi implementation + _mpiImpl = FactoryMpiImpl(_params.mpiImpl); } // Destructeur @@ -186,7 +188,7 @@ namespace BatchLight { tempOutputFile << _dirForTmpFiles ; tempOutputFile << ":$PYTHONPATH" << endl ; tempOutputFile << "if test " ; - tempOutputFile << mpiRank() ; + tempOutputFile << _mpiImpl->rank() ; tempOutputFile << " = 0; then" << endl ; tempOutputFile << " ./runAppli --terminal --batch --modules=" ; for ( i = 0 ; i < _params.modulesList.size() ; i++ ) { @@ -196,7 +198,7 @@ namespace BatchLight { } tempOutputFile << " --standalone=registry,study,moduleCatalog --killall &" << endl ; tempOutputFile << " for ((ip=1; ip < "; - tempOutputFile << mpiSize(); + tempOutputFile << _mpiImpl->size(); tempOutputFile << " ; ip++))" << endl; tempOutputFile << " do" << endl ; tempOutputFile << " arglist=\"$arglist YACS_Server_\"$ip" << endl ; @@ -209,7 +211,7 @@ namespace BatchLight { tempOutputFile << " sleep 5" << endl ; tempOutputFile << " ./runSession waitNS.py" << endl ; tempOutputFile << " ./runSession SALOME_Container 'YACS_Server_'"; - tempOutputFile << mpiRank() << endl ; + tempOutputFile << _mpiImpl->rank() << endl ; tempOutputFile << "fi" << endl ; tempOutputFile.flush(); tempOutputFile.close(); @@ -266,16 +268,15 @@ namespace BatchLight { ofstream tempOutputFile; tempOutputFile.open(_TmpFileName.c_str(), ofstream::out ); + ostringstream filenameToExecute; + filenameToExecute << " ~/" << _dirForTmpFiles << "/runSalome_" << _fileNameToExecute << "_Batch.sh"; + tempOutputFile << "#! /bin/sh -f" << endl ; tempOutputFile << "#PBS -l nodes=" << nbnodes << endl ; tempOutputFile << "#PBS -o runSalome.log" << endl ; - tempOutputFile << mpiBoot() << endl ; - tempOutputFile << "mpirun -np " << nbproc << " ~/" ; - tempOutputFile << _dirForTmpFiles ; - tempOutputFile << "/runSalome_" ; - tempOutputFile << _fileNameToExecute ; - tempOutputFile << "_Batch.sh" << endl ; - tempOutputFile << mpiHalt() << endl ; + tempOutputFile << _mpiImpl->boot("${PBS_NODEFILE}",nbnodes); + tempOutputFile << _mpiImpl->run("${PBS_NODEFILE}",nbproc,filenameToExecute.str()); + tempOutputFile << _mpiImpl->halt(); tempOutputFile.flush(); tempOutputFile.close(); chmod(_TmpFileName.c_str(), 0x1ED); @@ -376,48 +377,4 @@ namespace BatchLight { return id; } - std::string BatchManager_PBS::mpiRank() throw(SALOME_Exception) - { - if(_params.mpiImpl == "indif") - throw SALOME_Exception("You have to specify MPI implementation in CatalogResources.xml file"); - else if(_params.mpiImpl == "lam") - return "${LAMRANK}"; - else - throw SALOME_Exception("not yet implemented"); - } - - std::string BatchManager_PBS::mpiSize() throw(SALOME_Exception) - { - if(_params.mpiImpl == "indif") - throw SALOME_Exception("You have to specify MPI implementation in CatalogResources.xml file"); - else if(_params.mpiImpl == "lam") - return "${LAMWORLD}"; - else - throw SALOME_Exception("not yet implemented"); - } - - std::string BatchManager_PBS::mpiBoot() throw(SALOME_Exception) - { - if(_params.mpiImpl == "indif") - throw SALOME_Exception("You have to specify MPI implementation in CatalogResources.xml file"); - else if(_params.mpiImpl == "lam") - return "lamboot ${PBS_NODEFILE}"; - else if(_params.mpiImpl == "mpich1") - return ""; - else - throw SALOME_Exception("not yet implemented"); - } - - std::string BatchManager_PBS::mpiHalt() throw(SALOME_Exception) - { - if(_params.mpiImpl == "indif") - throw SALOME_Exception("You have to specify MPI implementation in CatalogResources.xml file"); - else if(_params.mpiImpl == "lam") - return "lamhalt"; - else if(_params.mpiImpl == "mpich1") - return ""; - else - throw SALOME_Exception("not yet implemented"); - } - } diff --git a/src/Batch/BatchLight_BatchManager_PBS.hxx b/src/Batch/BatchLight_BatchManager_PBS.hxx index 661f110aa..e7e5789c7 100644 --- a/src/Batch/BatchLight_BatchManager_PBS.hxx +++ b/src/Batch/BatchLight_BatchManager_PBS.hxx @@ -52,10 +52,6 @@ namespace BatchLight { void buildSalomeCouplingScript( const char *fileToExecute ) throw(SALOME_Exception); void buildSalomeBatchScript( const int nbproc ) throw(SALOME_Exception); int submit() throw(SALOME_Exception); - std::string mpiRank() throw(SALOME_Exception); - std::string mpiSize() throw(SALOME_Exception); - std::string mpiBoot() throw(SALOME_Exception); - std::string mpiHalt() throw(SALOME_Exception); }; } diff --git a/src/Batch/Makefile.am b/src/Batch/Makefile.am index 8e971b740..425e39f5c 100644 --- a/src/Batch/Makefile.am +++ b/src/Batch/Makefile.am @@ -63,7 +63,8 @@ LIB_INCLUDES = \ BatchLight_BatchManager.hxx \ BatchLight_BatchManager_PBS.hxx \ BatchLight_BatchManager_SLURM.hxx \ - BatchLight_Job.hxx + BatchLight_Job.hxx \ + MpiImpl.hxx LIB_SRC = \ @@ -99,7 +100,8 @@ LIB_SRC = \ BatchLight_BatchManager.cxx \ BatchLight_BatchManager_SLURM.cxx \ BatchLight_BatchManager_PBS.cxx \ - BatchLight_Job.cxx + BatchLight_Job.cxx \ + MpiImpl.cxx LIB_CPPFLAGS = \ diff --git a/src/Batch/MpiImpl.cxx b/src/Batch/MpiImpl.cxx new file mode 100644 index 000000000..036018b1e --- /dev/null +++ b/src/Batch/MpiImpl.cxx @@ -0,0 +1,212 @@ +// Copyright (C) 2005 OPEN CASCADE, EADS/CCR, LIP6, CEA/DEN, +// CEDRAT, EDF R&D, LEG, PRINCIPIA R&D, BUREAU VERITAS +// +// This library is free software; you can redistribute it and/or +// modify it under the terms of the GNU Lesser General Public +// License as published by the Free Software Foundation; either +// version 2.1 of the License. +// +// This library is distributed in the hope that it will be useful +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +// Lesser General Public License for more details. +// +// You should have received a copy of the GNU Lesser General Public +// License along with this library; if not, write to the Free Software +// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +// +// See http://www.salome-platform.org/ or email : webmaster.salome@opencascade.com +// +/* + * BatchManager.cxx : + * + * Auteur : Bernard SECHER - CEA/DEN + * Date : Juillet 2007 + * Projet : SALOME + * + */ + +#include +#include +#include +#include "utilities.h" +#include "MpiImpl.hxx" + +using namespace std; + +// Constructor +MpiImpl::MpiImpl() +{ + MESSAGE("MpiImpl constructor"); +} + +// Destructor +MpiImpl::~MpiImpl() +{ + MESSAGE("MpiImpl destructor"); +} + +// lam implementation +// Constructor +MpiImpl_LAM::MpiImpl_LAM() : MpiImpl() +{ +} + +// Destructor +MpiImpl_LAM::~MpiImpl_LAM() +{ + MESSAGE("MpiImpl_LAM destructor"); +} + +string MpiImpl_LAM::size() +{ + return "${LAMWORLD}"; +} + +string MpiImpl_LAM::rank() +{ + return "${LAMRANK}"; +} + +string MpiImpl_LAM::boot(const string machinefile, const unsigned int nbnodes) +{ + ostringstream oss; + oss << "lamboot " << machinefile << endl; + return oss.str(); +} + +string MpiImpl_LAM::run(const string machinefile, const unsigned int nbproc, const string fileNameToExecute) +{ + ostringstream oss; + oss << "mpirun -np " << nbproc << " " << fileNameToExecute << endl; + return oss.str(); +} + +string MpiImpl_LAM::halt() +{ + ostringstream oss; + oss << "lamhalt" << endl; + return oss.str(); +} + +// mpich1 implementation +// Constructor +MpiImpl_MPICH1::MpiImpl_MPICH1() : MpiImpl() +{ +} + +// Destructor +MpiImpl_MPICH1::~MpiImpl_MPICH1() +{ + MESSAGE("MpiImpl_MPICH1 destructor"); +} + +string MpiImpl_MPICH1::size() +{ + throw SALOME_Exception("mpich1 doesn't work with this batch system to submit salome session"); +} + +string MpiImpl_MPICH1::rank() +{ + throw SALOME_Exception("mpich1 doesn't work with this batch system to submit salome session"); +} + +string MpiImpl_MPICH1::boot(const string machinefile, const unsigned int nbnodes) +{ + return ""; +} + +string MpiImpl_MPICH1::run(const string machinefile, const unsigned int nbproc, const string fileNameToExecute) +{ + ostringstream oss; + oss << "mpirun -machinefile " << machinefile << " -np " << nbproc << " " << fileNameToExecute << endl; + return oss.str(); +} + +string MpiImpl_MPICH1::halt() +{ + return ""; +} + +// mpich2 implementation +// Constructor +MpiImpl_MPICH2::MpiImpl_MPICH2() : MpiImpl() +{ +} + +// Destructor +MpiImpl_MPICH2::~MpiImpl_MPICH2() +{ + MESSAGE("MpiImpl_MPICH2 destructor"); +} + +string MpiImpl_MPICH2::size() +{ + return "${PMI_SIZE}"; +} + +string MpiImpl_MPICH2::rank() +{ + return "${PMI_RANK}"; +} + +string MpiImpl_MPICH2::boot(const string machinefile, const unsigned int nbnodes) +{ + ostringstream oss; + oss << "mpdboot -n " << nbnodes << " -f " << machinefile << endl; + return oss.str(); +} + +string MpiImpl_MPICH2::run(const string machinefile, const unsigned int nbproc, const string fileNameToExecute) +{ + ostringstream oss; + oss << "mpirun -np " << nbproc << " " << fileNameToExecute << endl; + return oss.str(); +} + +string MpiImpl_MPICH2::halt() +{ + ostringstream oss; + oss << "mpdallexit" << endl; + return oss.str(); +} + +// openmpi implementation +// Constructor +MpiImpl_OPENMPI::MpiImpl_OPENMPI() : MpiImpl() +{ +} + +// Destructor +MpiImpl_OPENMPI::~MpiImpl_OPENMPI() +{ + MESSAGE("MpiImpl_OPENMPI destructor"); +} + +string MpiImpl_OPENMPI::size() +{ + return "${OMPI_MCA_ns_nds_num_procs}"; +} + +string MpiImpl_OPENMPI::rank() +{ + return "${OMPI_MCA_ns_nds_vpid}"; +} + +string MpiImpl_OPENMPI::boot(const string machinefile, const unsigned int nbnodes) +{ + return ""; +} + +string MpiImpl_OPENMPI::run(const string machinefile, const unsigned int nbproc, const string fileNameToExecute) +{ + ostringstream oss; + oss << "mpirun -hostfile " << machinefile << " -np " << nbproc << " " << fileNameToExecute << endl; + return oss.str(); +} + +string MpiImpl_OPENMPI::halt() +{ + return ""; +} + diff --git a/src/Batch/MpiImpl.hxx b/src/Batch/MpiImpl.hxx new file mode 100644 index 000000000..beeac0301 --- /dev/null +++ b/src/Batch/MpiImpl.hxx @@ -0,0 +1,131 @@ +// Copyright (C) 2005 OPEN CASCADE, EADS/CCR, LIP6, CEA/DEN, +// CEDRAT, EDF R&D, LEG, PRINCIPIA R&D, BUREAU VERITAS +// +// This library is free software; you can redistribute it and/or +// modify it under the terms of the GNU Lesser General Public +// License as published by the Free Software Foundation; either +// version 2.1 of the License. +// +// This library is distributed in the hope that it will be useful +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +// Lesser General Public License for more details. +// +// You should have received a copy of the GNU Lesser General Public +// License along with this library; if not, write to the Free Software +// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +// +// See http://www.salome-platform.org/ or email : webmaster.salome@opencascade.com +// +/* + * BatchManager.hxx : + * + * Auteur : Bernard SECHER - CEA/DEN + * Date : Juillet 2007 + * Projet : SALOME + * + */ + +#ifndef _BL_MPIIMPL_H_ +#define _BL_MPIIMPL_H_ + +#include +#include "Utils_SALOME_Exception.hxx" +#include + +class MpiImpl +{ +public: + // Constructeur et destructeur + MpiImpl(); // constrcuctor + virtual ~MpiImpl(); //Destructor + + virtual std::string size() = 0; // get number of process of current job + virtual std::string rank() = 0; // get process number of current job + virtual std::string boot(const std::string machinefile, const unsigned int nbnodes) = 0; // get boot command + virtual std::string run(const std::string machinefile, const unsigned int nbproc, const std::string fileNameToExecute) = 0; // get run command + virtual std::string halt() = 0; // get stop command + +protected: + +private: + +}; + +class MpiImpl_LAM : public MpiImpl +{ +public: + // Constructeur et destructeur + MpiImpl_LAM(); // constructor + virtual ~MpiImpl_LAM(); //Destructor + + std::string size(); // get number of process of current job + std::string rank(); // get process number of current job + std::string boot( const std::string machinefile, const unsigned int nbnodes); // get boot command + std::string run( const std::string machinefile, const unsigned int nbproc, const std::string fileNameToExecute); // get run command + std::string halt(); // get stop command + +protected: + +private: + +}; + +class MpiImpl_MPICH1 : public MpiImpl +{ +public: + // Constructeur et destructeur + MpiImpl_MPICH1(); // constructor + virtual ~MpiImpl_MPICH1(); //Destructor + + std::string size(); // get number of process of current job + std::string rank(); // get process number of current job + std::string boot( const std::string machinefile, const unsigned int nbnodes); // get boot command + std::string run( const std::string machinefile, const unsigned int nbproc, const std::string fileNameToExecute); // get run command + std::string halt(); // get stop command + +protected: + +private: + +}; + +class MpiImpl_MPICH2 : public MpiImpl +{ +public: + // Constructeur et destructeur + MpiImpl_MPICH2(); // constructor + virtual ~MpiImpl_MPICH2(); //Destructor + + std::string size(); // get number of process of current job + std::string rank(); // get process number of current job + std::string boot( const std::string machinefile, const unsigned int nbnodes); // get boot command + std::string run( const std::string machinefile, const unsigned int nbproc, const std::string fileNameToExecute); // get run command + std::string halt(); // get stop command + +protected: + +private: + +}; + +class MpiImpl_OPENMPI : public MpiImpl +{ +public: + // Constructeur et destructeur + MpiImpl_OPENMPI(); // constructor + virtual ~MpiImpl_OPENMPI(); //Destructor + + std::string size(); // get number of process of current job + std::string rank(); // get process number of current job + std::string boot( const std::string machinefile, const unsigned int nbnodes); // get boot command + std::string run( const std::string machinefile, const unsigned int nbproc, const std::string fileNameToExecute); // get run command + std::string halt(); // get stop command + +protected: + +private: + +}; + +#endif diff --git a/src/Container/SALOME_ContainerManager.cxx b/src/Container/SALOME_ContainerManager.cxx index 6cce6fcd4..c4d49712a 100644 --- a/src/Container/SALOME_ContainerManager.cxx +++ b/src/Container/SALOME_ContainerManager.cxx @@ -339,7 +339,7 @@ CORBA::Long SALOME_ContainerManager::submitSalomeJob( const char * fileToExecute jobId = _ResManager->submitSalomeJob(fileToExecute, filesToExport, filesToImport, NumberOfProcessors, params); } catch(const SALOME_Exception &ex){ - INFOS("Caught exception."); + MESSAGE(ex.what()); THROW_SALOME_CORBA_EXCEPTION(ex.what(),SALOME::INTERNAL_ERROR); } return jobId; diff --git a/src/ResourcesManager/SALOME_ResourcesCatalog_Handler.cxx b/src/ResourcesManager/SALOME_ResourcesCatalog_Handler.cxx index 93c114de0..d1073107d 100755 --- a/src/ResourcesManager/SALOME_ResourcesCatalog_Handler.cxx +++ b/src/ResourcesManager/SALOME_ResourcesCatalog_Handler.cxx @@ -200,6 +200,8 @@ startElement( const QString&, _resource.mpi = mpich1; else if( content.compare("mpich2") == 0 ) _resource.mpi = mpich2; + else if( content.compare("openmpi") == 0 ) + _resource.mpi = openmpi; else _resource.mpi = indif; } @@ -413,18 +415,22 @@ void SALOME_ResourcesCatalog_Handler::PrepareDocToXmlFile(QDomDocument& doc) switch ((*iter).second.mpi) { - case pbs: + case lam: eltRoot.setAttribute((char *)test_mpi, "lam"); break; - case lsf: + case mpich1: eltRoot.setAttribute((char *)test_mpi, "mpich1"); break; - case slurm: + case mpich2: eltRoot.setAttribute((char *)test_mpi, "mpich2"); break; + case openmpi: + eltRoot.setAttribute((char *)test_mpi, "openmpi"); + break; + default: eltRoot.setAttribute((char *)test_mpi, ""); } diff --git a/src/ResourcesManager/SALOME_ResourcesCatalog_Parser.hxx b/src/ResourcesManager/SALOME_ResourcesCatalog_Parser.hxx index 7bc9ec686..ad7a73be4 100755 --- a/src/ResourcesManager/SALOME_ResourcesCatalog_Parser.hxx +++ b/src/ResourcesManager/SALOME_ResourcesCatalog_Parser.hxx @@ -40,7 +40,7 @@ enum AccessModeType {interactive, batch}; enum BatchType {none, pbs, lsf, slurm}; -enum mpiImpl {indif, lam, mpich1, mpich2}; +enum mpiImpl {indif, lam, mpich1, mpich2, openmpi}; class ResourceDataToSort { diff --git a/src/ResourcesManager/SALOME_ResourcesManager.cxx b/src/ResourcesManager/SALOME_ResourcesManager.cxx index a98144a68..d871d2f31 100644 --- a/src/ResourcesManager/SALOME_ResourcesManager.cxx +++ b/src/ResourcesManager/SALOME_ResourcesManager.cxx @@ -582,6 +582,8 @@ BatchLight::BatchManager *SALOME_ResourcesManager::FactoryBatchManager( const Pa p.mpiImpl = "mpich1"; else if( resInfo.mpi == mpich2 ) p.mpiImpl = "mpich2"; + else if( resInfo.mpi == openmpi ) + p.mpiImpl = "openmpi"; else throw SALOME_Exception("Unknown mpi implementation");