From 954e65a018fc5f2c0d2ffb4f4814e199ecdda5a5 Mon Sep 17 00:00:00 2001 From: prascle Date: Wed, 24 Oct 2007 19:33:58 +0000 Subject: [PATCH] merge from BR_V4dev_resman 24oct07 --- bin/Makefile.am | 6 +- bin/appliskel/killCurrentPort | 2 +- bin/appliskel/runRemote.sh | 4 +- bin/launchConfigureParser.py | 14 + bin/orbmodule.py | 2 +- bin/runSalome.py | 13 +- bin/setenv.py | 4 + bin/shutdownSalome.py | 28 ++ bin/waitContainers.py | 53 +++ bin/waitNS.py | 4 + configure.ac | 1 + idl/SALOMEDS.idl | 2 + idl/SALOME_ContainerManager.idl | 62 ++- idl/SALOME_Registry.idl | 1 + resources/CatalogResources.xml.in | 67 ++-- salome_adm/unix/config_files/check_mpi.m4 | 10 +- src/Batch/BatchLight_BatchManager.cxx | 272 +++++++++++++ src/Batch/BatchLight_BatchManager.hxx | 94 +++++ src/Batch/BatchLight_BatchManager_PBS.cxx | 375 ++++++++++++++++++ src/Batch/BatchLight_BatchManager_PBS.hxx | 59 +++ src/Batch/BatchLight_BatchManager_SLURM.cxx | 328 +++++++++++++++ src/Batch/BatchLight_BatchManager_SLURM.hxx | 62 +++ src/Batch/BatchLight_Job.cxx | 45 +++ src/Batch/BatchLight_Job.hxx | 62 +++ src/Batch/Makefile.am | 18 +- src/Batch/MpiImpl.cxx | 212 ++++++++++ src/Batch/MpiImpl.hxx | 131 ++++++ src/Container/Container_i.cxx | 3 +- src/Container/Makefile.am | 18 +- src/Container/SALOME_ContainerManager.cxx | 149 ++++--- src/Container/SALOME_ContainerManager.hxx | 20 +- src/Launcher/Makefile.am | 130 ++++++ src/Launcher/SALOME_Launcher.cxx | 265 +++++++++++++ src/Launcher/SALOME_Launcher.hxx | 80 ++++ .../SALOME_LauncherServer.cxx} | 51 ++- src/LifeCycleCORBA/Makefile.am | 22 +- .../SALOME_FileTransferCORBA.cxx | 4 +- src/LifeCycleCORBA/SALOME_LifeCycleCORBA.cxx | 58 ++- src/LifeCycleCORBA/SALOME_LifeCycleCORBA.hxx | 2 + .../Test/LifeCycleCORBATest.cxx | 13 +- src/LifeCycleCORBA/Test/TestLifeCycleCORBA.py | 10 +- .../TestContainerManager.cxx | 34 +- .../Test/TestLifeCycleCORBA_SWIG.py | 8 +- src/Makefile.am | 9 +- .../SALOME_ModuleCatalog_Server.cxx | 5 +- .../SALOME_ModuleCatalog_impl.hxx | 2 +- src/NamingService/SALOME_NamingService.cxx | 286 ++++++------- src/Registry/RegistryService.cxx | 2 +- src/Registry/RegistryService.hxx | 4 + src/Registry/SALOME_Registry_Server.cxx | 1 + src/ResourcesManager/Makefile.am | 1 + .../SALOME_LoadRateManager.cxx | 20 +- .../SALOME_LoadRateManager.hxx | 3 +- .../SALOME_ResourcesCatalog_Handler.cxx | 85 +++- .../SALOME_ResourcesCatalog_Handler.hxx | 5 +- .../SALOME_ResourcesCatalog_Parser.cxx | 62 ++- .../SALOME_ResourcesCatalog_Parser.hxx | 14 +- .../SALOME_ResourcesManager.cxx | 356 ++++++++++------- .../SALOME_ResourcesManager.hxx | 46 ++- src/SALOMEDS/SALOMEDS_StudyManager_i.hxx | 2 + .../SALOMEDSImpl_StudyManager.cxx | 2 +- src/SALOMELocalTrace/LocalTraceBufferPool.hxx | 2 +- src/UnitTests/UnitTests.py | 8 +- 63 files changed, 3133 insertions(+), 580 deletions(-) create mode 100755 bin/shutdownSalome.py create mode 100755 bin/waitContainers.py create mode 100755 bin/waitNS.py create mode 100644 src/Batch/BatchLight_BatchManager.cxx create mode 100644 src/Batch/BatchLight_BatchManager.hxx create mode 100644 src/Batch/BatchLight_BatchManager_PBS.cxx create mode 100644 src/Batch/BatchLight_BatchManager_PBS.hxx create mode 100644 src/Batch/BatchLight_BatchManager_SLURM.cxx create mode 100644 src/Batch/BatchLight_BatchManager_SLURM.hxx create mode 100644 src/Batch/BatchLight_Job.cxx create mode 100644 src/Batch/BatchLight_Job.hxx create mode 100644 src/Batch/MpiImpl.cxx create mode 100644 src/Batch/MpiImpl.hxx create mode 100644 src/Launcher/Makefile.am create mode 100644 src/Launcher/SALOME_Launcher.cxx create mode 100644 src/Launcher/SALOME_Launcher.hxx rename src/{Container/SALOME_ContainerManagerServer.cxx => Launcher/SALOME_LauncherServer.cxx} (65%) rename src/{Container => LifeCycleCORBA}/TestContainerManager.cxx (78%) diff --git a/bin/Makefile.am b/bin/Makefile.am index 827b9ec38..89cd91071 100644 --- a/bin/Makefile.am +++ b/bin/Makefile.am @@ -61,8 +61,10 @@ dist_salomescript_SCRIPTS=\ setenv.py \ launchSalome.py \ nameserver.py \ - server.py - + server.py \ + waitNS.py \ + waitContainers.py \ + shutdownSalome.py EXTRA_DIST = appliskel diff --git a/bin/appliskel/killCurrentPort b/bin/appliskel/killCurrentPort index e1eb19268..41b14866a 100755 --- a/bin/appliskel/killCurrentPort +++ b/bin/appliskel/killCurrentPort @@ -21,7 +21,7 @@ currentPort=`${KERNEL_ROOT_DIR}/bin/salome/NSparam.py port` echo $currentPort # --- kill current salome session - +${KERNEL_ROOT_DIR}/bin/salome/shutdownSalome.py ${KERNEL_ROOT_DIR}/bin/salome/killSalomeWithPort.py $currentPort # --- delete config files diff --git a/bin/appliskel/runRemote.sh b/bin/appliskel/runRemote.sh index 3cbfbf55c..21bc0b0c1 100755 --- a/bin/appliskel/runRemote.sh +++ b/bin/appliskel/runRemote.sh @@ -63,4 +63,6 @@ echo "ORBInitRef $initref" > $OMNIORB_CONFIG shift 2 -${KERNEL_ROOT_DIR}/bin/salome/envSalome.py /bin/sh --rcfile $HOME/$APPLI/.bashrc -c "$*" +# suppress --rcfile option because of problem on Mandriva2006 - B Secher mai 2007 +#${KERNEL_ROOT_DIR}/bin/salome/envSalome.py /bin/sh --rcfile $HOME/$APPLI/.bashrc -c "$*" +${KERNEL_ROOT_DIR}/bin/salome/envSalome.py /bin/sh -c "$*" diff --git a/bin/launchConfigureParser.py b/bin/launchConfigureParser.py index bfe414300..51d16de15 100755 --- a/bin/launchConfigureParser.py +++ b/bin/launchConfigureParser.py @@ -50,6 +50,7 @@ interp_nam = "interp" except_nam = "noexcepthandler" terminal_nam = "terminal" pinter_nam = "pinter" +batch_nam = "batch" # values in XML configuration file giving specific module parameters ( section) # which are stored in opts with key _ (eg SMESH_plugins) @@ -390,6 +391,13 @@ def CreateOptionParser (theAdditionalOptions=[]): dest="gui", help=help_str) + help_str = "Launch in Batch Mode. (Without GUI on batch machine)" + o_b = optparse.Option("-b", + "--batch", + action="store_true", + dest="batch", + help=help_str) + help_str = "Launch in GUI mode [default]." o_g = optparse.Option("-g", "--gui", @@ -581,6 +589,7 @@ def CreateOptionParser (theAdditionalOptions=[]): # All options opt_list = [o_t,o_g, # GUI/Terminal o_d,o_o, # Desktop + o_b, # Batch o_l,o_f, # Use logger or log-file o_u, # Execute python scripts o_r, # Configuration XML file @@ -775,9 +784,14 @@ def get_env(theAdditionalOptions=[], appname="SalomeApp"): # GUI/Terminal, Desktop, Splash, STUDY_HDF args["session_gui"] = False + args[batch_nam] = False args["study_hdf"] = None + print 'launchConfigureParser cmd_opts',cmd_opts if cmd_opts.gui is not None: args[gui_nam] = cmd_opts.gui + if cmd_opts.batch is not None: + args[batch_nam] = True + print 'launchConfigureParser args[',batch_nam,']',args[batch_nam] if args[gui_nam]: args["session_gui"] = True if cmd_opts.desktop is not None: diff --git a/bin/orbmodule.py b/bin/orbmodule.py index f629aa847..7de5715c1 100755 --- a/bin/orbmodule.py +++ b/bin/orbmodule.py @@ -15,7 +15,7 @@ # Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA # # See http://www.salome-platform.org/ or email : webmaster.salome@opencascade.com -# +# import sys,os,time import string from nameserver import * diff --git a/bin/runSalome.py b/bin/runSalome.py index 4cfcf1502..86a251075 100755 --- a/bin/runSalome.py +++ b/bin/runSalome.py @@ -300,11 +300,11 @@ class SessionServer(Server): # --- -class ContainerManagerServer(Server): +class LauncherServer(Server): def __init__(self,args): self.args=args self.initArgs() - self.SCMD1=['SALOME_ContainerManagerServer'] + self.SCMD1=['SALOME_LauncherServer'] self.SCMD2=[] if args["gui"] : if 'registry' in self.args['embedded']: @@ -467,10 +467,10 @@ def startSalome(args, modules_list, modules_root_dir): clt.waitNSPID("/myStudyManager",myServer.PID) # - # Lancement ContainerManagerServer + # Lancement LauncherServer # - myCmServer = ContainerManagerServer(args) + myCmServer = LauncherServer(args) myCmServer.setpath(modules_list,modules_root_dir) myCmServer.run() @@ -497,7 +497,7 @@ def startSalome(args, modules_list, modules_root_dir): # attente de la disponibilite du Container C++ local dans le Naming Service # - if ('cppContainer' in args['standalone']) | (args["gui"] == 0): + if ('cppContainer' in args['standalone']) | (args["gui"] == 0) : myServer=ContainerCPPServer(args) myServer.run() if sys.platform == "win32": @@ -660,6 +660,7 @@ def useSalome(args, modules_list, modules_root_dir): i = 0 while i < len( toimport ) : if toimport[ i ] == 'killall': + clt.showNS() killAllPorts() import sys sys.exit(0) @@ -809,6 +810,8 @@ def no_main(): def main(): """Salome launch as a main application""" import sys + print "runSalome running on ",os.getenv('HOSTNAME') + print os.environ.itervalues args, modules_list, modules_root_dir = setenv.get_config() kill_salome(args) save_config = True diff --git a/bin/setenv.py b/bin/setenv.py index 0b1fc0e20..f6e81c437 100755 --- a/bin/setenv.py +++ b/bin/setenv.py @@ -158,6 +158,10 @@ def set_env(args, modules_list, modules_root_dir): python_version="python%d.%d" % sys.version_info[0:2] modules_root_dir_list = [] + os.putenv('SALOME_BATCH','0') + if args["batch"] : + os.putenv('SALOME_BATCH','1') + print 'SALOME_BATCH :',os.getenv('SALOME_BATCH') if args["gui"] : modules_list = modules_list[:] + ["GUI"] modules_list = modules_list[:] + ["KERNEL"] diff --git a/bin/shutdownSalome.py b/bin/shutdownSalome.py new file mode 100755 index 000000000..0c96154bb --- /dev/null +++ b/bin/shutdownSalome.py @@ -0,0 +1,28 @@ +#!/usr/bin/env python +import orbmodule +import Engines +import Registry +import SALOME +import SALOMEDS +import SALOME_ModuleCatalog +clt=orbmodule.client() +obj = clt.Resolve('Kernel/Session') +if obj != None: + ses = obj._narrow(SALOME.Session) + ses.StopSession() +obj = clt.Resolve('SalomeLauncher') +if obj != None: + cm = obj._narrow(Engines.SalomeLauncher) + cm.Shutdown() +obj = clt.Resolve('Kernel/ModulCatalog') +if obj != None: + mc = obj._narrow(SALOME_ModuleCatalog.ModuleCatalog) + mc.shutdown() +obj = clt.Resolve('Registry') +if obj != None: + reg = obj._narrow(Registry.Components) + reg.Shutdown() +obj = clt.Resolve('myStudyManager') +if obj != None: + sm = obj._narrow(SALOMEDS.StudyManager) + sm.Shutdown() diff --git a/bin/waitContainers.py b/bin/waitContainers.py new file mode 100755 index 000000000..65198a3e3 --- /dev/null +++ b/bin/waitContainers.py @@ -0,0 +1,53 @@ +#!/usr/bin/env python +import sys +import time +import orbmodule +import CosNaming +clt = orbmodule.client() +clt.waitNS("/ContainerManager") +obj = clt.orb.resolve_initial_references("NameService") +rootContext = obj._narrow(CosNaming.NamingContext) +cname = [] +cname.append(CosNaming.NameComponent('Containers', 'dir')) + +while(1): + try: + ccontext = rootContext.resolve(cname) + break + except CosNaming.NamingContext.NotFound, ex: + time.sleep(1) + except CosNaming.NamingContext.InvalidName, ex: + time.sleep(1) + except CosNaming.NamingContext.CannotProceed, ex: + time.sleep(1) + except (CORBA.TRANSIENT,CORBA.OBJECT_NOT_EXIST,CORBA.COMM_FAILURE): + time.sleep(1) + +def waitContainer(mycont): + while(1): + bl,bi=ccontext.list(0) + if bi is not None: + ok,b=bi.next_one() + while(ok): + for s in b.binding_name : + if s.kind == "dir": + obj=ccontext.resolve([s]) + scontext = obj._narrow(CosNaming.NamingContext) + bll,bii=scontext.list(0) + if bii is not None: + ok,bb=bii.next_one() + while(ok): + for s in bb.binding_name : + if s.id == mycont: + print s.id + return + ok,bb=bii.next_one() + ok,b=bi.next_one() + sys.stdout.write('+') + sys.stdout.flush() + time.sleep(1) + +for cont in sys.argv: + if cont != sys.argv[0]: + waitContainer(cont) + diff --git a/bin/waitNS.py b/bin/waitNS.py new file mode 100755 index 000000000..e439f5fef --- /dev/null +++ b/bin/waitNS.py @@ -0,0 +1,4 @@ +#!/usr/bin/env python +import orbmodule +clt=orbmodule.client() +clt.waitNS("/Kernel/ModulCatalog") diff --git a/configure.ac b/configure.ac index 92a8266bc..af68c9a39 100644 --- a/configure.ac +++ b/configure.ac @@ -570,6 +570,7 @@ AC_OUTPUT([ \ ./src/GenericObj/Makefile \ ./src/HDFPersist/Makefile \ ./src/KERNEL_PY/Makefile \ + ./src/Launcher/Makefile \ ./src/LifeCycleCORBA/Makefile \ ./src/LifeCycleCORBA/Test/Makefile \ ./src/LifeCycleCORBA_SWIG/Makefile \ diff --git a/idl/SALOMEDS.idl b/idl/SALOMEDS.idl index 541740fd1..f4aea4bab 100644 --- a/idl/SALOMEDS.idl +++ b/idl/SALOMEDS.idl @@ -714,6 +714,8 @@ Searches for a definite %SObject with a definite GUID and returns True if it fin */ void ping(); + void Shutdown(); + /*! \brief Creation of a new study Creates a new study with a definite name. diff --git a/idl/SALOME_ContainerManager.idl b/idl/SALOME_ContainerManager.idl index 84ec3c794..fe7700712 100644 --- a/idl/SALOME_ContainerManager.idl +++ b/idl/SALOME_ContainerManager.idl @@ -27,29 +27,39 @@ module Engines { /*! - Type to describe properties of wanted resource. + Type to transmit list of machines. +*/ + typedef sequence MachineList; + typedef sequence CompoList; + typedef sequence FilesList; + typedef sequence ModulesList; + +/*! + Type to describe properties of resource. */ struct MachineParameters { string container_name; string hostname; + string alias; + string protocol; + string username; + string applipath; + ModulesList modList; string OS; long mem_mb; long cpu_clock; long nb_proc_per_node; long nb_node; boolean isMPI; + string mpiImpl; + string batch; // PaCO specific informations string parallelLib; long nb_component_nodes; }; -/*! - Type to transmit list of machines. -*/ - typedef sequence MachineList; - /*! exception thrown if a computer is not found in the catalog */ @@ -58,6 +68,25 @@ struct MachineParameters enum policy {P_FIRST,P_CYCL,P_BEST}; typedef policy ResPolicy; +/*! \brief Interface of the %salomelauncher + This interface is used for interaction with the unique instance + of SalomeLauncher +*/ + interface SalomeLauncher + { + long submitSalomeJob( in string fileToExecute, + in FilesList filesToExport, + in FilesList filesToImport, + in long NumberOfProcessors, + in MachineParameters params ) raises (SALOME::SALOME_Exception); + string querySalomeJob( in long jobId, in MachineParameters params ) raises (SALOME::SALOME_Exception); + void deleteSalomeJob( in long jobId, in MachineParameters params ) raises (SALOME::SALOME_Exception); + void getResultSalomeJob( in string directory, in long jobId, in MachineParameters params ) raises (SALOME::SALOME_Exception); + + void Shutdown(); + + } ; + /*! \brief Interface of the %containerManager This interface is used for interaction with the unique instance of ContainerManager @@ -79,17 +108,30 @@ struct MachineParameters in MachineList possibleComputers); Container StartContainer( in MachineParameters params, - in ResPolicy policy); + in ResPolicy policy, + in CompoList componentList ); + Container GiveContainer( in MachineParameters params, + in ResPolicy policy, + in CompoList componentList ); + + void ShutdownContainers(); + } ; + +/*! \brief Interface of the %resourcesManager + This interface is used for interaction with the unique instance + of ResourcesManager +*/ + interface ResourcesManager + { string FindFirst(in MachineList possibleComputers); MachineList GetFittingResources( in MachineParameters params, - in string componentName ) + in CompoList componentList ) raises (SALOME::SALOME_Exception); - void Shutdown(); + MachineParameters GetMachineParameters( in string hostname ); - void ShutdownContainers(); } ; }; diff --git a/idl/SALOME_Registry.idl b/idl/SALOME_Registry.idl index e33e2ccd1..8e4433215 100644 --- a/idl/SALOME_Registry.idl +++ b/idl/SALOME_Registry.idl @@ -55,6 +55,7 @@ module Registry AllInfos history () ; oneway void end() ; oneway void hello( in unsigned long id ) ; + void Shutdown(); } ; } ; diff --git a/resources/CatalogResources.xml.in b/resources/CatalogResources.xml.in index c83cafac5..874d32492 100644 --- a/resources/CatalogResources.xml.in +++ b/resources/CatalogResources.xml.in @@ -1,48 +1,43 @@ - - - - - - - - - + + + + + + + - - - - - - - - - + + + + + + + - - - - - - - - - + + + + + + + - - - - - - - - - + + + + + + + + + + diff --git a/salome_adm/unix/config_files/check_mpi.m4 b/salome_adm/unix/config_files/check_mpi.m4 index 69c0ed854..6422c8b1d 100644 --- a/salome_adm/unix/config_files/check_mpi.m4 +++ b/salome_adm/unix/config_files/check_mpi.m4 @@ -24,6 +24,10 @@ AC_DEFUN([CHECK_MPI],[ AC_REQUIRE([AC_PROG_CC])dnl +AC_ARG_WITH(mpi_lib, + [AC_HELP_STRING([--with-mpi_lib=DIR],[directory path of MPICH lib installation])], + MPILIBREQUESTED="$withval") + AC_ARG_WITH(mpi, [AC_HELP_STRING([--with-mpi=DIR],[root directory path of MPICH installation])], MPIREQUESTED="yes",MPIREQUESTED="no") @@ -51,6 +55,10 @@ if test x"$MPIREQUESTED" = xyes; then fi fi + if test x"$MPILIBREQUESTED" != x; then + MPI_LIBS="-L$MPILIBREQUESTED" + fi + CPPFLAGS_old="$CPPFLAGS" CPPFLAGS="$MPI_INCLUDES $CPPFLAGS" AC_CHECK_HEADER(mpi.h,WITHMPI="yes",WITHMPI="no") @@ -69,7 +77,7 @@ if test x"$MPIREQUESTED" = xyes; then if test "$WITHMPI" = "yes";then mpi_ok=yes - MPI_LIBS="$MPI_LIBS -lmpi" + MPI_LIBS="$MPI_LIBS -lmpi -lmpio -lmpiCC" else mpi_ok=no fi diff --git a/src/Batch/BatchLight_BatchManager.cxx b/src/Batch/BatchLight_BatchManager.cxx new file mode 100644 index 000000000..4e0241f38 --- /dev/null +++ b/src/Batch/BatchLight_BatchManager.cxx @@ -0,0 +1,272 @@ +// Copyright (C) 2005 OPEN CASCADE, EADS/CCR, LIP6, CEA/DEN, +// CEDRAT, EDF R&D, LEG, PRINCIPIA R&D, BUREAU VERITAS +// +// This library is free software; you can redistribute it and/or +// modify it under the terms of the GNU Lesser General Public +// License as published by the Free Software Foundation; either +// version 2.1 of the License. +// +// This library is distributed in the hope that it will be useful +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +// Lesser General Public License for more details. +// +// You should have received a copy of the GNU Lesser General Public +// License along with this library; if not, write to the Free Software +// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +// +// See http://www.salome-platform.org/ or email : webmaster.salome@opencascade.com +// +/* + * BatchManager.cxx : + * + * Auteur : Bernard SECHER - CEA/DEN + * Date : Juillet 2007 + * Projet : SALOME + * + */ + +#include +#include +#include +#include +#include "BatchLight_Job.hxx" +#include "BatchLight_BatchManager.hxx" +#include "Batch_Date.hxx" +using namespace std; + +namespace BatchLight { + + // Constructeur + BatchManager::BatchManager(const batchParams& p) throw(SALOME_Exception) : _params(p) + { + SCRUTE(_params.hostname); + SCRUTE(_params.protocol); + SCRUTE(_params.username); + // On verifie que le hostname est correct + if (!gethostbyname(_params.hostname.c_str())) { // hostname unknown from network + string msg = "hostname \""; + msg += _params.hostname; + msg += "\" unknown from the network"; + throw SALOME_Exception(msg.c_str()); + } + _mpiImpl = NULL; + } + + // Destructeur + BatchManager::~BatchManager() + { + MESSAGE("BatchManager destructor "<<_params.hostname); + std::map < int, const BatchLight::Job * >::const_iterator it; + for(it=_jobmap.begin();it!=_jobmap.end();it++) + delete it->second; + if(_mpiImpl) delete _mpiImpl; + } + + // Methode pour le controle des jobs : soumet un job au gestionnaire + const int BatchManager::submitJob(Job* job) + { + BEGIN_OF("BatchManager::submitJob"); + int id; + + // temporary directory on cluster to put input files for job + setDirForTmpFiles(); + SCRUTE(_dirForTmpFiles); + + // export input files on cluster + exportInputFiles(job->getFileToExecute(),job->getFilesToExportList()); + + // build salome coupling script for job + buildSalomeCouplingScript(job->getFileToExecute()); + + // build batch script for job + buildSalomeBatchScript(job->getNbProc()); + + // submit job on cluster + id = submit(); + + // register job on map + _jobmap[id] = job; + END_OF("BatchManager::submitJob"); + return id; + } + + void BatchManager::setDirForTmpFiles() + { + int i; + + _dirForTmpFiles = string("Batch/"); + Batch::Date date = Batch::Date(time(0)) ; + std::string thedate = date.str() ; + int lend = thedate.size() ; + i = 0 ; + while ( i < lend ) { + if ( thedate[i] == '/' || thedate[i] == '-' || thedate[i] == ':' ) { + thedate[i] = '_' ; + } + i++ ; + } + _dirForTmpFiles += thedate ; + } + + void BatchManager::exportInputFiles(const char *fileToExecute, const Engines::FilesList filesToExportList) throw(SALOME_Exception) + { + BEGIN_OF("BatchManager::exportInFiles"); + string command = _params.protocol; + int status; + + command += " "; + + if (_params.username != ""){ + command += _params.username; + command += "@"; + } + + command += _params.hostname; + command += " \"mkdir -p "; + command += _dirForTmpFiles ; + command += "\"" ; + SCRUTE(command.c_str()); + status = system(command.c_str()); + if(status) + throw SALOME_Exception("Error of connection on remote host"); + + if( _params.protocol == "rsh" ) + command = "rcp "; + else if( _params.protocol == "ssh" ) + command = "scp "; + else + throw SALOME_Exception("Unknown protocol"); + + command += fileToExecute; + command += " "; + + if (_params.username != ""){ + command += _params.username; + command += "@"; + } + + command += _params.hostname; + command += ":"; + command += _dirForTmpFiles ; + SCRUTE(command.c_str()); + status = system(command.c_str()); + if(status) + throw SALOME_Exception("Error of connection on remote host"); + + int i ; + for ( i = 0 ; i < filesToExportList.length() ; i++ ) { + if( _params.protocol == "rsh" ) + command = "rcp "; + else if( _params.protocol == "ssh" ) + command = "scp "; + else + throw SALOME_Exception("Unknown protocol"); + command += filesToExportList[i] ; + command += " "; + if (_params.username != ""){ + command += _params.username; + command += "@"; + } + command += _params.hostname; + command += ":"; + command += _dirForTmpFiles ; + SCRUTE(command.c_str()); + status = system(command.c_str()); + if(status) + throw SALOME_Exception("Error of connection on remote host"); + } + + END_OF("BatchManager::exportInFiles"); + } + + void BatchManager::importOutputFiles( const char *directory, const CORBA::Long jobId ) throw(SALOME_Exception) + { + BEGIN_OF("BatchManager::importOutputFiles"); + string command; + int status; + + const BatchLight::Job* myJob = _jobmap[jobId]; + Engines::FilesList filesToImportList = myJob->getFilesToImportList(); + + for ( int i = 0 ; i < filesToImportList.length() ; i++ ) { + if( _params.protocol == "rsh" ) + command = "rcp "; + else if( _params.protocol == "ssh" ) + command = "scp "; + else + throw SALOME_Exception("Unknown protocol"); + if (_params.username != ""){ + command += _params.username; + command += "@"; + } + command += _params.hostname; + command += ":"; + command += filesToImportList[i] ; + command += " "; + command += directory; + SCRUTE(command.c_str()); + status = system(command.c_str()); + if(status) + throw SALOME_Exception("Error of connection on remote host"); + } + + END_OF("BatchManager::importOutputFiles"); + } + + string BatchManager::BuildTemporaryFileName() const + { + //build more complex file name to support multiple salome session + char *temp = new char[19]; + strcpy(temp, "/tmp/command"); + strcat(temp, "XXXXXX"); +#ifndef WNT + + mkstemp(temp); +#else + + char aPID[80]; + itoa(getpid(), aPID, 10); + strcat(temp, aPID); +#endif + + string command(temp); + delete [] temp; + command += ".sh"; + return command; + } + + void BatchManager::RmTmpFile() + { + if (_TmpFileName != ""){ + string command = "rm "; + command += _TmpFileName; + char *temp = strdup(command.c_str()); + int lgthTemp = strlen(temp); + temp[lgthTemp - 3] = '*'; + temp[lgthTemp - 2] = '\0'; + system(temp); + free(temp); + } + } + + MpiImpl *BatchManager::FactoryMpiImpl(string mpiImpl) throw(SALOME_Exception) + { + if(mpiImpl == "lam") + return new MpiImpl_LAM(); + else if(mpiImpl == "mpich1") + return new MpiImpl_MPICH1(); + else if(mpiImpl == "mpich2") + return new MpiImpl_MPICH2(); + else if(mpiImpl == "openmpi") + return new MpiImpl_OPENMPI(); + else if(mpiImpl == "indif") + throw SALOME_Exception("you must specify a mpi implementation in CatalogResources.xml file"); + else{ + ostringstream oss; + oss << mpiImpl << " : not yet implemented"; + throw SALOME_Exception(oss.str().c_str()); + } + } + +} diff --git a/src/Batch/BatchLight_BatchManager.hxx b/src/Batch/BatchLight_BatchManager.hxx new file mode 100644 index 000000000..a8ea0061a --- /dev/null +++ b/src/Batch/BatchLight_BatchManager.hxx @@ -0,0 +1,94 @@ +// Copyright (C) 2005 OPEN CASCADE, EADS/CCR, LIP6, CEA/DEN, +// CEDRAT, EDF R&D, LEG, PRINCIPIA R&D, BUREAU VERITAS +// +// This library is free software; you can redistribute it and/or +// modify it under the terms of the GNU Lesser General Public +// License as published by the Free Software Foundation; either +// version 2.1 of the License. +// +// This library is distributed in the hope that it will be useful +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +// Lesser General Public License for more details. +// +// You should have received a copy of the GNU Lesser General Public +// License along with this library; if not, write to the Free Software +// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +// +// See http://www.salome-platform.org/ or email : webmaster.salome@opencascade.com +// +/* + * BatchManager.hxx : + * + * Auteur : Bernard SECHER - CEA/DEN + * Date : Juillet 2007 + * Projet : SALOME + * + */ + +#ifndef _BL_BATCHMANAGER_H_ +#define _BL_BATCHMANAGER_H_ + +#include +#include +#include +#include "Utils_SALOME_Exception.hxx" +#include +#include +#include CORBA_CLIENT_HEADER(SALOME_ContainerManager) +#include "MpiImpl.hxx" + +namespace BatchLight { + + class Job; + + struct batchParams{ + std::string hostname; // serveur ou tourne le BatchManager + std::string protocol; // protocole d'acces au serveur: ssh ou rsh + std::string username; // username d'acces au serveur + std::string applipath; // path of apllication directory on server + std::vector modulesList; // list of Salome modules installed on server + unsigned int nbnodes; // number of nodes on cluster + unsigned int nbprocpernode; // number of processors on each node + std::string mpiImpl; // mpi implementation + }; + + class BatchManager + { + public: + // Constructeur et destructeur + BatchManager(const batchParams& p) throw(SALOME_Exception); // connexion a la machine host + virtual ~BatchManager(); + + // Methodes pour le controle des jobs : virtuelles pures + const int submitJob(BatchLight::Job* job); // soumet un job au gestionnaire + virtual void deleteJob(const int & jobid) = 0; // retire un job du gestionnaire + virtual std::string queryJob(const int & jobid) = 0; // renvoie l'etat du job + void importOutputFiles( const char *directory, const CORBA::Long jobId ) throw(SALOME_Exception); + + protected: + batchParams _params; + MpiImpl *_mpiImpl; + + std::map _jobmap; + std::string _dirForTmpFiles; // repertoire temporaire sur le serveur + std::string _TmpFileName; + std::string _fileNameToExecute; + + virtual int submit() throw(SALOME_Exception) = 0; + void setDirForTmpFiles(); + void exportInputFiles( const char *fileToExecute, const Engines::FilesList filesToExportList ) throw(SALOME_Exception); + virtual void buildSalomeCouplingScript( const char *fileToExecute ) throw(SALOME_Exception) = 0; + virtual void buildSalomeBatchScript( const int nbproc ) throw(SALOME_Exception) = 0; + + std::string BuildTemporaryFileName() const; + void RmTmpFile(); + MpiImpl *FactoryMpiImpl(std::string mpiImpl) throw(SALOME_Exception); + + private: + + }; + +} + +#endif diff --git a/src/Batch/BatchLight_BatchManager_PBS.cxx b/src/Batch/BatchLight_BatchManager_PBS.cxx new file mode 100644 index 000000000..d2856d828 --- /dev/null +++ b/src/Batch/BatchLight_BatchManager_PBS.cxx @@ -0,0 +1,375 @@ +// Copyright (C) 2005 OPEN CASCADE, EADS/CCR, LIP6, CEA/DEN, +// CEDRAT, EDF R&D, LEG, PRINCIPIA R&D, BUREAU VERITAS +// +// This library is free software; you can redistribute it and/or +// modify it under the terms of the GNU Lesser General Public +// License as published by the Free Software Foundation; either +// version 2.1 of the License. +// +// This library is distributed in the hope that it will be useful +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +// Lesser General Public License for more details. +// +// You should have received a copy of the GNU Lesser General Public +// License along with this library; if not, write to the Free Software +// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +// +// See http://www.salome-platform.org/ or email : webmaster.salome@opencascade.com +// +/* + * BatchManager.cxx : + * + * Auteur : Bernard SECHER - CEA/DEN + * Date : Juillet 2007 + * Projet : SALOME + * + */ + +#include "BatchLight_BatchManager_PBS.hxx" +#include "utilities.h" +#include "BatchLight_Job.hxx" +#include +#include +#include +#include + +using namespace std; + +namespace BatchLight { + + // Constructeur + BatchManager_PBS::BatchManager_PBS(const batchParams& p) throw(SALOME_Exception) : BatchManager(p) + { + // pbs batch system needs to know mpi implementation + _mpiImpl = FactoryMpiImpl(_params.mpiImpl); + } + + // Destructeur + BatchManager_PBS::~BatchManager_PBS() + { + MESSAGE("BatchManager_PBS destructor "<<_params.hostname); + } + + // Methode pour le controle des jobs : retire un job du gestionnaire + void BatchManager_PBS::deleteJob(const int & jobid) + { + BEGIN_OF("BatchManager_PBS::deleteJob"); + string command; + int status; + ostringstream oss; + oss << jobid; + + // define command to submit batch + if( _params.protocol == "rsh" ) + command = "rsh "; + else if( _params.protocol == "ssh" ) + command = "ssh "; + else + throw SALOME_Exception("Unknown protocol"); + + if (_params.username != ""){ + command += _params.username; + command += "@"; + } + + command += _params.hostname; + command += " \"qdel " ; + command += oss.str(); + command += "\""; + SCRUTE(command.c_str()); + status = system(command.c_str()); + if(status) + throw SALOME_Exception("Error of connection on remote host"); + + MESSAGE("jobId = " << jobid << "killed"); + END_OF("BatchManager_PBS::deleteJob"); + } + + // Methode pour le controle des jobs : renvoie l'etat du job + string BatchManager_PBS::queryJob(const int & jobid) + { + BEGIN_OF("BatchManager_PBS::queryJob"); + // define name of log file + string jstatus; + string logFile="/tmp/logs/"; + logFile += getenv("USER"); + logFile += "/batchSalome_"; + + srand ( time(NULL) ); + int ir = rand(); + ostringstream oss; + oss << ir; + logFile += oss.str(); + logFile += ".log"; + + string command; + int status; + + // define command to submit batch + if( _params.protocol == "rsh" ) + command = "rsh "; + else if( _params.protocol == "ssh" ) + command = "ssh "; + else + throw SALOME_Exception("Unknown protocol"); + + if (_params.username != ""){ + command += _params.username; + command += "@"; + } + + command += _params.hostname; + command += " \"qstat -f " ; + ostringstream oss2; + oss2 << jobid; + command += oss2.str(); + command += "\" > "; + command += logFile; + SCRUTE(command.c_str()); + status = system(command.c_str()); + if(status && status != 153 && status != 256*153){ + MESSAGE("status="<> jstatus; + iss >> jstatus; + iss >> jstatus; + } + else + jstatus = "U"; + } + + MESSAGE("jobId = " << jobid << " " << jstatus); + END_OF("BatchManager_PBS::queryJob"); + return jstatus; + } + + void BatchManager_PBS::buildSalomeCouplingScript( const char *fileToExecute ) throw(SALOME_Exception) + { + BEGIN_OF("BatchManager_PBS::buildSalomeCouplingScript"); + int status; + + string::size_type p1 = string(fileToExecute).find_last_of("/"); + string::size_type p2 = string(fileToExecute).find_last_of("."); + _fileNameToExecute = string(fileToExecute).substr(p1+1,p2-p1-1); + + _TmpFileName = BuildTemporaryFileName(); + ofstream tempOutputFile; + tempOutputFile.open(_TmpFileName.c_str(), ofstream::out ); + tempOutputFile << "#! /bin/sh -f" << endl ; + tempOutputFile << "cd " ; + tempOutputFile << _params.applipath << endl ; + tempOutputFile << "export PYTHONPATH=~/" ; + tempOutputFile << _dirForTmpFiles ; + tempOutputFile << ":$PYTHONPATH" << endl ; + tempOutputFile << "if test " ; + tempOutputFile << _mpiImpl->rank() ; + tempOutputFile << " = 0; then" << endl ; + tempOutputFile << " ./runAppli --terminal --batch --modules=" ; + for ( int i = 0 ; i < _params.modulesList.size() ; i++ ) { + tempOutputFile << _params.modulesList[i] ; + if ( i != _params.modulesList.size()-1 ) + tempOutputFile << "," ; + } + tempOutputFile << " --standalone=registry,study,moduleCatalog --killall &" << endl ; + tempOutputFile << " for ((ip=1; ip < "; + tempOutputFile << _mpiImpl->size(); + tempOutputFile << " ; ip++))" << endl; + tempOutputFile << " do" << endl ; + tempOutputFile << " arglist=\"$arglist YACS_Server_\"$ip" << endl ; + tempOutputFile << " done" << endl ; + tempOutputFile << " sleep 5" << endl ; + tempOutputFile << " ./runSession waitContainers.py $arglist" << endl ; + tempOutputFile << " ./runSession python ~/" << _dirForTmpFiles << "/" << _fileNameToExecute << ".py" << endl; + tempOutputFile << " ./runSession killCurrentPort" << endl; + tempOutputFile << "else" << endl ; + tempOutputFile << " sleep 5" << endl ; + tempOutputFile << " ./runSession waitNS.py" << endl ; + tempOutputFile << " ./runSession SALOME_Container 'YACS_Server_'"; + tempOutputFile << _mpiImpl->rank() << endl ; + tempOutputFile << "fi" << endl ; + tempOutputFile.flush(); + tempOutputFile.close(); + chmod(_TmpFileName.c_str(), 0x1ED); + SCRUTE(_TmpFileName.c_str()) ; + + string command; + if( _params.protocol == "rsh" ) + command = "rcp "; + else if( _params.protocol == "ssh" ) + command = "scp "; + else + throw SALOME_Exception("Unknown protocol"); + + command += _TmpFileName; + command += " "; + if (_params.username != ""){ + command += _params.username; + command += "@"; + } + command += _params.hostname; + command += ":"; + command += _dirForTmpFiles ; + command += "/runSalome_" ; + command += _fileNameToExecute ; + command += "_Batch.sh" ; + SCRUTE(_fileNameToExecute) ; + SCRUTE(command.c_str()); + status = system(command.c_str()); + if(status) + throw SALOME_Exception("Error of connection on remote host"); + RmTmpFile(); + + END_OF("BatchManager_PBS::buildSalomeCouplingScript"); + } + + void BatchManager_PBS::buildSalomeBatchScript( const int nbproc ) throw(SALOME_Exception) + { + BEGIN_OF("BatchManager_PBS::buildSalomeBatchScript"); + int status; + + int nbmaxproc = _params.nbnodes * _params.nbprocpernode; + if( nbproc > nbmaxproc ){ + MESSAGE(nbproc << " processors asked on a cluster of " << nbmaxproc << " processors"); + throw SALOME_Exception("Too much processors asked for that cluster"); + } + + int nbnodes; + if( nbproc < _params.nbnodes ) + nbnodes = nbproc; + else + nbnodes = _params.nbnodes; + + _TmpFileName = BuildTemporaryFileName(); + ofstream tempOutputFile; + tempOutputFile.open(_TmpFileName.c_str(), ofstream::out ); + + ostringstream filenameToExecute; + filenameToExecute << " ~/" << _dirForTmpFiles << "/runSalome_" << _fileNameToExecute << "_Batch.sh"; + + tempOutputFile << "#! /bin/sh -f" << endl ; + tempOutputFile << "#PBS -l nodes=" << nbnodes << endl ; + tempOutputFile << "#PBS -o ~/" << _dirForTmpFiles << "/runSalome.log${PBS_JOBID}" << endl ; + tempOutputFile << _mpiImpl->boot("${PBS_NODEFILE}",nbnodes); + tempOutputFile << _mpiImpl->run("${PBS_NODEFILE}",nbproc,filenameToExecute.str()); + tempOutputFile << _mpiImpl->halt(); + tempOutputFile.flush(); + tempOutputFile.close(); + chmod(_TmpFileName.c_str(), 0x1ED); + SCRUTE(_TmpFileName.c_str()) ; + + string command; + if( _params.protocol == "rsh" ) + command = "rcp "; + else if( _params.protocol == "ssh" ) + command = "scp "; + else + throw SALOME_Exception("Unknown protocol"); + command += _TmpFileName; + command += " "; + if (_params.username != ""){ + command += _params.username; + command += "@"; + } + command += _params.hostname; + command += ":"; + command += _dirForTmpFiles ; + command += "/" ; + command += _fileNameToExecute ; + command += "_Batch.sh" ; + SCRUTE(command.c_str()); + status = system(command.c_str()); + if(status) + throw SALOME_Exception("Error of connection on remote host"); + + RmTmpFile(); + END_OF("BatchManager_PBS::buildSalomeBatchScript"); + + } + + int BatchManager_PBS::submit() throw(SALOME_Exception) + { + BEGIN_OF("BatchManager_PBS::submit"); + + // define name of log file + string logFile="/tmp/logs/"; + logFile += getenv("USER"); + logFile += "/batchSalome_"; + + srand ( time(NULL) ); + int ir = rand(); + ostringstream oss; + oss << ir; + logFile += oss.str(); + logFile += ".log"; + + string command; + int status; + + // define command to submit batch + if( _params.protocol == "rsh" ) + command = "rsh "; + else if( _params.protocol == "ssh" ) + command = "ssh "; + else + throw SALOME_Exception("Unknown protocol"); + + if (_params.username != ""){ + command += _params.username; + command += "@"; + } + + command += _params.hostname; + command += " \"qsub " ; + command += _dirForTmpFiles ; + command += "/" ; + command += _fileNameToExecute ; + command += "_Batch.sh\" > "; + command += logFile; + SCRUTE(command.c_str()); + status = system(command.c_str()); + if(status) + throw SALOME_Exception("Error of connection on remote host"); + + // read id of submitted job in log file + char line[128]; + FILE *fp = fopen(logFile.c_str(),"r"); + fgets( line, 128, fp); + fclose(fp); + + string sline(line); + int pos = sline.find("."); + string strjob; + if(pos == string::npos) + strjob = sline; + else + strjob = sline.substr(0,pos); + + int id; + istringstream iss(strjob); + iss >> id; + + END_OF("BatchManager_PBS::submit"); + return id; + } + +} diff --git a/src/Batch/BatchLight_BatchManager_PBS.hxx b/src/Batch/BatchLight_BatchManager_PBS.hxx new file mode 100644 index 000000000..e7e5789c7 --- /dev/null +++ b/src/Batch/BatchLight_BatchManager_PBS.hxx @@ -0,0 +1,59 @@ +// Copyright (C) 2005 OPEN CASCADE, EADS/CCR, LIP6, CEA/DEN, +// CEDRAT, EDF R&D, LEG, PRINCIPIA R&D, BUREAU VERITAS +// +// This library is free software; you can redistribute it and/or +// modify it under the terms of the GNU Lesser General Public +// License as published by the Free Software Foundation; either +// version 2.1 of the License. +// +// This library is distributed in the hope that it will be useful +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +// Lesser General Public License for more details. +// +// You should have received a copy of the GNU Lesser General Public +// License along with this library; if not, write to the Free Software +// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +// +// See http://www.salome-platform.org/ or email : webmaster.salome@opencascade.com +// +/* + * BatchManager.hxx : + * + * Auteur : Bernard SECHER - CEA/DEN + * Date : Juillet 2007 + * Projet : SALOME + * + */ + +#ifndef _BL_BATCHMANAGER_PBS_H_ +#define _BL_BATCHMANAGER_PBS_H_ + +#include +#include "Utils_SALOME_Exception.hxx" +#include "BatchLight_BatchManager.hxx" + +namespace BatchLight { + + class Job; + + class BatchManager_PBS : public BatchManager + { + public: + // Constructeur et destructeur + BatchManager_PBS(const batchParams& p) throw(SALOME_Exception); // connexion a la machine host + virtual ~BatchManager_PBS(); + + // Methodes pour le controle des jobs : virtuelles pures + void deleteJob(const int & jobid); // retire un job du gestionnaire + std::string queryJob(const int & jobid); // renvoie l'etat du job + + private: + void buildSalomeCouplingScript( const char *fileToExecute ) throw(SALOME_Exception); + void buildSalomeBatchScript( const int nbproc ) throw(SALOME_Exception); + int submit() throw(SALOME_Exception); + }; + +} + +#endif diff --git a/src/Batch/BatchLight_BatchManager_SLURM.cxx b/src/Batch/BatchLight_BatchManager_SLURM.cxx new file mode 100644 index 000000000..67c1ed6fb --- /dev/null +++ b/src/Batch/BatchLight_BatchManager_SLURM.cxx @@ -0,0 +1,328 @@ +// Copyright (C) 2005 OPEN CASCADE, EADS/CCR, LIP6, CEA/DEN, +// CEDRAT, EDF R&D, LEG, PRINCIPIA R&D, BUREAU VERITAS +// +// This library is free software; you can redistribute it and/or +// modify it under the terms of the GNU Lesser General Public +// License as published by the Free Software Foundation; either +// version 2.1 of the License. +// +// This library is distributed in the hope that it will be useful +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +// Lesser General Public License for more details. +// +// You should have received a copy of the GNU Lesser General Public +// License along with this library; if not, write to the Free Software +// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +// +// See http://www.salome-platform.org/ or email : webmaster.salome@opencascade.com +// +/* + * BatchManager.cxx : + * + * Auteur : Bernard SECHER - CEA/DEN + * Date : Juillet 2007 + * Projet : SALOME + * + */ + +#include "BatchLight_BatchManager_SLURM.hxx" +#include "utilities.h" +#include "BatchLight_Job.hxx" +#include +#include +#include +#include + +using namespace std; + +namespace BatchLight { + + // Constructeur + BatchManager_SLURM::BatchManager_SLURM(const batchParams& p) throw(SALOME_Exception) : BatchManager(p) + { + } + + // Destructeur + BatchManager_SLURM::~BatchManager_SLURM() + { + MESSAGE("BatchManager_SLURM destructor "<<_params.hostname); + } + + // Methode pour le controle des jobs : retire un job du gestionnaire + void BatchManager_SLURM::deleteJob(const int & jobid) + { + BEGIN_OF("BatchManager_SLURM::deleteJob"); + string command; + int status; + ostringstream oss; + oss << jobid; + + // define command to submit batch + if( _params.protocol == "rsh" ) + command = "rsh "; + else if( _params.protocol == "ssh" ) + command = "ssh "; + else + throw SALOME_Exception("Unknown protocol"); + + if (_params.username != ""){ + command += _params.username; + command += "@"; + } + + command += _params.hostname; + command += " \"bkill " ; + command += oss.str(); + command += "\""; + SCRUTE(command.c_str()); + status = system(command.c_str()); + if(status) + throw SALOME_Exception("Error of connection on remote host"); + + MESSAGE("jobId = " << jobid << "killed"); + END_OF("BatchManager_SLURM::deleteJob"); + } + + // Methode pour le controle des jobs : renvoie l'etat du job + string BatchManager_SLURM::queryJob(const int & jobid) + { + BEGIN_OF("BatchManager_SLURM::queryJob"); + // define name of log file + string logFile="/tmp/logs/"; + logFile += getenv("USER"); + logFile += "/batchSalome_"; + + srand ( time(NULL) ); + int ir = rand(); + ostringstream oss; + oss << ir; + logFile += oss.str(); + logFile += ".log"; + + string command; + int status; + + // define command to submit batch + if( _params.protocol == "rsh" ) + command = "rsh "; + else if( _params.protocol == "ssh" ) + command = "ssh "; + else + throw SALOME_Exception("Unknown protocol"); + + if (_params.username != ""){ + command += _params.username; + command += "@"; + } + + command += _params.hostname; + command += " \"bjobs " ; + ostringstream oss2; + oss2 << jobid; + command += oss2.str(); + command += "\" > "; + command += logFile; + SCRUTE(command.c_str()); + status = system(command.c_str()); + if(status) + throw SALOME_Exception("Error of connection on remote host"); + + // read staus of job in log file + char line[128]; + ifstream fp(logFile.c_str(),ios::in); + fp.getline(line,80,'\n'); + + string sjobid, username, jstatus; + fp >> sjobid; + fp >> username; + fp >> jstatus; + + MESSAGE("jobId = " << jobid << " " << jstatus); + END_OF("BatchManager_SLURM::queryJob"); + return jstatus; + } + + void BatchManager_SLURM::buildSalomeCouplingScript( const char *fileToExecute ) throw(SALOME_Exception) + { + BEGIN_OF("BatchManager_SLURM::buildSalomeCouplingScript"); + int status; + + string::size_type p1 = string(fileToExecute).find_last_of("/"); + string::size_type p2 = string(fileToExecute).find_last_of("."); + _fileNameToExecute = string(fileToExecute).substr(p1+1,p2-p1-1); + + _TmpFileName = BuildTemporaryFileName(); + ofstream tempOutputFile; + tempOutputFile.open(_TmpFileName.c_str(), ofstream::out ); + tempOutputFile << "#! /bin/sh -f" << endl ; + tempOutputFile << "cd " ; + tempOutputFile << _params.applipath << endl ; + tempOutputFile << "export PYTHONPATH=~/" ; + tempOutputFile << _dirForTmpFiles ; + tempOutputFile << ":$PYTHONPATH" << endl ; + tempOutputFile << "if test $SLURM_PROCID = 0; then" << endl ; + tempOutputFile << " ./runAppli --terminal --batch --modules=" ; + for ( int i = 0 ; i < _params.modulesList.size() ; i++ ) { + tempOutputFile << _params.modulesList[i] ; + if ( i != _params.modulesList.size()-1 ) + tempOutputFile << "," ; + } + tempOutputFile << " --standalone=registry,study,moduleCatalog --killall &" << endl ; + tempOutputFile << " for ((ip=1; ip < ${SLURM_NPROCS} ; ip++))" << endl; + tempOutputFile << " do" << endl ; + tempOutputFile << " arglist=\"$arglist YACS_Server_\"$ip" << endl ; + tempOutputFile << " done" << endl ; + tempOutputFile << " sleep 5" << endl ; + tempOutputFile << " ./runSession waitContainers.py $arglist" << endl ; + tempOutputFile << " ./runSession python ~/" << _dirForTmpFiles << "/" << _fileNameToExecute << ".py" << endl; + tempOutputFile << " ./runSession killCurrentPort" << endl; + tempOutputFile << "else" << endl ; + tempOutputFile << " sleep 5" << endl ; + tempOutputFile << " ./runSession waitNS.py" << endl ; + tempOutputFile << " ./runSession SALOME_Container 'YACS_Server_'${SLURM_PROCID}" << endl ; + tempOutputFile << "fi" << endl ; + tempOutputFile.flush(); + tempOutputFile.close(); + chmod(_TmpFileName.c_str(), 0x1ED); + SCRUTE(_TmpFileName.c_str()) ; + + string command; + if( _params.protocol == "rsh" ) + command = "rcp "; + else if( _params.protocol == "ssh" ) + command = "scp "; + else + throw SALOME_Exception("Unknown protocol"); + + command += _TmpFileName; + command += " "; + if (_params.username != ""){ + command += _params.username; + command += "@"; + } + command += _params.hostname; + command += ":"; + command += _dirForTmpFiles ; + command += "/runSalome_" ; + command += _fileNameToExecute ; + command += "_Batch.sh" ; + SCRUTE(command.c_str()); + status = system(command.c_str()); + if(status) + throw SALOME_Exception("Error of connection on remote host"); + RmTmpFile(); + + END_OF("BatchManager_SLURM::buildSalomeCouplingScript"); + } + + void BatchManager_SLURM::buildSalomeBatchScript( const int nbproc ) throw(SALOME_Exception) + { + BEGIN_OF("BatchManager_SLURM::buildSalomeBatchScript"); + int status; + _TmpFileName = BuildTemporaryFileName(); + ofstream tempOutputFile; + tempOutputFile.open(_TmpFileName.c_str(), ofstream::out ); + + tempOutputFile << "#! /bin/sh -f" << endl ; + tempOutputFile << "#BSUB -n " << nbproc << endl ; + tempOutputFile << "#BSUB -o ~/" << _dirForTmpFiles << "/runSalome.log%J" << endl ; + tempOutputFile << "mpirun -srun ~/" << _dirForTmpFiles << "/runSalome_" << _fileNameToExecute << "_Batch.sh" << endl ; + tempOutputFile.flush(); + tempOutputFile.close(); + chmod(_TmpFileName.c_str(), 0x1ED); + SCRUTE(_TmpFileName.c_str()) ; + + string command; + if( _params.protocol == "rsh" ) + command = "rcp "; + else if( _params.protocol == "ssh" ) + command = "scp "; + else + throw SALOME_Exception("Unknown protocol"); + command += _TmpFileName; + command += " "; + if (_params.username != ""){ + command += _params.username; + command += "@"; + } + command += _params.hostname; + command += ":"; + command += _dirForTmpFiles ; + command += "/" ; + command += _fileNameToExecute ; + command += "_Batch.sh" ; + SCRUTE(command.c_str()); + status = system(command.c_str()); + if(status) + throw SALOME_Exception("Error of connection on remote host"); + + RmTmpFile(); + END_OF("BatchManager_SLURM::buildSalomeBatchScript"); + + } + + int BatchManager_SLURM::submit() throw(SALOME_Exception) + { + BEGIN_OF("BatchManager_SLURM::submit"); + + // define name of log file + string logFile="/tmp/logs/"; + logFile += getenv("USER"); + logFile += "/batchSalome_"; + + srand ( time(NULL) ); + int ir = rand(); + ostringstream oss; + oss << ir; + logFile += oss.str(); + logFile += ".log"; + + string command; + int status; + + // define command to submit batch + if( _params.protocol == "rsh" ) + command = "rsh "; + else if( _params.protocol == "ssh" ) + command = "ssh "; + else + throw SALOME_Exception("Unknown protocol"); + + if (_params.username != ""){ + command += _params.username; + command += "@"; + } + + command += _params.hostname; + command += " \"bsub < " ; + command += _dirForTmpFiles ; + command += "/" ; + command += _fileNameToExecute ; + command += "_Batch.sh\" > "; + command += logFile; + SCRUTE(command.c_str()); + status = system(command.c_str()); + if(status) + throw SALOME_Exception("Error of connection on remote host"); + + // read id of submitted job in log file + char line[128]; + FILE *fp = fopen(logFile.c_str(),"r"); + fgets( line, 128, fp); + fclose(fp); + + string sline(line); + int p1 = sline.find("<"); + int p2 = sline.find(">"); + string strjob = sline.substr(p1+1,p2-p1-1); + + int id; + istringstream iss(strjob); + iss >> id; + + END_OF("BatchManager_SLURM::submit"); + return id; + } + +} diff --git a/src/Batch/BatchLight_BatchManager_SLURM.hxx b/src/Batch/BatchLight_BatchManager_SLURM.hxx new file mode 100644 index 000000000..ed21624dd --- /dev/null +++ b/src/Batch/BatchLight_BatchManager_SLURM.hxx @@ -0,0 +1,62 @@ +// Copyright (C) 2005 OPEN CASCADE, EADS/CCR, LIP6, CEA/DEN, +// CEDRAT, EDF R&D, LEG, PRINCIPIA R&D, BUREAU VERITAS +// +// This library is free software; you can redistribute it and/or +// modify it under the terms of the GNU Lesser General Public +// License as published by the Free Software Foundation; either +// version 2.1 of the License. +// +// This library is distributed in the hope that it will be useful +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +// Lesser General Public License for more details. +// +// You should have received a copy of the GNU Lesser General Public +// License along with this library; if not, write to the Free Software +// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +// +// See http://www.salome-platform.org/ or email : webmaster.salome@opencascade.com +// +/* + * BatchManager.hxx : + * + * Auteur : Bernard SECHER - CEA/DEN + * Date : Juillet 2007 + * Projet : SALOME + * + */ + +#ifndef _BL_BATCHMANAGER_SLURM_H_ +#define _BL_BATCHMANAGER_SLURM_H_ + +#include +#include "Utils_SALOME_Exception.hxx" +#include "BatchLight_BatchManager.hxx" + +namespace BatchLight { + + class Job; + + class BatchManager_SLURM : public BatchManager + { + public: + // Constructeur et destructeur + BatchManager_SLURM(const batchParams& p) throw(SALOME_Exception); // connexion a la machine host + virtual ~BatchManager_SLURM(); + + // Methodes pour le controle des jobs : virtuelles pures + void deleteJob(const int & jobid); // retire un job du gestionnaire + std::string queryJob(const int & jobid); // renvoie l'etat du job + + protected: + void buildSalomeCouplingScript( const char *fileToExecute ) throw(SALOME_Exception); + void buildSalomeBatchScript( const int nbproc ) throw(SALOME_Exception); + int submit() throw(SALOME_Exception); + + private: + + }; + +} + +#endif diff --git a/src/Batch/BatchLight_Job.cxx b/src/Batch/BatchLight_Job.cxx new file mode 100644 index 000000000..9762a98a6 --- /dev/null +++ b/src/Batch/BatchLight_Job.cxx @@ -0,0 +1,45 @@ +// Copyright (C) 2005 OPEN CASCADE, EADS/CCR, LIP6, CEA/DEN, +// CEDRAT, EDF R&D, LEG, PRINCIPIA R&D, BUREAU VERITAS +// +// This library is free software; you can redistribute it and/or +// modify it under the terms of the GNU Lesser General Public +// License as published by the Free Software Foundation; either +// version 2.1 of the License. +// +// This library is distributed in the hope that it will be useful +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +// Lesser General Public License for more details. +// +// You should have received a copy of the GNU Lesser General Public +// License along with this library; if not, write to the Free Software +// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +// +// See http://www.salome-platform.org/ or email : webmaster.salome@opencascade.com +// +/* + * Job.cxx : + * + * Auteur : Bernard SECHER - CEA/DEN + * Date : Juillet 2007 + * Projet : SALOME + * + */ + +#include "BatchLight_Job.hxx" +using namespace std; + +namespace BatchLight { + + // Constructeur + Job::Job(const char *fileToExecute, const Engines::FilesList& filesToExport, const Engines::FilesList& filesToImport, const int nbproc) : _fileToExecute(fileToExecute), _filesToExport(filesToExport), _filesToImport(filesToImport), _nbproc(nbproc) + { + // Nothing to do + } + + Job::~Job() + { + MESSAGE("Job destructor"); + } + +} diff --git a/src/Batch/BatchLight_Job.hxx b/src/Batch/BatchLight_Job.hxx new file mode 100644 index 000000000..23ac8f3bb --- /dev/null +++ b/src/Batch/BatchLight_Job.hxx @@ -0,0 +1,62 @@ +// Copyright (C) 2005 OPEN CASCADE, EADS/CCR, LIP6, CEA/DEN, +// CEDRAT, EDF R&D, LEG, PRINCIPIA R&D, BUREAU VERITAS +// +// This library is free software; you can redistribute it and/or +// modify it under the terms of the GNU Lesser General Public +// License as published by the Free Software Foundation; either +// version 2.1 of the License. +// +// This library is distributed in the hope that it will be useful +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +// Lesser General Public License for more details. +// +// You should have received a copy of the GNU Lesser General Public +// License along with this library; if not, write to the Free Software +// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +// +// See http://www.salome-platform.org/ or email : webmaster.salome@opencascade.com +// +/* + * Job.hxx : + * + * Auteur : Bernard SECHER - CEA/DEN + * Date : Juillet 2007 + * Projet : SALOME + * + */ + +#ifndef _BL_JOB_H_ +#define _BL_JOB_H_ + +#include "utilities.h" +#include +#include CORBA_CLIENT_HEADER(SALOME_ContainerManager) + +namespace BatchLight { + + class Job + { + public: + // Constructeurs et destructeur + Job(const char *fileToExecute, const Engines::FilesList& filesToExport, const Engines::FilesList& filesToImport, const int nbproc); + virtual ~Job(); + + const char *getFileToExecute() const { return _fileToExecute; } + const Engines::FilesList getFilesToExportList() const { return _filesToExport; } + const Engines::FilesList getFilesToImportList() const { return _filesToImport; } + const int getNbProc() const { return _nbproc; } + + protected: + const char* _fileToExecute; + const Engines::FilesList _filesToExport; + const Engines::FilesList _filesToImport; + const int _nbproc; + + private: + + }; + +} + +#endif diff --git a/src/Batch/Makefile.am b/src/Batch/Makefile.am index 095173c50..425e39f5c 100644 --- a/src/Batch/Makefile.am +++ b/src/Batch/Makefile.am @@ -59,7 +59,12 @@ LIB_INCLUDES = \ Batch_PyVersatile.hxx \ Batch_RunTimeException.hxx \ Batch_StringType.hxx \ - Batch_TypeMismatchException.hxx + Batch_TypeMismatchException.hxx \ + BatchLight_BatchManager.hxx \ + BatchLight_BatchManager_PBS.hxx \ + BatchLight_BatchManager_SLURM.hxx \ + BatchLight_Job.hxx \ + MpiImpl.hxx LIB_SRC = \ @@ -91,7 +96,12 @@ LIB_SRC = \ Batch_PyVersatile.cxx \ Batch_RunTimeException.cxx \ Batch_StringType.cxx \ - Batch_TypeMismatchException.cxx + Batch_TypeMismatchException.cxx \ + BatchLight_BatchManager.cxx \ + BatchLight_BatchManager_SLURM.cxx \ + BatchLight_BatchManager_PBS.cxx \ + BatchLight_Job.cxx \ + MpiImpl.cxx LIB_CPPFLAGS = \ @@ -203,8 +213,10 @@ libSalomeBatch_la_CPPFLAGS = \ @PYTHON_INCLUDES@ \ -I$(srcdir)/../Basics \ -I$(srcdir)/../SALOMELocalTrace \ + -I$(srcdir)/../Utils \ -I$(top_builddir)/salome_adm/unix \ - $(LIB_CPPFLAGS) + -I$(top_builddir)/idl \ + @CORBA_CXXFLAGS@ @CORBA_INCLUDES@ $(LIB_CPPFLAGS) libSalomeBatch_la_LDFLAGS = -no-undefined -version-info=0:0:0 libSalomeBatch_la_LIBADD = \ diff --git a/src/Batch/MpiImpl.cxx b/src/Batch/MpiImpl.cxx new file mode 100644 index 000000000..036018b1e --- /dev/null +++ b/src/Batch/MpiImpl.cxx @@ -0,0 +1,212 @@ +// Copyright (C) 2005 OPEN CASCADE, EADS/CCR, LIP6, CEA/DEN, +// CEDRAT, EDF R&D, LEG, PRINCIPIA R&D, BUREAU VERITAS +// +// This library is free software; you can redistribute it and/or +// modify it under the terms of the GNU Lesser General Public +// License as published by the Free Software Foundation; either +// version 2.1 of the License. +// +// This library is distributed in the hope that it will be useful +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +// Lesser General Public License for more details. +// +// You should have received a copy of the GNU Lesser General Public +// License along with this library; if not, write to the Free Software +// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +// +// See http://www.salome-platform.org/ or email : webmaster.salome@opencascade.com +// +/* + * BatchManager.cxx : + * + * Auteur : Bernard SECHER - CEA/DEN + * Date : Juillet 2007 + * Projet : SALOME + * + */ + +#include +#include +#include +#include "utilities.h" +#include "MpiImpl.hxx" + +using namespace std; + +// Constructor +MpiImpl::MpiImpl() +{ + MESSAGE("MpiImpl constructor"); +} + +// Destructor +MpiImpl::~MpiImpl() +{ + MESSAGE("MpiImpl destructor"); +} + +// lam implementation +// Constructor +MpiImpl_LAM::MpiImpl_LAM() : MpiImpl() +{ +} + +// Destructor +MpiImpl_LAM::~MpiImpl_LAM() +{ + MESSAGE("MpiImpl_LAM destructor"); +} + +string MpiImpl_LAM::size() +{ + return "${LAMWORLD}"; +} + +string MpiImpl_LAM::rank() +{ + return "${LAMRANK}"; +} + +string MpiImpl_LAM::boot(const string machinefile, const unsigned int nbnodes) +{ + ostringstream oss; + oss << "lamboot " << machinefile << endl; + return oss.str(); +} + +string MpiImpl_LAM::run(const string machinefile, const unsigned int nbproc, const string fileNameToExecute) +{ + ostringstream oss; + oss << "mpirun -np " << nbproc << " " << fileNameToExecute << endl; + return oss.str(); +} + +string MpiImpl_LAM::halt() +{ + ostringstream oss; + oss << "lamhalt" << endl; + return oss.str(); +} + +// mpich1 implementation +// Constructor +MpiImpl_MPICH1::MpiImpl_MPICH1() : MpiImpl() +{ +} + +// Destructor +MpiImpl_MPICH1::~MpiImpl_MPICH1() +{ + MESSAGE("MpiImpl_MPICH1 destructor"); +} + +string MpiImpl_MPICH1::size() +{ + throw SALOME_Exception("mpich1 doesn't work with this batch system to submit salome session"); +} + +string MpiImpl_MPICH1::rank() +{ + throw SALOME_Exception("mpich1 doesn't work with this batch system to submit salome session"); +} + +string MpiImpl_MPICH1::boot(const string machinefile, const unsigned int nbnodes) +{ + return ""; +} + +string MpiImpl_MPICH1::run(const string machinefile, const unsigned int nbproc, const string fileNameToExecute) +{ + ostringstream oss; + oss << "mpirun -machinefile " << machinefile << " -np " << nbproc << " " << fileNameToExecute << endl; + return oss.str(); +} + +string MpiImpl_MPICH1::halt() +{ + return ""; +} + +// mpich2 implementation +// Constructor +MpiImpl_MPICH2::MpiImpl_MPICH2() : MpiImpl() +{ +} + +// Destructor +MpiImpl_MPICH2::~MpiImpl_MPICH2() +{ + MESSAGE("MpiImpl_MPICH2 destructor"); +} + +string MpiImpl_MPICH2::size() +{ + return "${PMI_SIZE}"; +} + +string MpiImpl_MPICH2::rank() +{ + return "${PMI_RANK}"; +} + +string MpiImpl_MPICH2::boot(const string machinefile, const unsigned int nbnodes) +{ + ostringstream oss; + oss << "mpdboot -n " << nbnodes << " -f " << machinefile << endl; + return oss.str(); +} + +string MpiImpl_MPICH2::run(const string machinefile, const unsigned int nbproc, const string fileNameToExecute) +{ + ostringstream oss; + oss << "mpirun -np " << nbproc << " " << fileNameToExecute << endl; + return oss.str(); +} + +string MpiImpl_MPICH2::halt() +{ + ostringstream oss; + oss << "mpdallexit" << endl; + return oss.str(); +} + +// openmpi implementation +// Constructor +MpiImpl_OPENMPI::MpiImpl_OPENMPI() : MpiImpl() +{ +} + +// Destructor +MpiImpl_OPENMPI::~MpiImpl_OPENMPI() +{ + MESSAGE("MpiImpl_OPENMPI destructor"); +} + +string MpiImpl_OPENMPI::size() +{ + return "${OMPI_MCA_ns_nds_num_procs}"; +} + +string MpiImpl_OPENMPI::rank() +{ + return "${OMPI_MCA_ns_nds_vpid}"; +} + +string MpiImpl_OPENMPI::boot(const string machinefile, const unsigned int nbnodes) +{ + return ""; +} + +string MpiImpl_OPENMPI::run(const string machinefile, const unsigned int nbproc, const string fileNameToExecute) +{ + ostringstream oss; + oss << "mpirun -hostfile " << machinefile << " -np " << nbproc << " " << fileNameToExecute << endl; + return oss.str(); +} + +string MpiImpl_OPENMPI::halt() +{ + return ""; +} + diff --git a/src/Batch/MpiImpl.hxx b/src/Batch/MpiImpl.hxx new file mode 100644 index 000000000..beeac0301 --- /dev/null +++ b/src/Batch/MpiImpl.hxx @@ -0,0 +1,131 @@ +// Copyright (C) 2005 OPEN CASCADE, EADS/CCR, LIP6, CEA/DEN, +// CEDRAT, EDF R&D, LEG, PRINCIPIA R&D, BUREAU VERITAS +// +// This library is free software; you can redistribute it and/or +// modify it under the terms of the GNU Lesser General Public +// License as published by the Free Software Foundation; either +// version 2.1 of the License. +// +// This library is distributed in the hope that it will be useful +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +// Lesser General Public License for more details. +// +// You should have received a copy of the GNU Lesser General Public +// License along with this library; if not, write to the Free Software +// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +// +// See http://www.salome-platform.org/ or email : webmaster.salome@opencascade.com +// +/* + * BatchManager.hxx : + * + * Auteur : Bernard SECHER - CEA/DEN + * Date : Juillet 2007 + * Projet : SALOME + * + */ + +#ifndef _BL_MPIIMPL_H_ +#define _BL_MPIIMPL_H_ + +#include +#include "Utils_SALOME_Exception.hxx" +#include + +class MpiImpl +{ +public: + // Constructeur et destructeur + MpiImpl(); // constrcuctor + virtual ~MpiImpl(); //Destructor + + virtual std::string size() = 0; // get number of process of current job + virtual std::string rank() = 0; // get process number of current job + virtual std::string boot(const std::string machinefile, const unsigned int nbnodes) = 0; // get boot command + virtual std::string run(const std::string machinefile, const unsigned int nbproc, const std::string fileNameToExecute) = 0; // get run command + virtual std::string halt() = 0; // get stop command + +protected: + +private: + +}; + +class MpiImpl_LAM : public MpiImpl +{ +public: + // Constructeur et destructeur + MpiImpl_LAM(); // constructor + virtual ~MpiImpl_LAM(); //Destructor + + std::string size(); // get number of process of current job + std::string rank(); // get process number of current job + std::string boot( const std::string machinefile, const unsigned int nbnodes); // get boot command + std::string run( const std::string machinefile, const unsigned int nbproc, const std::string fileNameToExecute); // get run command + std::string halt(); // get stop command + +protected: + +private: + +}; + +class MpiImpl_MPICH1 : public MpiImpl +{ +public: + // Constructeur et destructeur + MpiImpl_MPICH1(); // constructor + virtual ~MpiImpl_MPICH1(); //Destructor + + std::string size(); // get number of process of current job + std::string rank(); // get process number of current job + std::string boot( const std::string machinefile, const unsigned int nbnodes); // get boot command + std::string run( const std::string machinefile, const unsigned int nbproc, const std::string fileNameToExecute); // get run command + std::string halt(); // get stop command + +protected: + +private: + +}; + +class MpiImpl_MPICH2 : public MpiImpl +{ +public: + // Constructeur et destructeur + MpiImpl_MPICH2(); // constructor + virtual ~MpiImpl_MPICH2(); //Destructor + + std::string size(); // get number of process of current job + std::string rank(); // get process number of current job + std::string boot( const std::string machinefile, const unsigned int nbnodes); // get boot command + std::string run( const std::string machinefile, const unsigned int nbproc, const std::string fileNameToExecute); // get run command + std::string halt(); // get stop command + +protected: + +private: + +}; + +class MpiImpl_OPENMPI : public MpiImpl +{ +public: + // Constructeur et destructeur + MpiImpl_OPENMPI(); // constructor + virtual ~MpiImpl_OPENMPI(); //Destructor + + std::string size(); // get number of process of current job + std::string rank(); // get process number of current job + std::string boot( const std::string machinefile, const unsigned int nbnodes); // get boot command + std::string run( const std::string machinefile, const unsigned int nbproc, const std::string fileNameToExecute); // get run command + std::string halt(); // get stop command + +protected: + +private: + +}; + +#endif diff --git a/src/Container/Container_i.cxx b/src/Container/Container_i.cxx index 0c4928a9b..61d3e2a61 100644 --- a/src/Container/Container_i.cxx +++ b/src/Container/Container_i.cxx @@ -300,7 +300,8 @@ void Engines_Container_i::Shutdown() MESSAGE("Effective Shutdown of container Begins..."); LocalTraceBufferPool* bp1 = LocalTraceBufferPool::instance(); bp1->deleteInstance(bp1); - _orb->shutdown(0); + if(!CORBA::is_nil(_orb)) + _orb->shutdown(0); } } diff --git a/src/Container/Makefile.am b/src/Container/Makefile.am index e9e6f065b..409e9e0bb 100644 --- a/src/Container/Makefile.am +++ b/src/Container/Makefile.am @@ -65,6 +65,7 @@ COMMON_CPPFLAGS=\ @PYTHON_INCLUDES@ \ @MPI_INCLUDES@ \ @HDF5_INCLUDES@ \ + -I$(srcdir)/../Batch \ -I$(srcdir)/../Basics \ -I$(srcdir)/../SALOMELocalTrace \ -I$(srcdir)/../NamingService \ @@ -87,6 +88,7 @@ COMMON_LIBS =\ ../SALOMELocalTrace/libSALOMELocalTrace.la \ ../Basics/libSALOMEBasics.la \ ../HDFPersist/libSalomeHDFPersist.la \ + ../Batch/libSalomeBatch.la \ $(top_builddir)/idl/libSalomeIDLKernel.la\ @MPI_LIBS@ \ @CORBA_LIBS@ @@ -129,7 +131,7 @@ endif # Executables targets # =============================================================== # -bin_PROGRAMS = SALOME_Container SALOME_ContainerManagerServer +bin_PROGRAMS = SALOME_Container noinst_PROGRAMS = TestSalome_file SALOME_Container_SOURCES =\ @@ -142,22 +144,12 @@ SALOME_Container_CPPFLAGS =\ SALOME_Container_LDADD =\ libSalomeContainer.la \ $(COMMON_LIBS) \ - ../Basics/libSALOMEBasics.la + ../Basics/libSALOMEBasics.la \ + ../Batch/libSalomeBatch.la SALOME_Container_LDFLAGS =\ -Xlinker -export-dynamic -SALOME_ContainerManagerServer_SOURCES =\ - SALOME_ContainerManagerServer.cxx - -SALOME_ContainerManagerServer_CPPFLAGS=\ - $(COMMON_CPPFLAGS) - -SALOME_ContainerManagerServer_LDADD =\ - libSalomeContainer.la \ - $(COMMON_LIBS) \ - ../Basics/libSALOMEBasics.la - TestSalome_file_SOURCES =\ TestSalome_file.cxx diff --git a/src/Container/SALOME_ContainerManager.cxx b/src/Container/SALOME_ContainerManager.cxx index 296e3e0d2..0fc39bd91 100644 --- a/src/Container/SALOME_ContainerManager.cxx +++ b/src/Container/SALOME_ContainerManager.cxx @@ -26,6 +26,7 @@ #endif #include #include "Utils_CorbaException.hxx" +#include "Batch_Date.hxx" #ifdef WITH_PACO_PARALLEL #include "PaCO++.h" @@ -35,6 +36,10 @@ using namespace std; +vector SALOME_ContainerManager::_batchLaunchedContainers; + +vector::iterator SALOME_ContainerManager::_batchLaunchedContainersIter; + const char *SALOME_ContainerManager::_ContainerManagerNameInNS = "/ContainerManager"; @@ -47,27 +52,25 @@ const char *SALOME_ContainerManager::_ContainerManagerNameInNS = */ //============================================================================= -SALOME_ContainerManager::SALOME_ContainerManager(CORBA::ORB_ptr orb) +SALOME_ContainerManager::SALOME_ContainerManager(CORBA::ORB_ptr orb, PortableServer::POA_var poa, SALOME_ResourcesManager *rm, SALOME_NamingService *ns) { MESSAGE("constructor"); - _NS = new SALOME_NamingService(orb); - _ResManager = new SALOME_ResourcesManager(orb); + _NS = ns; + _ResManager = rm; _id=0; - PortableServer::POA_var root_poa = PortableServer::POA::_the_root_poa(); - PortableServer::POAManager_var pman = root_poa->the_POAManager(); - PortableServer::POA_var my_poa; + PortableServer::POAManager_var pman = poa->the_POAManager(); + _orb = CORBA::ORB::_duplicate(orb) ; CORBA::PolicyList policies; policies.length(1); PortableServer::ThreadPolicy_var threadPol = - root_poa->create_thread_policy(PortableServer::SINGLE_THREAD_MODEL); + poa->create_thread_policy(PortableServer::SINGLE_THREAD_MODEL); policies[0] = PortableServer::ThreadPolicy::_duplicate(threadPol); - my_poa = - root_poa->create_POA("SThreadPOA",pman,policies); + _poa = poa->create_POA("SThreadPOA",pman,policies); threadPol->destroy(); - PortableServer::ObjectId_var id = my_poa->activate_object(this); - CORBA::Object_var obj = my_poa->id_to_reference(id); + PortableServer::ObjectId_var id = _poa->activate_object(this); + CORBA::Object_var obj = _poa->id_to_reference(id); Engines::ContainerManager_var refContMan = Engines::ContainerManager::_narrow(obj); @@ -84,8 +87,6 @@ SALOME_ContainerManager::SALOME_ContainerManager(CORBA::ORB_ptr orb) SALOME_ContainerManager::~SALOME_ContainerManager() { MESSAGE("destructor"); - delete _NS; - delete _ResManager; } //============================================================================= @@ -98,10 +99,10 @@ void SALOME_ContainerManager::Shutdown() { MESSAGE("Shutdown"); ShutdownContainers(); - PortableServer::ObjectId_var oid = _default_POA()->servant_to_id(this); - _default_POA()->deactivate_object(oid); + _NS->Destroy_Name(_ContainerManagerNameInNS); + PortableServer::ObjectId_var oid = _poa->servant_to_id(this); + _poa->deactivate_object(oid); _remove_ref(); - } //============================================================================= @@ -113,36 +114,34 @@ void SALOME_ContainerManager::Shutdown() void SALOME_ContainerManager::ShutdownContainers() { MESSAGE("ShutdownContainers"); - _NS->Change_Directory("/Containers"); - vector vec = _NS->list_directory_recurs(); - list lstCont; - for(vector::iterator iter = vec.begin();iter!=vec.end();iter++) - { + bool isOK; + isOK = _NS->Change_Directory("/Containers"); + if( isOK ){ + vector vec = _NS->list_directory_recurs(); + list lstCont; + for(vector::iterator iter = vec.begin();iter!=vec.end();iter++){ SCRUTE((*iter)); CORBA::Object_var obj=_NS->Resolve((*iter).c_str()); Engines::Container_var cont=Engines::Container::_narrow(obj); - if(!CORBA::is_nil(cont)) - { - lstCont.push_back((*iter)); - } + if(!CORBA::is_nil(cont)){ + lstCont.push_back((*iter)); + } } - MESSAGE("Container list: "); - for(list::iterator iter=lstCont.begin();iter!=lstCont.end();iter++) - { + MESSAGE("Container list: "); + for(list::iterator iter=lstCont.begin();iter!=lstCont.end();iter++){ SCRUTE((*iter)); } - for(list::iterator iter=lstCont.begin();iter!=lstCont.end();iter++) - { + for(list::iterator iter=lstCont.begin();iter!=lstCont.end();iter++){ SCRUTE((*iter)); CORBA::Object_var obj=_NS->Resolve((*iter).c_str()); Engines::Container_var cont=Engines::Container::_narrow(obj); - if(!CORBA::is_nil(cont)) - { - MESSAGE("ShutdownContainers: " << (*iter)); - cont->Shutdown(); - } + if(!CORBA::is_nil(cont)){ + MESSAGE("ShutdownContainers: " << (*iter)); + cont->Shutdown(); + } else MESSAGE("ShutdownContainers: no container ref for " << (*iter)); } + } } //============================================================================= @@ -158,10 +157,6 @@ SALOME_ContainerManager:: FindOrStartContainer(const Engines::MachineParameters& params, const Engines::MachineList& possibleComputers) { - long id; - string containerNameInNS; - char idc[3*sizeof(long)]; - Engines::Container_ptr ret = FindContainer(params,possibleComputers); if(!CORBA::is_nil(ret)) return ret; @@ -295,9 +290,10 @@ StartContainer(const Engines::MachineParameters& params, Engines::Container_ptr SALOME_ContainerManager:: StartContainer(const Engines::MachineParameters& params, - Engines::ResPolicy policy) + Engines::ResPolicy policy, + const Engines::CompoList& componentList) { - Engines::MachineList_var possibleComputers = GetFittingResources(params,""); + Engines::MachineList_var possibleComputers = _ResManager->GetFittingResources(params,componentList); return StartContainer(params,possibleComputers,policy); } @@ -440,51 +436,28 @@ FindOrStartParallelContainer(const Engines::MachineParameters& params, #endif //============================================================================= -/*! - * - */ -//============================================================================= - -Engines::MachineList * -SALOME_ContainerManager:: -GetFittingResources(const Engines::MachineParameters& params, - const char *componentName) -{ - MESSAGE("SALOME_ContainerManager::GetFittingResources"); - Engines::MachineList *ret=new Engines::MachineList; - vector vec; - try - { - vec = _ResManager->GetFittingResources(params,componentName); - } - catch(const SALOME_Exception &ex) - { - INFOS("Caught exception."); - THROW_SALOME_CORBA_EXCEPTION(ex.what(),SALOME::BAD_PARAM); - //return ret; - } - - // MESSAGE("Machine list length "<length(vec.size()); - for(unsigned int i=0;iFindFirst(possibleComputers); - return CORBA::string_dup(theMachine.c_str()); + char *valenv=getenv("SALOME_BATCH"); + if(valenv) + if (strcmp(valenv,"1")==0) + { + if(_batchLaunchedContainers.empty()) + fillBatchLaunchedContainers(); + return *(_batchLaunchedContainersIter++); + } + return StartContainer(params,policy,componentList); } //============================================================================= @@ -637,3 +610,17 @@ long SALOME_ContainerManager::GetIdForContainer(void) return _id; } +void SALOME_ContainerManager::fillBatchLaunchedContainers() +{ + _batchLaunchedContainers.clear(); + _NS->Change_Directory("/Containers"); + vector vec = _NS->list_directory_recurs(); + for(vector::iterator iter = vec.begin();iter!=vec.end();iter++){ + CORBA::Object_var obj=_NS->Resolve((*iter).c_str()); + Engines::Container_ptr cont=Engines::Container::_narrow(obj); + if(!CORBA::is_nil(cont)){ + _batchLaunchedContainers.push_back(cont); + } + } + _batchLaunchedContainersIter=_batchLaunchedContainers.begin(); +} diff --git a/src/Container/SALOME_ContainerManager.hxx b/src/Container/SALOME_ContainerManager.hxx index aa7ab18c2..34888f98c 100644 --- a/src/Container/SALOME_ContainerManager.hxx +++ b/src/Container/SALOME_ContainerManager.hxx @@ -37,7 +37,7 @@ class CONTAINER_EXPORT SALOME_ContainerManager: { public: - SALOME_ContainerManager(CORBA::ORB_ptr orb); + SALOME_ContainerManager(CORBA::ORB_ptr orb, PortableServer::POA_var poa, SALOME_ResourcesManager *rm, SALOME_NamingService *ns); ~SALOME_ContainerManager(); Engines::Container_ptr @@ -51,13 +51,13 @@ public: Engines::Container_ptr StartContainer(const Engines::MachineParameters& params, - Engines::ResPolicy policy); - - Engines::MachineList * - GetFittingResources(const Engines::MachineParameters& params, - const char *componentName); + Engines::ResPolicy policy, + const Engines::CompoList& componentList); - char* FindFirst(const Engines::MachineList& possibleComputers); + Engines::Container_ptr + GiveContainer(const Engines::MachineParameters& params, + Engines::ResPolicy policy, + const Engines::CompoList& componentList); void Shutdown(); void ShutdownContainers(); @@ -83,11 +83,17 @@ protected: const Engines::MachineParameters& params, const std::string& name); + void fillBatchLaunchedContainers(); + long GetIdForContainer(void); long _id; + CORBA::ORB_var _orb; + PortableServer::POA_var _poa; SALOME_ResourcesManager *_ResManager; SALOME_NamingService *_NS; + static std::vector _batchLaunchedContainers; + static std::vector::iterator _batchLaunchedContainersIter; }; #endif diff --git a/src/Launcher/Makefile.am b/src/Launcher/Makefile.am new file mode 100644 index 000000000..4104db49d --- /dev/null +++ b/src/Launcher/Makefile.am @@ -0,0 +1,130 @@ +# SALOME Container : implementation of container and engine for Kernel +# +# Copyright (C) 2003 OPEN CASCADE, EADS/CCR, LIP6, CEA/DEN, +# CEDRAT, EDF R&D, LEG, PRINCIPIA R&D, BUREAU VERITAS +# +# This library is free software; you can redistribute it and/or +# modify it under the terms of the GNU Lesser General Public +# License as published by the Free Software Foundation; either +# version 2.1 of the License. +# +# This library is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +# Lesser General Public License for more details. +# +# You should have received a copy of the GNU Lesser General Public +# License along with this library; if not, write to the Free Software +# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +# +# See http://www.salome-platform.org/ or email : webmaster.salome@opencascade.com +# +# +# +# File : Makefile.am +# Author : Guillaume Boulant (CSSI) +# Module : KERNEL +# $Header$ + + +include $(top_srcdir)/salome_adm/unix/make_common_starter.am + +# +# =============================================================== +# Header to be installed +# =============================================================== +# +# header files +salomeinclude_HEADERS = \ + SALOME_Launcher.hxx + +# Scripts to be installed +dist_salomescript_DATA = + +# These files are executable scripts +dist_salomescript_SCRIPTS= + +# +# =============================================================== +# Local definitions +# =============================================================== +# + +# This local variable defines the list of CPPFLAGS common to all target in this package. +COMMON_CPPFLAGS=\ + @PYTHON_INCLUDES@ \ + @MPI_INCLUDES@ \ + @CAS_CPPFLAGS@ @CAS_CXXFLAGS@ \ + @LIBXML_INCLUDES@ \ + -I$(srcdir)/../Batch \ + -I$(srcdir)/../Basics \ + -I$(srcdir)/../SALOMELocalTrace \ + -I$(srcdir)/../NamingService \ + -I$(srcdir)/../Utils \ + -I$(srcdir)/../Registry \ + -I$(srcdir)/../Notification \ + -I$(srcdir)/../ResourcesManager \ + -I$(srcdir)/../Container \ + -I$(top_builddir)/salome_adm/unix \ + -I$(top_builddir)/idl \ + @CORBA_CXXFLAGS@ @CORBA_INCLUDES@ + +# This local variable defines the list of dependant libraries common to all target in this package. +COMMON_LIBS =\ + ../Registry/libRegistry.la \ + ../Notification/libSalomeNotification.la \ + ../Container/libSalomeContainer.la \ + ../ResourcesManager/libSalomeResourcesManager.la \ + ../NamingService/libSalomeNS.la \ + ../Utils/libOpUtil.la \ + ../SALOMELocalTrace/libSALOMELocalTrace.la \ + ../Basics/libSALOMEBasics.la \ + ../Batch/libSalomeBatch.la \ + $(top_builddir)/idl/libSalomeIDLKernel.la\ + @MPI_LIBS@ \ + @CORBA_LIBS@ + @LIBXML_LIBS@ + +# @PYTHON_LIBS@ + +# +# =============================================================== +# Libraries targets +# =============================================================== +# +lib_LTLIBRARIES = libSalomeLauncher.la +libSalomeLauncher_la_SOURCES=\ + SALOME_Launcher.cxx + +libSalomeLauncher_la_CPPFLAGS =\ + $(COMMON_CPPFLAGS) + +libSalomeLauncher_la_LDFLAGS =\ + -no-undefined -version-info=0:0:0 \ + @LDEXPDYNFLAGS@ + +libSalomeLauncher_la_LIBADD =\ + $(PYTHON_LIBS) \ + $(COMMON_LIBS) + + +# +# =============================================================== +# Executables targets +# =============================================================== +# +bin_PROGRAMS = SALOME_LauncherServer + +SALOME_LauncherServer_SOURCES =\ + SALOME_LauncherServer.cxx + +SALOME_LauncherServer_CPPFLAGS=\ + $(COMMON_CPPFLAGS) + +SALOME_LauncherServer_LDADD =\ + libSalomeLauncher.la \ + $(COMMON_LIBS) \ + ../Basics/libSALOMEBasics.la \ + ../Batch/libSalomeBatch.la + + diff --git a/src/Launcher/SALOME_Launcher.cxx b/src/Launcher/SALOME_Launcher.cxx new file mode 100644 index 000000000..564e361c7 --- /dev/null +++ b/src/Launcher/SALOME_Launcher.cxx @@ -0,0 +1,265 @@ +// Copyright (C) 2005 OPEN CASCADE, EADS/CCR, LIP6, CEA/DEN, +// CEDRAT, EDF R&D, LEG, PRINCIPIA R&D, BUREAU VERITAS +// +// This library is free software; you can redistribute it and/or +// modify it under the terms of the GNU Lesser General Public +// License as published by the Free Software Foundation; either +// version 2.1 of the License. +// +// This library is distributed in the hope that it will be useful +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +// Lesser General Public License for more details. +// +// You should have received a copy of the GNU Lesser General Public +// License along with this library; if not, write to the Free Software +// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +// +// See http://www.salome-platform.org/ or email : webmaster.salome@opencascade.com +// +#include "BatchLight_BatchManager_PBS.hxx" +#include "BatchLight_BatchManager_SLURM.hxx" +#include "BatchLight_Job.hxx" +#include "SALOME_Launcher.hxx" +#include "OpUtil.hxx" +#include +#ifndef WNT +#include +#endif +#include +#include "Utils_CorbaException.hxx" +#include "Batch_Date.hxx" + +#define TIME_OUT_TO_LAUNCH_CONT 21 + +using namespace std; + +const char *SALOME_Launcher::_LauncherNameInNS = "/SalomeLauncher"; + +//============================================================================= +/*! + * Constructor + * \param orb + * Define a CORBA single thread policy for the server, which avoid to deal + * with non thread-safe usage like Change_Directory in SALOME naming service + */ +//============================================================================= + +SALOME_Launcher::SALOME_Launcher(CORBA::ORB_ptr orb, PortableServer::POA_var poa) +{ + MESSAGE("constructor"); + _NS = new SALOME_NamingService(orb); + _ResManager = new SALOME_ResourcesManager(orb,poa,_NS); + _ContManager = new SALOME_ContainerManager(orb,poa,_ResManager,_NS); + + _orb = CORBA::ORB::_duplicate(orb) ; + _poa = PortableServer::POA::_duplicate(poa) ; + PortableServer::ObjectId_var id = _poa->activate_object(this); + CORBA::Object_var obj = _poa->id_to_reference(id); + Engines::SalomeLauncher_var refContMan = Engines::SalomeLauncher::_narrow(obj); + + _NS->Register(refContMan,_LauncherNameInNS); + MESSAGE("constructor end"); +} + +//============================================================================= +/*! + * destructor + */ +//============================================================================= + +SALOME_Launcher::~SALOME_Launcher() +{ + MESSAGE("destructor"); + delete _NS; + delete _ResManager; + delete _ContManager; + std::map < string, BatchLight::BatchManager * >::const_iterator it; + for(it=_batchmap.begin();it!=_batchmap.end();it++) + delete it->second; +} + +//============================================================================= +/*! CORBA method: + * shutdown all the containers, then the ContainerManager servant + */ +//============================================================================= + +void SALOME_Launcher::Shutdown() +{ + MESSAGE("Shutdown"); + _NS->Destroy_Name(_LauncherNameInNS); + _ContManager->Shutdown(); + _ResManager->Shutdown(); + PortableServer::ObjectId_var oid = _poa->servant_to_id(this); + _poa->deactivate_object(oid); + _remove_ref(); + if(!CORBA::is_nil(_orb)) + _orb->shutdown(0); +} + +//============================================================================= +/*! CORBA Method: + * Submit a batch job on a cluster and returns the JobId + * \param fileToExecute : .py/.exe/.sh/... to execute on the batch cluster + * \param filesToExport : to export on the batch cluster + * \param NumberOfProcessors : Number of processors needed on the batch cluster + * \param params : Constraints for the choice of the batch cluster + */ +//============================================================================= +CORBA::Long SALOME_Launcher::submitSalomeJob( const char * fileToExecute , + const Engines::FilesList& filesToExport , + const Engines::FilesList& filesToImport , + const CORBA::Long NumberOfProcessors , + const Engines::MachineParameters& params) +{ + MESSAGE("BEGIN OF SALOME_Launcher::submitSalomeJob"); + CORBA::Long jobId; + try{ + // find a cluster matching the structure params + Engines::CompoList aCompoList ; + Engines::MachineList *aMachineList = _ResManager->GetFittingResources( params , aCompoList ) ; + const Engines::MachineParameters* p = _ResManager->GetMachineParameters((*aMachineList)[0]); + string clustername(p->alias); + + // search batch manager for that cluster in map or instanciate one + std::map < string, BatchLight::BatchManager * >::const_iterator it = _batchmap.find(clustername); + SCRUTE(clustername); + if(it == _batchmap.end()) + _batchmap[clustername] = FactoryBatchManager( p ); + + // submit job on cluster + BatchLight::Job* job = new BatchLight::Job( fileToExecute, filesToExport, filesToImport, NumberOfProcessors ); + jobId = _batchmap[clustername]->submitJob(job); + } + catch(const SALOME_Exception &ex){ + MESSAGE(ex.what()); + THROW_SALOME_CORBA_EXCEPTION(ex.what(),SALOME::INTERNAL_ERROR); + } + return jobId; +} + +//============================================================================= +/*! CORBA Method: + * Query a batch job on a cluster and returns the status of job + * \param jobId : identification of Salome job + * \param params : Constraints for the choice of the batch cluster + */ +//============================================================================= +char* SALOME_Launcher::querySalomeJob( const CORBA::Long jobId, + const Engines::MachineParameters& params) +{ + string status; + try{ + // find a cluster matching params structure + Engines::CompoList aCompoList ; + Engines::MachineList * aMachineList = _ResManager->GetFittingResources( params , aCompoList ) ; + const Engines::MachineParameters* p = _ResManager->GetMachineParameters((*aMachineList)[0]); + string clustername(p->alias); + + // search batch manager for that cluster in map + std::map < string, BatchLight::BatchManager * >::const_iterator it = _batchmap.find(clustername); + if(it == _batchmap.end()) + throw SALOME_Exception("no batchmanager for that cluster"); + + status = _batchmap[clustername]->queryJob(jobId); + } + catch(const SALOME_Exception &ex){ + INFOS("Caught exception."); + THROW_SALOME_CORBA_EXCEPTION(ex.what(),SALOME::BAD_PARAM); + } + return CORBA::string_dup(status.c_str()); +} + +//============================================================================= +/*! CORBA Method: + * Delete a batch job on a cluster + * \param jobId : identification of Salome job + * \param params : Constraints for the choice of the batch cluster + */ +//============================================================================= +void SALOME_Launcher::deleteSalomeJob( const CORBA::Long jobId, + const Engines::MachineParameters& params) +{ + try{ + // find a cluster matching params structure + Engines::CompoList aCompoList ; + Engines::MachineList *aMachineList = _ResManager->GetFittingResources( params , aCompoList ) ; + const Engines::MachineParameters* p = _ResManager->GetMachineParameters((*aMachineList)[0]); + string clustername(p->alias); + + // search batch manager for that cluster in map + std::map < string, BatchLight::BatchManager * >::const_iterator it = _batchmap.find(clustername); + if(it == _batchmap.end()) + throw SALOME_Exception("no batchmanager for that cluster"); + + _batchmap[clustername]->deleteJob(jobId); + } + catch(const SALOME_Exception &ex){ + INFOS("Caught exception."); + THROW_SALOME_CORBA_EXCEPTION(ex.what(),SALOME::BAD_PARAM); + } +} + +//============================================================================= +/*! CORBA Method: + * Get result files of job on a cluster + * \param jobId : identification of Salome job + * \param params : Constraints for the choice of the batch cluster + */ +//============================================================================= +void SALOME_Launcher::getResultSalomeJob( const char *directory, + const CORBA::Long jobId, + const Engines::MachineParameters& params) +{ + try{ + // find a cluster matching params structure + Engines::CompoList aCompoList ; + Engines::MachineList *aMachineList = _ResManager->GetFittingResources( params , aCompoList ) ; + const Engines::MachineParameters* p = _ResManager->GetMachineParameters((*aMachineList)[0]); + string clustername(p->alias); + + // search batch manager for that cluster in map + std::map < string, BatchLight::BatchManager * >::const_iterator it = _batchmap.find(clustername); + if(it == _batchmap.end()) + throw SALOME_Exception("no batchmanager for that cluster"); + + _batchmap[clustername]->importOutputFiles( directory, jobId ); + } + catch(const SALOME_Exception &ex){ + INFOS("Caught exception."); + THROW_SALOME_CORBA_EXCEPTION(ex.what(),SALOME::BAD_PARAM); + } +} + +//============================================================================= +/*! + * Factory to instanciate the good batch manager for choosen cluster. + */ +//============================================================================= + +BatchLight::BatchManager *SALOME_Launcher::FactoryBatchManager( const Engines::MachineParameters* params ) throw(SALOME_Exception) +{ + // Fill structure for batch manager + BatchLight::batchParams p; + p.hostname = params->alias; + p.protocol = params->protocol; + p.username = params->username; + p.applipath = params->applipath; + for(int i=0;imodList.length();i++) + p.modulesList.push_back((const char*)params->modList[i]); + p.nbnodes = params->nb_node; + p.nbprocpernode = params->nb_proc_per_node; + p.mpiImpl = params->mpiImpl; + + string sb = (const char*)params->batch; + if(sb == "pbs") + return new BatchLight::BatchManager_PBS(p); + else if(sb == "slurm") + return new BatchLight::BatchManager_SLURM(p); + else{ + MESSAGE("BATCH = " << params->batch); + throw SALOME_Exception("no batchmanager for that cluster"); + } +} + diff --git a/src/Launcher/SALOME_Launcher.hxx b/src/Launcher/SALOME_Launcher.hxx new file mode 100644 index 000000000..5c83afbbf --- /dev/null +++ b/src/Launcher/SALOME_Launcher.hxx @@ -0,0 +1,80 @@ +// Copyright (C) 2005 OPEN CASCADE, EADS/CCR, LIP6, CEA/DEN, +// CEDRAT, EDF R&D, LEG, PRINCIPIA R&D, BUREAU VERITAS +// +// This library is free software; you can redistribute it and/or +// modify it under the terms of the GNU Lesser General Public +// License as published by the Free Software Foundation; either +// version 2.1 of the License. +// +// This library is distributed in the hope that it will be useful +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +// Lesser General Public License for more details. +// +// You should have received a copy of the GNU Lesser General Public +// License along with this library; if not, write to the Free Software +// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +// +// See http://www.salome-platform.org/ or email : webmaster.salome@opencascade.com +// +#ifndef __SALOME_LAUNCHER_HXX__ +#define __SALOME_LAUNCHER_HXX__ + +#include +#include CORBA_CLIENT_HEADER(SALOME_ContainerManager) +#include "SALOME_ContainerManager.hxx" +#include "BatchLight_BatchManager.hxx" + +#include + +class SALOME_NamingService; + +#if defined LAUNCHER_EXPORTS +#if defined WIN32 +#define LAUNCHER_EXPORT __declspec( dllexport ) +#else +#define LAUNCHER_EXPORT +#endif +#else +#if defined WNT +#define LAUNCHER_EXPORT __declspec( dllimport ) +#else +#define LAUNCHER_EXPORT +#endif +#endif + +class LAUNCHER_EXPORT SALOME_Launcher: + public POA_Engines::SalomeLauncher, + public PortableServer::RefCountServantBase +{ + +public: + SALOME_Launcher(CORBA::ORB_ptr orb, PortableServer::POA_var poa); + ~SALOME_Launcher(); + + CORBA::Long submitSalomeJob(const char * fileToExecute , + const Engines::FilesList& filesToExport , + const Engines::FilesList& filesToImport , + const CORBA::Long NumberOfProcessors , + const Engines::MachineParameters& params); + + char* querySalomeJob( const CORBA::Long jobId, const Engines::MachineParameters& params); + void deleteSalomeJob( const CORBA::Long jobId, const Engines::MachineParameters& params); + void getResultSalomeJob( const char * directory, const CORBA::Long jobId, const Engines::MachineParameters& params ); + + void Shutdown(); + + static const char *_LauncherNameInNS; + +protected: + BatchLight::BatchManager *FactoryBatchManager( const Engines::MachineParameters* params ) throw(SALOME_Exception); + + std::map _batchmap; + CORBA::ORB_var _orb; + PortableServer::POA_var _poa; + SALOME_ContainerManager *_ContManager; + SALOME_ResourcesManager *_ResManager; + SALOME_NamingService *_NS; +}; + +#endif diff --git a/src/Container/SALOME_ContainerManagerServer.cxx b/src/Launcher/SALOME_LauncherServer.cxx similarity index 65% rename from src/Container/SALOME_ContainerManagerServer.cxx rename to src/Launcher/SALOME_LauncherServer.cxx index d4a4399aa..a08feccb6 100644 --- a/src/Container/SALOME_ContainerManagerServer.cxx +++ b/src/Launcher/SALOME_LauncherServer.cxx @@ -17,11 +17,46 @@ // // See http://www.salome-platform.org/ or email : webmaster.salome@opencascade.com // -#include "SALOME_ContainerManager.hxx" +#include "SALOME_Launcher.hxx" #include "utilities.h" +#include +#include +#include +using namespace std; + +void AttachDebugger() +{ + if(getenv ("DEBUGGER")) + { + std::stringstream exec; + exec << "$DEBUGGER SALOME_LauncherServer " << getpid() << "&"; + std::cerr << exec.str() << std::endl; + system(exec.str().c_str()); + while(1); + } +} + +void terminateHandler(void) +{ + std::cerr << "Terminate: not managed exception !" << std::endl; + AttachDebugger(); +} + +void unexpectedHandler(void) +{ + std::cerr << "Unexpected: unexpected exception !" << std::endl; + AttachDebugger(); +} + int main(int argc, char* argv[]) { + if(getenv ("DEBUGGER")) + { +// setsig(SIGSEGV,&Handler); + set_terminate(&terminateHandler); + set_unexpected(&unexpectedHandler); + } PortableServer::POA_var root_poa; PortableServer::POAManager_var pman; CORBA::Object_var obj; @@ -31,17 +66,17 @@ int main(int argc, char* argv[]) INFOS_COMPILATION; BEGIN_OF(argv[0]); try{ - obj = orb->resolve_initial_references("RootPOA"); - if(!CORBA::is_nil(obj)) - root_poa = PortableServer::POA::_narrow(obj); - if(!CORBA::is_nil(root_poa)) - pman = root_poa->the_POAManager(); - } + obj = orb->resolve_initial_references("RootPOA"); + if(!CORBA::is_nil(obj)) + root_poa = PortableServer::POA::_narrow(obj); + if(!CORBA::is_nil(root_poa)) + pman = root_poa->the_POAManager(); + } catch(CORBA::COMM_FAILURE&){ MESSAGE( "Container: CORBA::COMM_FAILURE: Unable to contact the Naming Service" ); } try{ - SALOME_ContainerManager *cmServ=new SALOME_ContainerManager(orb); + SALOME_Launcher *lServ=new SALOME_Launcher(orb,root_poa); pman->activate(); orb->run(); }catch(CORBA::SystemException&){ diff --git a/src/LifeCycleCORBA/Makefile.am b/src/LifeCycleCORBA/Makefile.am index 208e60a7b..df3eabdc7 100644 --- a/src/LifeCycleCORBA/Makefile.am +++ b/src/LifeCycleCORBA/Makefile.am @@ -55,6 +55,7 @@ COMMON_CPPFLAGS=\ -I$(srcdir)/../SALOMETraceCollector \ -I$(srcdir)/../NamingService \ -I$(srcdir)/../Utils \ + -I$(srcdir)/../Container \ -I$(srcdir)/../ResourcesManager \ -I$(top_builddir)/salome_adm/unix \ -I$(top_builddir)/idl \ @@ -82,6 +83,7 @@ libSalomeLifeCycleCORBA_la_SOURCES = \ libSalomeLifeCycleCORBA_la_CPPFLAGS = \ $(COMMON_CPPFLAGS) \ @PYTHON_INCLUDES@ \ + -I$(srcdir)/../Batch \ -I$(srcdir)/../Container \ -I$(srcdir)/../Notification @@ -96,7 +98,7 @@ libSalomeLifeCycleCORBA_la_LIBADD = $(COMMON_LIBS) # Executables targets # =============================================================== # -bin_PROGRAMS = Test_LifeCycleCORBA +bin_PROGRAMS = Test_LifeCycleCORBA TestContainerManager Test_LifeCycleCORBA_SOURCES = Test_LifeCycleCORBA.cxx Test_LifeCycleCORBA_CPPFLAGS =\ -I$(srcdir)/../Registry \ @@ -111,8 +113,18 @@ Test_LifeCycleCORBA_LDADD = \ $(COMMON_LIBS) \ @CORBA_LIBS@ -#CPPFLAGS += $(PYTHON_INCLUDES) $(QT_MT_INCLUDES) +TestContainerManager_SOURCES = TestContainerManager.cxx +TestContainerManager_CPPFLAGS =\ + -I$(srcdir)/../Batch \ + -I$(srcdir)/../Registry \ + -I$(srcdir)/../Notification \ + $(COMMON_CPPFLAGS) + +TestContainerManager_LDADD = \ + libSalomeLifeCycleCORBA.la \ + ../Registry/libRegistry.la \ + ../Notification/libSalomeNotification.la \ + ../Container/libSalomeContainer.la \ + $(COMMON_LIBS) \ + @CORBA_LIBS@ -#LDFLAGS += -lSalomeNS -lOpUtil -lSALOMELocalTrace -lSalomeContainer -lSalomeResourcesManager -#LDFLAGSFORBIN= $(LDFLAGS) -lRegistry -lSalomeNotification -lSALOMEBasics -#LIBS += $(PYTHON_LIBS) diff --git a/src/LifeCycleCORBA/SALOME_FileTransferCORBA.cxx b/src/LifeCycleCORBA/SALOME_FileTransferCORBA.cxx index ef4ff83dc..bb72d3f20 100644 --- a/src/LifeCycleCORBA/SALOME_FileTransferCORBA.cxx +++ b/src/LifeCycleCORBA/SALOME_FileTransferCORBA.cxx @@ -116,14 +116,16 @@ string SALOME_FileTransferCORBA::getLocalFile(string localFile) SALOME_LifeCycleCORBA LCC; Engines::ContainerManager_var contManager = LCC.getContainerManager(); + Engines::ResourcesManager_var resManager = LCC.getResourcesManager(); Engines::MachineParameters params; LCC.preSet(params); params.container_name = _containerName.c_str(); params.hostname = _refMachine.c_str(); + Engines::CompoList clist; Engines::MachineList_var listOfMachines = - contManager->GetFittingResources(params, ""); + resManager->GetFittingResources(params, clist); container = contManager->FindOrStartContainer(params, listOfMachines); diff --git a/src/LifeCycleCORBA/SALOME_LifeCycleCORBA.cxx b/src/LifeCycleCORBA/SALOME_LifeCycleCORBA.cxx index d4541abac..9877b1bda 100644 --- a/src/LifeCycleCORBA/SALOME_LifeCycleCORBA.cxx +++ b/src/LifeCycleCORBA/SALOME_LifeCycleCORBA.cxx @@ -94,6 +94,10 @@ SALOME_LifeCycleCORBA::SALOME_LifeCycleCORBA(SALOME_NamingService *ns) _NS->Resolve(SALOME_ContainerManager::_ContainerManagerNameInNS); ASSERT( !CORBA::is_nil(obj)); _ContManager=Engines::ContainerManager::_narrow(obj); + + obj = _NS->Resolve(SALOME_ResourcesManager::_ResourcesManagerNameInNS); + ASSERT( !CORBA::is_nil(obj)); + _ResManager=Engines::ResourcesManager::_narrow(obj); } //============================================================================= @@ -124,8 +128,11 @@ SALOME_LifeCycleCORBA::FindComponent(const Engines::MachineParameters& params, if (! isKnownComponentClass(componentName)) return Engines::Component::_nil(); + Engines::CompoList clist; + clist.length(1); + clist[0] = componentName; Engines::MachineList_var listOfMachines = - _ContManager->GetFittingResources(params, componentName); + _ResManager->GetFittingResources(params, clist); Engines::Component_var compo = _FindComponent(params, componentName, @@ -155,8 +162,11 @@ SALOME_LifeCycleCORBA::LoadComponent(const Engines::MachineParameters& params, if (! isKnownComponentClass(componentName)) return Engines::Component::_nil(); + Engines::CompoList clist; + clist.length(1); + clist[0] = componentName; Engines::MachineList_var listOfMachines = - _ContManager->GetFittingResources(params, componentName); + _ResManager->GetFittingResources(params, clist); Engines::Component_var compo = _LoadComponent(params, componentName, @@ -188,8 +198,11 @@ FindOrLoad_Component(const Engines::MachineParameters& params, if (! isKnownComponentClass(componentName)) return Engines::Component::_nil(); + Engines::CompoList clist; + clist.length(1); + clist[0] = componentName; Engines::MachineList_var listOfMachines = - _ContManager->GetFittingResources(params,componentName); + _ResManager->GetFittingResources(params,clist); Engines::Component_var compo = _FindComponent(params, componentName, @@ -221,6 +234,23 @@ Engines::Component_ptr SALOME_LifeCycleCORBA::FindOrLoad_Component(const char *containerName, const char *componentName) { + char *valenv=getenv("SALOME_BATCH"); + if(valenv) + if (strcmp(valenv,"1")==0) + { + MESSAGE("SALOME_LifeCycleCORBA::FindOrLoad_Component BATCH " << containerName << " " << componentName ) ; + _NS->Change_Directory("/Containers"); + CORBA::Object_ptr obj=_NS->Resolve(containerName); + Engines::Container_var cont=Engines::Container::_narrow(obj); + bool isLoadable = cont->load_component_Library(componentName); + if (!isLoadable) return Engines::Component::_nil(); + + Engines::Component_ptr myInstance = + cont->create_component_instance(componentName, 0); + return myInstance; + } + MESSAGE("SALOME_LifeCycleCORBA::FindOrLoad_Component INTERACTIF " << containerName << " " << componentName ) ; + //#if 0 // --- Check if Component Name is known in ModuleCatalog if (! isKnownComponentClass(componentName)) @@ -257,7 +287,7 @@ SALOME_LifeCycleCORBA::FindOrLoad_Component(const char *containerName, // SCRUTE(params->isMPI); free(stContainer); return FindOrLoad_Component(params,componentName); - + //#endif } //============================================================================= @@ -378,6 +408,19 @@ Engines::ContainerManager_ptr SALOME_LifeCycleCORBA::getContainerManager() return contManager._retn(); } +//============================================================================= +/*! Public - + * \return the container Manager + */ +//============================================================================= + +Engines::ResourcesManager_ptr SALOME_LifeCycleCORBA::getResourcesManager() +{ + Engines::ResourcesManager_var resManager = + Engines::ResourcesManager::_duplicate(_ResManager); + return resManager._retn(); +} + //============================================================================= /*! Protected - @@ -428,7 +471,7 @@ _FindComponent(const Engines::MachineParameters& params, if(lghtOfmachinesOK != 0) { machinesOK->length(lghtOfmachinesOK); - CORBA::String_var bestMachine = _ContManager->FindFirst(machinesOK); + CORBA::String_var bestMachine = _ResManager->FindFirst(machinesOK); CORBA::Object_var obj = _NS->ResolveComponent(bestMachine, containerName, componentName, @@ -491,8 +534,11 @@ SALOME_LifeCycleCORBA::Load_ParallelComponent(const Engines::MachineParameters& MESSAGE("Number of component nodes : " << params.nb_component_nodes); MESSAGE("Component Name : " << componentName);*/ + Engines::CompoList clist; + clist.length(1); + clist[0] = componentName; MESSAGE("Building a list of machines"); - Engines::MachineList_var listOfMachines = _ContManager->GetFittingResources(params, componentName); + Engines::MachineList_var listOfMachines = _ResManager->GetFittingResources(params, clist); if (listOfMachines->length() == 0) { INFOS("No matching machines founded !"); diff --git a/src/LifeCycleCORBA/SALOME_LifeCycleCORBA.hxx b/src/LifeCycleCORBA/SALOME_LifeCycleCORBA.hxx index a8e0d5a88..ced93eb1e 100644 --- a/src/LifeCycleCORBA/SALOME_LifeCycleCORBA.hxx +++ b/src/LifeCycleCORBA/SALOME_LifeCycleCORBA.hxx @@ -102,6 +102,7 @@ public: void preSet(Engines::MachineParameters& params); Engines::ContainerManager_ptr getContainerManager(); + Engines::ResourcesManager_ptr getResourcesManager(); protected: @@ -124,6 +125,7 @@ protected: SALOME_NamingService *_NS; Engines::ContainerManager_var _ContManager; + Engines::ResourcesManager_var _ResManager; } ; diff --git a/src/LifeCycleCORBA/Test/LifeCycleCORBATest.cxx b/src/LifeCycleCORBA/Test/LifeCycleCORBATest.cxx index a1a582770..2c724e369 100644 --- a/src/LifeCycleCORBA/Test/LifeCycleCORBATest.cxx +++ b/src/LifeCycleCORBA/Test/LifeCycleCORBATest.cxx @@ -672,17 +672,20 @@ string LifeCycleCORBATest::GetRemoteHost() { SALOME_LifeCycleCORBA _LCC(&_NS); - CORBA::Object_var obj = _NS.Resolve("/ContainerManager"); + CORBA::Object_var obj = _NS.Resolve("/ResourcesManager"); CPPUNIT_ASSERT(!CORBA::is_nil(obj)); - Engines::ContainerManager_var containerManager = - Engines::ContainerManager::_narrow(obj); - CPPUNIT_ASSERT(!CORBA::is_nil(containerManager)); + Engines::ResourcesManager_var resourcesManager = + Engines::ResourcesManager::_narrow(obj); + CPPUNIT_ASSERT(!CORBA::is_nil(resourcesManager)); Engines::MachineParameters params; _LCC.preSet(params); // empty params to get all the machines + Engines::CompoList clist; + clist.length(1); + clist[0] = "SalomeTestComponent"; Engines::MachineList_var hostList = - containerManager->GetFittingResources(params,"SalomeTestComponent"); + resourcesManager->GetFittingResources(params,clist); CPPUNIT_ASSERT(hostList->length() > 1); string localHost = GetHostname(); diff --git a/src/LifeCycleCORBA/Test/TestLifeCycleCORBA.py b/src/LifeCycleCORBA/Test/TestLifeCycleCORBA.py index 9bfb82f71..d6d857b62 100644 --- a/src/LifeCycleCORBA/Test/TestLifeCycleCORBA.py +++ b/src/LifeCycleCORBA/Test/TestLifeCycleCORBA.py @@ -64,12 +64,12 @@ cataServer.setpath(modules_list,modules_root_dir) cataServer.run() clt.waitNS("/Kernel/ModulCatalog") -# launch container manager server +# launch launcher server -myCmServer = runSalome.ContainerManagerServer(args) +myCmServer = runSalome.LauncherServer(args) myCmServer.setpath(modules_list,modules_root_dir) myCmServer.run() -clt.waitNS("/ContainerManager") +clt.waitNS("/SalomeLauncher") # execute Unit Test @@ -79,8 +79,8 @@ ret = os.spawnvp(os.P_WAIT, command[0], command) # kill containers created by the Container Manager import Engines -containerManager = clt.waitNS("/ContainerManager",Engines.ContainerManager) -containerManager.Shutdown() +launcher = clt.waitNS("/SalomeLauncher",Engines.SalomeLauncher) +launcher.Shutdown() # kill Test process diff --git a/src/Container/TestContainerManager.cxx b/src/LifeCycleCORBA/TestContainerManager.cxx similarity index 78% rename from src/Container/TestContainerManager.cxx rename to src/LifeCycleCORBA/TestContainerManager.cxx index 3d991bbdd..9313aace7 100644 --- a/src/Container/TestContainerManager.cxx +++ b/src/LifeCycleCORBA/TestContainerManager.cxx @@ -59,8 +59,15 @@ int main (int argc, char * argv[]) CORBA::Object_var obj = _NS->Resolve(SALOME_ContainerManager::_ContainerManagerNameInNS); ASSERT( !CORBA::is_nil(obj)); Engines::ContainerManager_var _ContManager=Engines::ContainerManager::_narrow(obj); + obj = _NS->Resolve(SALOME_ResourcesManager::_ResourcesManagerNameInNS); + ASSERT( !CORBA::is_nil(obj)); + Engines::ResourcesManager_var _ResManager=Engines::ResourcesManager::_narrow(obj); Engines::MachineParameters p; + Engines::CompoList clist; + clist.length(2); + clist[0] = "MED"; + clist[1] = "GEOM"; p.hostname = ""; p.OS = "LINUX"; @@ -74,26 +81,27 @@ int main (int argc, char * argv[]) for(int i=0;i<10;i++){ sprintf(st,"cycl_%d",i); p.container_name = CORBA::string_dup(st); - cont = _ContManager->StartContainer(p,Engines::P_CYCL); + cont = _ContManager->GiveContainer(p,Engines::P_CYCL,clist); if(CORBA::is_nil(cont)) error = true; } for(int i=0;i<10;i++){ sprintf(st,"first_%d",i); p.container_name = CORBA::string_dup(st); - cont = _ContManager->StartContainer(p,Engines::P_FIRST); + cont = _ContManager->GiveContainer(p,Engines::P_FIRST,clist); if(CORBA::is_nil(cont)) error = true; } p.container_name = CORBA::string_dup("best"); - cont = _ContManager->StartContainer(p,Engines::P_BEST); + cont = _ContManager->GiveContainer(p,Engines::P_BEST,clist); if(CORBA::is_nil(cont)) bestImplemented = false; else bestImplemented = true; SALOME_LifeCycleCORBA LCC(_NS); - compo = LCC.FindOrLoad_Component("FactoryServer","GEOM"); + p.container_name = CORBA::string_dup("FactoryServer"); + compo = LCC.FindOrLoad_Component(p,"SMESH"); if(CORBA::is_nil(compo)) error = true; - compo = LCC.FindOrLoad_Component("FactoryServer","GEOM"); + compo = LCC.FindOrLoad_Component(p,"SMESH"); if(CORBA::is_nil(compo)) error = true; _NS->Change_Directory("/Containers"); @@ -125,15 +133,21 @@ int main (int argc, char * argv[]) int cmax=0; int fmin=10; int fmax=0; + int nbpmax; for(map::iterator iter=cycle.begin();iter!=cycle.end();iter++){ if(strcmp((*iter).first.c_str(),"localhost")!=0){ - if(cycle[(*iter).first]cmax) cmax=cycle[(*iter).first]; - if(first[(*iter).first]fmax) fmax=first[(*iter).first]; + Engines::MachineParameters *p = _ResManager->GetMachineParameters((*iter).first.c_str()); + int nbproc = p->nb_node * p->nb_proc_per_node; + if(cycle[(*iter).first]/nbproccmax) cmax=cycle[(*iter).first]/nbproc; + if(first[(*iter).first]/nbprocfmax){ + fmax=first[(*iter).first]/nbproc; + nbpmax = nbproc; + } } } - if( ((cmax-cmin) <= 1) && (fmax == 10) && !error ){ + if( ((cmax-cmin) <= 1) && (fmax == 10/nbpmax) && !error ){ string msg; if(bestImplemented) msg = "TEST OK"; diff --git a/src/LifeCycleCORBA_SWIG/Test/TestLifeCycleCORBA_SWIG.py b/src/LifeCycleCORBA_SWIG/Test/TestLifeCycleCORBA_SWIG.py index 8a4d7856e..8ffe63ee6 100644 --- a/src/LifeCycleCORBA_SWIG/Test/TestLifeCycleCORBA_SWIG.py +++ b/src/LifeCycleCORBA_SWIG/Test/TestLifeCycleCORBA_SWIG.py @@ -75,10 +75,10 @@ clt.waitNS("/Kernel/ModulCatalog") # launch container manager server -myCmServer = runSalome.ContainerManagerServer(args) +myCmServer = runSalome.LauncherServer(args) myCmServer.setpath(modules_list,modules_root_dir) myCmServer.run() -clt.waitNS("/ContainerManager") +clt.waitNS("/SalomeLauncher") # execute Unit Test @@ -92,8 +92,8 @@ unittest.TextTestRunner(verbosity=2).run(LifeCycleCORBA_SWIGTest.suite()) # kill containers created by the Container Manager import Engines -containerManager = clt.waitNS("/ContainerManager",Engines.ContainerManager) -containerManager.Shutdown() +launcher = clt.waitNS("/SalomeLauncher",Engines.SalomeLauncher) +launcher.Shutdown() # kill Test process diff --git a/src/Makefile.am b/src/Makefile.am index 55eff87da..dc3f9b7aa 100644 --- a/src/Makefile.am +++ b/src/Makefile.am @@ -43,6 +43,7 @@ SUBDIR_CORBA = \ NOTIFICATION_SWIG \ Container \ TestContainer \ + Launcher \ LifeCycleCORBA \ LifeCycleCORBA_SWIG \ SALOMEDSClient \ @@ -101,6 +102,10 @@ DIST_SUBDIRS = \ SUBDIRS = $(SUBDIR_BASE) +if WITH_BATCH + SUBDIRS += $(SUBDIR_BATCH) +endif + if CORBA_GEN SUBDIRS += $(SUBDIR_CORBA) endif @@ -109,10 +114,6 @@ endif # SUBDIRS += $(SUBDIR_MPI) #endif -if WITH_BATCH - SUBDIRS += $(SUBDIR_BATCH) -endif - if CPPUNIT_IS_OK if CORBA_GEN SUBDIRS += \ diff --git a/src/ModuleCatalog/SALOME_ModuleCatalog_Server.cxx b/src/ModuleCatalog/SALOME_ModuleCatalog_Server.cxx index de5110a03..735ff5b13 100644 --- a/src/ModuleCatalog/SALOME_ModuleCatalog_Server.cxx +++ b/src/ModuleCatalog/SALOME_ModuleCatalog_Server.cxx @@ -162,8 +162,8 @@ int main(int argc,char **argv) #endif orb->run(); - mgr->deactivate(true,true); - poa->destroy(1,1); +// mgr->deactivate(true,true); +// poa->destroy(1,1); } catch(CORBA::SystemException&) { @@ -173,6 +173,7 @@ int main(int argc,char **argv) INFOS("Caught CORBA::Exception.") } + END_OF( argv[0] ); // delete myThreadTrace; return 0; } diff --git a/src/ModuleCatalog/SALOME_ModuleCatalog_impl.hxx b/src/ModuleCatalog/SALOME_ModuleCatalog_impl.hxx index 5dad3bacf..0768c5de7 100644 --- a/src/ModuleCatalog/SALOME_ModuleCatalog_impl.hxx +++ b/src/ModuleCatalog/SALOME_ModuleCatalog_impl.hxx @@ -105,7 +105,7 @@ public: void ping(){}; - void shutdown() { if (_orb) _orb->shutdown(1); }; + void shutdown() { if(!CORBA::is_nil(_orb)) _orb->shutdown(0); }; private: //! method to parse one module catalog diff --git a/src/NamingService/SALOME_NamingService.cxx b/src/NamingService/SALOME_NamingService.cxx index 22fbc3c7d..13aa8a802 100644 --- a/src/NamingService/SALOME_NamingService.cxx +++ b/src/NamingService/SALOME_NamingService.cxx @@ -132,10 +132,9 @@ void SALOME_NamingService::Register(CORBA::Object_ptr ObjRef, // --- _current_context is replaced to the _root_context // if the Path begins whith '/' - if (Path[0] == '/') - { - _current_context = _root_context; - } + if (Path[0] == '/'){ + _current_context = _root_context; + } // --- the resolution of the directory path has to be done // to place the current_context to the correct node @@ -149,179 +148,158 @@ void SALOME_NamingService::Register(CORBA::Object_ptr ObjRef, CORBA::Boolean not_exist = false; - if (dimension_resultat > 0) - { - // A directory is treated (not only an object name) - // test if the directory where ObjRef should be recorded already exists - // If not, create the new context - - try - { - CORBA::Object_var obj = _current_context->resolve(context_name); - _current_context = CosNaming::NamingContext::_narrow(obj); - } - - catch (CosNaming::NamingContext::NotFound &) - { - // --- failed to resolve, therefore assume cold start - not_exist = true; - } - - catch (CosNaming::NamingContext::InvalidName &) - { - INFOS("Register() : CosNaming::NamingContext::InvalidName"); - } - - catch (CosNaming::NamingContext::CannotProceed &) - { - INFOS("Register() : CosNaming::NamingContext::CannotProceed"); - } - - catch (CORBA::SystemException&) - { - INFOS("Register() : CORBA::SystemException: " - << "unable to contact the naming service"); - throw ServiceUnreachable(); - } - - if (not_exist) - { - try - { - context_name.length(1); - for (int i = 0 ; i < dimension_resultat ;i++) - { - context_name[0].id = - CORBA::string_dup(splitPath[i].c_str()); - context_name[0].kind = CORBA::string_dup("dir"); - // SCRUTE(_context_name[0].id); - // --- check if the path is created - try - { - // --- if the context is already created, nothing to do - CORBA::Object_var obj = - _current_context->resolve(context_name); - _current_context = - CosNaming::NamingContext::_narrow(obj); - } - - catch (CosNaming::NamingContext::NotFound &) - { - // --- the context must be created - CosNaming::NamingContext_var temp_context = - _current_context->bind_new_context(context_name); - _current_context = temp_context; - } - } - } - - catch (CosNaming::NamingContext::AlreadyBound&) - { - INFOS("Register() : CosNaming::NamingContext::AlreadyBound"); - } - - catch (CosNaming::NamingContext::NotFound& ex) - { - CosNaming::Name n = ex.rest_of_name; - - if (ex.why == CosNaming::NamingContext::missing_node) - INFOS("Register() : " << (char *) n[0].id - << " (" << (char *) n[0].kind << ") not found"); + if (dimension_resultat > 0){ + // A directory is treated (not only an object name) + // test if the directory where ObjRef should be recorded already exists + // If not, create the new context + + try{ + CORBA::Object_var obj = _current_context->resolve(context_name); + _current_context = CosNaming::NamingContext::_narrow(obj); + } - if (ex.why == CosNaming::NamingContext::not_context) - INFOS("Register() : " << (char *) n[0].id - << " (" << (char *) n[0].kind - << ") is not a context"); + catch (CosNaming::NamingContext::NotFound &){ + // --- failed to resolve, therefore assume cold start + not_exist = true; + } - if (ex.why == CosNaming::NamingContext::not_object) - INFOS("Register() : " << (char *) n[0].id - << " (" << (char *) n[0].kind - << ") is not an object"); - } + catch (CosNaming::NamingContext::InvalidName &){ + INFOS("Register() : CosNaming::NamingContext::InvalidName"); + } - catch (CosNaming::NamingContext::CannotProceed&) - { - INFOS("Register(): CosNaming::NamingContext::CannotProceed"); - } + catch (CosNaming::NamingContext::CannotProceed &){ + INFOS("Register() : CosNaming::NamingContext::CannotProceed"); + } - catch (CosNaming::NamingContext::InvalidName&) - { - INFOS("Register(): CosNaming::NamingContext::InvalidName"); - } + catch (CORBA::SystemException&){ + INFOS("Register() : CORBA::SystemException: " + << "unable to contact the naming service"); + throw ServiceUnreachable(); + } - catch (CORBA::SystemException&) - { - INFOS("Register():CORBA::SystemException: " - << "unable to contact the naming service"); - throw ServiceUnreachable(); - } - } + if (not_exist){ + try{ + context_name.length(1); + for (int i = 0 ; i < dimension_resultat ;i++){ + context_name[0].id = CORBA::string_dup(splitPath[i].c_str()); + context_name[0].kind = CORBA::string_dup("dir"); + // SCRUTE(_context_name[0].id); + // --- check if the path is created + try{ + // --- if the context is already created, nothing to do + CORBA::Object_var obj = _current_context->resolve(context_name); + _current_context = CosNaming::NamingContext::_narrow(obj); + } + + catch (CosNaming::NamingContext::NotFound &){ + try{ + // --- the context must be created + CosNaming::NamingContext_var temp_context = + _current_context->bind_new_context(context_name); + _current_context = temp_context; + } + catch (CosNaming::NamingContext::AlreadyBound&){ + CORBA::Object_var obj = _current_context->resolve(context_name); + _current_context = CosNaming::NamingContext::_narrow(obj); + } + } + } + } + + catch (CosNaming::NamingContext::AlreadyBound&){ + INFOS("Register() : CosNaming::NamingContext::AlreadyBound"); + } + + catch (CosNaming::NamingContext::NotFound& ex){ + CosNaming::Name n = ex.rest_of_name; + + if (ex.why == CosNaming::NamingContext::missing_node) + INFOS("Register() : " << (char *) n[0].id + << " (" << (char *) n[0].kind << ") not found"); + + if (ex.why == CosNaming::NamingContext::not_context) + INFOS("Register() : " << (char *) n[0].id + << " (" << (char *) n[0].kind + << ") is not a context"); + + if (ex.why == CosNaming::NamingContext::not_object) + INFOS("Register() : " << (char *) n[0].id + << " (" << (char *) n[0].kind + << ") is not an object"); + } + + catch (CosNaming::NamingContext::CannotProceed&){ + INFOS("Register(): CosNaming::NamingContext::CannotProceed"); + } + + catch (CosNaming::NamingContext::InvalidName&){ + INFOS("Register(): CosNaming::NamingContext::InvalidName"); + } + + catch (CORBA::SystemException&){ + INFOS("Register():CORBA::SystemException: " + << "unable to contact the naming service"); + throw ServiceUnreachable(); + } } + } // --- The current directory is now the directory where the object should // be recorded int sizePath = splitPath.size(); - if (sizePath > dimension_resultat) - { - ASSERT(sizePath == dimension_resultat+1); - context_name.length(1); + if (sizePath > dimension_resultat){ + ASSERT(sizePath == dimension_resultat+1); + context_name.length(1); - try - { - // --- the last element is an object and not a directory + try{ + // --- the last element is an object and not a directory - context_name[0].id = - CORBA::string_dup(splitPath[dimension_resultat].c_str()); - context_name[0].kind = CORBA::string_dup("object"); - //SCRUTE(context_name[0].id); + context_name[0].id = CORBA::string_dup(splitPath[dimension_resultat].c_str()); + context_name[0].kind = CORBA::string_dup("object"); + //SCRUTE(context_name[0].id); - _current_context->bind(context_name, ObjRef); - } + _current_context->bind(context_name, ObjRef); + } - catch (CosNaming::NamingContext::NotFound& ex) - { - CosNaming::Name n = ex.rest_of_name; + catch (CosNaming::NamingContext::NotFound& ex){ + CosNaming::Name n = ex.rest_of_name; - if (ex.why == CosNaming::NamingContext::missing_node) - INFOS("Register() : " << (char *) n[0].id - << " (" << (char *) n[0].kind << ") not found"); + if (ex.why == CosNaming::NamingContext::missing_node) + INFOS("Register() : " << (char *) n[0].id + << " (" << (char *) n[0].kind << ") not found"); - if (ex.why == CosNaming::NamingContext::not_context) - INFOS("Register() : " << (char *) n[0].id - << " (" << (char *) n[0].kind - << ") is not a context"); + if (ex.why == CosNaming::NamingContext::not_context) + INFOS("Register() : " << (char *) n[0].id + << " (" << (char *) n[0].kind + << ") is not a context"); - if (ex.why == CosNaming::NamingContext::not_object) - INFOS("Register() : " << (char *) n[0].id - << " (" << (char *) n[0].kind - << ") is not an object"); - } + if (ex.why == CosNaming::NamingContext::not_object) + INFOS("Register() : " << (char *) n[0].id + << " (" << (char *) n[0].kind + << ") is not an object"); + } - catch (CosNaming::NamingContext::CannotProceed&) - { - INFOS("Register(): CosNaming::NamingContext::CannotProceed"); - } + catch (CosNaming::NamingContext::CannotProceed&){ + INFOS("Register(): CosNaming::NamingContext::CannotProceed"); + } - catch (CosNaming::NamingContext::InvalidName&) - { - INFOS("Register(): CosNaming::NamingContext::InvalidName"); - } + catch (CosNaming::NamingContext::InvalidName&){ + INFOS("Register(): CosNaming::NamingContext::InvalidName"); + } - catch (CosNaming::NamingContext::AlreadyBound&) - { - INFOS("Register(): CosNaming::NamingContext::AlreadyBound, " - << "object will be rebind"); - _current_context->rebind(context_name, ObjRef); - } + catch (CosNaming::NamingContext::AlreadyBound&){ + INFOS("Register(): CosNaming::NamingContext::AlreadyBound, " + << "object will be rebind"); + _current_context->rebind(context_name, ObjRef); + } - catch (CORBA::SystemException&) - { - INFOS("!!!Register(): CORBA::SystemException: " - << "unable to contact the naming service"); - throw ServiceUnreachable(); - } + catch (CORBA::SystemException&){ + INFOS("!!!Register(): CORBA::SystemException: " + << "unable to contact the naming service"); + throw ServiceUnreachable(); } + } } // ============================================================================ diff --git a/src/Registry/RegistryService.cxx b/src/Registry/RegistryService.cxx index 9b305d6a5..16c0b736c 100644 --- a/src/Registry/RegistryService.cxx +++ b/src/Registry/RegistryService.cxx @@ -49,7 +49,7 @@ using namespace std; RegistryService::RegistryService( void ) : _SessionName(0), _Compteur(0) { MESSAGE("Passage dans RegistryService::RegistryService()") ; - + _orb = CORBA::ORB::_nil(); } diff --git a/src/Registry/RegistryService.hxx b/src/Registry/RegistryService.hxx index 21b31b6f5..4315f9321 100644 --- a/src/Registry/RegistryService.hxx +++ b/src/Registry/RegistryService.hxx @@ -88,9 +88,13 @@ public : void SessionName( const char *sessionName ) ; + void SetOrb( CORBA::ORB_ptr orb ) { _orb = orb; return; } + + void Shutdown() { if(!CORBA::is_nil(_orb)) _orb->shutdown(0); } protected : + CORBA::ORB_var _orb; const char *_SessionName ; int _Compteur ; std::map _reg ; diff --git a/src/Registry/SALOME_Registry_Server.cxx b/src/Registry/SALOME_Registry_Server.cxx index 7dc1b6817..470cde401 100644 --- a/src/Registry/SALOME_Registry_Server.cxx +++ b/src/Registry/SALOME_Registry_Server.cxx @@ -168,6 +168,7 @@ int main( int argc , char **argv ) naming.init_orb( orb ) ; RegistryService *ptrRegistry = SINGLETON_::Instance() ; ptrRegistry->SessionName( ptrSessionName ) ; + ptrRegistry->SetOrb(orb); varComponents = ptrRegistry->_this() ; // The RegistryService must not already exist. diff --git a/src/ResourcesManager/Makefile.am b/src/ResourcesManager/Makefile.am index 4205f9361..dbb5bb380 100755 --- a/src/ResourcesManager/Makefile.am +++ b/src/ResourcesManager/Makefile.am @@ -48,6 +48,7 @@ salomeinclude_HEADERS = \ # This local variable defines the list of CPPFLAGS common to all target in this package. COMMON_CPPFLAGS=\ + -I$(srcdir)/../Batch \ -I$(srcdir)/../Basics \ -I$(srcdir)/../SALOMELocalTrace \ -I$(srcdir)/../NamingService \ diff --git a/src/ResourcesManager/SALOME_LoadRateManager.cxx b/src/ResourcesManager/SALOME_LoadRateManager.cxx index 527cade0a..42f08df99 100644 --- a/src/ResourcesManager/SALOME_LoadRateManager.cxx +++ b/src/ResourcesManager/SALOME_LoadRateManager.cxx @@ -34,7 +34,7 @@ string SALOME_LoadRateManager::FindFirst(const Engines::MachineList& hosts) return string(hosts[0]); } -string SALOME_LoadRateManager::FindNext(const Engines::MachineList& hosts,SALOME_NamingService *ns) +string SALOME_LoadRateManager::FindNext(const Engines::MachineList& hosts,MapOfParserResourcesType& resList,SALOME_NamingService *ns) { MESSAGE("SALOME_LoadRateManager::FindNext " << hosts.length()); map machines; @@ -47,15 +47,7 @@ string SALOME_LoadRateManager::FindNext(const Engines::MachineList& hosts,SALOME ns->Change_Directory("/Containers"); vector vec = ns->list_directory_recurs(); - list lstCont; for(vector::iterator iter = vec.begin();iter!=vec.end();iter++){ - CORBA::Object_var obj=ns->Resolve((*iter).c_str()); - Engines::Container_var cont=Engines::Container::_narrow(obj); - if(!CORBA::is_nil(cont)){ - lstCont.push_back((*iter)); - } - } - for(list::iterator iter=lstCont.begin();iter!=lstCont.end();iter++){ CORBA::Object_var obj=ns->Resolve((*iter).c_str()); Engines::Container_var cont=Engines::Container::_narrow(obj); if(!CORBA::is_nil(cont)){ @@ -65,11 +57,15 @@ string SALOME_LoadRateManager::FindNext(const Engines::MachineList& hosts,SALOME } int imin = 0; - int min = machines[string(hosts[0])]; + ParserResourcesType resource = resList[string(hosts[0])]; + int nbproc = resource.DataForSort._nbOfProcPerNode * resource.DataForSort._nbOfNodes; + int min = machines[string(hosts[0])]/nbproc; for(int i=1;i #include CORBA_CLIENT_HEADER(SALOME_ContainerManager) #include +#include "SALOME_ResourcesCatalog_Parser.hxx" #include "SALOME_NamingService.hxx" #if defined RESOURCESMANAGER_EXPORTS @@ -44,7 +45,7 @@ class RESOURCESMANAGER_EXPORT SALOME_LoadRateManager public: std::string FindFirst(const Engines::MachineList& hosts); - std::string FindNext(const Engines::MachineList& hosts,SALOME_NamingService *ns); + std::string FindNext(const Engines::MachineList& hosts,MapOfParserResourcesType& resList,SALOME_NamingService *ns); std::string FindBest(const Engines::MachineList& hosts) throw (SALOME_Exception); }; diff --git a/src/ResourcesManager/SALOME_ResourcesCatalog_Handler.cxx b/src/ResourcesManager/SALOME_ResourcesCatalog_Handler.cxx index a42b4369f..a0fc63552 100755 --- a/src/ResourcesManager/SALOME_ResourcesCatalog_Handler.cxx +++ b/src/ResourcesManager/SALOME_ResourcesCatalog_Handler.cxx @@ -59,12 +59,12 @@ SALOME_ResourcesCatalog_Handler(MapOfParserResourcesType& listOfResources): test_alias = "alias"; test_protocol = "protocol"; test_mode = "mode"; + test_batch = "batch"; + test_mpi = "mpi"; test_user_name = "userName"; test_appli_path = "appliPath"; test_modules = "modules"; test_module_name = "moduleName"; - test_module_path = "modulePath"; - test_pre_req_file_path = "preReqFilePath"; test_os = "OS"; test_mem_in_mb = "memInMB"; test_cpu_freq_mhz = "CPUFreqMHz"; @@ -118,6 +118,7 @@ void SALOME_ResourcesCatalog_Handler::ProcessXmlDocument(xmlDocPtr theDoc) { if ( !xmlStrcmp(aCurNode->name,(const xmlChar*)test_machine) ) { + _resource.Clear(); if (xmlHasProp(aCurNode, (const xmlChar*)test_hostname)) _resource.DataForSort._hostName = (const char*)xmlGetProp(aCurNode, (const xmlChar*)test_hostname); else @@ -156,15 +157,40 @@ void SALOME_ResourcesCatalog_Handler::ProcessXmlDocument(xmlDocPtr theDoc) break; } + if (xmlHasProp(aCurNode, (const xmlChar*)test_batch)) + { + std::string aBatch = (const char*)xmlGetProp(aCurNode, (const xmlChar*)test_batch); + if (aBatch == "pbs") + _resource.Batch = pbs; + else if (aBatch == "lsf") + _resource.Batch = lsf; + else if (aBatch == "slurm") + _resource.Batch = slurm; + else + _resource.Batch = none; + } + + if (xmlHasProp(aCurNode, (const xmlChar*)test_mpi)) + { + std::string anMpi = (const char*)xmlGetProp(aCurNode, (const xmlChar*)test_mpi); + if (anMpi == "lam") + _resource.mpi = lam; + else if (anMpi == "mpich1") + _resource.mpi = mpich1; + else if (anMpi == "mpich2") + _resource.mpi = mpich2; + else if (anMpi == "openmpi") + _resource.mpi = openmpi; + else + _resource.mpi = indif; + } + if (xmlHasProp(aCurNode, (const xmlChar*)test_user_name)) _resource.UserName = (const char*)xmlGetProp(aCurNode, (const xmlChar*)test_user_name); if (xmlHasProp(aCurNode, (const xmlChar*)test_appli_path)) _resource.AppliPath = (const char*)xmlGetProp(aCurNode, (const xmlChar*)test_appli_path); - if (xmlHasProp(aCurNode, (const xmlChar*)test_pre_req_file_path)) - _resource.PreReqFilePath = (const char*)xmlGetProp(aCurNode, (const xmlChar*)test_pre_req_file_path); - if (xmlHasProp(aCurNode, (const xmlChar*)test_os)) _resource.OS = (const char*)xmlGetProp(aCurNode, (const xmlChar*)test_os); @@ -186,12 +212,10 @@ void SALOME_ResourcesCatalog_Handler::ProcessXmlDocument(xmlDocPtr theDoc) { if ( !xmlStrcmp(aCurSubNode->name, (const xmlChar*)test_modules) ) { - if (xmlHasProp(aCurSubNode, (const xmlChar*)test_module_name) && - xmlHasProp(aCurSubNode, (const xmlChar*)test_module_path)) + if (xmlHasProp(aCurSubNode, (const xmlChar*)test_module_name)) { std::string aModuleName = (const char*)xmlGetProp(aCurSubNode, (const xmlChar*)test_module_name); - std::string aModulePath = (const char*)xmlGetProp(aCurSubNode, (const xmlChar*)test_module_path); - _resource.ModulesPath[aModuleName] = aModulePath; + _resource.ModulesList.push_back(aModuleName); } } aCurSubNode = aCurSubNode->next; @@ -287,19 +311,50 @@ void SALOME_ResourcesCatalog_Handler::PrepareDocToXmlFile(xmlDocPtr theDoc) xmlNewProp(node, BAD_CAST test_mode, BAD_CAST "interactive"); } + switch ((*iter).second.Batch) + { + case pbs: + xmlNewProp(node, BAD_CAST test_batch, BAD_CAST "pbs"); + break; + case lsf: + xmlNewProp(node, BAD_CAST test_batch, BAD_CAST "lsf"); + break; + case slurm: + xmlNewProp(node, BAD_CAST test_batch, BAD_CAST "slurm"); + break; + default: + xmlNewProp(node, BAD_CAST test_batch, BAD_CAST ""); + } + + switch ((*iter).second.mpi) + { + case lam: + xmlNewProp(node, BAD_CAST test_mpi, BAD_CAST "lam"); + break; + case mpich1: + xmlNewProp(node, BAD_CAST test_mpi, BAD_CAST "mpich1"); + break; + case mpich2: + xmlNewProp(node, BAD_CAST test_mpi, BAD_CAST "mpich2"); + break; + case openmpi: + xmlNewProp(node, BAD_CAST test_mpi, BAD_CAST "openmpi"); + break; + default: + xmlNewProp(node, BAD_CAST test_mpi, BAD_CAST ""); + } + xmlNewProp(node, BAD_CAST test_user_name, BAD_CAST (*iter).second.UserName.c_str()); - for (map::const_iterator iter2 = - (*iter).second.ModulesPath.begin(); - iter2 != (*iter).second.ModulesPath.end(); + for (vector::const_iterator iter2 = + (*iter).second.ModulesList.begin(); + iter2 != (*iter).second.ModulesList.end(); iter2++) { node1 = xmlNewChild(node, NULL, BAD_CAST test_modules, NULL); - xmlNewProp(node1, BAD_CAST test_module_name, BAD_CAST (*iter2).first.c_str()); - xmlNewProp(node1, BAD_CAST test_module_path, BAD_CAST (*iter2).second.c_str()); + xmlNewProp(node1, BAD_CAST test_module_name, BAD_CAST (*iter2).c_str()); } - xmlNewProp(node, BAD_CAST test_pre_req_file_path, BAD_CAST (*iter).second.PreReqFilePath.c_str()); xmlNewProp(node, BAD_CAST test_os, BAD_CAST (*iter).second.OS.c_str()); xmlNewProp(node, BAD_CAST test_mem_in_mb, BAD_CAST sprintf(string_buf, "%u", (*iter).second.DataForSort._memInMB)); xmlNewProp(node, BAD_CAST test_cpu_freq_mhz, BAD_CAST sprintf(string_buf, "%u", (*iter).second.DataForSort._CPUFreqMHz)); diff --git a/src/ResourcesManager/SALOME_ResourcesCatalog_Handler.hxx b/src/ResourcesManager/SALOME_ResourcesCatalog_Handler.hxx index 560659ece..d636ff22f 100755 --- a/src/ResourcesManager/SALOME_ResourcesCatalog_Handler.hxx +++ b/src/ResourcesManager/SALOME_ResourcesCatalog_Handler.hxx @@ -52,7 +52,6 @@ class SALOME_ResourcesCatalog_Handler private : std::string previous_module_name; - std::string previous_module_path; ParserResourcesType _resource; MapOfParserResourcesType& _resources_list; @@ -64,12 +63,12 @@ class SALOME_ResourcesCatalog_Handler const char *test_alias; const char *test_protocol; const char *test_mode; + const char *test_batch; + const char *test_mpi; const char *test_user_name; const char *test_appli_path; const char *test_modules; const char *test_module_name; - const char *test_module_path; - const char *test_pre_req_file_path; const char *test_os; const char *test_mem_in_mb; const char *test_cpu_freq_mhz; diff --git a/src/ResourcesManager/SALOME_ResourcesCatalog_Parser.cxx b/src/ResourcesManager/SALOME_ResourcesCatalog_Parser.cxx index 49e594e42..e20dbfc6a 100644 --- a/src/ResourcesManager/SALOME_ResourcesCatalog_Parser.cxx +++ b/src/ResourcesManager/SALOME_ResourcesCatalog_Parser.cxx @@ -112,26 +112,46 @@ void ResourceDataToSort::Print() const SCRUTE(_memInMB); } -void ParserResourcesType::Print() +void ParserResourcesType::Print() const { - MESSAGE("##############*****"); - MESSAGE("HostName : " << DataForSort._hostName); - MESSAGE("Alias : " << Alias); - MESSAGE("Protocol : " << Protocol); - MESSAGE("Mode : " << Mode); - MESSAGE("UserName : " << UserName); - MESSAGE("Modules : "); - int i = 1; - - for (std::map::iterator iter = ModulesPath.begin(); - iter != ModulesPath.end(); - iter++) - { - MESSAGE("Module " << i++ << " called : " << (*iter).first - << " with path : " << (*iter).second); - } - - MESSAGE("PreReqFilePath : " << PreReqFilePath); - MESSAGE("OS : " << OS); - DataForSort.Print(); + ostringstream oss; + oss << endl << + "HostName : " << DataForSort._hostName << endl << + "Alias : " << Alias << endl << + "NbOfNodes : " << DataForSort._nbOfNodes << endl << + "NbOfProcPerNode : " << DataForSort._nbOfProcPerNode << endl << + "CPUFreqMHz : " << DataForSort._CPUFreqMHz << endl << + "MemInMB : " << DataForSort._memInMB << endl << + "Protocol : " << Protocol << endl << + "Mode : " << Mode << endl << + "Batch : " << Batch << endl << + "mpi : " << mpi << endl << + "UserName : " << UserName << endl << + "AppliPath : " << AppliPath << endl << + "OS : " << OS << endl << + "Modules : " << endl; + + for(int i=0;i #include -typedef std::map MapOfModulesPath; - enum AccessProtocolType {rsh, ssh}; enum AccessModeType {interactive, batch}; +enum BatchType {none, pbs, lsf, slurm}; + +enum MpiImplType {indif, lam, mpich1, mpich2, openmpi}; + class ResourceDataToSort { @@ -74,13 +76,15 @@ struct ParserResourcesType std::string Alias; AccessProtocolType Protocol; AccessModeType Mode; + BatchType Batch; + MpiImplType mpi; std::string UserName; std::string AppliPath; - MapOfModulesPath ModulesPath; - std::string PreReqFilePath; + std::vector ModulesList; std::string OS; - void Print(); + void Print() const; + void Clear(); }; typedef std::map MapOfParserResourcesType; diff --git a/src/ResourcesManager/SALOME_ResourcesManager.cxx b/src/ResourcesManager/SALOME_ResourcesManager.cxx index 4c002f91b..7c31fa87e 100644 --- a/src/ResourcesManager/SALOME_ResourcesManager.cxx +++ b/src/ResourcesManager/SALOME_ResourcesManager.cxx @@ -18,8 +18,8 @@ // See http://www.salome-platform.org/ or email : webmaster.salome@opencascade.com // #include "SALOME_ResourcesManager.hxx" -//#include "SALOME_Container_i.hxx" #include "Utils_ExceptHandlers.hxx" +#include "Utils_CorbaException.hxx" #include "OpUtil.hxx" #include @@ -44,6 +44,8 @@ using namespace std; +const char *SALOME_ResourcesManager::_ResourcesManagerNameInNS = "/ResourcesManager"; + //============================================================================= /*! * just for test @@ -51,12 +53,24 @@ using namespace std; //============================================================================= SALOME_ResourcesManager:: -SALOME_ResourcesManager(CORBA::ORB_ptr orb, +SALOME_ResourcesManager(CORBA::ORB_ptr orb, + PortableServer::POA_var poa, + SALOME_NamingService *ns, const char *xmlFilePath) : _path_resources(xmlFilePath) { - _NS = new SALOME_NamingService(orb); + MESSAGE("constructor"); + _NS = ns; + _orb = CORBA::ORB::_duplicate(orb) ; + _poa = PortableServer::POA::_duplicate(poa) ; + PortableServer::ObjectId_var id = _poa->activate_object(this); + CORBA::Object_var obj = _poa->id_to_reference(id); + Engines::SalomeLauncher_var refContMan = + Engines::SalomeLauncher::_narrow(obj); + + _NS->Register(refContMan,_ResourcesManagerNameInNS); _MpiStarted = false; + MESSAGE("constructor end"); } //============================================================================= @@ -70,9 +84,19 @@ SALOME_ResourcesManager(CORBA::ORB_ptr orb, */ //============================================================================= -SALOME_ResourcesManager::SALOME_ResourcesManager(CORBA::ORB_ptr orb) +SALOME_ResourcesManager::SALOME_ResourcesManager(CORBA::ORB_ptr orb, + PortableServer::POA_var poa, + SALOME_NamingService *ns) { - _NS = new SALOME_NamingService(orb); + MESSAGE("constructor"); + _NS = ns; + _orb = CORBA::ORB::_duplicate(orb) ; + _poa = PortableServer::POA::_duplicate(poa) ; + PortableServer::ObjectId_var id = _poa->activate_object(this); + CORBA::Object_var obj = _poa->id_to_reference(id); + Engines::ResourcesManager_var refContMan = Engines::ResourcesManager::_narrow(obj); + _NS->Register(refContMan,_ResourcesManagerNameInNS); + _isAppliSalomeDefined = (getenv("APPLI") != 0); _MpiStarted = false; @@ -91,6 +115,7 @@ SALOME_ResourcesManager::SALOME_ResourcesManager(CORBA::ORB_ptr orb) } ParseXmlFile(); + MESSAGE("constructor end"); } //============================================================================= @@ -101,7 +126,23 @@ SALOME_ResourcesManager::SALOME_ResourcesManager(CORBA::ORB_ptr orb) SALOME_ResourcesManager::~SALOME_ResourcesManager() { - delete _NS; + MESSAGE("destructor"); +} + + +//============================================================================= +/*! CORBA method: + * shutdown all the containers, then the ContainerManager servant + */ +//============================================================================= + +void SALOME_ResourcesManager::Shutdown() +{ + MESSAGE("Shutdown"); + _NS->Destroy_Name(_ResourcesManagerNameInNS); + PortableServer::ObjectId_var oid = _poa->servant_to_id(this); + _poa->deactivate_object(oid); + _remove_ref(); } //============================================================================= @@ -118,101 +159,113 @@ SALOME_ResourcesManager::~SALOME_ResourcesManager() */ //============================================================================= -vector -SALOME_ResourcesManager:: -GetFittingResources(const Engines::MachineParameters& params, - const char *moduleName) -throw(SALOME_Exception) +Engines::MachineList * +SALOME_ResourcesManager::GetFittingResources(const Engines::MachineParameters& params, + const Engines::CompoList& componentList) +//throw(SALOME_Exception) { // MESSAGE("ResourcesManager::GetFittingResources"); - vector ret; + vector vec; + Engines::MachineList *ret=new Engines::MachineList; + try{ + // --- To be sure that we search in a correct list. + ParseXmlFile(); - // --- To be sure that we search in a correct list. - ParseXmlFile(); + const char *hostname = (const char *)params.hostname; + MESSAGE("GetFittingResources " << hostname << " " << GetHostname().c_str()); - const char *hostname = (const char *)params.hostname; - MESSAGE("GetFittingResources " << hostname << " " << GetHostname().c_str()); - - if (hostname[0] != '\0') - { + if (hostname[0] != '\0') + { // MESSAGE("ResourcesManager::GetFittingResources : hostname specified" ); - if ( strcmp(hostname, "localhost") == 0 || - strcmp(hostname, GetHostname().c_str()) == 0 ) - { -// MESSAGE("ResourcesManager::GetFittingResources : localhost" ); - ret.push_back(GetHostname().c_str()); -// MESSAGE("ResourcesManager::GetFittingResources : " << ret.size()); - } - - else if (_resourcesList.find(hostname) != _resourcesList.end()) - { - // --- params.hostname is in the list of resources so return it. - ret.push_back(hostname); - } - - else - { -// Cas d'un cluster: nombre de noeuds > 1 - int cpt=0; - for (map::const_iterator iter = _resourcesList.begin(); iter != _resourcesList.end(); iter++){ - if( (*iter).second.DataForSort._nbOfNodes > 1 ){ - if( strncmp(hostname,(*iter).first.c_str(),strlen(hostname)) == 0 ){ - ret.push_back((*iter).first.c_str()); - //cout << "SALOME_ResourcesManager::GetFittingResources vector[" - // << cpt << "] = " << (*iter).first.c_str() << endl ; - cpt++; - } - } - } - if(cpt==0){ - // --- user specified an unknown hostame so notify him. - MESSAGE("ResourcesManager::GetFittingResources : SALOME_Exception"); - throw SALOME_Exception("unknown host"); - } - } - } + if ( strcmp(hostname, "localhost") == 0 || + strcmp(hostname, GetHostname().c_str()) == 0 ) + { + // MESSAGE("ResourcesManager::GetFittingResources : localhost" ); + vec.push_back(GetHostname().c_str()); + // MESSAGE("ResourcesManager::GetFittingResources : " << vec.size()); + } + + else if (_resourcesList.find(hostname) != _resourcesList.end()) + { + // --- params.hostname is in the list of resources so return it. + vec.push_back(hostname); + } + + else + { + // Cas d'un cluster: nombre de noeuds > 1 + int cpt=0; + for (map::const_iterator iter = _resourcesList.begin(); iter != _resourcesList.end(); iter++){ + if( (*iter).second.DataForSort._nbOfNodes > 1 ){ + if( strncmp(hostname,(*iter).first.c_str(),strlen(hostname)) == 0 ){ + vec.push_back((*iter).first.c_str()); + //cout << "SALOME_ResourcesManager::GetFittingResources vector[" + // << cpt << "] = " << (*iter).first.c_str() << endl ; + cpt++; + } + } + } + if(cpt==0){ + // --- user specified an unknown hostame so notify him. + MESSAGE("ResourcesManager::GetFittingResources : SALOME_Exception"); + throw SALOME_Exception("unknown host"); + } + } + } + + else + // --- Search for available resources sorted by priority + { + SelectOnlyResourcesWithOS(vec, params.OS); + + KeepOnlyResourcesWithModule(vec, componentList); + + if (vec.size() == 0) + SelectOnlyResourcesWithOS(vec, params.OS); + + // --- set wanted parameters + ResourceDataToSort::_nbOfNodesWanted = params.nb_node; + + ResourceDataToSort::_nbOfProcPerNodeWanted = params.nb_proc_per_node; + + ResourceDataToSort::_CPUFreqMHzWanted = params.cpu_clock; + + ResourceDataToSort::_memInMBWanted = params.mem_mb; + + // --- end of set + + list li; + + for (vector::iterator iter = vec.begin(); + iter != vec.end(); + iter++) + li.push_back(_resourcesList[(*iter)].DataForSort); + + li.sort(); + + unsigned int i = 0; + + for (list::iterator iter2 = li.begin(); + iter2 != li.end(); + iter2++) + vec[i++] = (*iter2)._hostName; + } + + // MESSAGE("ResourcesManager::GetFittingResources : return" << ret.size()); + ret->length(vec.size()); + for(unsigned int i=0;i li; - - for (vector::iterator iter = ret.begin(); - iter != ret.end(); - iter++) - li.push_back(_resourcesList[(*iter)].DataForSort); - - li.sort(); - - unsigned int i = 0; - - for (list::iterator iter2 = li.begin(); - iter2 != li.end(); - iter2++) - ret[i++] = (*iter2)._hostName; - } - - // MESSAGE("ResourcesManager::GetFittingResources : return" << ret.size()); return ret; } @@ -226,16 +279,16 @@ throw(SALOME_Exception) int SALOME_ResourcesManager:: AddResourceInCatalog(const Engines::MachineParameters& paramsOfNewResources, - const map& modulesOnNewResources, - const char *environPathOfPrerequired, + const vector& modulesOnNewResources, const char *alias, const char *userName, AccessModeType mode, AccessProtocolType prot) throw(SALOME_Exception) { - map::const_iterator iter = - modulesOnNewResources.find("KERNEL"); + vector::const_iterator iter = find(modulesOnNewResources.begin(), + modulesOnNewResources.end(), + "KERNEL"); if (iter != modulesOnNewResources.end()) { @@ -245,8 +298,7 @@ throw(SALOME_Exception) newElt.Protocol = prot; newElt.Mode = mode; newElt.UserName = userName; - newElt.ModulesPath = modulesOnNewResources; - newElt.PreReqFilePath = environPathOfPrerequired; + newElt.ModulesList = modulesOnNewResources; newElt.OS = paramsOfNewResources.OS; newElt.DataForSort._memInMB = paramsOfNewResources.mem_mb; newElt.DataForSort._CPUFreqMHz = paramsOfNewResources.cpu_clock; @@ -370,10 +422,10 @@ const MapOfParserResourcesType& SALOME_ResourcesManager::GetList() const */ //============================================================================= -string +char * SALOME_ResourcesManager::FindFirst(const Engines::MachineList& listOfMachines) { - return _dynamicResourcesSelecter.FindFirst(listOfMachines); + return CORBA::string_dup(_dynamicResourcesSelecter.FindFirst(listOfMachines).c_str()); } //============================================================================= @@ -385,7 +437,7 @@ SALOME_ResourcesManager::FindFirst(const Engines::MachineList& listOfMachines) string SALOME_ResourcesManager::FindNext(const Engines::MachineList& listOfMachines) { - return _dynamicResourcesSelecter.FindNext(listOfMachines,_NS); + return _dynamicResourcesSelecter.FindNext(listOfMachines,_resourcesList,_NS); } //============================================================================= /*! @@ -399,8 +451,6 @@ SALOME_ResourcesManager::FindBest(const Engines::MachineList& listOfMachines) return _dynamicResourcesSelecter.FindBest(listOfMachines); } - - //============================================================================= /*! * This is no longer valid (C++ container are also python containers) @@ -539,7 +589,6 @@ SALOME_ResourcesManager::BuildCommandToLaunchRemoteContainer return command; } - //============================================================================= /*! * builds the command to be launched. @@ -661,7 +710,7 @@ SALOME_ResourcesManager::BuildCommand command += machine; command += " "; - string path = (*(resInfo.ModulesPath.find("KERNEL"))).second; + string path = getenv("KERNEL_ROOT_DIR"); command += path; command += "/bin/salome/"; @@ -722,17 +771,29 @@ throw(SALOME_Exception) void SALOME_ResourcesManager::KeepOnlyResourcesWithModule ( vector& hosts, - const char *moduleName) const + const Engines::CompoList& componentList) const throw(SALOME_Exception) { for (vector::iterator iter = hosts.begin(); iter != hosts.end();) { MapOfParserResourcesType::const_iterator it = _resourcesList.find(*iter); - const map& mapOfModulesOfCurrentHost = - (((*it).second).ModulesPath); - - if (mapOfModulesOfCurrentHost.find(moduleName) == - mapOfModulesOfCurrentHost.end()) + const vector& mapOfModulesOfCurrentHost = (((*it).second).ModulesList); + + bool erasedHost = false; + if( mapOfModulesOfCurrentHost.size() > 0 ){ + for(int i=0;i::const_iterator itt = find(mapOfModulesOfCurrentHost.begin(), + mapOfModulesOfCurrentHost.end(), + compoi); +// componentList[i]); + if (itt == mapOfModulesOfCurrentHost.end()){ + erasedHost = true; + break; + } + } + } + if(erasedHost) hosts.erase(iter); else iter++; @@ -828,8 +889,10 @@ string SALOME_ResourcesManager::BuildTemporaryFileName() const string SALOME_ResourcesManager::BuildTempFileToLaunchRemoteContainer (const string& machine, - const Engines::MachineParameters& params) + const Engines::MachineParameters& params) throw(SALOME_Exception) { + int status; + _TmpFileName = BuildTemporaryFileName(); ofstream tempOutputFile; tempOutputFile.open(_TmpFileName.c_str(), ofstream::out ); @@ -838,27 +901,6 @@ SALOME_ResourcesManager::BuildTempFileToLaunchRemoteContainer // --- set env vars - tempOutputFile << "source " << resInfo.PreReqFilePath << endl; - - for (map::const_iterator iter = resInfo.ModulesPath.begin(); - iter != resInfo.ModulesPath.end(); - iter++) - { - string curModulePath((*iter).second); - tempOutputFile << (*iter).first << "_ROOT_DIR=" << curModulePath << endl; - tempOutputFile << "export " << (*iter).first << "_ROOT_DIR" << endl; - tempOutputFile << "LD_LIBRARY_PATH=" << curModulePath - << "/lib/salome" << ":${LD_LIBRARY_PATH}" << endl; - tempOutputFile << "PYTHONPATH=" << curModulePath << "/bin/salome:" - << curModulePath << "/lib/salome:" << curModulePath - << "/lib/python${PYTHON_VERSION}/site-packages/salome:"; - tempOutputFile << curModulePath - << "/lib/python${PYTHON_VERSION}/site-packages/salome/shared_modules:${PYTHONPATH}" - << endl; - } - - tempOutputFile << "export LD_LIBRARY_PATH" << endl; - tempOutputFile << "export PYTHONPATH" << endl; tempOutputFile << "export SALOME_trace=local" << endl; // mkr : 27.11.2006 : PAL13967 - Distributed supervision graphs - Problem with "SALOME_trace" //tempOutputFile << "source " << resInfo.PreReqFilePath << endl; @@ -886,8 +928,7 @@ SALOME_ResourcesManager::BuildTempFileToLaunchRemoteContainer #endif } - tempOutputFile << (*(resInfo.ModulesPath.find("KERNEL"))).second - << "/bin/salome/"; + tempOutputFile << getenv("KERNEL_ROOT_DIR") << "/bin/salome/"; if (params.isMPI) { @@ -925,7 +966,7 @@ SALOME_ResourcesManager::BuildTempFileToLaunchRemoteContainer commandRcp += machine; commandRcp += ":"; commandRcp += _TmpFileName; - system(commandRcp.c_str()); + status = system(commandRcp.c_str()); } else if (resInfo.Protocol == ssh) @@ -937,11 +978,14 @@ SALOME_ResourcesManager::BuildTempFileToLaunchRemoteContainer commandRcp += machine; commandRcp += ":"; commandRcp += _TmpFileName; - system(commandRcp.c_str()); + status = system(commandRcp.c_str()); } else throw SALOME_Exception("Unknown protocol"); + if(status) + throw SALOME_Exception("Error of connection on remote host"); + command += machine; _CommandForRemAccess = command; command += " "; @@ -1101,4 +1145,42 @@ void SALOME_ResourcesManager::startMPI() } } - +Engines::MachineParameters* SALOME_ResourcesManager::GetMachineParameters(const char *hostname) +{ + ParserResourcesType resource = _resourcesList[string(hostname)]; + Engines::MachineParameters *p_ptr = new Engines::MachineParameters; + p_ptr->container_name = CORBA::string_dup(""); + p_ptr->hostname = CORBA::string_dup("hostname"); + p_ptr->alias = CORBA::string_dup(resource.Alias.c_str()); + if( resource.Protocol == rsh ) + p_ptr->protocol = "rsh"; + else if( resource.Protocol == ssh ) + p_ptr->protocol = "ssh"; + p_ptr->username = CORBA::string_dup(resource.UserName.c_str()); + p_ptr->applipath = CORBA::string_dup(resource.AppliPath.c_str()); + p_ptr->modList.length(resource.ModulesList.size()); + for(int i=0;imodList[i] = CORBA::string_dup(resource.ModulesList[i].c_str()); + p_ptr->OS = CORBA::string_dup(resource.OS.c_str()); + p_ptr->mem_mb = resource.DataForSort._memInMB; + p_ptr->cpu_clock = resource.DataForSort._CPUFreqMHz; + p_ptr->nb_proc_per_node = resource.DataForSort._nbOfProcPerNode; + p_ptr->nb_node = resource.DataForSort._nbOfNodes; + if( resource.mpi == indif ) + p_ptr->mpiImpl = "indif"; + else if( resource.mpi == lam ) + p_ptr->mpiImpl = "lam"; + else if( resource.mpi == mpich1 ) + p_ptr->mpiImpl = "mpich1"; + else if( resource.mpi == mpich2 ) + p_ptr->mpiImpl = "mpich2"; + else if( resource.mpi == openmpi ) + p_ptr->mpiImpl = "openmpi"; + if( resource.Batch == pbs ) + p_ptr->batch = "pbs"; + else if( resource.Batch == lsf ) + p_ptr->batch = "lsf"; + else if( resource.Batch == slurm ) + p_ptr->batch = "slurm"; + return p_ptr; +} diff --git a/src/ResourcesManager/SALOME_ResourcesManager.hxx b/src/ResourcesManager/SALOME_ResourcesManager.hxx index d9ca64434..23c7bd647 100644 --- a/src/ResourcesManager/SALOME_ResourcesManager.hxx +++ b/src/ResourcesManager/SALOME_ResourcesManager.hxx @@ -31,28 +31,44 @@ #include #include +#if defined RESOURCESMANAGER_EXPORTS +#if defined WIN32 +#define RESOURCESMANAGER_EXPORT __declspec( dllexport ) +#else +#define RESOURCESMANAGER_EXPORT +#endif +#else +#if defined WNT +#define RESOURCESMANAGER_EXPORT __declspec( dllimport ) +#else +#define RESOURCESMANAGER_EXPORT +#endif +#endif + // --- WARNING --- // The call of BuildTempFileToLaunchRemoteContainer and RmTmpFile must be done // in a critical section to be sure to be clean. // Only one thread should use the SALOME_ResourcesManager class in a SALOME // session. -class RESOURCESMANAGER_EXPORT SALOME_ResourcesManager +class RESOURCESMANAGER_EXPORT SALOME_ResourcesManager: + public POA_Engines::ResourcesManager, + public PortableServer::RefCountServantBase { public: - SALOME_ResourcesManager(CORBA::ORB_ptr orb, const char *xmlFilePath); - SALOME_ResourcesManager(CORBA::ORB_ptr orb); + SALOME_ResourcesManager(CORBA::ORB_ptr orb, PortableServer::POA_var poa, SALOME_NamingService *ns, const char *xmlFilePath); + SALOME_ResourcesManager(CORBA::ORB_ptr orb, PortableServer::POA_var poa, SALOME_NamingService *ns); ~SALOME_ResourcesManager(); - std::vector + Engines::MachineList * GetFittingResources(const Engines::MachineParameters& params, - const char *moduleName) - throw(SALOME_Exception); + const Engines::CompoList& componentList); +// throw(SALOME_Exception); - std::string FindFirst(const Engines::MachineList& listOfMachines); + char* FindFirst(const Engines::MachineList& listOfMachines); std::string FindNext(const Engines::MachineList& listOfMachines); std::string FindBest(const Engines::MachineList& listOfMachines); @@ -70,8 +86,7 @@ class RESOURCESMANAGER_EXPORT SALOME_ResourcesManager int AddResourceInCatalog (const Engines::MachineParameters& paramsOfNewResources, - const std::map& modulesOnNewResources, - const char *environPathOfPrerequired, + const std::vector& modulesOnNewResources, const char *alias, const char *userName, AccessModeType mode, @@ -90,6 +105,12 @@ class RESOURCESMANAGER_EXPORT SALOME_ResourcesManager std::string BuildCommandToLaunchLocalParallelContainer(const std::string& exe_name, const Engines::MachineParameters& params, const std::string& log = "default"); + Engines::MachineParameters* GetMachineParameters(const char *hostname); + + void Shutdown(); + + static const char *_ResourcesManagerNameInNS; + protected: // Parallel extension @@ -97,17 +118,19 @@ class RESOURCESMANAGER_EXPORT SALOME_ResourcesManager bool _MpiStarted; SALOME_NamingService *_NS; + CORBA::ORB_var _orb; + PortableServer::POA_var _poa; std::string BuildTempFileToLaunchRemoteContainer (const std::string& machine, - const Engines::MachineParameters& params); + const Engines::MachineParameters& params) throw(SALOME_Exception); void SelectOnlyResourcesWithOS(std::vector& hosts, const char *OS) const throw(SALOME_Exception); void KeepOnlyResourcesWithModule(std::vector& hosts, - const char *moduleName) const + const Engines::CompoList& componentList) const throw(SALOME_Exception); void AddOmninamesParams(std::string& command) const; @@ -116,7 +139,6 @@ class RESOURCESMANAGER_EXPORT SALOME_ResourcesManager std::string BuildTemporaryFileName() const; - //! will contain the path to the ressources catalog std::string _path_resources; diff --git a/src/SALOMEDS/SALOMEDS_StudyManager_i.hxx b/src/SALOMEDS/SALOMEDS_StudyManager_i.hxx index 9ef1801bb..c66c12adc 100644 --- a/src/SALOMEDS/SALOMEDS_StudyManager_i.hxx +++ b/src/SALOMEDS/SALOMEDS_StudyManager_i.hxx @@ -145,6 +145,8 @@ public: virtual CORBA::LongLong GetLocalImpl(const char* theHostname, CORBA::Long thePID, CORBA::Boolean& isLocal); static PortableServer::POA_ptr GetPOA(const SALOMEDS::Study_ptr theStudy); + + void Shutdown() { if(!CORBA::is_nil(_orb)) _orb->shutdown(0); } }; #endif diff --git a/src/SALOMEDSImpl/SALOMEDSImpl_StudyManager.cxx b/src/SALOMEDSImpl/SALOMEDSImpl_StudyManager.cxx index e1059aeab..20a0d49ab 100644 --- a/src/SALOMEDSImpl/SALOMEDSImpl_StudyManager.cxx +++ b/src/SALOMEDSImpl/SALOMEDSImpl_StudyManager.cxx @@ -590,7 +590,7 @@ bool SALOMEDSImpl_StudyManager::Impl_SaveAs(const TCollection_AsciiString& aUrl, length, theMultiFile); HDFdataset *hdf_dataset; - hdf_size aHDFSize[1]; + hdf_size aHDFSize[1]; if(length > 0) { //The component saved some auxiliary files, then put them into HDF file aHDFSize[0] = length; diff --git a/src/SALOMELocalTrace/LocalTraceBufferPool.hxx b/src/SALOMELocalTrace/LocalTraceBufferPool.hxx index b439cc54a..bd55a1dc2 100644 --- a/src/SALOMELocalTrace/LocalTraceBufferPool.hxx +++ b/src/SALOMELocalTrace/LocalTraceBufferPool.hxx @@ -28,7 +28,7 @@ #define TRACE_BUFFER_SIZE 256 // number of entries in circular buffer // must be power of 2 -#define MAX_TRACE_LENGTH 256 // messages are truncated at this size +#define MAX_TRACE_LENGTH 1024 // messages are truncated at this size #include #include diff --git a/src/UnitTests/UnitTests.py b/src/UnitTests/UnitTests.py index 04fc42f44..9e232c822 100644 --- a/src/UnitTests/UnitTests.py +++ b/src/UnitTests/UnitTests.py @@ -65,10 +65,10 @@ clt.waitNS("/Kernel/ModulCatalog") # launch container manager server -myCmServer = runSalome.ContainerManagerServer(args) +myCmServer = runSalome.LauncherServer(args) myCmServer.setpath(modules_list,modules_root_dir) myCmServer.run() -clt.waitNS("/ContainerManager") +clt.waitNS("/SalomeLauncher") # execute Unit Test @@ -78,8 +78,8 @@ ret = os.spawnvp(os.P_WAIT, command[0], command) # kill containers created by the Container Manager import Engines -containerManager = clt.waitNS("/ContainerManager",Engines.ContainerManager) -containerManager.Shutdown() +launcher = clt.waitNS("/SalomeLauncher",Engines.SalomeLauncher) +launcher.Shutdown() # kill Test process -- 2.39.2