From fd510ea234fbbac7e2db00f3a3a3b7e4a62c6efd Mon Sep 17 00:00:00 2001 From: ribes Date: Tue, 19 Jan 2010 10:06:47 +0000 Subject: [PATCH] Merge from mergeto_V5_1_main_19Jan10 --- idl/SALOME_ContainerManager.idl | 14 +++++++++ src/Container/SALOME_ContainerManager.cxx | 30 +++++++++++-------- src/Container/SALOME_ContainerManager.hxx | 8 ++--- src/Launcher/Launcher.cxx | 29 ++++++++++++++++-- src/Launcher/Makefile.am | 1 + .../SALOME_ResourcesCatalog_Handler.cxx | 30 +++++++++++-------- .../SALOME_ResourcesManager.cxx | 2 ++ 7 files changed, 84 insertions(+), 30 deletions(-) diff --git a/idl/SALOME_ContainerManager.idl b/idl/SALOME_ContainerManager.idl index 1b48aa5af..66d51d569 100644 --- a/idl/SALOME_ContainerManager.idl +++ b/idl/SALOME_ContainerManager.idl @@ -40,6 +40,15 @@ typedef sequence FilesList; //! modules list typedef sequence ModulesList; +//! A generic parameter +struct Parameter +{ + string name; + string value; +}; +//! Generic parameter list +typedef sequence ParameterList; + //! Type to describe required properties of a resource struct ResourceParameters { @@ -166,6 +175,11 @@ struct JobParameters Name of the batch queue choosed - optional */ string queue; + + /*! + Specific parameters for each type of job - optional + */ + Engines::ParameterList specific_parameters; }; /*! \brief Interface of the %salomelauncher diff --git a/src/Container/SALOME_ContainerManager.cxx b/src/Container/SALOME_ContainerManager.cxx index f59f7b814..6358393a7 100644 --- a/src/Container/SALOME_ContainerManager.cxx +++ b/src/Container/SALOME_ContainerManager.cxx @@ -203,6 +203,7 @@ void SALOME_ContainerManager::ShutdownContainers() Engines::Container_ptr SALOME_ContainerManager::GiveContainer(const Engines::ContainerParameters& params) { + string machFile; Engines::Container_ptr ret = Engines::Container::_nil(); // Step 0: Default mode is start @@ -292,9 +293,9 @@ SALOME_ContainerManager::GiveContainer(const Engines::ContainerParameters& param else nbproc = params.resource_params.nb_node * params.resource_params.nb_proc_per_node; if( getenv("LIBBATCH_NODEFILE") != NULL ) - machinesFile(nbproc); + machFile = machinesFile(nbproc); // A mpi parallel container register on zero node in NS - containerNameInNS = _NS->BuildContainerNameForNS(params, GetMPIZeroNode(hostname).c_str()); + containerNameInNS = _NS->BuildContainerNameForNS(params, GetMPIZeroNode(hostname,machFile).c_str()); } else containerNameInNS = _NS->BuildContainerNameForNS(params, hostname.c_str()); @@ -312,7 +313,6 @@ SALOME_ContainerManager::GiveContainer(const Engines::ContainerParameters& param if(!cont->_non_existent()) { if(std::string(params.mode.in())=="getorstart" or std::string(params.mode.in())=="get"){ - _numInstanceMutex.unlock(); return cont._retn(); /* the container exists and params.mode is getorstart or get use it*/ } else @@ -396,10 +396,10 @@ SALOME_ContainerManager::GiveContainer(const Engines::ContainerParameters& param std::string command; // if a parallel container is launched in batch job, command is: "mpirun -np nbproc -machinefile nodesfile SALOME_MPIContainer" if( getenv("LIBBATCH_NODEFILE") != NULL && params.isMPI ) - command = BuildCommandToLaunchLocalContainer(params,container_exe); + command = BuildCommandToLaunchLocalContainer(params, machFile, container_exe); // if a container is launched on localhost, command is "SALOME_Container" or "mpirun -np nbproc SALOME_MPIContainer" else if(hostname == Kernel_Utils::GetHostname()) - command = BuildCommandToLaunchLocalContainer(params, container_exe); + command = BuildCommandToLaunchLocalContainer(params, machFile, container_exe); // if a container is launched in remote mode, command is "ssh resource_selected SALOME_Container" or "ssh resource_selected mpirun -np nbproc SALOME_MPIContainer" else command = BuildCommandToLaunchRemoteContainer(resource_selected, params, container_exe); @@ -679,7 +679,7 @@ SALOME_ContainerManager::BuildCommandToLaunchRemoteContainer //============================================================================= string SALOME_ContainerManager::BuildCommandToLaunchLocalContainer -(const Engines::ContainerParameters& params, const std::string& container_exe) +(const Engines::ContainerParameters& params, const std::string& machinesFile, const std::string& container_exe) { _TmpFileName = BuildTemporaryFileName(); string command; @@ -703,7 +703,7 @@ SALOME_ContainerManager::BuildCommandToLaunchLocalContainer o << nbproc << " "; if( getenv("LIBBATCH_NODEFILE") != NULL ) - o << "-machinefile " << _machinesFile << " "; + o << "-machinefile " << machinesFile << " "; #ifdef WITHLAM o << "-x PATH,LD_LIBRARY_PATH,OMNIORB_CONFIG,SALOME_trace "; @@ -1701,7 +1701,7 @@ SALOME_ContainerManager::BuildCommandToLaunchParallelContainer(const std::string } #endif -string SALOME_ContainerManager::GetMPIZeroNode(string machine) +string SALOME_ContainerManager::GetMPIZeroNode(const string machine, const string machinesFile) { int status; string zeronode; @@ -1711,7 +1711,7 @@ string SALOME_ContainerManager::GetMPIZeroNode(string machine) if( getenv("LIBBATCH_NODEFILE") == NULL ) cmd = "ssh " + machine + " mpirun -np 1 hostname > " + tmpFile; else - cmd = "mpirun -np 1 -machinefile " + _machinesFile + " hostname > " + tmpFile; + cmd = "mpirun -np 1 -machinefile " + machinesFile + " hostname > " + tmpFile; status = system(cmd.c_str()); if( status == 0 ){ @@ -1724,13 +1724,15 @@ string SALOME_ContainerManager::GetMPIZeroNode(string machine) return zeronode; } -void SALOME_ContainerManager::machinesFile(const int nbproc) +string SALOME_ContainerManager::machinesFile(const int nbproc) { string tmp; string nodesFile = getenv("LIBBATCH_NODEFILE"); - _machinesFile = Kernel_Utils::GetTmpFileName(); + string machinesFile = Kernel_Utils::GetTmpFileName(); ifstream fpi(nodesFile.c_str(),ios::in); - ofstream fpo(_machinesFile.c_str(),ios::out); + ofstream fpo(machinesFile.c_str(),ios::out); + + _numInstanceMutex.lock(); for(int i=0;i<_nbprocUsed;i++) fpi >> tmp; @@ -1745,4 +1747,8 @@ void SALOME_ContainerManager::machinesFile(const int nbproc) fpi.close(); fpo.close(); + _numInstanceMutex.unlock(); + + return machinesFile; + } diff --git a/src/Container/SALOME_ContainerManager.hxx b/src/Container/SALOME_ContainerManager.hxx index 30c76ef8b..89fa303fa 100644 --- a/src/Container/SALOME_ContainerManager.hxx +++ b/src/Container/SALOME_ContainerManager.hxx @@ -71,6 +71,7 @@ protected: const std::string& container_exe="SALOME_Container"); std::string BuildCommandToLaunchLocalContainer(const Engines::ContainerParameters& params, + const std::string& machinesFile, const std::string& container_exe="SALOME_Container"); std::string BuildTempFileToLaunchRemoteContainer(const std::string& resource_name, @@ -86,7 +87,9 @@ protected: std::string BuildTemporaryFileName() const; - std::string GetMPIZeroNode(std::string machine); + std::string GetMPIZeroNode(const std::string machine, const std::string machinesFile); + + std::string machinesFile(const int nbproc); void machinesFile(const int nbproc); @@ -120,9 +123,6 @@ protected: //! attribute that contains the number of processes used in batch mode by MPI containers int _nbprocUsed; - //! attributes that contains the machinefile for MPI containers - std::string _machinesFile; - static omni_mutex _numInstanceMutex ; // lib and instance protection }; diff --git a/src/Launcher/Launcher.cxx b/src/Launcher/Launcher.cxx index 358b640ed..01b224ead 100644 --- a/src/Launcher/Launcher.cxx +++ b/src/Launcher/Launcher.cxx @@ -29,6 +29,8 @@ #include #endif +#include "Basics_Utils.hxx" +#include "Basics_DirUtils.hxx" #include "SALOME_Launcher_Handler.hxx" #include "Launcher.hxx" #include "Launcher_Job_Command.hxx" @@ -127,7 +129,11 @@ Launcher_cpp::createJob(Launcher::Job * new_job) { try { - _batchmap[resource_name] = FactoryBatchManager(resource_definition); + // Warning cannot write on one line like this, because map object is constructed before + // the method is called... + //_batchmap.[resource_name] = FactoryBatchManager(resource_definition); + Batch::BatchManager_eClient * batch_client = FactoryBatchManager(resource_definition); + _batchmap[resource_name] = batch_client; } catch(const LauncherException &ex) { @@ -300,7 +306,20 @@ Launcher_cpp::createJobWithFile(const std::string xmlExecuteFile, // Creating a new job Launcher::Job_Command * new_job = new Launcher::Job_Command(); - new_job->setJobFile(job_params.Command); + + string cmdFile = Kernel_Utils::GetTmpFileName(); +#ifndef WIN32 + cmdFile += ".sh"; +#else + cmdFile += ".bat"; +#endif + ofstream os; + os.open(cmdFile.c_str(), ofstream::out ); + os << "#! /bin/sh" << endl; + os << job_params.Command; + os.close(); + + new_job->setJobFile(cmdFile); new_job->setLocalDirectory(job_params.RefDirectory); new_job->setWorkDirectory(job_params.MachinesList[clusterName].WorkDirectory); new_job->setEnvFile(job_params.MachinesList[clusterName].EnvFile); @@ -312,7 +331,13 @@ Launcher_cpp::createJobWithFile(const std::string xmlExecuteFile, resourceParams p; p.hostname = clusterName; + p.name = ""; + p.OS = ""; p.nb_proc = job_params.NbOfProcesses; + p.nb_node = 0; + p.nb_proc_per_node = 0; + p.cpu_clock = 0; + p.mem_mb = 0; new_job->setResourceRequiredParams(p); createJob(new_job); diff --git a/src/Launcher/Makefile.am b/src/Launcher/Makefile.am index ab11e0daf..87dd1c923 100644 --- a/src/Launcher/Makefile.am +++ b/src/Launcher/Makefile.am @@ -127,6 +127,7 @@ libLauncher_la_SOURCES=\ Launcher.cxx libLauncher_la_CPPFLAGS =\ + -I$(srcdir)/../Basics \ -I$(srcdir)/../ResourcesManager \ @LIBBATCH_INCLUDES@ \ @MPI_INCLUDES@ \ diff --git a/src/ResourcesManager/SALOME_ResourcesCatalog_Handler.cxx b/src/ResourcesManager/SALOME_ResourcesCatalog_Handler.cxx index 4601e3229..2cf3e8b72 100755 --- a/src/ResourcesManager/SALOME_ResourcesCatalog_Handler.cxx +++ b/src/ResourcesManager/SALOME_ResourcesCatalog_Handler.cxx @@ -119,17 +119,20 @@ void SALOME_ResourcesCatalog_Handler::ProcessXmlDocument(xmlDocPtr theDoc) bool Ok = ProcessMachine(aCurNode, _resource); if (Ok) { - // Adding a resource - if(_resource.HostName == "localhost") - { - _resource.HostName = Kernel_Utils::GetHostname(); - if (_resource.Name == "localhost") - { - _resource.Name = Kernel_Utils::GetHostname(); - _resource.DataForSort._Name = Kernel_Utils::GetHostname(); - } - } - _resources_list[_resource.Name] = _resource; + // Adding a resource + if(_resource.HostName == "localhost") + { + _resource.HostName = Kernel_Utils::GetHostname(); + if (_resource.Name == "localhost") + { + _resource.Name = Kernel_Utils::GetHostname(); + _resource.DataForSort._Name = Kernel_Utils::GetHostname(); + } + } + map::const_iterator iter = _resources_list.find(_resource.Name); + if (iter != _resources_list.end()) + RES_INFOS("Warning resource " << _resource.Name << " already added, keep last resource found !"); + _resources_list[_resource.Name] = _resource; } } // Cas de la déclaration d'un cluster @@ -138,7 +141,10 @@ void SALOME_ResourcesCatalog_Handler::ProcessXmlDocument(xmlDocPtr theDoc) _resource.Clear(); if(ProcessCluster(aCurNode, _resource)) { - _resources_list[_resource.Name] = _resource; + map::const_iterator iter = _resources_list.find(_resource.Name); + if (iter != _resources_list.end()) + RES_INFOS("Warning resource " << _resource.Name << " already added, keep last resource found !"); + _resources_list[_resource.Name] = _resource; } } aCurNode = aCurNode->next; diff --git a/src/ResourcesManager/SALOME_ResourcesManager.cxx b/src/ResourcesManager/SALOME_ResourcesManager.cxx index c31f0ac5e..a84eca4d1 100644 --- a/src/ResourcesManager/SALOME_ResourcesManager.cxx +++ b/src/ResourcesManager/SALOME_ResourcesManager.cxx @@ -253,6 +253,8 @@ SALOME_ResourcesManager::GetResourceDefinition(const char * name) p_ptr->batch = "lsf"; else if( resource.Batch == sge ) p_ptr->batch = "sge"; + else if( resource.Batch == ssh_batch ) + p_ptr->batch = "ssh"; return p_ptr; } -- 2.39.2