From 9fa492d2ea817a233c4371234786a9577ab007e4 Mon Sep 17 00:00:00 2001 From: prascle Date: Tue, 15 Dec 2009 10:52:12 +0000 Subject: [PATCH] merge from branch BR_V511_PR 15 12 2009 --- bin/appli_gen.py | 3 +- idl/SALOME_ContainerManager.idl | 284 +-- src/Container/SALOME_ContainerManager.cxx | 1762 ++++++++--------- src/Container/SALOME_ContainerManager.hxx | 54 +- src/Launcher/BatchTest.cxx | 62 +- src/Launcher/BatchTest.hxx | 4 +- src/Launcher/Launcher.cxx | 1072 +++------- src/Launcher/Launcher.hxx | 74 +- src/Launcher/Launcher_Job.cxx | 501 +++++ src/Launcher/Launcher_Job.hxx | 139 ++ src/Launcher/Launcher_Job_Command.cxx | 69 + src/Launcher/Launcher_Job_Command.hxx | 49 + src/Launcher/Launcher_Job_PythonSALOME.cxx | 38 + src/Launcher/Launcher_Job_PythonSALOME.hxx | 41 + src/Launcher/Launcher_Job_SALOME.cxx | 121 ++ src/Launcher/Launcher_Job_SALOME.hxx | 52 + src/Launcher/Launcher_Job_YACSFile.cxx | 38 + src/Launcher/Launcher_Job_YACSFile.hxx | 40 + src/Launcher/Launcher_Utils.hxx | 57 + src/Launcher/Makefile.am | 12 + src/Launcher/SALOME_Launcher.cxx | 337 ++-- src/Launcher/SALOME_Launcher.hxx | 31 +- src/LifeCycleCORBA/LifeCycleCORBA.py | 275 --- .../SALOME_FileTransferCORBA.cxx | 5 +- .../SALOME_FileTransferCORBA.hxx | 1 + src/LifeCycleCORBA/SALOME_LifeCycleCORBA.cxx | 416 ++-- src/LifeCycleCORBA/SALOME_LifeCycleCORBA.hxx | 26 +- .../Test/LifeCycleCORBATest.cxx | 11 +- src/LifeCycleCORBA/TestContainerManager.cxx | 28 +- src/NamingService/SALOME_NamingService.cxx | 41 + src/NamingService/SALOME_NamingService.hxx | 8 +- src/ResourcesManager/ResourcesManager.cxx | 400 ++-- src/ResourcesManager/ResourcesManager.hxx | 44 +- .../ResourcesManager_Defs.hxx | 12 + .../SALOME_ResourcesCatalog_Handler.cxx | 429 ++-- .../SALOME_ResourcesCatalog_Handler.hxx | 7 +- .../SALOME_ResourcesCatalog_Parser.cxx | 96 +- .../SALOME_ResourcesCatalog_Parser.hxx | 17 +- .../SALOME_ResourcesManager.cxx | 87 +- .../SALOME_ResourcesManager.hxx | 13 +- 40 files changed, 3694 insertions(+), 3062 deletions(-) create mode 100644 src/Launcher/Launcher_Job.cxx create mode 100644 src/Launcher/Launcher_Job.hxx create mode 100644 src/Launcher/Launcher_Job_Command.cxx create mode 100644 src/Launcher/Launcher_Job_Command.hxx create mode 100644 src/Launcher/Launcher_Job_PythonSALOME.cxx create mode 100644 src/Launcher/Launcher_Job_PythonSALOME.hxx create mode 100644 src/Launcher/Launcher_Job_SALOME.cxx create mode 100644 src/Launcher/Launcher_Job_SALOME.hxx create mode 100644 src/Launcher/Launcher_Job_YACSFile.cxx create mode 100644 src/Launcher/Launcher_Job_YACSFile.hxx create mode 100644 src/Launcher/Launcher_Utils.hxx delete mode 100644 src/LifeCycleCORBA/LifeCycleCORBA.py diff --git a/bin/appli_gen.py b/bin/appli_gen.py index 872fdd7f0..e16a780fe 100644 --- a/bin/appli_gen.py +++ b/bin/appli_gen.py @@ -265,7 +265,8 @@ def install(prefix,config_file,verbose=0): f =open(os.path.join(home_dir,'CatalogResources.xml'),'w') command=""" - + """ f.write(command) diff --git a/idl/SALOME_ContainerManager.idl b/idl/SALOME_ContainerManager.idl index 16ffacd19..1b48aa5af 100644 --- a/idl/SALOME_ContainerManager.idl +++ b/idl/SALOME_ContainerManager.idl @@ -31,62 +31,84 @@ module Engines { -//! Type to transmit list of machines. - typedef sequence MachineList; +//! Type to transmit list of resources. +typedef sequence ResourceList; //! components list - typedef sequence CompoList; +typedef sequence CompoList; //! files list - typedef sequence FilesList; +typedef sequence FilesList; //! modules list - typedef sequence ModulesList; +typedef sequence ModulesList; -//! Type to describe required properties of a container. -struct MachineParameters +//! Type to describe required properties of a resource +struct ResourceParameters { - //! container name if given else automatic - string container_name; - //! host name if given else automatic + //! resource name - manual selection + string name; + //! host name string hostname; - //! if given list of components that could be loaded on the container - CompoList componentList; - //! if given restricted list of machines to search in - MachineList computerList; - //! required operating system + //! if given required operating system string OS; + //! if given list of components that could be loaded on a container + //! Optional if no resource are found with this constraint + CompoList componentList; + + // Permits to order resources + //! required number of proc + long nb_proc; //! required memory size long mem_mb; //! required frequency long cpu_clock; - //! required number of proc per node - long nb_proc_per_node; //! required number of node long nb_node; - //! if true start a MPI container - boolean isMPI; - //! container working directory - string workingdir; - //! creation mode for GiveContainer. + //! required number of proc per node + long nb_proc_per_node; + + // Permits to configure SALOME resource management + //! resource management policy : first, cycl, altcycl or best (can be extended) + string policy; + //! restricted list of resources to search in + ResourceList resList; +}; + +//! Type to describe required properties of a container +struct ContainerParameters +{ + //! container name if given else automatic + string container_name; + + //! creation mode for GiveContainer if given else automatic /*!start creates a new container * get try to find an existing container * getorstart use an existing container if it exists or creates a new one */ string mode; - //! resource management policy : first, cycl, altcycl or best (can be extended) - string policy; + //! container working directory if given else automatic + string workingdir; + + // Parallel part + //! Number of proc of a parallel container + long nb_proc; + //! if true start a MPI container + boolean isMPI; //! PaCO specific informations string parallelLib; - long nb_component_nodes; + + //! Parameters to choose a resource + ResourceParameters resource_params; }; -//! Type to describe properties of a resource. -struct MachineDefinition +//! Type to describe a resource +struct ResourceDefinition { - //! host name + //! name + string name; + //! hostname string hostname; - //! alias name - string alias; - //! protocol to use to start a remote container (ssh or rsh) + //! protocol to connect to the resource + //! protocol used to start a remote container (ssh or rsh) string protocol; //! login name to use to start a remote container string username; @@ -94,120 +116,158 @@ struct MachineDefinition string applipath; //! list of available components CompoList componentList; + //! operating system string OS; - //! memory size + //! memory size per node long mem_mb; //! frequency long cpu_clock; - //! number of proc per node - long nb_proc_per_node; //! number of node long nb_node; - //! MPI implementation - string mpiImpl; + //! number of proc per node + long nb_proc_per_node; //! batch system string batch; - long nb_component_nodes; + //! MPI implementation + string mpiImpl; + //! if the resource is a cluster: + //! internal protocol to use to start a remote container (ssh or rsh) on the cluster + string iprotocol; }; + //! exception thrown if a computer is not found in the catalog - exception NotFound {}; +exception NotFound {}; -//! Structure used for Salome Batch Job parameters -struct BatchParameters +struct JobParameters { - //! Where batch command will be launched and log files will be created - string batch_directory; - //! Time for the batch (has to be like this : hh:mm) - string expected_during_time; - //! Minimum of memory needed (has to be like : 32gb or 512mb) - string mem; - //! Number of processors requested - long nb_proc; + //! Job Type - Could be equal to "command" or "yacs_file" or "python_salome" + string job_type; + + // Common values + string job_file; + string env_file; + FilesList in_files; + FilesList out_files; + string work_directory; + string local_directory; + string result_directory; + + /*! Time for the batch (has to be like this : hh:mm) - Could be empty, in + this case, default value of the selected resource will be used. + */ + string maximum_duration; + + // Memory is expressed in megabytes -> mem_mb + // Number of Processors -> nb_proc + ResourceParameters resource_required; + + /*! + Name of the batch queue choosed - optional + */ + string queue; }; /*! \brief Interface of the %salomelauncher This interface is used for interaction with the unique instance of SalomeLauncher */ - interface SalomeLauncher - { - long submitJob( in string xmlExecuteFile, - in string clusterName ) raises (SALOME::SALOME_Exception); - long submitSalomeJob( in string fileToExecute, - in FilesList filesToExport, - in FilesList filesToImport, - in BatchParameters batch_params, - in MachineParameters params ) raises (SALOME::SALOME_Exception); - string queryJob( in long jobId, in MachineParameters params ) raises (SALOME::SALOME_Exception); - void deleteJob( in long jobId, in MachineParameters params ) raises (SALOME::SALOME_Exception); - void getResultsJob( in string directory, in long jobId, in MachineParameters params ) raises (SALOME::SALOME_Exception); - - boolean testBatch(in MachineParameters params) raises (SALOME::SALOME_Exception); - - void Shutdown(); - - long getPID(); - - } ; +interface SalomeLauncher +{ + // Main methods + long createJob (in Engines::JobParameters job_parameters) raises (SALOME::SALOME_Exception); + void launchJob (in long job_id) raises (SALOME::SALOME_Exception); + string getJobState (in long job_id) raises (SALOME::SALOME_Exception); + void getJobResults(in long job_id, in string directory) raises (SALOME::SALOME_Exception); + void removeJob (in long job_id) raises (SALOME::SALOME_Exception); + + // Useful methods + long createJobWithFile(in string xmlJobFile, in string clusterName) raises (SALOME::SALOME_Exception); + boolean testBatch (in ResourceParameters params) raises (SALOME::SALOME_Exception); + + // SALOME kernel service methods + void Shutdown(); + long getPID(); +}; /*! \brief Interface of the %containerManager This interface is used for interaction with the unique instance of ContainerManager */ - interface ContainerManager - { - //! Find an existing container satisfying the constraints given by input parameters or start a new one. - Container FindOrStartContainer( in MachineParameters params); - - //! This operation launches a PaCO++ container. - /*! - \param Description of the container resquested. - \param List of computers ressources. - - \return Container's CORBA reference. - */ - Container StartParallelContainer( in MachineParameters params); - - //! Start a new container satisfying the constraints given by input parameters. - Container StartContainer( in MachineParameters params); - - //! Same as StartContainer except that in batch all containers have already been launched - /*! - We are in batch if environment variable SALOME_BATCH is 1. - In this case, containers have been launched at the beginning of the Salome session and - the container manager picks one in the pool of existing containers. - */ - Container GiveContainer( in MachineParameters params); - - //! Shutdown all containers that have been launched by the container manager - void ShutdownContainers(); - - } ; +interface ContainerManager +{ + //! GiveContainer - use mode parameter of ContainerParameters to configure + //! how this method works + //! Currently: get, start, getorstart, findorstart, find + Container GiveContainer(in ContainerParameters params); + + //! Shutdown all containers that have been launched by the container manager + void ShutdownContainers(); +} ; /*! \brief Interface of the %resourcesManager This interface is used for interaction with the unique instance of ResourcesManager */ - interface ResourcesManager - { - //! Find first available computer in a computers list - string FindFirst(in MachineList possibleComputers); +interface ResourcesManager +{ + //! Find first available resource in a resources list + string FindFirst(in ResourceList possibleResources); - //! Find best available computer according to policy in a computers list - string Find(in string policy, in MachineList possibleComputers); + //! Find best available computer according to policy in a computers list + string Find(in string policy, in ResourceList possibleResources); + + //! Get a list of resources that are best suited to launch a container given constraints + /*! + The constraints are resource constraints (params) and components constraints (componentList) + */ + ResourceList GetFittingResources(in ResourceParameters params) raises (SALOME::SALOME_Exception); + + //! Get definition of a resource + ResourceDefinition GetResourceDefinition(in string name); +}; - //! Get a list of computers that are best suited to launch a container given constraints - /*! - The constraints are resource constraints (params) and components constraints (componentList) - */ - MachineList GetFittingResources( in MachineParameters params) - raises (SALOME::SALOME_Exception); - //! Get the current machine parameters of a computer - MachineDefinition GetMachineParameters( in string hostname ); +// For compatibility - will be erased on SALOME 6 +typedef sequence MachineList; +//! Type to describe required properties of a container. +struct MachineParameters +{ + //! container name if given else automatic + string container_name; + //! host name if given else automatic + string hostname; + //! if given list of components that could be loaded on the container + CompoList componentList; + //! if given restricted list of machines to search in + MachineList computerList; + //! required operating system + string OS; + //! required memory size + long mem_mb; + //! required frequency + long cpu_clock; + //! required number of proc per node + long nb_proc_per_node; + //! required number of node + long nb_node; + //! if true start a MPI container + boolean isMPI; + //! container working directory + string workingdir; + //! creation mode for GiveContainer. + /*!start creates a new container + * get try to find an existing container + * getorstart use an existing container if it exists or creates a new one + */ + string mode; + //! resource management policy : first, cycl, altcycl or best (can be extended) + string policy; - } ; + //! PaCO specific informations + string parallelLib; + long nb_component_nodes; +}; }; #endif diff --git a/src/Container/SALOME_ContainerManager.cxx b/src/Container/SALOME_ContainerManager.cxx index 8afbc5f2f..d68d7aef7 100644 --- a/src/Container/SALOME_ContainerManager.cxx +++ b/src/Container/SALOME_ContainerManager.cxx @@ -41,10 +41,6 @@ using namespace std; -vector SALOME_ContainerManager::_batchLaunchedContainers; - -vector::iterator SALOME_ContainerManager::_batchLaunchedContainersIter; - const char *SALOME_ContainerManager::_ContainerManagerNameInNS = "/ContainerManager"; @@ -57,7 +53,8 @@ const char *SALOME_ContainerManager::_ContainerManagerNameInNS = */ //============================================================================= -SALOME_ContainerManager::SALOME_ContainerManager(CORBA::ORB_ptr orb, PortableServer::POA_var poa, SALOME_ResourcesManager *rm, SALOME_NamingService *ns) +SALOME_ContainerManager::SALOME_ContainerManager(CORBA::ORB_ptr orb, PortableServer::POA_var poa, + SALOME_ResourcesManager *rm, SALOME_NamingService *ns) { MESSAGE("constructor"); _NS = ns; @@ -197,144 +194,187 @@ void SALOME_ContainerManager::ShutdownContainers() //============================================================================= //! Give a suitable Container given constraints /*! CORBA Method: - * \param params Machine Parameters required for the container + * \param params Container Parameters required for the container * \return the container or nil */ //============================================================================= - Engines::Container_ptr -SALOME_ContainerManager::GiveContainer(const Engines::MachineParameters& params) +SALOME_ContainerManager::GiveContainer(const Engines::ContainerParameters& params) { - char *valenv=getenv("SALOME_BATCH"); - if(valenv) - if (strcmp(valenv,"1")==0) - { - if(_batchLaunchedContainers.empty()) - fillBatchLaunchedContainers(); - - if (_batchLaunchedContainersIter == _batchLaunchedContainers.end()) - _batchLaunchedContainersIter = _batchLaunchedContainers.begin(); - - Engines::Container_ptr rtn = Engines::Container::_duplicate(*_batchLaunchedContainersIter); - _batchLaunchedContainersIter++; - return rtn; - } - return StartContainer(params); -} + Engines::Container_ptr ret = Engines::Container::_nil(); -//============================================================================= -//! Start a suitable Container in a list of machines with constraints -/*! C++ Method: - * Constraints are given by a machine parameters struct - * \param params Machine Parameters required for the container - * \param possibleComputers list of machines usable for start - * \param container_exe specific container executable (default=SALOME_Container) - */ -//============================================================================= + // Step 0: Default mode is start + Engines::ContainerParameters local_params(params); + if (std::string(local_params.mode.in()) == "") + local_params.mode = CORBA::string_dup("start"); + std::string mode = local_params.mode.in(); + MESSAGE("[GiveContainer] starting with mode: " << mode); -Engines::Container_ptr -SALOME_ContainerManager::StartContainer(const Engines::MachineParameters& params, - const Engines::MachineList& possibleComputers, - const std::string& container_exe) -{ -#ifdef WITH_PACO_PARALLEL - std::string parallelLib(params.parallelLib); - if (parallelLib != "") + // Step 1: Find Container for find and findorstart mode + if (mode == "find" or mode == "findorstart") { - Engines::MachineParameters myparams(params); - myparams.computerList=possibleComputers; - return StartParallelContainer(myparams); + ret = FindContainer(params, params.resource_params.resList); + if(!CORBA::is_nil(ret)) + return ret; + else + { + if (mode == "find") + { + MESSAGE("[GiveContainer] no container found"); + return ret; + } + else + { + mode = "start"; + } + } } -#endif - string containerNameInNS; - Engines::Container_ptr ret = Engines::Container::_nil(); - MESSAGE("SALOME_ContainerManager::StartContainer " << possibleComputers.length()); + // Step 2: Get all possibleResources from the parameters + Engines::ResourceList_var possibleResources = _ResManager->GetFittingResources(local_params.resource_params); + MESSAGE("[GiveContainer] - length of possible resources " << possibleResources->length()); + std::vector local_resources; - vector lm; -// if mode is "get" keep only machines with existing containers - if(std::string(params.mode.in())=="get") - { - for(unsigned int i=0;i_non_existent()) - lm.push_back(string(possibleComputers[i])); - } - catch(CORBA::Exception&) - { - // CORBA::Exception ignored. - } - } - } - else + // Step 3: if mode is "get" keep only machines with existing containers + if(mode == "get") + { + for(unsigned int i=0; i < possibleResources->length(); i++) { - for(unsigned int i=0;i_non_existent()) + local_resources.push_back(string(possibleResources[i])); + } + catch(CORBA::Exception&) {} } - string theMachine; - try - { - theMachine=_ResManager->GetImpl()->Find(params.policy.in(),lm); - } - catch( const SALOME_Exception &ex ) + // if local_resources is empty, we cannot give a container + if (local_resources.size() == 0) { - MESSAGE(ex.what()); - return Engines::Container::_nil(); + MESSAGE("[GiveContainer] cannot find a container for mode get"); + return ret; } + } + else + for(unsigned int i=0; i < possibleResources->length(); i++) + local_resources.push_back(string(possibleResources[i])); - //If the machine name is localhost use the real name - if(theMachine == "localhost") - theMachine=Kernel_Utils::GetHostname(); + // Step 4: select the resource where to get/start the container + std::string resource_selected; + try + { + resource_selected = _ResManager->GetImpl()->Find(params.resource_params.policy.in(), local_resources); + } + catch(const SALOME_Exception &ex) + { + MESSAGE("[GiveContainer] Exception in ResourceManager find !: " << ex.what()); + return ret; + } + MESSAGE("[GiveContainer] Resource selected is: " << resource_selected); - //check if an entry exists in Naming service - //if params.mode == "start" or "" shutdown the existing container before launching a new one with that name - //if params.mode == "getorstart" or "get" use the existing container + // Step 5: get container in the naming service + Engines::ResourceDefinition_var resource_definition = _ResManager->GetResourceDefinition(resource_selected.c_str()); + std::string hostname(resource_definition->name.in()); + std::string containerNameInNS; if(params.isMPI) - // A parallel container register on zero node in NS - containerNameInNS = _NS->BuildContainerNameForNS(params,GetMPIZeroNode(theMachine).c_str()); + // A mpi parallel container register on zero node in NS + containerNameInNS = _NS->BuildContainerNameForNS(params, GetMPIZeroNode(hostname).c_str()); else - containerNameInNS = _NS->BuildContainerNameForNS(params,theMachine.c_str()); + containerNameInNS = _NS->BuildContainerNameForNS(params, hostname.c_str()); + MESSAGE("[GiveContainer] Container name in the naming service: " << containerNameInNS); - SCRUTE(containerNameInNS); + // Step 6: check if the name exists in naming service + //if params.mode == "getorstart" or "get" use the existing container + //if params.mode == "start" shutdown the existing container before launching a new one with that name CORBA::Object_var obj = _NS->Resolve(containerNameInNS.c_str()); - if ( !CORBA::is_nil(obj) ) + if (!CORBA::is_nil(obj)) + { + try { - try - { - Engines::Container_var cont=Engines::Container::_narrow(obj); - if(!cont->_non_existent()) - { - if(std::string(params.mode.in())=="getorstart"||std::string(params.mode.in())=="get") - return cont._retn(); /* the container exists and params.mode is getorstart or get use it*/ - else - { - INFOS("A container is already registered with the name: " << containerNameInNS << ", shutdown the existing container"); - cont->Shutdown(); // shutdown the registered container if it exists - } - } - } - catch(CORBA::Exception&) - { - INFOS("CORBA::Exception ignored."); - } + Engines::Container_var cont=Engines::Container::_narrow(obj); + if(!cont->_non_existent()) + { + if(std::string(params.mode.in())=="getorstart" or std::string(params.mode.in())=="get") + return cont._retn(); /* the container exists and params.mode is getorstart or get use it*/ + else + { + INFOS("[GiveContainer] A container is already registered with the name: " << containerNameInNS << ", shutdown the existing container"); + cont->Shutdown(); // shutdown the registered container if it exists + } + } } + catch(CORBA::Exception&) + { + INFOS("[GiveContainer] CORBA::Exception ignored when trying to get the container - we start a new one"); + } + } - //try to launch a new container - MESSAGE("try to launch it on " << theMachine); - - string command; - if(theMachine==""){ - MESSAGE("SALOME_ContainerManager::StartContainer : no possible computer"); - return Engines::Container::_nil(); + // Step 7: type of container: PaCO, Exe, Mpi or Classic + // Mpi already tested in step 5, specific code on BuildCommandToLaunch Local/Remote Container methods + // TODO -> separates Mpi from Classic/Exe + // PaCO++ + std::string parallelLib(params.parallelLib); + if (std::string(local_params.parallelLib.in()) != "") + { + INFOS("[GiveContainer] PaCO++ container are not currently available"); + return ret; + } + // Classic or Exe ? + std::string container_exe = "SALOME_Container"; // Classic container + int found=0; + try + { + CORBA::String_var container_exe_tmp; + CORBA::Object_var obj = _NS->Resolve("/Kernel/ModulCatalog"); + SALOME_ModuleCatalog::ModuleCatalog_var Catalog = SALOME_ModuleCatalog::ModuleCatalog::_narrow(obj) ; + if (CORBA::is_nil (Catalog)) + { + INFOS("[GiveContainer] Module Catalog is not found -> cannot launch a container"); + return ret; + } + // Loop through component list + for(unsigned int i=0; i < local_params.resource_params.componentList.length(); i++) + { + const char* compoi = local_params.resource_params.componentList[i]; + SALOME_ModuleCatalog::Acomponent_var compoInfo = Catalog->GetComponent(compoi); + if (CORBA::is_nil (compoInfo)) + { + continue; + } + SALOME_ModuleCatalog::ImplType impl=compoInfo->implementation_type(); + container_exe_tmp=compoInfo->implementation_name(); + if(impl==SALOME_ModuleCatalog::CEXE) + { + if(found) + { + INFOS("ContainerManager Error: you can't have 2 CEXE component in the same container" ); + return Engines::Container::_nil(); + } + MESSAGE("[GiveContainer] Exe container found !: " << container_exe_tmp); + container_exe = container_exe_tmp.in(); + found=1; + } + } + } + catch (ServiceUnreachable&) + { + INFOS("Caught exception: Naming Service Unreachable"); + return ret; } - else if(theMachine==Kernel_Utils::GetHostname()) - command = BuildCommandToLaunchLocalContainer(params,container_exe); + catch (...) + { + INFOS("Caught unknown exception."); + return ret; + } + + // Step 8: start a new container + MESSAGE("[GiveContainer] Try to launch a new container on " << resource_selected); + std::string command; + if(hostname == Kernel_Utils::GetHostname()) + command = BuildCommandToLaunchLocalContainer(params, container_exe); else - command = BuildCommandToLaunchRemoteContainer(theMachine,params,container_exe); + command = BuildCommandToLaunchRemoteContainer(resource_selected, params, container_exe); //redirect stdout and stderr in a file #ifdef WNT @@ -344,16 +384,16 @@ SALOME_ContainerManager::StartContainer(const Engines::MachineParameters& params string logFilename="/tmp"; char* val = getenv("SALOME_TMP_DIR"); if(val) - { - struct stat file_info; - stat(val, &file_info); - bool is_dir = S_ISDIR(file_info.st_mode); - if (is_dir)logFilename=val; - else std::cerr << "SALOME_TMP_DIR environment variable is not a directory use /tmp instead" << std::endl; - } + { + struct stat file_info; + stat(val, &file_info); + bool is_dir = S_ISDIR(file_info.st_mode); + if (is_dir)logFilename=val; + else std::cerr << "SALOME_TMP_DIR environment variable is not a directory use /tmp instead" << std::endl; + } logFilename += "/"; #endif - logFilename += _NS->ContainerName(params)+"_"+ theMachine +"_"+getenv( "USER" )+".log" ; + logFilename += _NS->ContainerName(params)+"_"+ resource_selected +"_"+getenv( "USER" )+".log" ; command += " > " + logFilename + " 2>&1"; #ifdef WNT command = "%PYTHONBIN% -c \"import win32pm ; win32pm.spawnpid(r'" + command + "', '')\""; @@ -374,117 +414,37 @@ SALOME_ContainerManager::StartContainer(const Engines::MachineParameters& params RmTmpFile(_TmpFileName); // command file can be removed here return Engines::Container::_nil(); } - else{ - int count=TIME_OUT_TO_LAUNCH_CONT; - MESSAGE("count = "<Resolve(containerNameInNS.c_str()); ret=Engines::Container::_narrow(obj); } - - if ( CORBA::is_nil(ret) ) - { - MESSAGE("SALOME_ContainerManager::StartContainer rsh failed"); - } - else - { - logFilename=":"+logFilename; - logFilename="@"+Kernel_Utils::GetHostname()+logFilename; - logFilename=getenv( "USER" )+logFilename; - ret->logfilename(logFilename.c_str()); - } - - RmTmpFile(_TmpFileName); // command file can be removed here - return ret; - } -} - -//============================================================================= -//! Start a suitable Container given constraints -/*! CORBA Method: - * \param params Machine Parameters required for the container - */ -//============================================================================= - -Engines::Container_ptr -SALOME_ContainerManager::StartContainer(const Engines::MachineParameters& params) -{ - Engines::MachineList_var possibleComputers = _ResManager->GetFittingResources(params); - - // Look into ModulCatalog if a specific container must be launched - CORBA::String_var container_exe; - int found=0; - try - { - CORBA::Object_var obj = _NS->Resolve("/Kernel/ModulCatalog"); - SALOME_ModuleCatalog::ModuleCatalog_var Catalog = SALOME_ModuleCatalog::ModuleCatalog::_narrow(obj) ; - if (CORBA::is_nil (Catalog)) - return Engines::Container::_nil(); - // Loop through component list - for(unsigned int i=0;iGetComponent(compoi); - if (CORBA::is_nil (compoInfo)) - { - continue; - } - SALOME_ModuleCatalog::ImplType impl=compoInfo->implementation_type(); - container_exe=compoInfo->implementation_name(); - if(impl==SALOME_ModuleCatalog::CEXE) - { - if(found) - { - INFOS("ContainerManager Error: you can't have 2 CEXE component in the same container" ); - return Engines::Container::_nil(); - } - found=1; - } - } - } - catch (ServiceUnreachable&) + if (CORBA::is_nil(ret)) { - INFOS("Caught exception: Naming Service Unreachable"); - return Engines::Container::_nil(); + INFOS("[GiveContainer] was not able to launch container " << containerNameInNS); } - catch (...) + else { - INFOS("Caught unknown exception."); - return Engines::Container::_nil(); + // Setting log file name + logFilename=":"+logFilename; + logFilename="@"+Kernel_Utils::GetHostname()+logFilename; + logFilename=getenv( "USER" )+logFilename; + ret->logfilename(logFilename.c_str()); + RmTmpFile(_TmpFileName); // command file can be removed here } - - if(found) - return StartContainer(params,possibleComputers,container_exe.in()); - else - return StartContainer(params,possibleComputers); -} - -//============================================================================= -//! Find or start a suitable Container given some constraints -/*! CORBA Method: - * \param params Machine Parameters required for the container - * \return the container or nil - */ -//============================================================================= - -Engines::Container_ptr -SALOME_ContainerManager::FindOrStartContainer(const Engines::MachineParameters& params) -{ - Engines::Container_ptr ret = FindContainer(params,params.computerList); - if(!CORBA::is_nil(ret)) - return ret; - MESSAGE("Container doesn't exist try to launch it ..."); - - return StartContainer(params); + } + return ret; } //============================================================================= @@ -495,18 +455,17 @@ SALOME_ContainerManager::FindOrStartContainer(const Engines::MachineParameters& //============================================================================= Engines::Container_ptr -SALOME_ContainerManager::FindContainer(const Engines::MachineParameters& params, - const Engines::MachineList& possibleComputers) +SALOME_ContainerManager::FindContainer(const Engines::ContainerParameters& params, + const Engines::ResourceList& possibleResources) { - MESSAGE("FindContainer "<BuildContainerNameForNS(params,theMachine)); + std::string containerNameInNS(_NS->BuildContainerNameForNS(params, resource.c_str())); + MESSAGE("[FindContainer] Try to find a container " << containerNameInNS << " on resource " << resource); CORBA::Object_var obj = _NS->Resolve(containerNameInNS.c_str()); try - { - if(obj->_non_existent()) - return Engines::Container::_nil(); - else - return Engines::Container::_narrow(obj); - } - catch(const CORBA::Exception& e) - { + { + if(obj->_non_existent()) return Engines::Container::_nil(); - } + else + return Engines::Container::_narrow(obj); + } + catch(const CORBA::Exception& e) + { + return Engines::Container::_nil(); + } } -#ifdef WITH_PACO_PARALLEL //============================================================================= -/*! CORBA Method: - * Find or Start a suitable PaCO++ Parallel Container in a list of machines. - * \param params Machine Parameters required for the container - * \return CORBA container reference. - */ +/*! + * This is no longer valid (C++ container are also python containers) + */ //============================================================================= -Engines::Container_ptr -SALOME_ContainerManager::StartParallelContainer(const Engines::MachineParameters& params_const) +bool isPythonContainer(const char* ContainerName) { - CORBA::Object_var obj; - PaCO::InterfaceManager_var container_proxy; - Engines::Container_ptr ret = Engines::Container::_nil(); - Engines::MachineParameters params(params_const); + bool ret = false; + int len = strlen(ContainerName); - // Step 1 : Try to find a suitable container - // Currently not as good as could be since - // we have to verified the number of nodes of the container - // if a user tell that. - ret = FindContainer(params, params.computerList); - if(CORBA::is_nil(ret)) { - // Step 2 : Starting a new parallel container ! - INFOS("[StartParallelContainer] Starting a PaCO++ parallel container"); + if (len >= 2) + if (strcmp(ContainerName + len - 2, "Py") == 0) + ret = true; - // Step 3 : Choose a computer - std::string theMachine = _ResManager->FindFirst(params.computerList); - //If the machine name is localhost use the real name - if(theMachine == "localhost") - theMachine=Kernel_Utils::GetHostname(); + return ret; +} - if(theMachine == "") { - INFOS("[StartParallelContainer] !!!!!!!!!!!!!!!!!!!!!!!!!!"); - INFOS("[StartParallelContainer] No possible computer found"); - INFOS("[StartParallelContainer] !!!!!!!!!!!!!!!!!!!!!!!!!!"); - return ret; - } - INFOS("[StartParallelContainer] on machine : " << theMachine); - params.hostname = CORBA::string_dup(theMachine.c_str()); +//============================================================================= +/*! + * Builds the script to be launched + * + * If SALOME Application not defined ($APPLI), + * see BuildTempFileToLaunchRemoteContainer() + * + * Else rely on distant configuration. Command is under the form (example): + * ssh user@machine distantPath/runRemote.sh hostNS portNS WORKINGDIR workingdir \ + * SALOME_Container containerName &" - // Step 4 : starting parallel container proxy - Engines::MachineParameters params_proxy(params); - std::string command_proxy; - SALOME_ContainerManager::actual_launch_machine_t proxy_machine; - try + * - where user is ommited if not specified in CatalogResources, + * - where distant path is always relative to user@machine $HOME, and + * equal to $APPLI if not specified in CatalogResources, + * - where hostNS is the hostname of CORBA naming server (set by scripts to + * use to launch SALOME and servers in $APPLI: runAppli.sh, runRemote.sh) + * - where portNS is the port used by CORBA naming server (set by scripts to + * use to launch SALOME and servers in $APPLI: runAppli.sh, runRemote.sh) + * - where workingdir is the requested working directory for the container. + * If WORKINGDIR (and workingdir) is not present the working dir will be $HOME + */ +//============================================================================= + +string +SALOME_ContainerManager::BuildCommandToLaunchRemoteContainer +(const string& resource_name, + const Engines::ContainerParameters& params, const std::string& container_exe) +{ + + string command; + if (!_isAppliSalomeDefined) + command = BuildTempFileToLaunchRemoteContainer(resource_name, params); + else + { + int nbproc; + Engines::ResourceDefinition_var resource_definition = _ResManager->GetResourceDefinition(resource_name.c_str()); + std::string hostname(resource_definition->name.in()); + const ParserResourcesType& resInfo = _ResManager->GetImpl()->GetResourcesDescr(resource_name); + + if (params.isMPI) { - command_proxy = BuildCommandToLaunchParallelContainer("SALOME_ParallelContainerProxy", params_proxy, proxy_machine); - } - catch(const SALOME_Exception & ex) - { - INFOS("[StartParallelContainer] Exception in BuildCommandToLaunchParallelContainer"); - INFOS(ex.what()); - return ret; - } - params_proxy.nb_component_nodes = 0; // LaunchParallelContainer uses this value to know if it launches the proxy or the nodes - obj = LaunchParallelContainer(command_proxy, params_proxy, _NS->ContainerName(params_proxy), proxy_machine); - if (CORBA::is_nil(obj)) - { - INFOS("[StartParallelContainer] LaunchParallelContainer for proxy returns NIL !"); - return ret; - } - try - { - container_proxy = PaCO::InterfaceManager::_narrow(obj); - } - catch(CORBA::SystemException& e) - { - INFOS("[StartParallelContainer] Exception in _narrow after LaunchParallelContainer for proxy !"); - INFOS("CORBA::SystemException : " << e); - return ret; - } - catch(CORBA::Exception& e) - { - INFOS("[StartParallelContainer] Exception in _narrow after LaunchParallelContainer for proxy !"); - INFOS("CORBA::Exception" << e); - return ret; - } - catch(...) - { - INFOS("[StartParallelContainer] Exception in _narrow after LaunchParallelContainer for proxy !"); - INFOS("Unknown exception !"); - return ret; - } - if (CORBA::is_nil(container_proxy)) - { - INFOS("[StartParallelContainer] PaCO::InterfaceManager::_narrow returns NIL !"); - return ret; + if ((params.resource_params.nb_node <= 0) && (params.resource_params.nb_proc_per_node <= 0)) + nbproc = 1; + else if (params.resource_params.nb_node == 0) + nbproc = params.resource_params.nb_proc_per_node; + else if (params.resource_params.nb_proc_per_node == 0) + nbproc = params.resource_params.nb_node; + else + nbproc = params.resource_params.nb_node * params.resource_params.nb_proc_per_node; } - // Step 5 : starting parallel container nodes - std::string command_nodes; - Engines::MachineParameters params_nodes(params); - SALOME_ContainerManager::actual_launch_machine_t nodes_machines; - try - { - command_nodes = BuildCommandToLaunchParallelContainer("SALOME_ParallelContainerNode", params_nodes, nodes_machines, proxy_machine[0]); - } - catch(const SALOME_Exception & ex) - { - INFOS("[StartParallelContainer] Exception in BuildCommandToLaunchParallelContainer"); - INFOS(ex.what()); - return ret; - } - std::string container_generic_node_name = _NS->ContainerName(params) + "Node"; - obj = LaunchParallelContainer(command_nodes, params_nodes, container_generic_node_name, nodes_machines); - if (CORBA::is_nil(obj)) - { - INFOS("[StartParallelContainer] LaunchParallelContainer for nodes returns NIL !"); - // Il faut tuer le proxy - try - { - Engines::Container_var proxy = Engines::Container::_narrow(container_proxy); - proxy->Shutdown(); - } - catch (...) - { - INFOS("[StartParallelContainer] Exception catched from proxy Shutdown..."); - } - return ret; - } + // "ssh -l user machine distantPath/runRemote.sh hostNS portNS WORKINGDIR workingdir \ + // SALOME_Container containerName &" + if (resInfo.Protocol == rsh) + command = "rsh "; + else if (resInfo.Protocol == ssh) + command = "ssh "; + else + throw SALOME_Exception("Unknown protocol"); - // Step 6 : connecting nodes and the proxy to actually create a parallel container - for (int i = 0; i < params.nb_component_nodes; i++) + if (resInfo.UserName != "") { - std::ostringstream tmp; - tmp << i; - std::string proc_number = tmp.str(); - std::string container_node_name = container_generic_node_name + proc_number; - - std::string theNodeMachine(nodes_machines[i]); - std::string containerNameInNS = _NS->BuildContainerNameForNS(container_node_name.c_str(), theNodeMachine.c_str()); - obj = _NS->Resolve(containerNameInNS.c_str()); - if (CORBA::is_nil(obj)) - { - INFOS("[StartParallelContainer] CONNECTION FAILED From Naming Service !"); - INFOS("[StartParallelContainer] Container name is " << containerNameInNS); - return ret; - } - try - { - MESSAGE("[StartParallelContainer] Deploying node : " << container_node_name); - PaCO::InterfaceParallel_var node = PaCO::InterfaceParallel::_narrow(obj); - node->deploy(); - MESSAGE("[StartParallelContainer] node " << container_node_name << " is deployed"); - } - catch(CORBA::SystemException& e) - { - INFOS("[StartParallelContainer] Exception in deploying node : " << containerNameInNS); - INFOS("CORBA::SystemException : " << e); - return ret; - } - catch(CORBA::Exception& e) - { - INFOS("[StartParallelContainer] Exception in deploying node : " << containerNameInNS); - INFOS("CORBA::Exception" << e); - return ret; - } - catch(...) - { - INFOS("[StartParallelContainer] Exception in deploying node : " << containerNameInNS); - INFOS("Unknown exception !"); - return ret; - } + command += "-l "; + command += resInfo.UserName; + command += " "; } - // Step 7 : starting parallel container - try - { - MESSAGE ("[StartParallelContainer] Starting parallel object"); - container_proxy->start(); - MESSAGE ("[StartParallelContainer] Parallel object is started"); - ret = Engines::Container::_narrow(container_proxy); - } - catch(CORBA::SystemException& e) - { - INFOS("Caught CORBA::SystemException. : " << e); - } - catch(PortableServer::POA::ServantAlreadyActive&) - { - INFOS("Caught CORBA::ServantAlreadyActiveException"); - } - catch(CORBA::Exception&) - { - INFOS("Caught CORBA::Exception."); - } - catch(std::exception& exc) - { - INFOS("Caught std::exception - "<BuildContainerNameForNS((char*) name.c_str(), theMachine.c_str()); - INFOS("[LaunchParallelContainer] Waiting for Parallel Container proxy " << containerNameInNS << " on " << theMachine); - while (CORBA::is_nil(obj) && count) + command += " "; + ASSERT(getenv("NSPORT")); + command += getenv("NSPORT"); // port of CORBA name server + + std::string wdir = params.workingdir.in(); + if(wdir != "") { -#ifndef WIN32 - sleep(1) ; -#else - Sleep(1000); -#endif - count-- ; - obj = _NS->Resolve(containerNameInNS.c_str()); + command += " WORKINGDIR "; + command += " '"; + if(wdir == "$TEMPDIR") + wdir="\\$TEMPDIR"; + command += wdir; // requested working directory + command += "'"; } - } - else - { - INFOS("[LaunchParallelContainer] launching the nodes of the parallel container"); - // We are waiting all the nodes - for (int i = 0; i < params.nb_component_nodes; i++) + + if(params.isMPI) { - obj = CORBA::Object::_nil(); - std::string theMachine(vect_machine[i]); - // Name of the node - std::ostringstream tmp; - tmp << i; - std::string proc_number = tmp.str(); - std::string container_node_name = name + proc_number; - containerNameInNS = _NS->BuildContainerNameForNS((char*) container_node_name.c_str(), theMachine.c_str()); - INFOS("[LaunchParallelContainer] Waiting for Parallel Container node " << containerNameInNS << " on " << theMachine); - while (CORBA::is_nil(obj) && count) { -#ifndef WIN32 - sleep(1) ; -#else - Sleep(1000); -#endif - count-- ; - obj = _NS->Resolve(containerNameInNS.c_str()); - } - if (CORBA::is_nil(obj)) - { - INFOS("[LaunchParallelContainer] Launch of node failed (or not found) !"); - return obj; + command += " mpirun -np "; + std::ostringstream o; + o << nbproc << " "; + command += o.str(); +#ifdef WITHLAM + command += "-x PATH,LD_LIBRARY_PATH,OMNIORB_CONFIG,SALOME_trace "; +#elif defined(WITHOPENMPI) + if( getenv("OMPI_URI_FILE") == NULL ) + command += "-x PATH -x LD_LIBRARY_PATH -x OMNIORB_CONFIG -x SALOME_trace"; + else{ + command += "-x PATH -x LD_LIBRARY_PATH -x OMNIORB_CONFIG -x SALOME_trace -ompi-server file:"; + command += getenv("OMPI_URI_FILE"); } +#endif + command += " SALOME_MPIContainer "; } - } - if (CORBA::is_nil(obj)) - INFOS("[LaunchParallelContainer] failed"); - - return obj; -} + else + command += " " +container_exe+ " "; -void SALOME_ContainerManager::fillBatchLaunchedContainers() -{ - _batchLaunchedContainers.clear(); - _NS->Change_Directory("/Containers"); - vector vec = _NS->list_directory_recurs(); - for(vector::iterator iter = vec.begin();iter!=vec.end();iter++){ - CORBA::Object_var obj=_NS->Resolve((*iter).c_str()); - Engines::Container_ptr cont=Engines::Container::_narrow(obj); - if(!CORBA::is_nil(cont)){ - _batchLaunchedContainers.push_back(cont); - } + command += _NS->ContainerName(params); + command += " -"; + AddOmninamesParams(command); + + MESSAGE("command =" << command); } - _batchLaunchedContainersIter=_batchLaunchedContainers.begin(); + + return command; } //============================================================================= /*! - * This is no longer valid (C++ container are also python containers) + * builds the command to be launched. */ //============================================================================= - -bool isPythonContainer(const char* ContainerName) -{ - bool ret = false; - int len = strlen(ContainerName); - - if (len >= 2) - if (strcmp(ContainerName + len - 2, "Py") == 0) - ret = true; - - return ret; -} - -//============================================================================= -/*! - * Builds the script to be launched - * - * If SALOME Application not defined ($APPLI), - * see BuildTempFileToLaunchRemoteContainer() - * - * Else rely on distant configuration. Command is under the form (example): - * ssh user@machine distantPath/runRemote.sh hostNS portNS WORKINGDIR workingdir \ - * SALOME_Container containerName &" - - * - where user is ommited if not specified in CatalogResources, - * - where distant path is always relative to user@machine $HOME, and - * equal to $APPLI if not specified in CatalogResources, - * - where hostNS is the hostname of CORBA naming server (set by scripts to - * use to launch SALOME and servers in $APPLI: runAppli.sh, runRemote.sh) - * - where portNS is the port used by CORBA naming server (set by scripts to - * use to launch SALOME and servers in $APPLI: runAppli.sh, runRemote.sh) - * - where workingdir is the requested working directory for the container. - * If WORKINGDIR (and workingdir) is not present the working dir will be $HOME - */ -//============================================================================= - -string -SALOME_ContainerManager::BuildCommandToLaunchRemoteContainer -(const string& machine, - const Engines::MachineParameters& params, const std::string& container_exe) -{ - string command; - int nbproc; - - if ( ! _isAppliSalomeDefined ) - command = BuildTempFileToLaunchRemoteContainer(machine, params); - - else - { - const ParserResourcesType& resInfo = _ResManager->GetImpl()->GetResourcesList(machine); - - if (params.isMPI) - { - if ( (params.nb_node <= 0) && (params.nb_proc_per_node <= 0) ) - nbproc = 1; - else if ( params.nb_node == 0 ) - nbproc = params.nb_proc_per_node; - else if ( params.nb_proc_per_node == 0 ) - nbproc = params.nb_node; - else - nbproc = params.nb_node * params.nb_proc_per_node; - } - - // "ssh user@machine distantPath/runRemote.sh hostNS portNS WORKINGDIR workingdir \ - // SALOME_Container containerName &" - - if (resInfo.Protocol == rsh) - command = "rsh "; - else if (resInfo.Protocol == ssh) - command = "ssh "; - else - throw SALOME_Exception("Unknown protocol"); - - if (resInfo.UserName != "") - { - command += resInfo.UserName; - command += "@"; - } - - command += machine; - command += " "; - - if (resInfo.AppliPath != "") - command += resInfo.AppliPath; // path relative to user@machine $HOME - else - { - ASSERT(getenv("APPLI")); - command += getenv("APPLI"); // path relative to user@machine $HOME - } - - command += "/runRemote.sh "; - - ASSERT(getenv("NSHOST")); - command += getenv("NSHOST"); // hostname of CORBA name server - - command += " "; - ASSERT(getenv("NSPORT")); - command += getenv("NSPORT"); // port of CORBA name server - - std::string wdir=params.workingdir.in(); - if(wdir != "") - { - command += " WORKINGDIR "; - command += " '"; - if(wdir == "$TEMPDIR") - wdir="\\$TEMPDIR"; - command += wdir; // requested working directory - command += "'"; - } - - if(params.isMPI) - { - command += " mpirun -np "; - std::ostringstream o; - o << nbproc << " "; - command += o.str(); -#ifdef WITHLAM - command += "-x PATH,LD_LIBRARY_PATH,OMNIORB_CONFIG,SALOME_trace "; -#elif defined(WITHOPENMPI) - if( getenv("OMPI_URI_FILE") == NULL ) - command += "-x PATH -x LD_LIBRARY_PATH -x OMNIORB_CONFIG -x SALOME_trace"; - else{ - command += "-x PATH -x LD_LIBRARY_PATH -x OMNIORB_CONFIG -x SALOME_trace -ompi-server file:"; - command += getenv("OMPI_URI_FILE"); - } -#endif - command += " SALOME_MPIContainer "; - } - else - command += " " +container_exe+ " "; - - command += _NS->ContainerName(params); - command += " -"; - AddOmninamesParams(command); - - MESSAGE("command =" << command); - } - - return command; -} - -//============================================================================= -/*! - * builds the command to be launched. - */ -//============================================================================= - -string -SALOME_ContainerManager::BuildCommandToLaunchLocalContainer -(const Engines::MachineParameters& params, const std::string& container_exe) +string +SALOME_ContainerManager::BuildCommandToLaunchLocalContainer +(const Engines::ContainerParameters& params, const std::string& container_exe) { _TmpFileName = BuildTemporaryFileName(); string command; int nbproc = 0; - ofstream command_file( _TmpFileName.c_str() ); + ostringstream o; if (params.isMPI) { - //command = "mpirun -np "; - command_file << "mpirun -np "; + o << "mpirun -np "; - if ( (params.nb_node <= 0) && (params.nb_proc_per_node <= 0) ) + if ( (params.resource_params.nb_node <= 0) && (params.resource_params.nb_proc_per_node <= 0) ) nbproc = 1; - else if ( params.nb_node == 0 ) - nbproc = params.nb_proc_per_node; - else if ( params.nb_proc_per_node == 0 ) - nbproc = params.nb_node; + else if ( params.resource_params.nb_node == 0 ) + nbproc = params.resource_params.nb_proc_per_node; + else if ( params.resource_params.nb_proc_per_node == 0 ) + nbproc = params.resource_params.nb_node; else - nbproc = params.nb_node * params.nb_proc_per_node; - - //std::ostringstream o; + nbproc = params.resource_params.nb_node * params.resource_params.nb_proc_per_node; - //o << nbproc << " "; - command_file << nbproc << " "; + o << nbproc << " "; - //command += o.str(); #ifdef WITHLAM - //command += "-x PATH,LD_LIBRARY_PATH,OMNIORB_CONFIG,SALOME_trace "; - command_file << "-x PATH,LD_LIBRARY_PATH,OMNIORB_CONFIG,SALOME_trace "; + o << "-x PATH,LD_LIBRARY_PATH,OMNIORB_CONFIG,SALOME_trace "; #elif defined(WITHOPENMPI) - //command += "-x PATH -x LD_LIBRARY_PATH -x OMNIORB_CONFIG -x SALOME_trace "; if( getenv("OMPI_URI_FILE") == NULL ) - command_file << "-x PATH -x LD_LIBRARY_PATH -x OMNIORB_CONFIG -x SALOME_trace"; + o << "-x PATH -x LD_LIBRARY_PATH -x OMNIORB_CONFIG -x SALOME_trace"; else { - command_file << "-x PATH -x LD_LIBRARY_PATH -x OMNIORB_CONFIG -x SALOME_trace -ompi-server file:"; - command_file << getenv("OMPI_URI_FILE"); + o << "-x PATH -x LD_LIBRARY_PATH -x OMNIORB_CONFIG -x SALOME_trace -ompi-server file:"; + o << getenv("OMPI_URI_FILE"); } #endif if (isPythonContainer(params.container_name)) - //command += "pyMPI SALOME_ContainerPy.py "; - command_file << " pyMPI SALOME_ContainerPy.py "; + o << " pyMPI SALOME_ContainerPy.py "; else - //command += "SALOME_MPIContainer "; - command_file << " SALOME_MPIContainer "; + o << " SALOME_MPIContainer "; } else { - //command=""; std::string wdir=params.workingdir.in(); if(wdir != "") { @@ -1067,11 +701,9 @@ SALOME_ContainerManager::BuildCommandToLaunchLocalContainer // a new temporary directory is requested string dir = Kernel_Utils::GetTmpDir(); #ifdef WIN32 - //command += "cd /d "+ dir +";"; - command_file << "cd /d " << dir << endl; + o << "cd /d " << dir << endl; #else - //command = "cd "+ dir +";"; - command_file << "cd " << dir << ";"; + o << "cd " << dir << ";"; #endif } @@ -1079,27 +711,26 @@ SALOME_ContainerManager::BuildCommandToLaunchLocalContainer { // a permanent directory is requested use it or create it #ifdef WIN32 - //command="mkdir " + wdir; - command_file << "mkdir " + wdir << endl; - command_file << "cd /D " + wdir << endl; + o << "mkdir " + wdir << endl; + o << "cd /D " + wdir << endl; #else - //command="mkdir -p " + wdir + " && cd " + wdir + ";"; - command_file << "mkdir -p " << wdir << " && cd " << wdir + ";"; + o << "mkdir -p " << wdir << " && cd " << wdir + ";"; #endif } } if (isPythonContainer(params.container_name)) - //command += "SALOME_ContainerPy.py "; - command_file << "SALOME_ContainerPy.py "; + o << "SALOME_ContainerPy.py "; else - //command += container_exe + " "; - command_file << container_exe + " "; + o << container_exe + " "; } - command_file << _NS->ContainerName(params); - command_file << " -"; - AddOmninamesParams(command_file); + o << _NS->ContainerName(params); + o << " -"; + AddOmninamesParams(o); + + ofstream command_file( _TmpFileName.c_str() ); + command_file << o.str(); command_file.close(); #ifndef WIN32 @@ -1108,6 +739,7 @@ SALOME_ContainerManager::BuildCommandToLaunchLocalContainer command = _TmpFileName; MESSAGE("Command is file ... " << command); + MESSAGE("Command is ... " << o.str()); return command; } @@ -1160,170 +792,523 @@ void SALOME_ContainerManager::AddOmninamesParams(string& command) const command += "ORBInitRef NameService="; command += iorstr; } - - + +//============================================================================= +/*! + * add to command all options relative to naming service. + */ +//============================================================================= + +void SALOME_ContainerManager::AddOmninamesParams(ofstream& fileStream) const +{ + CORBA::String_var iorstr = _NS->getIORaddr(); + fileStream << "ORBInitRef NameService="; + fileStream << iorstr; +} + +//============================================================================= +/*! + * add to command all options relative to naming service. + */ +//============================================================================= + +void SALOME_ContainerManager::AddOmninamesParams(ostringstream& oss) const +{ + CORBA::String_var iorstr = _NS->getIORaddr(); + oss << "ORBInitRef NameService="; + oss << iorstr; +} + +//============================================================================= +/*! + * generate a file name in /tmp directory + */ +//============================================================================= + +string SALOME_ContainerManager::BuildTemporaryFileName() const +{ + //build more complex file name to support multiple salome session + string aFileName = Kernel_Utils::GetTmpFileName(); +#ifndef WIN32 + aFileName += ".sh"; +#else + aFileName += ".bat"; +#endif + return aFileName; +} + +string SALOME_ContainerManager::GetMPIZeroNode(string machine) +{ + int status; + string zeronode; + string cmd; + string tmpFile = BuildTemporaryFileName(); + + cmd = "ssh " + machine + " mpirun -np 1 hostname > " + tmpFile; + + status = system(cmd.c_str()); + if( status == 0 ){ + ifstream fp(tmpFile.c_str(),ios::in); + fp >> zeronode; + } + + RmTmpFile(tmpFile); + + return zeronode; +} + +//============================================================================= +/*! + * Builds in a temporary file the script to be launched. + * + * Used if SALOME Application ($APPLI) is not defined. + * The command is build with data from CatalogResources, in which every path + * used on remote computer must be defined. + */ +//============================================================================= + +string +SALOME_ContainerManager::BuildTempFileToLaunchRemoteContainer +(const string& resource_name, + const Engines::ContainerParameters& params) throw(SALOME_Exception) +{ + int status; + + _TmpFileName = BuildTemporaryFileName(); + ofstream tempOutputFile; + tempOutputFile.open(_TmpFileName.c_str(), ofstream::out ); + const ParserResourcesType& resInfo = _ResManager->GetImpl()->GetResourcesDescr(resource_name); + tempOutputFile << "#! /bin/sh" << endl; + + // --- set env vars + + tempOutputFile << "export SALOME_trace=local" << endl; // mkr : 27.11.2006 : PAL13967 - Distributed supervision graphs - Problem with "SALOME_trace" + //tempOutputFile << "source " << resInfo.PreReqFilePath << endl; + + // ! env vars + + if (params.isMPI) + { + tempOutputFile << "mpirun -np "; + int nbproc; + + if ( (params.resource_params.nb_node <= 0) && (params.resource_params.nb_proc_per_node <= 0) ) + nbproc = 1; + else if ( params.resource_params.nb_node == 0 ) + nbproc = params.resource_params.nb_proc_per_node; + else if ( params.resource_params.nb_proc_per_node == 0 ) + nbproc = params.resource_params.nb_node; + else + nbproc = params.resource_params.nb_node * params.resource_params.nb_proc_per_node; + + std::ostringstream o; + + tempOutputFile << nbproc << " "; +#ifdef WITHLAM + tempOutputFile << "-x PATH,LD_LIBRARY_PATH,OMNIORB_CONFIG,SALOME_trace "; +#elif defined(WITHOPENMPI) + if( getenv("OMPI_URI_FILE") == NULL ) + tempOutputFile << "-x PATH -x LD_LIBRARY_PATH -x OMNIORB_CONFIG -x SALOME_trace"; + else{ + tempOutputFile << "-x PATH -x LD_LIBRARY_PATH -x OMNIORB_CONFIG -x SALOME_trace -ompi-server file:"; + tempOutputFile << getenv("OMPI_URI_FILE"); + } +#endif + } + + tempOutputFile << getenv("KERNEL_ROOT_DIR") << "/bin/salome/"; + + if (params.isMPI) + { + if (isPythonContainer(params.container_name)) + tempOutputFile << " pyMPI SALOME_ContainerPy.py "; + else + tempOutputFile << " SALOME_MPIContainer "; + } + + else + { + if (isPythonContainer(params.container_name)) + tempOutputFile << "SALOME_ContainerPy.py "; + else + tempOutputFile << "SALOME_Container "; + } + + tempOutputFile << _NS->ContainerName(params) << " -"; + AddOmninamesParams(tempOutputFile); + tempOutputFile << " &" << endl; + tempOutputFile.flush(); + tempOutputFile.close(); +#ifndef WIN32 + chmod(_TmpFileName.c_str(), 0x1ED); +#endif + + // --- Build command + + string command; + + if (resInfo.Protocol == rsh) + { + command = "rsh "; + string commandRcp = "rcp "; + commandRcp += _TmpFileName; + commandRcp += " "; + commandRcp += resInfo.HostName; + commandRcp += ":"; + commandRcp += _TmpFileName; + status = system(commandRcp.c_str()); + } + + else if (resInfo.Protocol == ssh) + { + command = "ssh "; + string commandRcp = "scp "; + commandRcp += _TmpFileName; + commandRcp += " "; + commandRcp += resInfo.HostName; + commandRcp += ":"; + commandRcp += _TmpFileName; + status = system(commandRcp.c_str()); + } + else + throw SALOME_Exception("Unknown protocol"); + + if(status) + throw SALOME_Exception("Error of connection on remote host"); + + command += resInfo.HostName; + _CommandForRemAccess = command; + command += " "; + command += _TmpFileName; + + SCRUTE(command); + + return command; + +} + +#ifdef WITH_PACO_PARALLEL +//============================================================================= +/*! CORBA Method: + * Find or Start a suitable PaCO++ Parallel Container in a list of machines. + * \param params Machine Parameters required for the container + * \return CORBA container reference. + */ +//============================================================================= +Engines::Container_ptr +SALOME_ContainerManager::StartPaCOPPContainer(const Engines::ContainerParameters& params_const) +{ + CORBA::Object_var obj; + PaCO::InterfaceManager_var container_proxy; + Engines::Container_ptr ret = Engines::Container::_nil(); + Engines::MachineParameters params(params_const); + + // Step 1 : Try to find a suitable container + // Currently not as good as could be since + // we have to verified the number of nodes of the container + // if a user tell that. + ret = FindContainer(params, params.computerList); + if(CORBA::is_nil(ret)) { + // Step 2 : Starting a new parallel container ! + INFOS("[StartParallelContainer] Starting a PaCO++ parallel container"); + + // Step 3 : Choose a computer + std::string theMachine = _ResManager->FindFirst(params.computerList); + //If the machine name is localhost use the real name + if(theMachine == "localhost") + theMachine=Kernel_Utils::GetHostname(); + + if(theMachine == "") { + INFOS("[StartParallelContainer] !!!!!!!!!!!!!!!!!!!!!!!!!!"); + INFOS("[StartParallelContainer] No possible computer found"); + INFOS("[StartParallelContainer] !!!!!!!!!!!!!!!!!!!!!!!!!!"); + return ret; + } + INFOS("[StartParallelContainer] on machine : " << theMachine); + params.hostname = CORBA::string_dup(theMachine.c_str()); + + // Step 4 : starting parallel container proxy + Engines::MachineParameters params_proxy(params); + std::string command_proxy; + SALOME_ContainerManager::actual_launch_machine_t proxy_machine; + try + { + command_proxy = BuildCommandToLaunchParallelContainer("SALOME_ParallelContainerProxy", params_proxy, proxy_machine); + } + catch(const SALOME_Exception & ex) + { + INFOS("[StartParallelContainer] Exception in BuildCommandToLaunchParallelContainer"); + INFOS(ex.what()); + return ret; + } + params_proxy.nb_proc = 0; // LaunchParallelContainer uses this value to know if it launches the proxy or the nodes + obj = LaunchParallelContainer(command_proxy, params_proxy, _NS->ContainerName(params_proxy), proxy_machine); + if (CORBA::is_nil(obj)) + { + INFOS("[StartParallelContainer] LaunchParallelContainer for proxy returns NIL !"); + return ret; + } + try + { + container_proxy = PaCO::InterfaceManager::_narrow(obj); + } + catch(CORBA::SystemException& e) + { + INFOS("[StartParallelContainer] Exception in _narrow after LaunchParallelContainer for proxy !"); + INFOS("CORBA::SystemException : " << e); + return ret; + } + catch(CORBA::Exception& e) + { + INFOS("[StartParallelContainer] Exception in _narrow after LaunchParallelContainer for proxy !"); + INFOS("CORBA::Exception" << e); + return ret; + } + catch(...) + { + INFOS("[StartParallelContainer] Exception in _narrow after LaunchParallelContainer for proxy !"); + INFOS("Unknown exception !"); + return ret; + } + if (CORBA::is_nil(container_proxy)) + { + INFOS("[StartParallelContainer] PaCO::InterfaceManager::_narrow returns NIL !"); + return ret; + } + + // Step 5 : starting parallel container nodes + std::string command_nodes; + Engines::MachineParameters params_nodes(params); + SALOME_ContainerManager::actual_launch_machine_t nodes_machines; + try + { + command_nodes = BuildCommandToLaunchParallelContainer("SALOME_ParallelContainerNode", params_nodes, nodes_machines, proxy_machine[0]); + } + catch(const SALOME_Exception & ex) + { + INFOS("[StartParallelContainer] Exception in BuildCommandToLaunchParallelContainer"); + INFOS(ex.what()); + return ret; + } + std::string container_generic_node_name = _NS->ContainerName(params) + "Node"; + obj = LaunchParallelContainer(command_nodes, params_nodes, container_generic_node_name, nodes_machines); + if (CORBA::is_nil(obj)) + { + INFOS("[StartParallelContainer] LaunchParallelContainer for nodes returns NIL !"); + // Il faut tuer le proxy + try + { + Engines::Container_var proxy = Engines::Container::_narrow(container_proxy); + proxy->Shutdown(); + } + catch (...) + { + INFOS("[StartParallelContainer] Exception catched from proxy Shutdown..."); + } + return ret; + } + + // Step 6 : connecting nodes and the proxy to actually create a parallel container + for (int i = 0; i < params.nb_proc; i++) + { + std::ostringstream tmp; + tmp << i; + std::string proc_number = tmp.str(); + std::string container_node_name = container_generic_node_name + proc_number; + + std::string theNodeMachine(nodes_machines[i]); + std::string containerNameInNS = _NS->BuildContainerNameForNS(container_node_name.c_str(), theNodeMachine.c_str()); + obj = _NS->Resolve(containerNameInNS.c_str()); + if (CORBA::is_nil(obj)) + { + INFOS("[StartParallelContainer] CONNECTION FAILED From Naming Service !"); + INFOS("[StartParallelContainer] Container name is " << containerNameInNS); + return ret; + } + try + { + MESSAGE("[StartParallelContainer] Deploying node : " << container_node_name); + PaCO::InterfaceParallel_var node = PaCO::InterfaceParallel::_narrow(obj); + node->deploy(); + MESSAGE("[StartParallelContainer] node " << container_node_name << " is deployed"); + } + catch(CORBA::SystemException& e) + { + INFOS("[StartParallelContainer] Exception in deploying node : " << containerNameInNS); + INFOS("CORBA::SystemException : " << e); + return ret; + } + catch(CORBA::Exception& e) + { + INFOS("[StartParallelContainer] Exception in deploying node : " << containerNameInNS); + INFOS("CORBA::Exception" << e); + return ret; + } + catch(...) + { + INFOS("[StartParallelContainer] Exception in deploying node : " << containerNameInNS); + INFOS("Unknown exception !"); + return ret; + } + } + + // Step 7 : starting parallel container + try + { + MESSAGE ("[StartParallelContainer] Starting parallel object"); + container_proxy->start(); + MESSAGE ("[StartParallelContainer] Parallel object is started"); + ret = Engines::Container::_narrow(container_proxy); + } + catch(CORBA::SystemException& e) + { + INFOS("Caught CORBA::SystemException. : " << e); + } + catch(PortableServer::POA::ServantAlreadyActive&) + { + INFOS("Caught CORBA::ServantAlreadyActiveException"); + } + catch(CORBA::Exception&) + { + INFOS("Caught CORBA::Exception."); + } + catch(std::exception& exc) + { + INFOS("Caught std::exception - "<getIORaddr(); - fileStream << "ORBInitRef NameService="; - fileStream << iorstr; + Engines::Container_ptr ret = Engines::Container::_nil(); + INFOS("[StartParallelContainer] is disabled !"); + INFOS("[StartParallelContainer] recompile SALOME Kernel to enable parallel extension"); + return ret; } +#endif -//============================================================================= -/*! - * generate a file name in /tmp directory - */ -//============================================================================= - -string SALOME_ContainerManager::BuildTemporaryFileName() const +#ifndef WITH_PACO_PARALLEL +CORBA::Object_ptr +SALOME_ContainerManager::LaunchParallelContainer(const std::string& command, + const Engines::ContainerParameters& params, + const std::string& name, + SALOME_ContainerManager::actual_launch_machine_t & vect_machine) { - //build more complex file name to support multiple salome session - string aFileName = Kernel_Utils::GetTmpFileName(); -#ifndef WIN32 - aFileName += ".sh"; -#else - aFileName += ".bat"; -#endif - return aFileName; + CORBA::Object_ptr obj = CORBA::Object::_nil(); + return obj; } - - +#else //============================================================================= -/*! - * Builds in a temporary file the script to be launched. - * - * Used if SALOME Application ($APPLI) is not defined. - * The command is build with data from CatalogResources, in which every path - * used on remote computer must be defined. - */ +/*! This method launches the parallel container. + * It will may be placed on the ressources manager. + * + * \param command to launch + * \param container's parameters + * \param name of the container + * + * \return CORBA container reference + */ //============================================================================= - -string -SALOME_ContainerManager::BuildTempFileToLaunchRemoteContainer -(const string& machine, - const Engines::MachineParameters& params) throw(SALOME_Exception) +CORBA::Object_ptr +SALOME_ContainerManager::LaunchParallelContainer(const std::string& command, + const Engines::ContainerParameters& params, + const std::string& name, + SALOME_ContainerManager::actual_launch_machine_t & vect_machine) { - int status; - - _TmpFileName = BuildTemporaryFileName(); - ofstream tempOutputFile; - tempOutputFile.open(_TmpFileName.c_str(), ofstream::out ); - const ParserResourcesType& resInfo = _ResManager->GetImpl()->GetResourcesList(machine); - tempOutputFile << "#! /bin/sh" << endl; - - // --- set env vars - - tempOutputFile << "export SALOME_trace=local" << endl; // mkr : 27.11.2006 : PAL13967 - Distributed supervision graphs - Problem with "SALOME_trace" - //tempOutputFile << "source " << resInfo.PreReqFilePath << endl; + CORBA::Object_ptr obj = CORBA::Object::_nil(); + std::string containerNameInNS; + int count = TIME_OUT_TO_LAUNCH_CONT; - // ! env vars + INFOS("[LaunchParallelContainer] Begin"); + int status = system(command.c_str()); + if (status == -1) { + INFOS("[LaunchParallelContainer] failed : system command status -1"); + return obj; + } + else if (status == 217) { + INFOS("[LaunchParallelContainer] failed : system command status 217"); + return obj; + } - if (params.isMPI) + if (params.nb_proc == 0) + { + std::string theMachine(vect_machine[0]); + // Proxy We have launch a proxy + containerNameInNS = _NS->BuildContainerNameForNS((char*) name.c_str(), theMachine.c_str()); + INFOS("[LaunchParallelContainer] Waiting for Parallel Container proxy " << containerNameInNS << " on " << theMachine); + while (CORBA::is_nil(obj) && count) { - tempOutputFile << "mpirun -np "; - int nbproc; - - if ( (params.nb_node <= 0) && (params.nb_proc_per_node <= 0) ) - nbproc = 1; - else if ( params.nb_node == 0 ) - nbproc = params.nb_proc_per_node; - else if ( params.nb_proc_per_node == 0 ) - nbproc = params.nb_node; - else - nbproc = params.nb_node * params.nb_proc_per_node; - - std::ostringstream o; - - tempOutputFile << nbproc << " "; -#ifdef WITHLAM - tempOutputFile << "-x PATH,LD_LIBRARY_PATH,OMNIORB_CONFIG,SALOME_trace "; -#elif defined(WITHOPENMPI) - if( getenv("OMPI_URI_FILE") == NULL ) - tempOutputFile << "-x PATH -x LD_LIBRARY_PATH -x OMNIORB_CONFIG -x SALOME_trace"; - else{ - tempOutputFile << "-x PATH -x LD_LIBRARY_PATH -x OMNIORB_CONFIG -x SALOME_trace -ompi-server file:"; - tempOutputFile << getenv("OMPI_URI_FILE"); - } +#ifndef WIN32 + sleep(1) ; +#else + Sleep(1000); #endif + count-- ; + obj = _NS->Resolve(containerNameInNS.c_str()); } - - tempOutputFile << getenv("KERNEL_ROOT_DIR") << "/bin/salome/"; - - if (params.isMPI) - { - if (isPythonContainer(params.container_name)) - tempOutputFile << " pyMPI SALOME_ContainerPy.py "; - else - tempOutputFile << " SALOME_MPIContainer "; - } - - else + } + else + { + INFOS("[LaunchParallelContainer] launching the nodes of the parallel container"); + // We are waiting all the nodes + for (int i = 0; i < params.nb_proc; i++) { - if (isPythonContainer(params.container_name)) - tempOutputFile << "SALOME_ContainerPy.py "; - else - tempOutputFile << "SALOME_Container "; - } - - tempOutputFile << _NS->ContainerName(params) << " -"; - AddOmninamesParams(tempOutputFile); - tempOutputFile << " &" << endl; - tempOutputFile.flush(); - tempOutputFile.close(); + obj = CORBA::Object::_nil(); + std::string theMachine(vect_machine[i]); + // Name of the node + std::ostringstream tmp; + tmp << i; + std::string proc_number = tmp.str(); + std::string container_node_name = name + proc_number; + containerNameInNS = _NS->BuildContainerNameForNS((char*) container_node_name.c_str(), theMachine.c_str()); + INFOS("[LaunchParallelContainer] Waiting for Parallel Container node " << containerNameInNS << " on " << theMachine); + while (CORBA::is_nil(obj) && count) { #ifndef WIN32 - chmod(_TmpFileName.c_str(), 0x1ED); + sleep(1) ; +#else + Sleep(1000); #endif - - // --- Build command - - string command; - - if (resInfo.Protocol == rsh) - { - command = "rsh "; - string commandRcp = "rcp "; - commandRcp += _TmpFileName; - commandRcp += " "; - commandRcp += machine; - commandRcp += ":"; - commandRcp += _TmpFileName; - status = system(commandRcp.c_str()); - } - - else if (resInfo.Protocol == ssh) - { - command = "ssh "; - string commandRcp = "scp "; - commandRcp += _TmpFileName; - commandRcp += " "; - commandRcp += machine; - commandRcp += ":"; - commandRcp += _TmpFileName; - status = system(commandRcp.c_str()); + count-- ; + obj = _NS->Resolve(containerNameInNS.c_str()); + } + if (CORBA::is_nil(obj)) + { + INFOS("[LaunchParallelContainer] Launch of node failed (or not found) !"); + return obj; + } } - else - throw SALOME_Exception("Unknown protocol"); - - if(status) - throw SALOME_Exception("Error of connection on remote host"); - - command += machine; - _CommandForRemAccess = command; - command += " "; - command += _TmpFileName; - - SCRUTE(command); - - return command; - + } + if (CORBA::is_nil(obj)) + INFOS("[LaunchParallelContainer] failed"); + + return obj; } +#endif +#ifndef WITH_PACO_PARALLEL +string +SALOME_ContainerManager::BuildCommandToLaunchParallelContainer(const std::string& exe_name, + const Engines::ContainerParameters& params, + SALOME_ContainerManager::actual_launch_machine_t & vect_machine, + const std::string proxy_hostname) +{ + return ""; +} +#else //============================================================================= /*! Creates a command line that the container manager uses to launch * a parallel container. @@ -1331,12 +1316,12 @@ SALOME_ContainerManager::BuildTempFileToLaunchRemoteContainer //============================================================================= string SALOME_ContainerManager::BuildCommandToLaunchParallelContainer(const std::string& exe_name, - const Engines::MachineParameters& params, + const Engines::ContainerParameters& params, SALOME_ContainerManager::actual_launch_machine_t & vect_machine, const std::string proxy_hostname) { // This method knows the differences between the proxy and the nodes. - // nb_component_nodes is not used in the same way if it is a proxy or + // nb_proc is not used in the same way if it is a proxy or // a node. //command = "gdb --args "; @@ -1352,7 +1337,7 @@ SALOME_ContainerManager::BuildCommandToLaunchParallelContainer(const std::string std::string hostname(CORBA::string_dup(params.hostname)); std::ostringstream tmp_string; - CORBA::Long nb_nodes = params.nb_component_nodes; + CORBA::Long nb_nodes = params.nb_proc; tmp_string << nb_nodes; std::string nbproc = tmp_string.str(); @@ -1364,6 +1349,7 @@ SALOME_ContainerManager::BuildCommandToLaunchParallelContainer(const std::string rtn->cpu_clock = params.cpu_clock; rtn->nb_proc_per_node = params.nb_proc_per_node; rtn->nb_node = params.nb_node; + rtn->nb_proc = params.nb_proc; rtn->isMPI = params.isMPI; // Step 1 : local or remote launch ? @@ -1392,7 +1378,7 @@ SALOME_ContainerManager::BuildCommandToLaunchParallelContainer(const std::string else { machine_file_name = _ResManager->getMachineFile(hostname, - params.nb_component_nodes, + params.nb_proc, parallelLib); } if (machine_file_name == "") @@ -1447,14 +1433,15 @@ SALOME_ContainerManager::BuildCommandToLaunchParallelContainer(const std::string MESSAGE("[BuildCommandToLaunchParallelContainer] machine file name extracted is " << machine_name) // We want to launch a command like : - // ssh user@machine distantPath/runRemote.sh hostNS portNS + // ssh -l user machine distantPath/runRemote.sh hostNS portNS const ParserResourcesType& resInfo = _ResManager->GetImpl()->GetResourcesList(machine_name); if (resInfo.Protocol == rsh) command_remote = "rsh "; else command_remote = "ssh "; + command_remote += "-l "; command_remote += resInfo.UserName; - command_remote += "@"; + command_remote += " "; command_remote += machine_name; command_remote += " "; command_remote += resInfo.AppliPath; // path relative to user@machine $HOME @@ -1495,14 +1482,15 @@ SALOME_ContainerManager::BuildCommandToLaunchParallelContainer(const std::string MESSAGE("[BuildCommandToLaunchParallelContainer] machine file name extracted is " << machine_name) // We want to launch a command like : - // ssh user@machine distantPath/runRemote.sh hostNS portNS + // ssh -l user machine distantPath/runRemote.sh hostNS portNS const ParserResourcesType& resInfo = _ResManager->GetImpl()->GetResourcesList(machine_name); if (resInfo.Protocol == rsh) command_remote = "rsh "; else command_remote = "ssh "; + command_remote += "-l "; command_remote += resInfo.UserName; - command_remote += "@"; + command_remote += " "; command_remote += machine_name; command_remote += " "; command_remote += resInfo.AppliPath; // path relative to user@machine $HOME @@ -1590,14 +1578,15 @@ SALOME_ContainerManager::BuildCommandToLaunchParallelContainer(const std::string if (remote) { // We want to launch a command like : - // ssh user@machine distantPath/runRemote.sh hostNS portNS + // ssh -l user machine distantPath/runRemote.sh hostNS portNS const ParserResourcesType& resInfo = _ResManager->GetImpl()->GetResourcesList(remote_machine); if (resInfo.Protocol == rsh) command_remote = "rsh "; else command_remote = "ssh "; + command_remote += "-l "; command_remote += resInfo.UserName; - command_remote += "@"; + command_remote += " "; command_remote += remote_machine; command_remote += " "; command_remote += resInfo.AppliPath; // path relative to user@machine $HOME @@ -1635,8 +1624,9 @@ SALOME_ContainerManager::BuildCommandToLaunchParallelContainer(const std::string command_remote = "rsh "; else command_remote = "ssh "; + command_remote += "-l "; command_remote += resInfo.UserName; - command_remote += "@"; + command_remote += " "; command_remote += remote_machine; command_remote += " "; @@ -1696,23 +1686,5 @@ SALOME_ContainerManager::BuildCommandToLaunchParallelContainer(const std::string MESSAGE("Parallel launch is: " << command); return command; } +#endif -string SALOME_ContainerManager::GetMPIZeroNode(string machine) -{ - int status; - string zeronode; - string cmd; - string tmpFile = BuildTemporaryFileName(); - - cmd = "ssh " + machine + " mpirun -np 1 hostname > " + tmpFile; - - status = system(cmd.c_str()); - if( status == 0 ){ - ifstream fp(tmpFile.c_str(),ios::in); - fp >> zeronode; - } - - RmTmpFile(tmpFile); - - return zeronode; -} diff --git a/src/Container/SALOME_ContainerManager.hxx b/src/Container/SALOME_ContainerManager.hxx index e401de3be..a0e7e19eb 100644 --- a/src/Container/SALOME_ContainerManager.hxx +++ b/src/Container/SALOME_ContainerManager.hxx @@ -42,54 +42,46 @@ public: SALOME_ContainerManager(CORBA::ORB_ptr orb, PortableServer::POA_var poa, SALOME_ResourcesManager *rm, SALOME_NamingService *ns); ~SALOME_ContainerManager(); - void Shutdown(); - void ShutdownContainers(); - + // Corba Methods Engines::Container_ptr - StartContainer(const Engines::MachineParameters& params, - const Engines::MachineList& possibleComputer, - const std::string& container_exe="SALOME_Container"); + GiveContainer(const Engines::ContainerParameters& params); - Engines::Container_ptr - StartContainer(const Engines::MachineParameters& params); - - Engines::Container_ptr - GiveContainer(const Engines::MachineParameters& params); + void ShutdownContainers(); - Engines::Container_ptr - FindOrStartContainer(const Engines::MachineParameters& params); + // C++ Methods + void Shutdown(); static const char *_ContainerManagerNameInNS; - // PaCO++ Parallel extension - Engines::Container_ptr - StartParallelContainer(const Engines::MachineParameters& params); - protected: - Engines::Container_ptr - FindContainer(const Engines::MachineParameters& params, - const Engines::MachineList& possibleComputers); + // C++ methods + Engines::Container_ptr + StartPaCOPPContainer(const Engines::ContainerParameters& params); Engines::Container_ptr - FindContainer(const Engines::MachineParameters& params, - const char *theMachine); + FindContainer(const Engines::ContainerParameters& params, + const Engines::ResourceList& possibleResources); - void fillBatchLaunchedContainers(); + Engines::Container_ptr + FindContainer(const Engines::ContainerParameters& params, + const std::string& resource); - std::string BuildCommandToLaunchRemoteContainer(const std::string& machine, - const Engines::MachineParameters& params, + std::string BuildCommandToLaunchRemoteContainer(const std::string & resource_name, + const Engines::ContainerParameters& params, const std::string& container_exe="SALOME_Container"); - std::string BuildCommandToLaunchLocalContainer(const Engines::MachineParameters& params, + std::string BuildCommandToLaunchLocalContainer(const Engines::ContainerParameters& params, const std::string& container_exe="SALOME_Container"); - std::string BuildTempFileToLaunchRemoteContainer(const std::string& machine, - const Engines::MachineParameters& params) throw(SALOME_Exception); + std::string BuildTempFileToLaunchRemoteContainer(const std::string& resource_name, + const Engines::ContainerParameters& params) throw(SALOME_Exception); void RmTmpFile(std::string& tmpFile); void AddOmninamesParams(std::string& command) const; + void AddOmninamesParams(std::ostringstream& oss) const; + void AddOmninamesParams(std::ofstream& fileStream) const; std::string BuildTemporaryFileName() const; @@ -99,12 +91,12 @@ protected: // For PacO++ Parallel extension typedef std::vector actual_launch_machine_t; std::string BuildCommandToLaunchParallelContainer(const std::string& exe_name, - const Engines::MachineParameters& params, + const Engines::ContainerParameters& params, SALOME_ContainerManager::actual_launch_machine_t & vect_machine, const std::string proxy_hostname = ""); CORBA::Object_ptr LaunchParallelContainer(const std::string& command, - const Engines::MachineParameters& params, + const Engines::ContainerParameters& params, const std::string& name, SALOME_ContainerManager::actual_launch_machine_t & vect_machine); CORBA::ORB_var _orb; @@ -112,8 +104,6 @@ protected: SALOME_ResourcesManager *_ResManager; SALOME_NamingService *_NS; - static std::vector _batchLaunchedContainers; - static std::vector::iterator _batchLaunchedContainersIter; //! attribute that contains current tmp files generated std::string _TmpFileName; diff --git a/src/Launcher/BatchTest.cxx b/src/Launcher/BatchTest.cxx index 23ff34abe..591db823f 100644 --- a/src/Launcher/BatchTest.cxx +++ b/src/Launcher/BatchTest.cxx @@ -35,7 +35,7 @@ #ifdef WIN32 # include #endif -BatchTest::BatchTest(const Engines::MachineDefinition& batch_descr) +BatchTest::BatchTest(const Engines::ResourceDefinition& batch_descr) { #ifdef WITH_LIBBATCH _batch_descr = batch_descr; @@ -57,8 +57,8 @@ BatchTest::BatchTest(const Engines::MachineDefinition& batch_descr) // Creating test temporary file _test_filename = "/tmp/"; _test_filename += _date + "_test_cluster_file_"; - _test_filename += _batch_descr.alias.in(); - _base_filename = _date + "_test_cluster_file_" + _batch_descr.alias.in(); + _test_filename += _batch_descr.hostname.in(); + _base_filename = _date + "_test_cluster_file_" + _batch_descr.hostname.in(); #endif } @@ -71,7 +71,7 @@ BatchTest::test() INFOS(std::endl << "--- Testing batch Machine :" << std::endl << "--- Name : " << _batch_descr.hostname << std::endl - << "--- Alias : " << _batch_descr.alias << std::endl + << "--- hostname : " << _batch_descr.hostname << std::endl << "--- Protocol : " << _batch_descr.protocol << std::endl << "--- User Name : " << _batch_descr.username << std::endl << "--- Batch Type : " << _batch_descr.batch << std::endl @@ -114,21 +114,21 @@ BatchTest::test() return rtn; } -// For this test we use : alias, protocol, username +// For this test we use : hostname, protocol, username std::string BatchTest::test_connection() { int status; std::string command; std::string result("Failed : "); - std::string alias = _batch_descr.alias.in(); + std::string hostname = _batch_descr.hostname.in(); std::string username = _batch_descr.username.in(); std::string protocol = _batch_descr.protocol.in(); // Basic tests - if(alias == "") + if(hostname == "") { - result += "alias is empty !"; + result += "hostname is empty !"; return result; } if(username == "") @@ -145,7 +145,7 @@ BatchTest::test_connection() // Build command command += protocol + " " - + username + "@" + alias; + + username + "@" + hostname; // Test status = system(command.c_str()); @@ -161,7 +161,7 @@ BatchTest::test_connection() return result; } -// For this test we use : alias, protocol, username +// For this test we use : hostname, protocol, username std::string BatchTest::test_filecopy() { @@ -169,7 +169,7 @@ BatchTest::test_filecopy() std::string home; std::string command; std::string result("Failed : "); - std::string alias = _batch_descr.alias.in(); + std::string hostname = _batch_descr.hostname.in(); std::string username = _batch_descr.username.in(); std::string protocol = _batch_descr.protocol.in(); @@ -196,7 +196,7 @@ BatchTest::test_filecopy() if(protocol == "rsh") command = "rcp"; command += " " + _test_filename + " " - + username + "@" + alias + ":" + home; + + username + "@" + hostname + ":" + home; // Test status = system(command.c_str()); @@ -212,7 +212,7 @@ BatchTest::test_filecopy() return result; } -// For this test we use : alias, protocol, username +// For this test we use : hostname, protocol, username std::string BatchTest::test_getresult() { @@ -220,7 +220,7 @@ BatchTest::test_getresult() std::string home; std::string command; std::string result("Failed : "); - std::string alias = _batch_descr.alias.in(); + std::string hostname = _batch_descr.hostname.in(); std::string username = _batch_descr.username.in(); std::string protocol = _batch_descr.protocol.in(); @@ -235,7 +235,7 @@ BatchTest::test_getresult() command = "scp"; if(protocol == "rsh") command = "rcp"; - command += " " + username + "@" + alias + ":" + home + command += " " + username + "@" + hostname + ":" + home + "/" + _base_filename + " " + _test_filename + "_copy"; // Test @@ -285,7 +285,7 @@ BatchTest::test_jobsubmit_simple() std::string home; std::string command; std::string result("Failed : "); - std::string alias = _batch_descr.alias.in(); + std::string hostname = _batch_descr.hostname.in(); std::string username = _batch_descr.username.in(); std::string protocol = _batch_descr.protocol.in(); std::string batch_type = _batch_descr.batch.in(); @@ -336,7 +336,7 @@ BatchTest::test_jobsubmit_simple() if(protocol == "rsh") command = "rcp"; command += " " + _test_file_simple + " " - + username + "@" + alias + ":" + home; + + username + "@" + hostname + ":" + home; status = system(command.c_str()); if(status) { std::ostringstream oss; @@ -348,7 +348,7 @@ BatchTest::test_jobsubmit_simple() // Build command for submit job std::string file_job_name = _test_filename + "_jobid"; - command = protocol + " " + username + "@" + alias + " qsub " + _base_filename + "_simple > " + file_job_name; + command = protocol + " " + username + "@" + hostname + " qsub " + _base_filename + "_simple > " + file_job_name; status = system(command.c_str()); if(status) { std::ostringstream oss; @@ -368,7 +368,7 @@ BatchTest::test_jobsubmit_simple() file_job.close(); // Wait the end of the job - command = protocol + " " + username + "@" + alias + " qstat -f " + jobid + " > " + file_job_name; + command = protocol + " " + username + "@" + hostname + " qstat -f " + jobid + " > " + file_job_name; bool stop = false; while (!stop) { @@ -396,7 +396,7 @@ BatchTest::test_jobsubmit_simple() if(protocol == "rsh") command = "rcp"; command += " " - + username + "@" + alias + ":" + home + "/" + _date + "_simple* /tmp"; + + username + "@" + hostname + ":" + home + "/" + _date + "_simple* /tmp"; status = system(command.c_str()); if(status) { std::ostringstream oss; @@ -450,7 +450,7 @@ BatchTest::test_jobsubmit_mpi() std::string command; MpiImpl * mpiImpl; std::string result("Failed : "); - std::string alias = _batch_descr.alias.in(); + std::string hostname = _batch_descr.hostname.in(); std::string username = _batch_descr.username.in(); std::string protocol = _batch_descr.protocol.in(); std::string batch_type = _batch_descr.batch.in(); @@ -530,7 +530,7 @@ BatchTest::test_jobsubmit_mpi() if(protocol == "rsh") command = "rcp"; command += " " + _test_file_script + " " - + username + "@" + alias + ":" + home; + + username + "@" + hostname + ":" + home; status = system(command.c_str()); if(status) { std::ostringstream oss; @@ -543,7 +543,7 @@ BatchTest::test_jobsubmit_mpi() if(protocol == "rsh") command = "rcp"; command += " " + _test_file_mpi + " " - + username + "@" + alias + ":" + home; + + username + "@" + hostname + ":" + home; status = system(command.c_str()); if(status) { std::ostringstream oss; @@ -555,7 +555,7 @@ BatchTest::test_jobsubmit_mpi() // Build command for submit job std::string file_job_name = _test_filename + "_jobid"; - command = protocol + " " + username + "@" + alias + " qsub " + _base_filename + "_mpi > " + file_job_name; + command = protocol + " " + username + "@" + hostname + " qsub " + _base_filename + "_mpi > " + file_job_name; status = system(command.c_str()); if(status) { std::ostringstream oss; @@ -575,7 +575,7 @@ BatchTest::test_jobsubmit_mpi() file_job.close(); // Wait the end of the job - command = protocol + " " + username + "@" + alias + " qstat -f " + jobid + " > " + file_job_name; + command = protocol + " " + username + "@" + hostname + " qstat -f " + jobid + " > " + file_job_name; bool stop = false; while (!stop) { @@ -603,7 +603,7 @@ BatchTest::test_jobsubmit_mpi() if(protocol == "rsh") command = "rcp"; command += " " - + username + "@" + alias + ":" + home + "/" + _date + "_mpi* /tmp"; + + username + "@" + hostname + ":" + home + "/" + _date + "_mpi* /tmp"; status = system(command.c_str()); if(status) { std::ostringstream oss; @@ -649,7 +649,7 @@ BatchTest::test_appli() std::string home; std::string command; std::string result("Failed : "); - std::string alias = _batch_descr.alias.in(); + std::string hostname = _batch_descr.hostname.in(); std::string username = _batch_descr.username.in(); std::string protocol = _batch_descr.protocol.in(); std::string applipath = _batch_descr.applipath.in(); @@ -679,7 +679,7 @@ BatchTest::test_appli() if(protocol == "rsh") command = "rcp"; command += " " + _test_file_appli + " " - + username + "@" + alias + ":" + home; + + username + "@" + hostname + ":" + home; status = system(command.c_str()); if(status) { std::ostringstream oss; @@ -690,7 +690,7 @@ BatchTest::test_appli() } // Launch test - command = protocol + " " + username + "@" + alias + command = protocol + " " + username + "@" + hostname + " sh " + home + "/" + _base_filename + "_appli_test > " + _test_filename + "_appli_test_result"; @@ -732,12 +732,12 @@ BatchTest::get_home(std::string * home) int status; std::string result = ""; std::string command; - std::string alias = _batch_descr.alias.in(); + std::string hostname = _batch_descr.hostname.in(); std::string username = _batch_descr.username.in(); std::string protocol = _batch_descr.protocol.in(); std::string file_home_name = _test_filename + "_home"; - command = protocol + " " + username + "@" + alias + " 'echo $HOME' > " + file_home_name; + command = protocol + " " + username + "@" + hostname + " 'echo $HOME' > " + file_home_name; status = system(command.c_str()); if(status) { std::ostringstream oss; diff --git a/src/Launcher/BatchTest.hxx b/src/Launcher/BatchTest.hxx index 8fdf3736c..9a9a18388 100644 --- a/src/Launcher/BatchTest.hxx +++ b/src/Launcher/BatchTest.hxx @@ -32,7 +32,7 @@ class SALOMELAUNCHER_EXPORT BatchTest { public: - BatchTest(const Engines::MachineDefinition& batch_descr); + BatchTest(const Engines::ResourceDefinition& batch_descr); virtual ~BatchTest(); bool test(); @@ -48,7 +48,7 @@ class SALOMELAUNCHER_EXPORT BatchTest std::string get_home(std::string * home); private: - Engines::MachineDefinition _batch_descr; + Engines::ResourceDefinition _batch_descr; std::string _test_filename; std::string _base_filename; std::string _date; diff --git a/src/Launcher/Launcher.cxx b/src/Launcher/Launcher.cxx index 17dfdaa4b..c01d21450 100644 --- a/src/Launcher/Launcher.cxx +++ b/src/Launcher/Launcher.cxx @@ -26,10 +26,12 @@ #include #include #include +#include #endif #include "SALOME_Launcher_Handler.hxx" #include "Launcher.hxx" +#include "Launcher_Job_Command.hxx" #include #include #include @@ -45,12 +47,12 @@ using namespace std; * with non thread-safe usage like Change_Directory in SALOME naming service */ //============================================================================= - Launcher_cpp::Launcher_cpp() { -#if defined(_DEBUG_) || defined(_DEBUG) - cerr << "Launcher_cpp constructor" << endl; -#endif + LAUNCHER_MESSAGE("Launcher_cpp constructor"); + _job_cpt = 0; + _job_cpt_mutex = new pthread_mutex_t(); + pthread_mutex_init(_job_cpt_mutex, NULL); } //============================================================================= @@ -58,373 +60,263 @@ Launcher_cpp::Launcher_cpp() * destructor */ //============================================================================= - Launcher_cpp::~Launcher_cpp() { -#if defined(_DEBUG_) || defined(_DEBUG) - cerr << "Launcher_cpp destructor" << endl; -#endif - + LAUNCHER_MESSAGE("Launcher_cpp destructor"); #ifdef WITH_LIBBATCH std::map < string, Batch::BatchManager_eClient * >::const_iterator it1; for(it1=_batchmap.begin();it1!=_batchmap.end();it1++) delete it1->second; - std::map < std::pair , Batch::Job* >::const_iterator it2; - for(it2=_jobmap.begin();it2!=_jobmap.end();it2++) - delete it2->second; + std::map::const_iterator it_job; + for(it_job = _launcher_job_map.begin(); it_job != _launcher_job_map.end(); it_job++) + delete it_job->second; #endif + + pthread_mutex_destroy(_job_cpt_mutex); + delete _job_cpt_mutex; } +#ifdef WITH_LIBBATCH + //============================================================================= -/*! CORBA Method: - * Submit a batch job on a cluster and returns the JobId - * \param xmlExecuteFile : to define the execution on the batch cluster - * \param clusterName : name of the batch cluster - */ +/*! + * Add a job into the launcher - check resource and choose one + */ //============================================================================= -long Launcher_cpp::submitJob( const std::string xmlExecuteFile, - const std::string clusterName) throw(LauncherException) +void +Launcher_cpp::createJob(Launcher::Job * new_job) { -#ifdef WITH_LIBBATCH -#if defined(_DEBUG_) || defined(_DEBUG) - cout << "BEGIN OF Launcher_cpp::submitJob" << endl; -#endif - long jobId; - vector aMachineList; - - if(!_ResManager) - throw LauncherException("You must set Resources Manager to Launcher!!"); - - // verify if cluster is in resources catalog - machineParams params; - params.hostname = clusterName; + LAUNCHER_MESSAGE("Creating a new job"); + + // First step take a resource + std::vector ResourceList; + resourceParams params = new_job->getResourceRequiredParams(); try{ - aMachineList = _ResManager->GetFittingResources(params); + ResourceList = _ResManager->GetFittingResources(params); } catch(const ResourcesException &ex){ throw LauncherException(ex.msg.c_str()); } - if (aMachineList.size() == 0) - throw LauncherException("This cluster is not in resources catalog"); - - // Parsing xml file - ParseXmlFile(xmlExecuteFile); + if (ResourceList.size() == 0) + { + LAUNCHER_INFOS("No adequate resource found for the job, number " << new_job->getNumber() << " - deleting it"); + delete new_job; + throw LauncherException("No resource found the job"); + } - // verify if clustername is in xml file - map::const_iterator it1 = _launch.MachinesList.find(clusterName); - if(it1 == _launch.MachinesList.end()) - throw LauncherException("This cluster is not in xml file"); + // Second step configure the job with the resource selected - the first of the list + ParserResourcesType resource_definition = _ResManager->GetResourcesDescr(ResourceList[0]); - ParserResourcesType p = _ResManager->GetResourcesList(aMachineList[0]); - string cname(p.Alias); -#if defined(_DEBUG_) || defined(_DEBUG) - cout << "Choose cluster: " << cname << endl; -#endif + // Set resource definition to the job + // The job will check if the definitions needed + try + { + new_job->setResourceDefinition(resource_definition); + } + catch(const LauncherException &ex) + { + LAUNCHER_INFOS("Error in the definition of the resource, mess: " << ex.msg); + delete new_job; + throw ex; + } - // search batch manager for that cluster in map or instanciate one - map < string, Batch::BatchManager_eClient * >::const_iterator it2 = _batchmap.find(cname); - if(it2 == _batchmap.end()) + // Third step search batch manager for the resource into the map -> instanciate one if does not exist + std::string resource_name = resource_definition.Name; + std::map::const_iterator it = _batchmap.find(resource_name); + if(it == _batchmap.end()) + { + try { - _batchmap[cname] = FactoryBatchManager(p); - // TODO: Add a test for the cluster ! + _batchmap[resource_name] = FactoryBatchManager(resource_definition); + } + catch(const LauncherException &ex) + { + LAUNCHER_INFOS("Error during creation of the batch manager of the resource, mess: " << ex.msg); + delete new_job; + throw ex; + } + catch(const Batch::EmulationException &ex) + { + LAUNCHER_INFOS("Error during creation of the batch manager of the resource, mess: " << ex.message); + delete new_job; + throw LauncherException(ex.message); } - - try{ - - // directory on cluster to put files to execute - string remotedir = _launch.MachinesList[clusterName].WorkDirectory; - // local directory to get files to execute and to put results - string localdir = _launch.RefDirectory; - - int idx1 = xmlExecuteFile.find_last_of("/"); - if(idx1 == string::npos) idx1 = -1; - int idx2 = xmlExecuteFile.find(".xml"); - string logfile = xmlExecuteFile.substr(idx1+1,idx2-idx1-1); - string ologfile = logfile + ".output.log"; - string elogfile = logfile + ".error.log"; - - // create and submit job on cluster - Batch::Parametre param; - param[USER] = p.UserName; - param[EXECUTABLE] = ""; - for(int i=0; i<_launch.InputFile.size();i++) - param[INFILE] += Batch::Couple( localdir + "/" + _launch.InputFile[i], remotedir + "/" + _launch.InputFile[i] ); - for(int i=0; i<_launch.OutputFile.size();i++) - param[OUTFILE] += Batch::Couple( localdir + "/" + _launch.OutputFile[i], remotedir + "/" + _launch.OutputFile[i] ); - param[OUTFILE] += Batch::Couple( localdir + "/" + ologfile, remotedir + "/" + ologfile ); - param[OUTFILE] += Batch::Couple( localdir + "/" + elogfile, remotedir + "/" + elogfile ); - param[NBPROC] = _launch.NbOfProcesses; - param[WORKDIR] = remotedir; - param[TMPDIR] = remotedir; - param[MAXWALLTIME] = getWallTime(""); - param[MAXRAMSIZE] = getRamSize(""); - param[HOMEDIR] = ""; - - Batch::Environnement env; - env["COMMAND"] = _launch.Command; - env["SOURCEFILE"] = _launch.MachinesList[clusterName].EnvFile; - env["LOGFILE"] = logfile; - - Batch::Job* job = new Batch::Job(param,env); - - // submit job on cluster - Batch::JobId jid = _batchmap[cname]->submitJob(*job); - - // get job id in long - istringstream iss(jid.getReference()); - iss >> jobId; - - _jobmap[ pair(cname,jobId) ] = job; - } - catch(const Batch::EmulationException &ex){ - throw LauncherException(ex.message.c_str()); } - return jobId; -#else - throw LauncherException("Method Launcher_cpp::submitJob is not available " - "(libBatch was not present at compilation time)"); -#endif + + // Final step - add job to the jobs map + pthread_mutex_lock(_job_cpt_mutex); + new_job->setNumber(_job_cpt); + _job_cpt++; + pthread_mutex_unlock(_job_cpt_mutex); + std::map::const_iterator it_job = _launcher_job_map.find(new_job->getNumber()); + if (it_job == _launcher_job_map.end()) + _launcher_job_map[new_job->getNumber()] = new_job; + else + { + LAUNCHER_INFOS("A job as already the same id: " << new_job->getNumber()); + delete new_job; + throw LauncherException("A job as already the same id - job is not created !"); + } + LAUNCHER_MESSAGE("New Job created"); } //============================================================================= -/*! CORBA Method: - * Submit a batch job on a cluster and returns the JobId - * \param fileToExecute : .py/.exe/.sh/... to execute on the batch cluster - * \param filesToExport : to export on the batch cluster - * \param NumberOfProcessors : Number of processors needed on the batch cluster - * \param params : Constraints for the choice of the batch cluster - */ +/*! + * Launch a job + */ //============================================================================= -long Launcher_cpp::submitSalomeJob( const string fileToExecute , - const vector& filesToExport , - const vector& filesToImport , - const batchParams& batch_params, - const machineParams& params) throw(LauncherException) +void +Launcher_cpp::launchJob(int job_id) { -#ifdef WITH_LIBBATCH -#if defined(_DEBUG_) || defined(_DEBUG) - cerr << "BEGIN OF Launcher_cpp::submitSalomeJob" << endl; -#endif - long jobId; - vector aMachineList; - - if(!_ResManager) - throw LauncherException("You must set Resources Manager to Launcher!!"); + LAUNCHER_MESSAGE("Launch a job"); + + // Check if job exist + std::map::const_iterator it_job = _launcher_job_map.find(job_id); + if (it_job == _launcher_job_map.end()) + { + LAUNCHER_INFOS("Cannot find the job, is it created ? job number: " << job_id); + throw LauncherException("Cannot find the job, is it created ?"); + } - // check batch params - if ( !check(batch_params) ) - throw LauncherException("Batch parameters are bad (see informations above)"); + Launcher::Job * job = it_job->second; - // find a cluster matching the structure params - try{ - aMachineList = _ResManager->GetFittingResources(params); + // Check job state (cannot launch a job already launched...) + if (job->getState() != "CREATED") + { + LAUNCHER_INFOS("Bad state of the job: " << job->getState()); + throw LauncherException("Bad state of the job: " + job->getState()); } - catch(const ResourcesException &ex){ - throw LauncherException(ex.msg.c_str()); - } - if (aMachineList.size() == 0) - throw LauncherException("No resources have been found with your parameters"); - ParserResourcesType p = _ResManager->GetResourcesList(aMachineList[0]); - string clustername(p.Alias); -#if defined(_DEBUG_) || defined(_DEBUG) - cerr << "Choose cluster: " << clustername << endl; -#endif - - // search batch manager for that cluster in map or instanciate one - map < string, Batch::BatchManager_eClient * >::const_iterator it = _batchmap.find(clustername); - if(it == _batchmap.end()) - { - _batchmap[clustername] = FactoryBatchManager(p); - // TODO: Add a test for the cluster ! - } - - try{ - // tmp directory on cluster to put files to execute - string tmpdir = getTmpDirForBatchFiles(); - - // create and submit job on cluster - Batch::Parametre param; - param[USER] = p.UserName; - param[EXECUTABLE] = buildSalomeCouplingScript(fileToExecute,tmpdir,p); - param[INFILE] = Batch::Couple( fileToExecute, getRemoteFile(tmpdir,fileToExecute) ); - for(int i=0;isubmitJob(*job); - - // get job id in long - istringstream iss(jid.getReference()); - iss >> jobId; - - _jobmap[ pair(clustername,jobId) ] = job; + std::string resource_name = job->getResourceDefinition().Name; + try { + Batch::JobId batch_manager_job_id = _batchmap[resource_name]->submitJob(*(job->getBatchJob())); + job->setBatchManagerJobId(batch_manager_job_id); + job->setState("QUEUED"); } - catch(const Batch::EmulationException &ex){ + catch(const Batch::EmulationException &ex) + { + LAUNCHER_INFOS("Job is not launched, exception in submitJob: " << ex.message); throw LauncherException(ex.message.c_str()); } - - return jobId; -#else - throw LauncherException("Method Launcher_cpp::submitSalomeJob is not available " - "(libBatch was not present at compilation time)"); -#endif + LAUNCHER_MESSAGE("Job launched"); } //============================================================================= -/*! CORBA Method: - * Query a batch job on a cluster and returns the status of job - * \param jobId : identification of Salome job - * \param params : Constraints for the choice of the batch cluster - */ +/*! + * Get job state + */ //============================================================================= -string Launcher_cpp::queryJob( long id, - const machineParams& params) throw(LauncherException) +const char * +Launcher_cpp::getJobState(int job_id) { -#ifdef WITH_LIBBATCH - if(!_ResManager) - throw LauncherException("You must set Resources Manager to Launcher!!"); - - // find a cluster matching params structure - vector aMachineList = _ResManager->GetFittingResources( params ) ; - ParserResourcesType p = _ResManager->GetResourcesList(aMachineList[0]); - string clustername(p.Alias); - - // search batch manager for that cluster in map - std::map < string, Batch::BatchManager_eClient * >::const_iterator it = _batchmap.find(clustername); - if(it == _batchmap.end()) - throw LauncherException("no batchmanager for that cluster"); - - Batch::Parametre par; - try{ - ostringstream oss; - oss << id; - Batch::JobId jobId( _batchmap[clustername], oss.str() ); - - Batch::JobInfo jinfo = jobId.queryJob(); - par = jinfo.getParametre(); - } - catch(const Batch::EmulationException &ex){ - throw LauncherException(ex.message.c_str()); + LAUNCHER_MESSAGE("Get job state"); + + // Check if job exist + std::map::const_iterator it_job = _launcher_job_map.find(job_id); + if (it_job == _launcher_job_map.end()) + { + LAUNCHER_INFOS("Cannot find the job, is it created ? job number: " << job_id); + throw LauncherException("Cannot find the job, is it created ?"); } - return par[STATE]; -#else - throw LauncherException("Method Launcher_cpp::queryJob is not available " - "(libBatch was not present at compilation time)"); -#endif -} + Launcher::Job * job = it_job->second; + std::string state = job->updateJobState(); -string Launcher_cpp::queryJob( long id, - const std::string clusterName) throw (LauncherException) -{ - machineParams params; - params.hostname = clusterName; - return queryJob(id,params); + return state.c_str(); } //============================================================================= -/*! CORBA Method: - * Delete a batch job on a cluster - * \param jobId : identification of Salome job - * \param params : Constraints for the choice of the batch cluster - */ +/*! + * Get Job result - the result directory could be changed + */ //============================================================================= -void Launcher_cpp::deleteJob( const long id, - const machineParams& params) throw(LauncherException) +void +Launcher_cpp::getJobResults(int job_id, std::string directory) { -#ifdef WITH_LIBBATCH - if(!_ResManager) - throw LauncherException("You must set Resources Manager to Launcher!!"); - - // find a cluster matching params structure - vector aMachineList = _ResManager->GetFittingResources( params ) ; - ParserResourcesType p = _ResManager->GetResourcesList(aMachineList[0]); - string clustername(p.Alias); - - // search batch manager for that cluster in map - map < string, Batch::BatchManager_eClient * >::const_iterator it = _batchmap.find(clustername); - if(it == _batchmap.end()) - throw LauncherException("no batchmanager for that cluster"); - - ostringstream oss; - oss << id; - Batch::JobId jobId( _batchmap[clustername], oss.str() ); + LAUNCHER_MESSAGE("Get Job results"); + + // Check if job exist + std::map::const_iterator it_job = _launcher_job_map.find(job_id); + if (it_job == _launcher_job_map.end()) + { + LAUNCHER_INFOS("Cannot find the job, is it created ? job number: " << job_id); + throw LauncherException("Cannot find the job, is it created ?"); + } - jobId.deleteJob(); -#else - throw LauncherException("Method Launcher_cpp::deleteJob is not available " - "(libBatch was not present at compilation time)"); -#endif + Launcher::Job * job = it_job->second; + std::string resource_name = job->getResourceDefinition().Name; + try + { + if (directory != "") + _batchmap[resource_name]->importOutputFiles(*(job->getBatchJob()), directory); + else + _batchmap[resource_name]->importOutputFiles(*(job->getBatchJob()), job->getResultDirectory()); + } + catch(const Batch::EmulationException &ex) + { + LAUNCHER_INFOS("getJobResult is maybe incomplete, exception: " << ex.message); + throw LauncherException(ex.message.c_str()); + } + LAUNCHER_MESSAGE("getJobResult ended"); } -void Launcher_cpp::deleteJob( long id, - const std::string clusterName) throw (LauncherException) +//============================================================================= +/*! + * Remove the job - into the Launcher and its batch manager + */ +//============================================================================= +void +Launcher_cpp::removeJob(int job_id) { - machineParams params; - params.hostname = clusterName; - deleteJob(id,params); + LAUNCHER_MESSAGE("Remove Job"); + + // Check if job exist + std::map::iterator it_job = _launcher_job_map.find(job_id); + if (it_job == _launcher_job_map.end()) + { + LAUNCHER_INFOS("Cannot find the job, is it created ? job number: " << job_id); + throw LauncherException("Cannot find the job, is it created ?"); + } + + delete it_job->second; + _launcher_job_map.erase(it_job); } //============================================================================= -/*! CORBA Method: - * Get result files of job on a cluster - * \param jobId : identification of Salome job - * \param params : Constraints for the choice of the batch cluster +/*! + * create a launcher job based on a file + * \param xmlExecuteFile : to define the execution on the batch cluster */ //============================================================================= -void Launcher_cpp::getResultsJob( const string directory, - const long id, - const machineParams& params) throw(LauncherException) +long +Launcher_cpp::createJobWithFile(const std::string xmlExecuteFile, + const std::string clusterName) { -#ifdef WITH_LIBBATCH - if(!_ResManager) - throw LauncherException("You must set Resources Manager to Launcher!!"); - - vector aMachineList = _ResManager->GetFittingResources( params ) ; - ParserResourcesType p = _ResManager->GetResourcesList(aMachineList[0]); - string clustername(p.Alias); - - // search batch manager for that cluster in map - map < string, Batch::BatchManager_eClient * >::const_iterator it = _batchmap.find(clustername); - if(it == _batchmap.end()) - throw LauncherException("no batchmanager for that cluster"); - - Batch::Job* job = _jobmap[ pair(clustername,id) ]; - - _batchmap[clustername]->importOutputFiles( *job, directory ); -#else - throw LauncherException("Method Launcher_cpp::getResultsJob is not available " - "(libBatch was not present at compilation time)"); -#endif -} + LAUNCHER_MESSAGE("Begin of Launcher_cpp::createJobWithFile"); -void Launcher_cpp::getResultsJob( const std::string directory, - long id, - const std::string clusterName) throw (LauncherException) -{ - machineParams params; - params.hostname = clusterName; - getResultsJob(directory,id,params); + // Parsing xml file + ParserLauncherType job_params = ParseXmlFile(xmlExecuteFile); + + // Creating a new job + Launcher::Job_Command * new_job = new Launcher::Job_Command(); + new_job->setJobFile(job_params.Command); + new_job->setLocalDirectory(job_params.RefDirectory); + new_job->setWorkDirectory(job_params.MachinesList[clusterName].WorkDirectory); + new_job->setEnvFile(job_params.MachinesList[clusterName].EnvFile); + + for(int i=0; i < job_params.InputFile.size(); i++) + new_job->add_in_file(job_params.InputFile[i]); + for(int i=0; i < job_params.OutputFile.size();i++) + new_job->add_out_file(job_params.OutputFile[i]); + + resourceParams p; + p.hostname = clusterName; + p.nb_proc = job_params.NbOfProcesses; + new_job->setResourceRequiredParams(p); + + createJob(new_job); + return new_job->getNumber(); } //============================================================================= @@ -432,478 +324,170 @@ void Launcher_cpp::getResultsJob( const std::string directory, * Factory to instanciate the good batch manager for choosen cluster. */ //============================================================================= - -Batch::BatchManager_eClient *Launcher_cpp::FactoryBatchManager( const ParserResourcesType& params ) throw(LauncherException) +Batch::BatchManager_eClient * +Launcher_cpp::FactoryBatchManager(ParserResourcesType& params) { -#ifdef WITH_LIBBATCH - std::string hostname, mpi; + std::string mpi; Batch::CommunicationProtocolType protocol; Batch::FactBatchManager_eClient* fact; - hostname = params.Alias; - switch(params.Protocol){ - case rsh: - protocol = Batch::RSH; - break; - case ssh: - protocol = Batch::SSH; - break; - default: - throw LauncherException("unknown protocol"); - break; + int nb_proc_per_node = params.DataForSort._nbOfProcPerNode; + std::string hostname = params.HostName; + + switch(params.Protocol) + { + case rsh: + protocol = Batch::RSH; + break; + case ssh: + protocol = Batch::SSH; + break; + default: + throw LauncherException("Unknown protocol for this resource"); + break; } - switch(params.mpi){ - case lam: - mpi = "lam"; - break; - case mpich1: - mpi = "mpich1"; - break; - case mpich2: - mpi = "mpich2"; - break; - case openmpi: - mpi = "openmpi"; - break; - case slurm: - mpi = "slurm"; - break; - case prun: - mpi = "prun"; - break; - case nompi: - throw LauncherException("you must specified an mpi implementation for batch manager"); - break; - default: - throw LauncherException("unknown mpi implementation"); - break; - } -#if defined(_DEBUG_) || defined(_DEBUG) - cerr << "Instanciation of batch manager" << endl; -#endif - switch( params.Batch ){ - case pbs: -#if defined(_DEBUG_) || defined(_DEBUG) - cerr << "Instantiation of PBS batch manager" << endl; -#endif - fact = new Batch::FactBatchManager_ePBS; - break; - case lsf: -#if defined(_DEBUG_) || defined(_DEBUG) - cerr << "Instantiation of LSF batch manager" << endl; -#endif - fact = new Batch::FactBatchManager_eLSF; - break; - case sge: -#if defined(_DEBUG_) || defined(_DEBUG) - cout << "Instantiation of SGE batch manager" << endl; -#endif - fact = new Batch::FactBatchManager_eSGE; - break; - default: -#if defined(_DEBUG_) || defined(_DEBUG) - cerr << "BATCH = " << params.Batch << endl; -#endif - throw LauncherException("no batchmanager for that cluster"); - } - return (*fact)(hostname.c_str(), protocol, mpi.c_str()); -#else - throw LauncherException("Method Launcher_cpp::FactoryBatchManager is not available " - "(libBatch was not present at compilation time)"); -#endif -} -string Launcher_cpp::buildSalomeCouplingScript(const string fileToExecute, const string dirForTmpFiles, const ParserResourcesType& params) -{ -#ifdef WITH_LIBBATCH -#ifndef WIN32 //TODO: need for porting on Windows - int idx = dirForTmpFiles.find("Batch/"); - std::string filelogtemp = dirForTmpFiles.substr(idx+6, dirForTmpFiles.length()); - std::string dfilelogtemp = params.AppliPath + "/" + filelogtemp; - - string::size_type p1 = fileToExecute.find_last_of("/"); - string::size_type p2 = fileToExecute.find_last_of("."); - std::string fileNameToExecute = fileToExecute.substr(p1+1,p2-p1-1); - std::string TmpFileName = "/tmp/runSalome_" + fileNameToExecute + ".sh"; - - MpiImpl* mpiImpl = FactoryMpiImpl(params.mpi); - - ofstream tempOutputFile; - tempOutputFile.open(TmpFileName.c_str(), ofstream::out ); - - // Begin - tempOutputFile << "#! /bin/sh -f" << endl ; - tempOutputFile << "cd ~/" ; - tempOutputFile << dirForTmpFiles << endl ; - tempOutputFile << "export SALOME_BATCH=1\n"; - tempOutputFile << "export PYTHONPATH=~/" ; - tempOutputFile << dirForTmpFiles ; - tempOutputFile << ":$PYTHONPATH" << endl ; - - // Adding user script - std::string script = params.userCommands; - if (script != "") - tempOutputFile << script << endl; - // Test node rank - tempOutputFile << "if test \"" ; - tempOutputFile << mpiImpl->rank() ; - tempOutputFile << "\" = \"0\"; then" << endl ; - - // ----------------------------------------------- - // Code for rank 0 : launch runAppli and a container - // RunAppli - if(params.ModulesList.size()>0) - tempOutputFile << " " << params.AppliPath << "/runAppli --terminal --modules=" ; - else - tempOutputFile << " " << params.AppliPath << "/runAppli --terminal "; - for ( int i = 0 ; i < params.ModulesList.size() ; i++ ) { - tempOutputFile << params.ModulesList[i] ; - if ( i != params.ModulesList.size()-1 ) - tempOutputFile << "," ; + switch(params.mpi) + { + case lam: + mpi = "lam"; + break; + case mpich1: + mpi = "mpich1"; + break; + case mpich2: + mpi = "mpich2"; + break; + case openmpi: + mpi = "openmpi"; + break; + case slurm: + mpi = "slurm"; + break; + case prun: + mpi = "prun"; + break; + default: + mpi = "nompi"; } - tempOutputFile << " --standalone=registry,study,moduleCatalog --ns-port-log=" - << filelogtemp - << " &\n"; - - // Wait NamingService - tempOutputFile << " current=0\n" - << " stop=20\n" - << " while ! test -f " << dfilelogtemp << "\n" - << " do\n" - << " sleep 2\n" - << " let current=current+1\n" - << " if [ \"$current\" -eq \"$stop\" ] ; then\n" - << " echo Error Naming Service failed ! >&2" - << " exit\n" - << " fi\n" - << " done\n" - << " port=`cat " << dfilelogtemp << "`\n"; - - // Wait other containers - tempOutputFile << " for ((ip=1; ip < "; - tempOutputFile << mpiImpl->size(); - tempOutputFile << " ; ip++))" << endl; - tempOutputFile << " do" << endl ; - tempOutputFile << " arglist=\"$arglist YACS_Server_\"$ip" << endl ; - tempOutputFile << " done" << endl ; - tempOutputFile << " sleep 5" << endl ; - tempOutputFile << " " << params.AppliPath << "/runSession waitContainers.py $arglist" << endl ; - - // Launch user script - tempOutputFile << " " << params.AppliPath << "/runSession python ~/" << dirForTmpFiles << "/" << fileNameToExecute << ".py" << endl; - - // Stop application - tempOutputFile << " rm " << dfilelogtemp << "\n" - << " " << params.AppliPath << "/runSession shutdownSalome.py" << endl; - - // ------------------------------------- - // Other nodes launch a container - tempOutputFile << "else" << endl ; - - // Wait NamingService - tempOutputFile << " current=0\n" - << " stop=20\n" - << " while ! test -f " << dfilelogtemp << "\n" - << " do\n" - << " sleep 2\n" - << " let current=current+1\n" - << " if [ \"$current\" -eq \"$stop\" ] ; then\n" - << " echo Error Naming Service failed ! >&2" - << " exit\n" - << " fi\n" - << " done\n" - << " port=`cat " << dfilelogtemp << "`\n"; - - // Launching container - tempOutputFile << " " << params.AppliPath << "/runSession SALOME_Container YACS_Server_"; - tempOutputFile << mpiImpl->rank() - << " > ~/" << dirForTmpFiles << "/YACS_Server_" - << mpiImpl->rank() << "_container_log." << filelogtemp - << " 2>&1\n"; - tempOutputFile << "fi" << endl ; - tempOutputFile.flush(); - tempOutputFile.close(); - chmod(TmpFileName.c_str(), 0x1ED); -#if defined(_DEBUG_) || defined(_DEBUG) - cerr << TmpFileName.c_str() << endl; -#endif - delete mpiImpl; + std::string message = "Instanciation of batch manager of type: "; + switch( params.Batch ) + { + case pbs: + message += "ePBS"; + fact = new Batch::FactBatchManager_ePBS; + break; + case lsf: + message += "eLSF"; + fact = new Batch::FactBatchManager_eLSF; + break; + case sge: + message += "eSGE"; + fact = new Batch::FactBatchManager_eSGE; + break; + case ssh_batch: + message += "eSSH"; + fact = new Batch::FactBatchManager_eSSH; + break; + default: + LAUNCHER_MESSAGE("Bad batch description of the resource: Batch = " << params.Batch); + throw LauncherException("No batchmanager for that cluster - Bad batch description of the resource"); + } + LAUNCHER_MESSAGE(message); + return (*fact)(hostname.c_str(), protocol, mpi.c_str(), nb_proc_per_node); +} - return TmpFileName; -#else - return ""; -#endif - +//---------------------------------------------------------- +// Without LIBBATCH - Launcher_cpp do nothing... +//---------------------------------------------------------- #else - throw LauncherException("Method Launcher_cpp::buildSalomeCouplingScript is not available " - "(libBatch was not present at compilation time)"); -#endif -} -MpiImpl *Launcher_cpp::FactoryMpiImpl(MpiImplType mpi) throw(LauncherException) +void +Launcher_cpp::createJob(Launcher::Job * new_job) { -#ifdef WITH_LIBBATCH - switch(mpi){ - case lam: - return new MpiImpl_LAM(); - case mpich1: - return new MpiImpl_MPICH1(); - case mpich2: - return new MpiImpl_MPICH2(); - case openmpi: - return new MpiImpl_OPENMPI(); - case slurm: - return new MpiImpl_SLURM(); - case prun: - return new MpiImpl_PRUN(); - case nompi: - throw LauncherException("you must specify an mpi implementation for batch manager"); - default: - ostringstream oss; - oss << mpi << " : not yet implemented"; - throw LauncherException(oss.str().c_str()); - } -#else - throw LauncherException("Method Launcher_cpp::FactoryMpiImpl is not available " + LAUNCHER_INFOS("Launcher compiled without LIBBATCH - cannot create a job !!!"); + delete new_job; + throw LauncherException("Method Launcher_cpp::createJob is not available " "(libBatch was not present at compilation time)"); -#endif } -string Launcher_cpp::getTmpDirForBatchFiles() +void +Launcher_cpp::launchJob(int job_id) { -#ifdef WITH_LIBBATCH - string ret; - string thedate; - - // Adding date to the directory name - Batch::Date date = Batch::Date(time(0)); - thedate = date.str(); - int lend = thedate.size() ; - int i = 0 ; - while ( i < lend ) { - if ( thedate[i] == '/' || thedate[i] == '-' || thedate[i] == ':' ) { - thedate[i] = '_' ; - } - i++ ; - } - - ret = string("Batch/"); - ret += thedate; - return ret; -#else - throw LauncherException("Method Launcher_cpp::getTmpDirForBatchFiles is not available " + LAUNCHER_INFOS("Launcher compiled without LIBBATCH - cannot launch a job !!!"); + throw LauncherException("Method Launcher_cpp::launchJob is not available " "(libBatch was not present at compilation time)"); -#endif } -string Launcher_cpp::getRemoteFile( std::string remoteDir, std::string localFile ) +const char * +Launcher_cpp::getJobState(int job_id) { - string::size_type pos = localFile.find_last_of("/") + 1; - int ln = localFile.length() - pos; - string remoteFile = remoteDir + "/" + localFile.substr(pos,ln); - return remoteFile; + LAUNCHER_INFOS("Launcher compiled without LIBBATCH - cannot get job state!!!"); + throw LauncherException("Method Launcher_cpp::getJobState is not available " + "(libBatch was not present at compilation time)"); } -bool Launcher_cpp::check(const batchParams& batch_params) +void +Launcher_cpp::getJobResults(int job_id, std::string directory) { - bool rtn = true; -#if defined(_DEBUG_) || defined(_DEBUG) - cerr << "Job parameters are :" << endl; - cerr << "Directory : $HOME/Batch/$date" << endl; -#endif - - // check expected_during_time (check the format) - std::string edt_info = batch_params.expected_during_time; - std::string edt_value = batch_params.expected_during_time; - if (edt_value != "") { - std::string begin_edt_value = edt_value.substr(0, 2); - std::string mid_edt_value = edt_value.substr(2, 1); - std::string end_edt_value = edt_value.substr(3); - - long value; - std::istringstream iss(begin_edt_value); - if (!(iss >> value)) { - edt_info = "Error on definition ! : " + edt_value; - rtn = false; - } - else if (value < 0) { - edt_info = "Error on definition time is negative ! : " + value; - rtn = false; - } - std::istringstream iss_2(end_edt_value); - if (!(iss_2 >> value)) { - edt_info = "Error on definition ! : " + edt_value; - rtn = false; - } - else if (value < 0) { - edt_info = "Error on definition time is negative ! : " + value; - rtn = false; - } - if (mid_edt_value != ":") { - edt_info = "Error on definition ! :" + edt_value; - rtn = false; - } - } - else { - edt_info = "No value given"; - } -#if defined(_DEBUG_) || defined(_DEBUG) - cerr << "Expected during time : " << edt_info << endl;; -#endif - - // check memory (check the format) - std::string mem_info; - std::string mem_value = batch_params.mem; - if (mem_value != "") { - std::string begin_mem_value = mem_value.substr(0, mem_value.length()-2); - long re_mem_value; - std::istringstream iss(begin_mem_value); - if (!(iss >> re_mem_value)) { - mem_info = "Error on definition ! : " + mem_value; - rtn = false; - } - else if (re_mem_value <= 0) { - mem_info = "Error on definition memory is negative ! : " + mem_value; - rtn = false; - } - std::string end_mem_value = mem_value.substr(mem_value.length()-2); - if (end_mem_value != "gb" && end_mem_value != "mb") { - mem_info = "Error on definition, type is bad ! " + mem_value; - rtn = false; - } - } - else { - mem_info = "No value given"; - } -#if defined(_DEBUG_) || defined(_DEBUG) - cerr << "Memory : " << mem_info << endl; -#endif - - // check nb_proc - std::string nb_proc_info; - ostringstream nb_proc_value; - nb_proc_value << batch_params.nb_proc; - if(batch_params.nb_proc <= 0) { - nb_proc_info = "Bad value ! nb_proc = "; - nb_proc_info += nb_proc_value.str(); - rtn = false; - } - else { - nb_proc_info = nb_proc_value.str(); - } -#if defined(_DEBUG_) || defined(_DEBUG) - cerr << "Nb of processors : " << nb_proc_info << endl; -#endif - - return rtn; + LAUNCHER_INFOS("Launcher compiled without LIBBATCH - cannot get job results!!!"); + throw LauncherException("Method Launcher_cpp::getJobResults is not available " + "(libBatch was not present at compilation time)"); } -long Launcher_cpp::getWallTime(std::string edt) +void +Launcher_cpp::removeJob(int job_id) { - long hh, mm, ret; - - if( edt.size() == 0 ) - return 0; - - string::size_type pos = edt.find(":"); - string h = edt.substr(0,pos); - string m = edt.substr(pos+1,edt.size()-pos+1); - istringstream issh(h); - issh >> hh; - istringstream issm(m); - issm >> mm; - ret = hh*60 + mm; - return ret; + LAUNCHER_INFOS("Launcher compiled without LIBBATCH - cannot remove job!!!"); + throw LauncherException("Method Launcher_cpp::removeJob is not available " + "(libBatch was not present at compilation time)"); } -long Launcher_cpp::getRamSize(std::string mem) +long +Launcher_cpp::createJobWithFile( const std::string xmlExecuteFile, std::string clusterName) { - long mv; - - if( mem.size() == 0 ) - return 0; - - string ram = mem.substr(0,mem.size()-2); - istringstream iss(ram); - iss >> mv; - string unity = mem.substr(mem.size()-2,2); - if( (unity.find("gb") != string::npos) || (unity.find("GB") != string::npos) ) - return mv*1024; - else if( (unity.find("mb") != string::npos) || (unity.find("MB") != string::npos) ) - return mv; - else if( (unity.find("kb") != string::npos) || (unity.find("KB") != string::npos) ) - return mv/1024; - else if( (unity.find("b") != string::npos) || (unity.find("B") != string::npos) ) - return mv/(1024*1024); - else - return 0; + throw LauncherException("Method Launcher_cpp::createJobWithFile is not available " + "(libBatch was not present at compilation time)"); + return 0; } -void Launcher_cpp::ParseXmlFile(string xmlExecuteFile) +#endif + +ParserLauncherType +Launcher_cpp::ParseXmlFile(string xmlExecuteFile) { - SALOME_Launcher_Handler* handler = new SALOME_Launcher_Handler(_launch); + ParserLauncherType job_params; + SALOME_Launcher_Handler * handler = new SALOME_Launcher_Handler(job_params); const char* aFilePath = xmlExecuteFile.c_str(); FILE* aFile = fopen(aFilePath, "r"); - if (aFile != NULL) + { + xmlDocPtr aDoc = xmlReadFile(aFilePath, NULL, 0); + if (aDoc != NULL) + handler->ProcessXmlDocument(aDoc); + else { - xmlDocPtr aDoc = xmlReadFile(aFilePath, NULL, 0); - - if (aDoc != NULL) - handler->ProcessXmlDocument(aDoc); - else{ -#if defined(_DEBUG_) || defined(_DEBUG) - cout << "ResourcesManager_cpp: could not parse file "<< aFilePath << endl; -#endif - } - - // Free the document - xmlFreeDoc(aDoc); - - fclose(aFile); + std::string message = "ResourcesManager_cpp: could not parse file: " + xmlExecuteFile; + LAUNCHER_MESSAGE(message); + delete handler; + throw LauncherException(message); } - else{ -#if defined(_DEBUG_) || defined(_DEBUG) - cout << "Launcher_cpp: file "< #include "ResourcesManager.hxx" +#include + #include "SALOME_Launcher_Parser.hxx" #include #include +#include + class MpiImpl; namespace Batch{ @@ -53,14 +49,6 @@ struct batchParams{ unsigned long nb_proc; }; -class LAUNCHER_EXPORT LauncherException -{ -public: - const std::string msg; - - LauncherException(const std::string m) : msg(m) {} -}; - class LAUNCHER_EXPORT Launcher_cpp { @@ -68,44 +56,32 @@ public: Launcher_cpp(); ~Launcher_cpp(); - long submitJob(const std::string xmlExecuteFile, - const std::string clusterName) throw(LauncherException); + // Main interface + void createJob(Launcher::Job * new_job); + void launchJob(int job_id); + const char * getJobState(int job_id); + void getJobResults(int job_id, std::string directory); + void removeJob(int job_id); - long submitSalomeJob(const std::string fileToExecute , - const std::vector& filesToExport , - const std::vector& filesToImport , - const batchParams& batch_params, - const machineParams& params) throw(LauncherException); + // Useful methods + long createJobWithFile(std::string xmlExecuteFile, std::string clusterName); - std::string queryJob( const long jobId, const machineParams& params) throw(LauncherException); - std::string queryJob( const long jobId, const std::string clusterName) throw(LauncherException); - void deleteJob( const long jobId, const machineParams& params) throw(LauncherException); - void deleteJob( const long jobId, const std::string clusterName) throw(LauncherException); - void getResultsJob( const std::string directory, const long jobId, const machineParams& params ) throw(LauncherException); - void getResultsJob( const std::string directory, const long jobId, const std::string clusterName ) throw (LauncherException); - - void SetResourcesManager( ResourcesManager_cpp* rm ) { _ResManager = rm; } + // Lib methods + void SetResourcesManager( ResourcesManager_cpp* rm ) {_ResManager = rm;} protected: - std::string buildSalomeCouplingScript(const std::string fileToExecute, const std::string dirForTmpFiles, const ParserResourcesType& params); - MpiImpl *FactoryMpiImpl(MpiImplType mpiImpl) throw(LauncherException); - Batch::BatchManager_eClient *FactoryBatchManager( const ParserResourcesType& params ) throw(LauncherException); - std::string getTmpDirForBatchFiles(); - std::string getRemoteFile( std::string remoteDir, std::string localFile ); - std::string getHomeDir(const ParserResourcesType& p, const std::string & tmpdir); - + // Methods used by user interface methods +#ifdef WITH_LIBBATCH + Batch::BatchManager_eClient *FactoryBatchManager(ParserResourcesType& params); std::map _batchmap; - std::map < std::pair , Batch::Job* > _jobmap; - ResourcesManager_cpp *_ResManager; - bool check(const batchParams& batch_params); - long getWallTime(std::string edt); - long getRamSize(std::string mem); - void ParseXmlFile(std::string xmlExecuteFile); - - //! will contain the informations on the data type catalog(after parsing) - ParserLauncherType _launch; +#endif + ParserLauncherType ParseXmlFile(std::string xmlExecuteFile); + ResourcesManager_cpp *_ResManager; + std::map _launcher_job_map; + int _job_cpt; // job number counter + pthread_mutex_t * _job_cpt_mutex; // mutex for job counter }; #endif diff --git a/src/Launcher/Launcher_Job.cxx b/src/Launcher/Launcher_Job.cxx new file mode 100644 index 000000000..835d752f7 --- /dev/null +++ b/src/Launcher/Launcher_Job.cxx @@ -0,0 +1,501 @@ +// Copyright (C) 2009 CEA/DEN, EDF R&D +// +// This library is free software; you can redistribute it and/or +// modify it under the terms of the GNU Lesser General Public +// License as published by the Free Software Foundation; either +// version 2.1 of the License. +// +// This library is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +// Lesser General Public License for more details. +// +// You should have received a copy of the GNU Lesser General Public +// License along with this library; if not, write to the Free Software +// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +// +// See http://www.salome-platform.org/ or email : webmaster.salome@opencascade.com +// +// Author: André RIBES - EDF R&D + +#include "Launcher_Job.hxx" +#include "Launcher.hxx" + +Launcher::Job::Job() +{ + _number = -1; + _state = "CREATED"; + _launch_date = getLaunchDate(); + + _env_file = ""; + _job_file = ""; + _job_file_name = ""; + _job_file_name_complete = ""; + _work_directory = ""; + _local_directory = ""; + _result_directory = ""; + _maximum_duration = ""; + _maximum_duration_in_second = -1; + _resource_required_params.name = ""; + _resource_required_params.hostname = ""; + _resource_required_params.OS = ""; + _resource_required_params.nb_proc = -1; + _resource_required_params.nb_node = -1; + _resource_required_params.nb_proc_per_node = -1; + _resource_required_params.cpu_clock = -1; + _resource_required_params.mem_mb = -1; + _queue = ""; + +#ifdef WITH_LIBBATCH + _batch_job = new Batch::Job(); +#endif +} + +Launcher::Job::~Job() +{ + LAUNCHER_MESSAGE("Deleting job number: " << _number); +#ifdef WITH_LIBBATCH + if (_batch_job_id.getReference() != "undefined") + { + try + { + _batch_job_id.deleteJob(); + } + catch (const Batch::EmulationException &ex) + { + LAUNCHER_INFOS("WARNING: exception when deleting the job: " << ex.message); + } + } + if (_batch_job) + delete _batch_job; +#endif +} + +void +Launcher::Job::setState(const std::string & state) +{ + // State of a Job: CREATED, QUEUED, RUNNING, FINISHED, ERROR + if (state != "CREATED" and + state != "QUEUED" and + state != "RUNNING" and + state != "FINISHED" and + state != "ERROR") + { + throw LauncherException("Bad state, this state does not exist: " + state); + } + _state = state; +} + +std::string +Launcher::Job::getState() +{ + return _state; +} + +void +Launcher::Job::setNumber(const int & number) +{ + if (_number != -1) + std::cerr << "Launcher::Job::setNumber -- Job number was already defined, before: " << _number << " now: " << number << std::endl; + _number = number; +} + +int +Launcher::Job::getNumber() +{ + return _number; +} + +void +Launcher::Job::setResourceDefinition(const ParserResourcesType & resource_definition) +{ + // Check machine_definition + std::string user_name = ""; + if (resource_definition.UserName == "") + { + user_name = getenv("USER"); + if (user_name == "") + { + std::string mess = "You must define a user name: into your resource description or with env variable USER"; + throw LauncherException(mess); + } + } + else + user_name = resource_definition.UserName; + + _resource_definition = resource_definition; + _resource_definition.UserName = user_name; +} + +ParserResourcesType +Launcher::Job::getResourceDefinition() +{ + return _resource_definition; +} + +void +Launcher::Job::setJobFile(const std::string & job_file) +{ + // Check job file + if (job_file == "") + { + std::string mess = "Empty Job File is forbidden !"; + throw LauncherException(mess); + } + + _job_file = job_file; + std::string::size_type p1 = _job_file.find_last_of("/"); + std::string::size_type p2 = _job_file.find_last_of("."); + _job_file_name_complete = _job_file.substr(p1+1); + _job_file_name = _job_file.substr(p1+1,p2-p1-1); + + if (_job_file != "") + add_in_file(_job_file); +} + +std::string +Launcher::Job::getJobFile() +{ + return _job_file; +} +void +Launcher::Job::setEnvFile(const std::string & env_file) +{ + _env_file = env_file; + if (_env_file != "") + add_in_file(_env_file); +} + +std::string +Launcher::Job::getEnvFile() +{ + return _env_file; +} + +void +Launcher::Job::setWorkDirectory(const std::string & work_directory) +{ + _work_directory = work_directory; +} + +void +Launcher::Job::setLocalDirectory(const std::string & local_directory) +{ + _local_directory = local_directory; +} + +void +Launcher::Job::setResultDirectory(const std::string & result_directory) +{ + _result_directory = result_directory; +} + +void +Launcher::Job::add_in_file(const std::string & file) +{ + std::list::iterator it = std::find(_in_files.begin(), _in_files.end(), file); + if (it == _in_files.end()) + _in_files.push_back(file); + else + std::cerr << "Launcher::Job::add_in_file -- Warning file was already entered in in_files: " << file << std::endl; +} + +void +Launcher::Job::add_out_file(const std::string & file) +{ + std::list::iterator it = std::find(_out_files.begin(), _out_files.end(), file); + if (it == _out_files.end()) + _out_files.push_back(file); + else + std::cerr << "Launcher::Job::add_out_file -- Warning file was already entered in out_files: " << file << std::endl; +} + +void +Launcher::Job::setMaximumDuration(const std::string & maximum_duration) +{ + checkMaximumDuration(maximum_duration); + _maximum_duration_in_second = convertMaximumDuration(maximum_duration); + _maximum_duration = maximum_duration; +} + +void +Launcher::Job::setResourceRequiredParams(const resourceParams & resource_required_params) +{ + checkResourceRequiredParams(resource_required_params); + _resource_required_params = resource_required_params; +} + +void +Launcher::Job::setQueue(const std::string & queue) +{ + _queue = queue; +} + +std::string +Launcher::Job::getWorkDirectory() +{ + return _work_directory; +} + +std::string +Launcher::Job::getLocalDirectory() +{ + return _local_directory; +} + +std::string +Launcher::Job::getResultDirectory() +{ + return _result_directory; +} + +const std::list & +Launcher::Job::get_in_files() +{ + return _in_files; +} + +const std::list & +Launcher::Job::get_out_files() +{ + return _out_files; +} + +std::string +Launcher::Job::getMaximumDuration() +{ + return _maximum_duration; +} + +resourceParams +Launcher::Job::getResourceRequiredParams() +{ + return _resource_required_params; +} + +std::string +Launcher::Job::getQueue() +{ + return _queue; +} + +void +Launcher::Job::checkMaximumDuration(const std::string & maximum_duration) +{ + std::string result(""); + std::string edt_value = maximum_duration; + if (edt_value != "") { + std::string begin_edt_value = edt_value.substr(0, 2); + std::string mid_edt_value = edt_value.substr(2, 1); + std::string end_edt_value = edt_value.substr(3); + + long value; + std::istringstream iss(begin_edt_value); + if (!(iss >> value)) { + result = "[Launcher::Job::checkExpectedDuration] Error on definition ! : " + edt_value; + } + else if (value < 0) { + result = "[Launcher::Job::checkExpectedDuration] Error on definition time is negative ! : " + value; + } + std::istringstream iss_2(end_edt_value); + if (!(iss_2 >> value)) { + result = "[Launcher::Job::checkExpectedDuration] Error on definition ! : " + edt_value; + } + else if (value < 0) { + result = "[Launcher::Job::checkExpectedDuration] Error on definition time is negative ! : " + value; + } + if (mid_edt_value != ":") { + result = "[Launcher::Job::checkExpectedDuration] Error on definition ! :" + edt_value; + } + } + if (result != "") + throw LauncherException(result); +} + +void +Launcher::Job::checkResourceRequiredParams(const resourceParams & resource_required_params) +{ + // nb_proc has be to > 0 + if (resource_required_params.nb_proc <= 0) + { + std::string message("[Launcher::Job::checkResourceRequiredParams] proc number is not > 0 ! "); + throw LauncherException(message); + } +} + +long +Launcher::Job::convertMaximumDuration(const std::string & edt) +{ + long hh, mm, ret; + + if( edt.size() == 0 ) + return -1; + + std::string::size_type pos = edt.find(":"); + std::string h = edt.substr(0,pos); + std::string m = edt.substr(pos+1,edt.size()-pos+1); + std::istringstream issh(h); + issh >> hh; + std::istringstream issm(m); + issm >> mm; + ret = hh*60 + mm; + ret = ret * 60; + + return ret; +} + +std::string +Launcher::Job::getLaunchDate() +{ + time_t rawtime; + time(&rawtime); + std::string launch_date = ctime(&rawtime); + int i = 0 ; + for (;i < launch_date.size(); i++) + if (launch_date[i] == '/' or + launch_date[i] == '-' or + launch_date[i] == ':' or + launch_date[i] == ' ') + launch_date[i] = '_'; + launch_date.erase(--launch_date.end()); // Last caracter is a \n + + return launch_date; +} + +std::string +Launcher::Job::updateJobState() +{ +#ifdef WITH_LIBBATCH + if (_batch_job_id.getReference() != "undefined") + { + // A batch manager has been affected to the job + Batch::JobInfo job_info = _batch_job_id.queryJob(); + Batch::Parametre par = job_info.getParametre(); + + LAUNCHER_MESSAGE("State received is: " << par[STATE].str()); + + // Patch until new LIBBATCH version + // eSSH Client and ePBS Client and eSGE + if (par[STATE].str() == "Running" or par[STATE].str() == "E" or par[STATE].str() == "R" or par[STATE].str() == "r" or par[STATE].str() == "RUN") + _state = "RUNNING"; + else if (par[STATE].str() == "Stopped") + _state = "PAUSED"; + else if (par[STATE].str() == "Done" or par[STATE].str() == "U" or par[STATE].str() == "e" or par[STATE].str() == "DONE" or par[STATE].str() == "EXIT") + _state = "FINISHED"; + else if (par[STATE].str() == "Dead" or par[STATE].str() == "Eqw") + _state = "ERROR"; + else if (par[STATE].str() == "Q" or par[STATE].str() == "qw" or par[STATE].str() == "PEN") + _state = "QUEUED"; + } +#endif + return _state; +} + +#ifdef WITH_LIBBATCH +Batch::Job * +Launcher::Job::getBatchJob() +{ + update_job(); + return _batch_job; +} + +Batch::Parametre +Launcher::Job::common_job_params() +{ + Batch::Parametre params; + + params[USER] = _resource_definition.UserName; + params[NBPROC] = _resource_required_params.nb_proc; + + // Memory + if (_resource_required_params.mem_mb > 0) + { + // Memory is in kilobytes + params[MAXRAMSIZE] = _resource_required_params.mem_mb * 1024; + } + + // We define a default directory based on user time + if (_work_directory == "") + { + std::string thedate; + Batch::Date date = Batch::Date(time(0)); + thedate = date.str(); + int lend = thedate.size() ; + int i = 0 ; + while ( i < lend ) { + if ( thedate[i] == '/' || thedate[i] == '-' || thedate[i] == ':' ) { + thedate[i] = '_' ; + } + i++ ; + } + _work_directory = std::string("$HOME/Batch/"); + _work_directory += thedate; + } + params[WORKDIR] = _work_directory; + params[TMPDIR] = _work_directory; // To Compatibility -- remove ??? TODO + + // If result_directory is not defined, we use HOME environnement + if (_result_directory == "") + _result_directory = getenv("HOME"); + + // _in_files + for(std::list::iterator it = _in_files.begin(); it != _in_files.end(); it++) + { + std::string file = *it; + + // local file -> If file is not an absolute path, we apply _local_directory + std::string local_file; + if (file.substr(0, 1) == std::string("/")) + local_file = file; + else + local_file = _local_directory + "/" + file; + + // remote file -> get only file name from _in_files + size_t found = file.find_last_of("/"); + std::string remote_file = _work_directory + "/" + file.substr(found+1); + + params[INFILE] += Batch::Couple(local_file, remote_file); + } + + // _out_files + for(std::list::iterator it = _out_files.begin(); it != _out_files.end(); it++) + { + std::string file = *it; + + // local file + size_t found = file.find_last_of("/"); + std::string local_file = _result_directory + "/" + file.substr(found+1); + + // remote file -> If file is not an absolute path, we apply _work_directory + std::string remote_file; + if (file.substr(0, 1) == std::string("/")) + remote_file = file; + else + remote_file = _work_directory + "/" + file; + + params[OUTFILE] += Batch::Couple(local_file, remote_file); + } + + // Time + if (_maximum_duration_in_second != -1) + params[MAXWALLTIME] = _maximum_duration_in_second; + + // Queue + if (_queue != "") + params[QUEUE] = _queue; + + return params; +} + +void +Launcher::Job::setBatchManagerJobId(Batch::JobId batch_manager_job_id) +{ + _batch_job_id = batch_manager_job_id; +} + +Batch::JobId +Launcher::Job::getBatchManagerJobId() +{ + return _batch_job_id; +} +#endif diff --git a/src/Launcher/Launcher_Job.hxx b/src/Launcher/Launcher_Job.hxx new file mode 100644 index 000000000..853ca7953 --- /dev/null +++ b/src/Launcher/Launcher_Job.hxx @@ -0,0 +1,139 @@ +// Copyright (C) 2009 CEA/DEN, EDF R&D +// +// This library is free software; you can redistribute it and/or +// modify it under the terms of the GNU Lesser General Public +// License as published by the Free Software Foundation; either +// version 2.1 of the License. +// +// This library is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +// Lesser General Public License for more details. +// +// You should have received a copy of the GNU Lesser General Public +// License along with this library; if not, write to the Free Software +// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +// +// See http://www.salome-platform.org/ or email : webmaster.salome@opencascade.com +// +// Author: André RIBES - EDF R&D + +#ifndef _LAUNCHER_JOB_HXX_ +#define _LAUNCHER_JOB_HXX_ + +#include +#include "ResourcesManager.hxx" + +#include +#include +#include + +#include +#include +#include +#include +#include +#include + +#ifdef WITH_LIBBATCH +#include +#include +#include +#include +#endif + +namespace Launcher +{ + class Job + { + public: + Job(); + virtual ~Job(); + + // Launcher managing parameters + // State of a Job: CREATED, IN_PROCESS, QUEUED, RUNNING, PAUSED, FINISHED, ERROR + void setState(const std::string & state); + std::string getState(); + + void setNumber(const int & number); + int getNumber(); + + virtual void setResourceDefinition(const ParserResourcesType & resource_definition); + ParserResourcesType getResourceDefinition(); + + // Common parameters + virtual void setJobFile(const std::string & job_file); + void setWorkDirectory(const std::string & work_directory); + void setLocalDirectory(const std::string & local_directory); + void setResultDirectory(const std::string & result_directory); + void add_in_file(const std::string & file); + void add_out_file(const std::string & file); + void setMaximumDuration(const std::string & maximum_duration); + void setResourceRequiredParams(const resourceParams & resource_required_params); + void setQueue(const std::string & queue); + void setEnvFile(const std::string & env_file); + + std::string getJobFile(); + std::string getWorkDirectory(); + std::string getLocalDirectory(); + std::string getResultDirectory(); + const std::list & get_in_files(); + const std::list & get_out_files(); + std::string getMaximumDuration(); + resourceParams getResourceRequiredParams(); + std::string getQueue(); + std::string getEnvFile(); + + std::string updateJobState(); + + // Checks + void checkMaximumDuration(const std::string & maximum_duration); + void checkResourceRequiredParams(const resourceParams & resource_required_params); + + // Helps + long convertMaximumDuration(const std::string & maximum_duration); + std::string getLaunchDate(); + + // Abstract class + virtual void update_job() = 0; + + protected: + int _number; + + std::string _state; + std::string _launch_date; + std::string _env_file; + + ParserResourcesType _resource_definition; + + std::string _job_file; + std::string _job_file_name; + std::string _job_file_name_complete; + + std::string _work_directory; + std::string _local_directory; + std::string _result_directory; + std::list _in_files; + std::list _out_files; + std::string _maximum_duration; + long _maximum_duration_in_second; + resourceParams _resource_required_params; + std::string _queue; + +#ifdef WITH_LIBBATCH + // Connection with LIBBATCH + public: + Batch::Job * getBatchJob(); + Batch::Parametre common_job_params(); + void setBatchManagerJobId(Batch::JobId batch_manager_job_id); + Batch::JobId getBatchManagerJobId(); + + protected: + Batch::Job * _batch_job; + Batch::JobId _batch_job_id; +#endif + }; +} + +#endif + diff --git a/src/Launcher/Launcher_Job_Command.cxx b/src/Launcher/Launcher_Job_Command.cxx new file mode 100644 index 000000000..313a0b026 --- /dev/null +++ b/src/Launcher/Launcher_Job_Command.cxx @@ -0,0 +1,69 @@ +// Copyright (C) 2009 CEA/DEN, EDF R&D +// +// This library is free software; you can redistribute it and/or +// modify it under the terms of the GNU Lesser General Public +// License as published by the Free Software Foundation; either +// version 2.1 of the License. +// +// This library is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +// Lesser General Public License for more details. +// +// You should have received a copy of the GNU Lesser General Public +// License along with this library; if not, write to the Free Software +// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +// +// See http://www.salome-platform.org/ or email : webmaster.salome@opencascade.com +// +// Author: André RIBES - EDF R&D + +#include "Launcher_Job_Command.hxx" + +Launcher::Job_Command::Job_Command() {} + +Launcher::Job_Command::~Job_Command() {} + +void +Launcher::Job_Command::update_job() +{ +#ifdef WITH_LIBBATCH + Batch::Parametre params = common_job_params(); + params[EXECUTABLE] = buildCommandScript(params, _launch_date); + _batch_job->setParametre(params); +#endif +} + +#ifdef WITH_LIBBATCH +std::string +Launcher::Job_Command::buildCommandScript(Batch::Parametre params, std::string launch_date) +{ + // parameters + std::string work_directory = params[WORKDIR].str(); + + // File name + std::string::size_type p1 = _job_file.find_last_of("/"); + + std::string launch_date_port_file = launch_date; + std::string launch_script = "/tmp/runCommand_" + _job_file_name + "_" + launch_date + ".sh"; + std::ofstream launch_script_stream; + launch_script_stream.open(launch_script.c_str(), std::ofstream::out); + + // Script + launch_script_stream << "#! /bin/sh -f" << std::endl; + launch_script_stream << "cd " << work_directory << std::endl; + if (_env_file != "") + { + std::string::size_type last = _env_file.find_last_of("/"); + launch_script_stream << "source ./" << _env_file.substr(last+1) << std::endl; + } + launch_script_stream << "./" << _job_file_name_complete << " > " << work_directory <<"/logs/command_" << launch_date << ".log 2>&1" << std::endl; + + // Return + launch_script_stream.flush(); + launch_script_stream.close(); + chmod(launch_script.c_str(), 0x1ED); + chmod(_job_file.c_str(), 0x1ED); + return launch_script; +} +#endif diff --git a/src/Launcher/Launcher_Job_Command.hxx b/src/Launcher/Launcher_Job_Command.hxx new file mode 100644 index 000000000..a5ef7e4bf --- /dev/null +++ b/src/Launcher/Launcher_Job_Command.hxx @@ -0,0 +1,49 @@ +// Copyright (C) 2009 CEA/DEN, EDF R&D +// +// This library is free software; you can redistribute it and/or +// modify it under the terms of the GNU Lesser General Public +// License as published by the Free Software Foundation; either +// version 2.1 of the License. +// +// This library is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +// Lesser General Public License for more details. +// +// You should have received a copy of the GNU Lesser General Public +// License along with this library; if not, write to the Free Software +// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +// +// See http://www.salome-platform.org/ or email : webmaster.salome@opencascade.com +// +// Author: André RIBES - EDF R&D + +#ifndef _LAUNCHER_JOB_COMMAND_HXX_ +#define _LAUNCHER_JOB_COMMAND_HXX_ + +#include "Launcher_Job.hxx" +#include "Launcher.hxx" + +#ifdef WITH_LIBBATCH +#include +#endif + +namespace Launcher +{ + class Job_Command : virtual public Launcher::Job + { + public: + Job_Command(); + virtual ~Job_Command(); + + virtual void update_job(); + +#ifdef WITH_LIBBATCH + protected: + std::string buildCommandScript(Batch::Parametre params, std::string launch_date); +#endif + }; +} + +#endif + diff --git a/src/Launcher/Launcher_Job_PythonSALOME.cxx b/src/Launcher/Launcher_Job_PythonSALOME.cxx new file mode 100644 index 000000000..584cabad4 --- /dev/null +++ b/src/Launcher/Launcher_Job_PythonSALOME.cxx @@ -0,0 +1,38 @@ +// Copyright (C) 2009 CEA/DEN, EDF R&D +// +// This library is free software; you can redistribute it and/or +// modify it under the terms of the GNU Lesser General Public +// License as published by the Free Software Foundation; either +// version 2.1 of the License. +// +// This library is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +// Lesser General Public License for more details. +// +// You should have received a copy of the GNU Lesser General Public +// License along with this library; if not, write to the Free Software +// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +// +// See http://www.salome-platform.org/ or email : webmaster.salome@opencascade.com +// +// Author: André RIBES - EDF R&D + +#include "Launcher_Job_PythonSALOME.hxx" + + +Launcher::Job_PythonSALOME::Job_PythonSALOME() {} + +Launcher::Job_PythonSALOME::~Job_PythonSALOME() {} + +void +Launcher::Job_PythonSALOME::setJobFile(const std::string & job_file) +{ + Launcher::Job::setJobFile(job_file); +} + +void +Launcher::Job_PythonSALOME::addJobTypeSpecificScript(std::ofstream & launch_script_stream) +{ + launch_script_stream << _resource_definition.AppliPath << "/runSession -p $appli_port python " << _job_file_name_complete << " > logs/python_" << _launch_date << ".log 2>&1" << std::endl; +} diff --git a/src/Launcher/Launcher_Job_PythonSALOME.hxx b/src/Launcher/Launcher_Job_PythonSALOME.hxx new file mode 100644 index 000000000..106d6f0b3 --- /dev/null +++ b/src/Launcher/Launcher_Job_PythonSALOME.hxx @@ -0,0 +1,41 @@ +// Copyright (C) 2009 CEA/DEN, EDF R&D +// +// This library is free software; you can redistribute it and/or +// modify it under the terms of the GNU Lesser General Public +// License as published by the Free Software Foundation; either +// version 2.1 of the License. +// +// This library is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +// Lesser General Public License for more details. +// +// You should have received a copy of the GNU Lesser General Public +// License along with this library; if not, write to the Free Software +// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +// +// See http://www.salome-platform.org/ or email : webmaster.salome@opencascade.com +// +// Author: André RIBES - EDF R&D + +#ifndef _LAUNCHER_JOB_PYTHONSALOME_HXX_ +#define _LAUNCHER_JOB_PYTHONSALOME_HXX_ + +#include "Launcher_Job_SALOME.hxx" + +namespace Launcher +{ + class Job_PythonSALOME : virtual public Launcher::Job_SALOME + { + public: + Job_PythonSALOME(); + virtual ~Job_PythonSALOME(); + + virtual void setJobFile(const std::string & job_file); + virtual void addJobTypeSpecificScript(std::ofstream & launch_script_stream); + }; +} + +#endif + + diff --git a/src/Launcher/Launcher_Job_SALOME.cxx b/src/Launcher/Launcher_Job_SALOME.cxx new file mode 100644 index 000000000..40808ab8c --- /dev/null +++ b/src/Launcher/Launcher_Job_SALOME.cxx @@ -0,0 +1,121 @@ +// Copyright (C) 2009 CEA/DEN, EDF R&D +// +// This library is free software; you can redistribute it and/or +// modify it under the terms of the GNU Lesser General Public +// License as published by the Free Software Foundation; either +// version 2.1 of the License. +// +// This library is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +// Lesser General Public License for more details. +// +// You should have received a copy of the GNU Lesser General Public +// License along with this library; if not, write to the Free Software +// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +// +// See http://www.salome-platform.org/ or email : webmaster.salome@opencascade.com +// +// Author: André RIBES - EDF R&D + +#include "Launcher_Job_SALOME.hxx" + +Launcher::Job_SALOME::Job_SALOME() {} + +Launcher::Job_SALOME::~Job_SALOME() {} + +void +Launcher::Job_SALOME::setResourceDefinition(const ParserResourcesType & resource_definition) +{ + // Check resource_definition + if (resource_definition.AppliPath == "") + { + std::string mess = "Resource definition must define an application path !, resource name is: " + resource_definition.Name; + throw LauncherException(mess); + } + Launcher::Job::setResourceDefinition(resource_definition); +} + +void +Launcher::Job_SALOME::update_job() +{ +#ifdef WITH_LIBBATCH + Batch::Parametre params = common_job_params(); + params[EXECUTABLE] = buildSalomeScript(params); + _batch_job->setParametre(params); +#endif +} + +#ifdef WITH_LIBBATCH +std::string +Launcher::Job_SALOME::buildSalomeScript(Batch::Parametre params) +{ + // parameters + std::string work_directory = params[WORKDIR].str(); + + std::string launch_date_port_file = _launch_date; + std::string launch_script = "/tmp/runSalome_" + _job_file_name + "_" + _launch_date + ".sh"; + std::ofstream launch_script_stream; + launch_script_stream.open(launch_script.c_str(), std::ofstream::out); + + // Begin of script + launch_script_stream << "#! /bin/sh -f" << std::endl; + launch_script_stream << "cd " << work_directory << std::endl; + if (_env_file != "") + { + std::string::size_type last = _env_file.find_last_of("/"); + launch_script_stream << "source ./" << _env_file.substr(last+1) << std::endl; + } + launch_script_stream << "export SALOME_TMP_DIR=" << work_directory << "/logs" << std::endl; + + // -- Generates Catalog Resources + std::string resource_protocol = "ssh"; + if (_resource_definition.ClusterInternalProtocol == rsh) + resource_protocol = "rsh"; + + launch_script_stream << "if [ \"x$LIBBATCH_NODEFILE\" != \"x\" ]; then " << std::endl; + launch_script_stream << "CATALOG_FILE=" << work_directory << "/CatalogResources_" << _launch_date << ".xml" << std::endl; + launch_script_stream << "export USER_CATALOG_RESOURCES_FILE=" << "$CATALOG_FILE" << std::endl; + launch_script_stream << "echo '' > $CATALOG_FILE" << std::endl; + launch_script_stream << "echo '' >> $CATALOG_FILE" << std::endl; + launch_script_stream << "cat $LIBBATCH_NODEFILE | sort -u | while read host" << std::endl; + launch_script_stream << "do" << std::endl; + launch_script_stream << "echo '> $CATALOG_FILE" << std::endl; + launch_script_stream << "echo ' userName=\"" << _resource_definition.UserName << "\"' >> $CATALOG_FILE" << std::endl; + launch_script_stream << "echo ' appliPath=\"" << _resource_definition.AppliPath << "\"' >> $CATALOG_FILE" << std::endl; + launch_script_stream << "echo '/>' >> $CATALOG_FILE" << std::endl; + launch_script_stream << "done" << std::endl; + launch_script_stream << "echo '' >> $CATALOG_FILE" << std::endl; + launch_script_stream << "fi" << std::endl; + + // Launch SALOME with an appli + launch_script_stream << _resource_definition.AppliPath << "/runAppli --terminal --ns-port-log=" << launch_date_port_file << " > logs/salome_" << _launch_date << ".log 2>&1" << std::endl; + launch_script_stream << "current=0\n" + << "stop=20\n" + << "while ! test -f " << _resource_definition.AppliPath << "/" << launch_date_port_file << "\n" + << "do\n" + << " sleep 2\n" + << " let current=current+1\n" + << " if [ \"$current\" -eq \"$stop\" ] ; then\n" + << " echo Error Naming Service failed ! >&2\n" + << " exit\n" + << " fi\n" + << "done\n" + << "appli_port=`cat " << _resource_definition.AppliPath << "/" << launch_date_port_file << "`\n"; + + // Call real job type + addJobTypeSpecificScript(launch_script_stream); + + // End + launch_script_stream << _resource_definition.AppliPath << "/runSession -p $appli_port shutdownSalome.py" << std::endl; + launch_script_stream << "sleep 10" << std::endl; + + // Return + launch_script_stream.flush(); + launch_script_stream.close(); + chmod(launch_script.c_str(), 0x1ED); + return launch_script; +} +#endif + diff --git a/src/Launcher/Launcher_Job_SALOME.hxx b/src/Launcher/Launcher_Job_SALOME.hxx new file mode 100644 index 000000000..de351ab08 --- /dev/null +++ b/src/Launcher/Launcher_Job_SALOME.hxx @@ -0,0 +1,52 @@ +// Copyright (C) 2009 CEA/DEN, EDF R&D +// +// This library is free software; you can redistribute it and/or +// modify it under the terms of the GNU Lesser General Public +// License as published by the Free Software Foundation; either +// version 2.1 of the License. +// +// This library is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +// Lesser General Public License for more details. +// +// You should have received a copy of the GNU Lesser General Public +// License along with this library; if not, write to the Free Software +// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +// +// See http://www.salome-platform.org/ or email : webmaster.salome@opencascade.com +// +// Author: André RIBES - EDF R&D + +#ifndef _LAUNCHER_JOB_SALOME_HXX_ +#define _LAUNCHER_JOB_SALOME_HXX_ + +#include "Launcher_Job.hxx" +#include "Launcher.hxx" + +#ifdef WITH_LIBBATCH +#include +#endif + +namespace Launcher +{ + class Job_SALOME : virtual public Launcher::Job + { + public: + Job_SALOME(); + virtual ~Job_SALOME(); + + virtual void setResourceDefinition(const ParserResourcesType & resource_definition); + virtual void update_job(); + +#ifdef WITH_LIBBATCH + public: + std::string buildSalomeScript(Batch::Parametre params); + virtual void addJobTypeSpecificScript(std::ofstream & launch_script_stream) = 0; +#endif + }; +} + +#endif + + diff --git a/src/Launcher/Launcher_Job_YACSFile.cxx b/src/Launcher/Launcher_Job_YACSFile.cxx new file mode 100644 index 000000000..1126e90ab --- /dev/null +++ b/src/Launcher/Launcher_Job_YACSFile.cxx @@ -0,0 +1,38 @@ +// Copyright (C) 2009 CEA/DEN, EDF R&D +// +// This library is free software; you can redistribute it and/or +// modify it under the terms of the GNU Lesser General Public +// License as published by the Free Software Foundation; either +// version 2.1 of the License. +// +// This library is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +// Lesser General Public License for more details. +// +// You should have received a copy of the GNU Lesser General Public +// License along with this library; if not, write to the Free Software +// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +// +// See http://www.salome-platform.org/ or email : webmaster.salome@opencascade.com +// +// Author: André RIBES - EDF R&D + +#include "Launcher_Job_YACSFile.hxx" + + +Launcher::Job_YACSFile::Job_YACSFile() {} + +Launcher::Job_YACSFile::~Job_YACSFile() {} + +void +Launcher::Job_YACSFile::setJobFile(const std::string & job_file) +{ + Launcher::Job::setJobFile(job_file); +} + +void +Launcher::Job_YACSFile::addJobTypeSpecificScript(std::ofstream & launch_script_stream) +{ + launch_script_stream << _resource_definition.AppliPath << "/runSession -p $appli_port driver " << _job_file_name_complete << " > logs/yacs_" << _launch_date << ".log 2>&1" << std::endl; +} diff --git a/src/Launcher/Launcher_Job_YACSFile.hxx b/src/Launcher/Launcher_Job_YACSFile.hxx new file mode 100644 index 000000000..b789226b1 --- /dev/null +++ b/src/Launcher/Launcher_Job_YACSFile.hxx @@ -0,0 +1,40 @@ +// Copyright (C) 2009 CEA/DEN, EDF R&D +// +// This library is free software; you can redistribute it and/or +// modify it under the terms of the GNU Lesser General Public +// License as published by the Free Software Foundation; either +// version 2.1 of the License. +// +// This library is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +// Lesser General Public License for more details. +// +// You should have received a copy of the GNU Lesser General Public +// License along with this library; if not, write to the Free Software +// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +// +// See http://www.salome-platform.org/ or email : webmaster.salome@opencascade.com +// +// Author: André RIBES - EDF R&D + +#ifndef _LAUNCHER_JOB_YACSFILE_HXX_ +#define _LAUNCHER_JOB_YACSFILE_HXX_ + +#include "Launcher_Job_SALOME.hxx" + +namespace Launcher +{ + class Job_YACSFile : virtual public Launcher::Job_SALOME + { + public: + Job_YACSFile(); + virtual ~Job_YACSFile(); + + virtual void setJobFile(const std::string & job_file); + virtual void addJobTypeSpecificScript(std::ofstream & launch_script_stream); + }; +} + +#endif + diff --git a/src/Launcher/Launcher_Utils.hxx b/src/Launcher/Launcher_Utils.hxx new file mode 100644 index 000000000..2aa8b950f --- /dev/null +++ b/src/Launcher/Launcher_Utils.hxx @@ -0,0 +1,57 @@ +// Copyright (C) 2009 CEA/DEN, EDF R&D +// +// This library is free software; you can redistribute it and/or +// modify it under the terms of the GNU Lesser General Public +// License as published by the Free Software Foundation; either +// version 2.1 of the License. +// +// This library is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +// Lesser General Public License for more details. +// +// You should have received a copy of the GNU Lesser General Public +// License along with this library; if not, write to the Free Software +// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +// +// See http://www.salome-platform.org/ or email : webmaster.salome@opencascade.com +// +// Author: André RIBES - EDF R&D + +#ifndef __LAUNCHER_UTILS_HXX__ +#define __LAUNCHER_UTILS_HXX__ + +#include + +#ifdef WIN32 +# if defined LAUNCHER_EXPORTS || defined Launcher_EXPORTS +# define LAUNCHER_EXPORT __declspec(dllexport) +# else +# define LAUNCHER_EXPORT __declspec(dllimport) +# endif +#else +# define LAUNCHER_EXPORT +#endif + +// MESSAGES +#define LAUNCHER_MESS_INIT(deb) std::cerr << deb +#define LAUNCHER_MESS_BEGIN(deb) LAUNCHER_MESS_INIT(deb)<<__FILE__ <<" ["<<__LINE__<<"] : " +#define LAUNCHER_MESS_END std::endl; +#define LAUNCHER_INFOS(msg) {LAUNCHER_MESS_BEGIN("- Trace ") << msg << LAUNCHER_MESS_END} + +#if defined(_DEBUG_) || defined(_DEBUG) +#define LAUNCHER_MESSAGE(msg) {LAUNCHER_MESS_BEGIN("- Trace ") << msg << LAUNCHER_MESS_END} +#else /* ifdef _DEBUG_*/ +#define LAUNCHER_MESSAGE(msg) {} +#endif /* ifdef _DEBUG_*/ + +class LAUNCHER_EXPORT LauncherException +{ +public: + const std::string msg; + + LauncherException(const std::string m) : msg(m) {} +}; + + +#endif diff --git a/src/Launcher/Makefile.am b/src/Launcher/Makefile.am index fc49e1121..ab11e0daf 100644 --- a/src/Launcher/Makefile.am +++ b/src/Launcher/Makefile.am @@ -33,6 +33,12 @@ salomeinclude_HEADERS = \ BatchTest.hxx \ SALOME_Launcher_defs.hxx \ SALOME_Launcher.hxx \ + Launcher_Utils.hxx \ + Launcher_Job.hxx \ + Launcher_Job_Command.hxx \ + Launcher_Job_SALOME.hxx \ + Launcher_Job_PythonSALOME.hxx \ + Launcher_Job_YACSFile.hxx \ Launcher.hxx # Scripts to be installed @@ -112,6 +118,12 @@ libSalomeLauncher_la_LIBADD =\ libLauncher_la_SOURCES=\ SALOME_Launcher_Parser.cxx \ SALOME_Launcher_Handler.cxx \ + Launcher_Utils.hxx \ + Launcher_Job.cxx \ + Launcher_Job_Command.cxx \ + Launcher_Job_SALOME.cxx \ + Launcher_Job_PythonSALOME.cxx \ + Launcher_Job_YACSFile.cxx \ Launcher.cxx libLauncher_la_CPPFLAGS =\ diff --git a/src/Launcher/SALOME_Launcher.cxx b/src/Launcher/SALOME_Launcher.cxx index 9ec5bf1f9..d65f42f7d 100644 --- a/src/Launcher/SALOME_Launcher.cxx +++ b/src/Launcher/SALOME_Launcher.cxx @@ -25,6 +25,11 @@ #include "SALOME_ContainerManager.hxx" #include "Utils_CorbaException.hxx" + +#include "Launcher_Job_Command.hxx" +#include "Launcher_Job_YACSFile.hxx" +#include "Launcher_Job_PythonSALOME.hxx" + #ifdef WIN32 # include #else @@ -41,11 +46,8 @@ const char *SALOME_Launcher::_LauncherNameInNS = "/SalomeLauncher"; /*! * Constructor * \param orb - * Define a CORBA single thread policy for the server, which avoid to deal - * with non thread-safe usage like Change_Directory in SALOME naming service */ //============================================================================= - SALOME_Launcher::SALOME_Launcher(CORBA::ORB_ptr orb, PortableServer::POA_var poa) : _l() { MESSAGE("SALOME_Launcher constructor"); @@ -71,116 +73,188 @@ SALOME_Launcher::SALOME_Launcher(CORBA::ORB_ptr orb, PortableServer::POA_var poa * destructor */ //============================================================================= - SALOME_Launcher::~SALOME_Launcher() { - MESSAGE("destructor"); + MESSAGE("SALOME_Launcher destructor"); delete _NS; + MESSAGE("SALOME_Launcher destructor end"); } -//============================================================================= -/*! CORBA method: - * shutdown all the containers, then the ContainerManager servant - */ -//============================================================================= -void SALOME_Launcher::Shutdown() +CORBA::Long +SALOME_Launcher::createJob(const Engines::JobParameters & job_parameters) { - MESSAGE("Shutdown"); - _NS->Destroy_Name(_LauncherNameInNS); - _ContManager->Shutdown(); - _ResManager->Shutdown(); - PortableServer::ObjectId_var oid = _poa->servant_to_id(this); - _poa->deactivate_object(oid); - if(!CORBA::is_nil(_orb)) - _orb->shutdown(0); + std::string job_type = job_parameters.job_type.in(); + + if (job_type != "command" and job_type != "yacs_file" and job_type != "python_salome") + { + std::string message("SALOME_Launcher::createJob: bad job type: "); + message += job_type; + THROW_SALOME_CORBA_EXCEPTION(message.c_str(), SALOME::INTERNAL_ERROR); + } + + Launcher::Job * new_job; // It is Launcher_cpp that is going to destroy it + + if (job_type == "command") + new_job = new Launcher::Job_Command(); + else if (job_type == "yacs_file") + new_job = new Launcher::Job_YACSFile(); + else if (job_type == "python_salome") + new_job = new Launcher::Job_PythonSALOME(); + + // Directories + std::string work_directory = job_parameters.work_directory.in(); + std::string local_directory = job_parameters.local_directory.in(); + std::string result_directory = job_parameters.result_directory.in(); + new_job->setWorkDirectory(work_directory); + new_job->setLocalDirectory(local_directory); + new_job->setResultDirectory(result_directory); + + // Job File + std::string job_file = job_parameters.job_file.in(); + try + { + new_job->setJobFile(job_file); + } + catch(const LauncherException &ex) + { + INFOS(ex.msg.c_str()); + THROW_SALOME_CORBA_EXCEPTION(ex.msg.c_str(),SALOME::INTERNAL_ERROR); + } + + // Files + std::string env_file = job_parameters.env_file.in(); + new_job->setEnvFile(env_file); + for (CORBA::ULong i = 0; i < job_parameters.in_files.length(); i++) + new_job->add_in_file(job_parameters.in_files[i].in()); + for (CORBA::ULong i = 0; i < job_parameters.out_files.length(); i++) + new_job->add_out_file(job_parameters.out_files[i].in()); + + // Expected During Time + try + { + std::string maximum_duration = job_parameters.maximum_duration.in(); + new_job->setMaximumDuration(maximum_duration); + } + catch(const LauncherException &ex){ + INFOS(ex.msg.c_str()); + THROW_SALOME_CORBA_EXCEPTION(ex.msg.c_str(),SALOME::INTERNAL_ERROR); + } + + // Queue + std::string queue = job_parameters.queue.in(); + if (queue != "") + new_job->setQueue(queue); + + // Resources requirements + try + { + resourceParams p; + p.name = job_parameters.resource_required.name; + p.hostname = job_parameters.resource_required.hostname; + p.OS = job_parameters.resource_required.OS; + p.nb_proc = job_parameters.resource_required.nb_proc; + p.nb_node = job_parameters.resource_required.nb_node; + p.nb_proc_per_node = job_parameters.resource_required.nb_proc_per_node; + p.cpu_clock = job_parameters.resource_required.cpu_clock; + p.mem_mb = job_parameters.resource_required.mem_mb; + new_job->setResourceRequiredParams(p); + } + catch(const LauncherException &ex){ + INFOS(ex.msg.c_str()); + THROW_SALOME_CORBA_EXCEPTION(ex.msg.c_str(),SALOME::INTERNAL_ERROR); + } + + try + { + _l.createJob(new_job); + } + catch(const LauncherException &ex) + { + INFOS(ex.msg.c_str()); + THROW_SALOME_CORBA_EXCEPTION(ex.msg.c_str(),SALOME::BAD_PARAM); + } + return new_job->getNumber(); } -//============================================================================= -/*! CORBA Method: - * Returns the PID of the process - */ -//============================================================================= -CORBA::Long SALOME_Launcher::getPID() +void +SALOME_Launcher::launchJob(CORBA::Long job_id) { - return -#ifndef WIN32 - (CORBA::Long)getpid(); -#else - (CORBA::Long)_getpid(); -#endif + try + { + _l.launchJob(job_id); + } + catch(const LauncherException &ex) + { + INFOS(ex.msg.c_str()); + THROW_SALOME_CORBA_EXCEPTION(ex.msg.c_str(),SALOME::BAD_PARAM); + } +} +char * +SALOME_Launcher::getJobState(CORBA::Long job_id) +{ + std::string result; + try + { + result = _l.getJobState(job_id); + } + catch(const LauncherException &ex) + { + INFOS(ex.msg.c_str()); + THROW_SALOME_CORBA_EXCEPTION(ex.msg.c_str(),SALOME::BAD_PARAM); + } + return CORBA::string_dup(result.c_str()); } -//============================================================================= -/*! CORBA Method: - * Submit a batch job on a cluster and returns the JobId - * \param xmlExecuteFile : .xml to parse to execute on the batch cluster - * \param clusterName : cluster name - */ -//============================================================================= -CORBA::Long SALOME_Launcher::submitJob(const char * xmlExecuteFile, - const char * clusterName) +void +SALOME_Launcher::getJobResults(CORBA::Long job_id, const char * directory) { - CORBA::Long jobId; + try + { + _l.getJobResults(job_id, directory); + } + catch(const LauncherException &ex) + { + INFOS(ex.msg.c_str()); + THROW_SALOME_CORBA_EXCEPTION(ex.msg.c_str(),SALOME::BAD_PARAM); + } +} - try{ - jobId = _l.submitJob(xmlExecuteFile,clusterName); +void +SALOME_Launcher::removeJob(CORBA::Long job_id) +{ + try + { + _l.removeJob(job_id); } - catch(const LauncherException &ex){ + catch(const LauncherException &ex) + { INFOS(ex.msg.c_str()); - THROW_SALOME_CORBA_EXCEPTION(ex.msg.c_str(),SALOME::INTERNAL_ERROR); + THROW_SALOME_CORBA_EXCEPTION(ex.msg.c_str(),SALOME::BAD_PARAM); } - return jobId; } //============================================================================= /*! CORBA Method: - * Submit a batch job on a cluster and returns the JobId - * \param fileToExecute : .py/.exe/.sh/... to execute on the batch cluster - * \param filesToExport : to export on the batch cluster - * \param NumberOfProcessors : Number of processors needed on the batch cluster - * \param params : Constraints for the choice of the batch cluster + * Create a job in the launcher with a file + * \param xmlExecuteFile : .xml to parse that contains job description + * \param clusterName : machine choosed */ //============================================================================= -CORBA::Long SALOME_Launcher::submitSalomeJob( const char * fileToExecute , - const Engines::FilesList& filesToExport , - const Engines::FilesList& filesToImport , - const Engines::BatchParameters& batch_params, - const Engines::MachineParameters& params) +CORBA::Long +SALOME_Launcher::createJobWithFile(const char * xmlExecuteFile, + const char * clusterName) { - MESSAGE("BEGIN OF SALOME_Launcher::submitSalomeJob"); CORBA::Long jobId; - - machineParams p; - p.hostname = params.hostname; - p.OS = params.OS; - p.nb_node = params.nb_node; - p.nb_proc_per_node = params.nb_proc_per_node; - p.cpu_clock = params.cpu_clock; - p.mem_mb = params.mem_mb; - - batchParams bp; - bp.batch_directory = batch_params.batch_directory; - bp.expected_during_time = batch_params.expected_during_time; - bp.mem = batch_params.mem; - bp.nb_proc = batch_params.nb_proc; - - vector efl; - for(int i=0;i ifl; - for(int i=0;iGetFittingResources(params); + Engines::ResourceList *aMachineList = _ResManager->GetFittingResources(params); if (aMachineList->length() == 0) throw SALOME_Exception("No resources have been found with your parameters"); - const Engines::MachineDefinition* p = _ResManager->GetMachineParameters((*aMachineList)[0]); - string clustername(p->alias); - INFOS("Choose cluster" << clustername); + const Engines::ResourceDefinition* p = _ResManager->GetResourceDefinition((*aMachineList)[0]); + string resource_name(p->name); + INFOS("Choose resource for test: " << resource_name); BatchTest t(*p); if (t.test()) @@ -220,86 +294,33 @@ SALOME_Launcher::testBatch(const Engines::MachineParameters& params) } //============================================================================= -/*! CORBA Method: - * Query a batch job on a cluster and returns the status of job - * \param jobId : identification of Salome job - * \param params : Constraints for the choice of the batch cluster - */ -//============================================================================= -char* SALOME_Launcher::queryJob( const CORBA::Long jobId, - const Engines::MachineParameters& params) -{ - string status; - machineParams p; - p.hostname = params.hostname; - p.OS = params.OS; - p.nb_node = params.nb_node; - p.nb_proc_per_node = params.nb_proc_per_node; - p.cpu_clock = params.cpu_clock; - p.mem_mb = params.mem_mb; - - try{ - status = _l.queryJob(jobId,p); - } - catch(const LauncherException &ex){ - INFOS(ex.msg.c_str()); - THROW_SALOME_CORBA_EXCEPTION(ex.msg.c_str(),SALOME::BAD_PARAM); - } - return CORBA::string_dup(status.c_str()); -} - -//============================================================================= -/*! CORBA Method: - * Delete a batch job on a cluster - * \param jobId : identification of Salome job - * \param params : Constraints for the choice of the batch cluster +/*! CORBA method: + * shutdown all the containers, then the ContainerManager servant */ //============================================================================= -void SALOME_Launcher::deleteJob( const CORBA::Long jobId, - const Engines::MachineParameters& params) +void SALOME_Launcher::Shutdown() { - machineParams p; - p.hostname = params.hostname; - p.OS = params.OS; - p.nb_node = params.nb_node; - p.nb_proc_per_node = params.nb_proc_per_node; - p.cpu_clock = params.cpu_clock; - p.mem_mb = params.mem_mb; - - try{ - _l.deleteJob(jobId,p); - } - catch(const LauncherException &ex){ - INFOS("Caught exception."); - THROW_SALOME_CORBA_EXCEPTION(ex.msg.c_str(),SALOME::BAD_PARAM); - } + MESSAGE("Shutdown"); + _NS->Destroy_Name(_LauncherNameInNS); + _ContManager->Shutdown(); + _ResManager->Shutdown(); + PortableServer::ObjectId_var oid = _poa->servant_to_id(this); + _poa->deactivate_object(oid); + if(!CORBA::is_nil(_orb)) + _orb->shutdown(0); } //============================================================================= /*! CORBA Method: - * Get result files of job on a cluster - * \param jobId : identification of Salome job - * \param params : Constraints for the choice of the batch cluster + * Returns the PID of the process */ //============================================================================= -void SALOME_Launcher::getResultsJob( const char *directory, - const CORBA::Long jobId, - const Engines::MachineParameters& params) +CORBA::Long SALOME_Launcher::getPID() { - machineParams p; - p.hostname = params.hostname; - p.OS = params.OS; - p.nb_node = params.nb_node; - p.nb_proc_per_node = params.nb_proc_per_node; - p.cpu_clock = params.cpu_clock; - p.mem_mb = params.mem_mb; - - try{ - _l.getResultsJob( directory, jobId, p ); - } - catch(const LauncherException &ex){ - INFOS("Caught exception."); - THROW_SALOME_CORBA_EXCEPTION(ex.msg.c_str(),SALOME::BAD_PARAM); - } + return +#ifndef WIN32 + (CORBA::Long)getpid(); +#else + (CORBA::Long)_getpid(); +#endif } - diff --git a/src/Launcher/SALOME_Launcher.hxx b/src/Launcher/SALOME_Launcher.hxx index 9263d5c68..489b782dd 100644 --- a/src/Launcher/SALOME_Launcher.hxx +++ b/src/Launcher/SALOME_Launcher.hxx @@ -43,26 +43,19 @@ public: SALOME_Launcher(CORBA::ORB_ptr orb, PortableServer::POA_var poa); ~SALOME_Launcher(); - CORBA::Long submitJob(const char * xmlExecuteFile, - const char * clusterName); - - CORBA::Long submitSalomeJob(const char * fileToExecute , - const Engines::FilesList& filesToExport , - const Engines::FilesList& filesToImport , - const Engines::BatchParameters& batch_params, - const Engines::MachineParameters& params); - - char* querySalomeJob( CORBA::Long jobId, const Engines::MachineParameters& params); - void deleteSalomeJob( CORBA::Long jobId, const Engines::MachineParameters& params); - void getResultSalomeJob( const char * directory, CORBA::Long jobId, const Engines::MachineParameters& params ); - char* queryJob( CORBA::Long jobId, const Engines::MachineParameters& params); - void deleteJob( CORBA::Long jobId, const Engines::MachineParameters& params); - void getResultsJob( const char * directory, CORBA::Long jobId, const Engines::MachineParameters& params ); - - CORBA::Boolean testBatch(const Engines::MachineParameters& params); - + // Main methods + CORBA::Long createJob (const Engines::JobParameters & job_parameters); + void launchJob (CORBA::Long job_id); + char * getJobState (CORBA::Long job_id); + void getJobResults(CORBA::Long job_id, const char * directory); + void removeJob (CORBA::Long job_id); + + // Useful methods + CORBA::Long createJobWithFile(const char * xmlExecuteFile, const char * clusterName); + CORBA::Boolean testBatch (const Engines::ResourceParameters& params); + + // SALOME Kernel service methods void Shutdown(); - CORBA::Long getPID(); static const char *_LauncherNameInNS; diff --git a/src/LifeCycleCORBA/LifeCycleCORBA.py b/src/LifeCycleCORBA/LifeCycleCORBA.py deleted file mode 100644 index 133ab610c..000000000 --- a/src/LifeCycleCORBA/LifeCycleCORBA.py +++ /dev/null @@ -1,275 +0,0 @@ -# -*- coding: iso-8859-1 -*- -# Copyright (C) 2007-2008 CEA/DEN, EDF R&D, OPEN CASCADE -# -# Copyright (C) 2003-2007 OPEN CASCADE, EADS/CCR, LIP6, CEA/DEN, -# CEDRAT, EDF R&D, LEG, PRINCIPIA R&D, BUREAU VERITAS -# -# This library is free software; you can redistribute it and/or -# modify it under the terms of the GNU Lesser General Public -# License as published by the Free Software Foundation; either -# version 2.1 of the License. -# -# This library is distributed in the hope that it will be useful, -# but WITHOUT ANY WARRANTY; without even the implied warranty of -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU -# Lesser General Public License for more details. -# -# You should have received a copy of the GNU Lesser General Public -# License along with this library; if not, write to the Free Software -# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA -# -# See http://www.salome-platform.org/ or email : webmaster.salome@opencascade.com -# -# SALOME LifeCycleC RBA : implementation of containers and engines life cycle both in Python and C++ -# File : LifeCycleCORBA.py -# Author : Paul RASCLE, EDF -# Module : SALOME -# $Header$ -# -import os -import sys -import time -import string -from omniORB import CORBA -import CosNaming -import Engines -reload(Engines) -import SALOME_ModuleCatalog - -from SALOME_utilities import * -from Utils_Identity import getShortHostName -import Utils_Identity -import Launchers - -class LifeCycleCORBA: - _orb = None - _rootcontext = None - _containerRootContext = None - _catalog = None - - #------------------------------------------------------------------------- - - def __init__(self, orb): - MESSAGE( "LifeCycleCORBA::__init__" ) - self._orb = orb - - obj = self._orb.resolve_initial_references("NameService") - self._rootContext = obj._narrow(CosNaming.NamingContext) - - if self._rootContext is None: - MESSAGE( "Name Service Reference is invalid" ) - - name = [CosNaming.NameComponent("Containers","dir")] - try: - self._containerRootContext = self._rootContext.bind_new_context(name) - - except CosNaming.NamingContext.AlreadyBound, ex: - MESSAGE( "/Containers.dir Context already exists" ) - obj = self._rootContext.resolve(name) - self._containerRootContext = obj._narrow(CosNaming.NamingContext) - if self._containerRootContext is None: - MESSAGE( "Containers.dir exists but it is not a NamingContext" ) - - name = [CosNaming.NameComponent("Kernel","dir"), - CosNaming.NameComponent("ModulCatalog","object")] - try: - obj = self._rootContext.resolve(name) - except CosNaming.NamingContext.NotFound, ex: - MESSAGE( "/Kernel.dir/ModulCatalog.object not found in Naming Service" ) - - self._catalog = obj._narrow(SALOME_ModuleCatalog.ModuleCatalog) - if self._catalog is None: - MESSAGE( "/Kernel.dir/ModulCatalog.object exists but is not a ModulCatalog" ) - - name = [CosNaming.NameComponent("ContainerManager","object")] - try: - obj = self._rootContext.resolve(name) - except CosNaming.NamingContext.NotFound, ex: - MESSAGE( "ContainerManager.object not found in Naming Service" ) - self._contManager = obj._narrow(Engines.ContainerManager) - if self._contManager is None: - MESSAGE( "ContainerManager.object exists but is not a ContainerManager") - - #------------------------------------------------------------------------- - - def ContainerName(self, containerName): - theComputer = "" - try: - theComputer , theContainer = containerName.split('/') - except: - theComputer = "" - theContainer = containerName - - if theComputer in ("","localhost") : - theComputer = getShortHostName() - - MESSAGE( theComputer + theContainer ) - return theComputer,theContainer - - #------------------------------------------------------------------------- - - def ComputerPath(self, ComputerName ): - try: - #path = self._catalog.GetPathPrefix( ComputerName ) - path = os.getenv("KERNEL_ROOT_DIR") + "/bin/salome/" - except SALOME_ModuleCatalog.NotFound, ex: - path = "" - return path - - #------------------------------------------------------------------------- - - def FindContainer(self, containerName): - theComputer,theContainer = self.ContainerName( containerName ) - name = [CosNaming.NameComponent(theComputer,"dir"), - CosNaming.NameComponent(theContainer,"object")] - obj = None - try: - obj = self._containerRootContext.resolve(name) - MESSAGE( containerName + ".object found in Naming Service" ) - - except CosNaming.NamingContext.NotFound, ex: - MESSAGE( containerName + ".object not found in Naming Service" ) - - if obj is None: - container = None - else: - container = obj._narrow(Engines.Container) - if container is None: - MESSAGE( containerName + ".object exists but is not a Container" ) - return container - - #------------------------------------------------------------------------- - - def FindComponent(self,containerName,componentName,listOfMachines): - if containerName!="": - machinesOK=[] - for i in range(len(listOfMachines)): - currentMachine=listOfMachines[i] - componentNameForNS= [CosNaming.NameComponent(currentMachine,"dir"), - CosNaming.NameComponent(containerName,"dir"), - CosNaming.NameComponent(componentName,"object")] - obj=None - try: - obj = self._containerRootContext.resolve(componentNameForNS) - except CosNaming.NamingContext.NotFound, ex: - MESSAGE( "component " + componentName + " not found on machine " + currentMachine + " , trying to load" ) - pass - if obj is not None: - machinesOK.append(currentMachine) - pass - pass - if len(machinesOK)!=0: - bestMachine=self._contManager.FindFirst(machinesOK) - componentNameForNS= [CosNaming.NameComponent(bestMachine,"dir"), - CosNaming.NameComponent(containerName,"dir"), - CosNaming.NameComponent(componentName,"object")] - obj=None - try: - obj = self._containerRootContext.resolve(componentNameForNS) - except: - pass - if obj is not None: - return obj._narrow(Engines.Component) - else: - MESSAGE( "Big problem !!!") - return None - else: - return None - else: - bestMachine=self._contManager.FindFirst(listOfMachines) - MESSAGE("Not implemented yet ...") - return None - pass - - #------------------------------------------------------------------------- - - def setLauncher(self,name): - """Change default launcher to the launcher identified by name - - See module Launchers.py - """ - Launchers.setLauncher(name) - - #------------------------------------------------------------------------- - - def StartContainer(self, theComputer , theContainer ): - """Start a container on theComputer machine with theContainer name - """ - # Get the Naming Service address - # - addr=self._orb.object_to_string(self._rootContext) - # - # If container name contains "Py" launch a Python Container - # - if theContainer.find('Py') == -1 : - CMD=['SALOME_Container',theContainer,'-ORBInitRef','NameService='+addr] - else: - CMD=['SALOME_ContainerPy.py',theContainer,'-ORBInitRef','NameService='+addr] - if theComputer in ("","localhost"): - theComputer=getShortHostName() - # - # Get the appropriate launcher and ask to launch - # - Launchers.getLauncher(theComputer).launch(theComputer,CMD) - # - # Wait until the container is registered in Naming Service - # - count =5 - aContainer=None - while aContainer is None and count > 0: - time.sleep(1) - count = count - 1 - MESSAGE( str(count) + ". Waiting for " + theComputer + "/" + theContainer ) - aContainer = self.FindContainer( theComputer + "/" + theContainer ) - return aContainer - - #------------------------------------------------------------------------- - - def FindOrStartContainer(self, theComputer , theContainer ): - """Find or Start a container on theComputer machine with theContainer name - """ - if theComputer in ("","localhost"): - theComputer=getShortHostName() - MESSAGE( "FindOrStartContainer: " + theComputer + theContainer ) - aContainer = self.FindContainer( theComputer + "/" + theContainer ) - if aContainer is None : - aContainer= self.StartContainer(theComputer , theContainer ) - return aContainer - - #------------------------------------------------------------------------- - - def LoadComponent(self,containerName,componentName,listOfMachine): - container=self._contManager.FindOrStartContainer(containerName,listOfMachine) - implementation="lib"+componentName+"Engine.so" - try: - component = container.load_impl(componentName, implementation) - MESSAGE( "component " + component._get_instanceName() + " launched !" ) - return component - except: - MESSAGE( "component " + componentName + " NOT launched !" ) - return None - - #------------------------------------------------------------------------- - - - def FindOrLoadComponent(self, containerName, componentName): - sp=containerName.split("/") - if len(sp)==1: - listOfMachine=[] - listOfMachine.append(getShortHostName()) - comp=self.FindComponent(containerName,componentName,listOfMachine) - if comp is None: - return self.LoadComponent(containerName,componentName,listOfMachine) - else: - return comp - pass - else: - params= Engines.MachineParameters(sp[1],sp[0],"LINUX",0,0,0,0) - listOfMachine=self._contManager.GetFittingResources(params,componentName) - ret=self.FindComponent(sp[1],componentName,listOfMachine); - if ret is None: - return self.LoadComponent(sp[1],componentName,listOfMachine) - else: - return ret - pass - diff --git a/src/LifeCycleCORBA/SALOME_FileTransferCORBA.cxx b/src/LifeCycleCORBA/SALOME_FileTransferCORBA.cxx index 4601dce6f..6a6f1335b 100644 --- a/src/LifeCycleCORBA/SALOME_FileTransferCORBA.cxx +++ b/src/LifeCycleCORBA/SALOME_FileTransferCORBA.cxx @@ -131,7 +131,10 @@ string SALOME_FileTransferCORBA::getLocalFile(string localFile) params.container_name = _containerName.c_str(); params.hostname = _refMachine.c_str(); - container = contManager->FindOrStartContainer(params); + Engines::ContainerParameters new_params; + LCC.convert(params, new_params); + new_params.mode = CORBA::string_dup("findorstart"); + container = contManager->GiveContainer(new_params); if (CORBA::is_nil(container)) { INFOS("machine " << _refMachine << " unreachable"); diff --git a/src/LifeCycleCORBA/SALOME_FileTransferCORBA.hxx b/src/LifeCycleCORBA/SALOME_FileTransferCORBA.hxx index 223a71511..20ba9dcc4 100644 --- a/src/LifeCycleCORBA/SALOME_FileTransferCORBA.hxx +++ b/src/LifeCycleCORBA/SALOME_FileTransferCORBA.hxx @@ -34,6 +34,7 @@ #include CORBA_CLIENT_HEADER(SALOME_Component) #include +#include #ifdef WIN32 # if defined LIFECYCLECORBA_EXPORTS || defined SalomeLifeCycleCORBA_EXPORTS diff --git a/src/LifeCycleCORBA/SALOME_LifeCycleCORBA.cxx b/src/LifeCycleCORBA/SALOME_LifeCycleCORBA.cxx index 534553609..c3e25f02e 100644 --- a/src/LifeCycleCORBA/SALOME_LifeCycleCORBA.cxx +++ b/src/LifeCycleCORBA/SALOME_LifeCycleCORBA.cxx @@ -137,15 +137,16 @@ SALOME_LifeCycleCORBA::FindComponent(const Engines::MachineParameters& params, if (! isKnownComponentClass(componentName)) return Engines::Component::_nil(); - Engines::MachineParameters parms(params); - parms.componentList.length(1); - parms.componentList[0] = componentName; - Engines::MachineList_var listOfMachines = _ResManager->GetFittingResources(parms); + Engines::ContainerParameters new_params; + convert(params, new_params); + new_params.resource_params.componentList.length(1); + new_params.resource_params.componentList[0] = componentName; + Engines::ResourceList_var listOfResources = _ResManager->GetFittingResources(new_params.resource_params); - Engines::Component_var compo = _FindComponent(parms, + Engines::Component_var compo = _FindComponent(new_params, componentName, studyId, - listOfMachines); + listOfResources); return compo._retn(); } @@ -170,14 +171,15 @@ SALOME_LifeCycleCORBA::LoadComponent(const Engines::MachineParameters& params, if (! isKnownComponentClass(componentName)) return Engines::Component::_nil(); - Engines::MachineParameters parms(params); - parms.componentList.length(1); - parms.componentList[0] = componentName; + Engines::ContainerParameters new_params; + convert(params, new_params); + new_params.resource_params.componentList.length(1); + new_params.resource_params.componentList[0] = componentName; - Engines::MachineList_var listOfMachines = _ResManager->GetFittingResources(parms); - parms.computerList=listOfMachines; + Engines::ResourceList_var listOfResources = _ResManager->GetFittingResources(new_params.resource_params); + new_params.resource_params.resList = listOfResources; - Engines::Component_var compo = _LoadComponent(parms, + Engines::Component_var compo = _LoadComponent(new_params, componentName, studyId); @@ -206,23 +208,65 @@ FindOrLoad_Component(const Engines::MachineParameters& params, if (! isKnownComponentClass(componentName)) return Engines::Component::_nil(); - Engines::MachineParameters parms(params); - parms.componentList.length(1); - parms.componentList[0] = componentName; - Engines::MachineList_var listOfMachines = _ResManager->GetFittingResources(parms); + Engines::ContainerParameters new_params; + convert(params, new_params); + new_params.resource_params.componentList.length(1); + new_params.resource_params.componentList[0] = componentName; - Engines::Component_var compo = _FindComponent(parms, + // For Compatibility -> if hostname == localhost put name == hostname + if (std::string(new_params.resource_params.hostname.in()) == "localhost") + { + new_params.resource_params.hostname = CORBA::string_dup(Kernel_Utils::GetHostname().c_str()); + new_params.resource_params.name = CORBA::string_dup(Kernel_Utils::GetHostname().c_str()); + } + + Engines::ResourceList_var listOfResources = _ResManager->GetFittingResources(new_params.resource_params); + + Engines::Component_var compo = _FindComponent(new_params, componentName, studyId, - listOfMachines); + listOfResources); if(CORBA::is_nil(compo)) - { - parms.computerList=listOfMachines; - compo = _LoadComponent(parms, + { + new_params.resource_params.resList = listOfResources; + compo = _LoadComponent(new_params, componentName, studyId); - } + } + + return compo._retn(); +} + +Engines::Component_ptr +SALOME_LifeCycleCORBA:: +FindOrLoad_Component(const Engines::ContainerParameters& params, + const char *componentName, + int studyId) +{ + // --- Check if Component Name is known in ModuleCatalog + + if (! isKnownComponentClass(componentName)) + return Engines::Component::_nil(); + + Engines::ContainerParameters new_params(params); + new_params.resource_params.componentList.length(1); + new_params.resource_params.componentList[0] = componentName; + + Engines::ResourceList_var listOfResources = _ResManager->GetFittingResources(new_params.resource_params); + + Engines::Component_var compo = _FindComponent(new_params, + componentName, + studyId, + listOfResources); + + if(CORBA::is_nil(compo)) + { + new_params.resource_params.resList = listOfResources; + compo = _LoadComponent(new_params, + componentName, + studyId); + } return compo._retn(); } @@ -243,30 +287,13 @@ Engines::Component_ptr SALOME_LifeCycleCORBA::FindOrLoad_Component(const char *containerName, const char *componentName) { - char *valenv=getenv("SALOME_BATCH"); - if(valenv) - if (strcmp(valenv,"1")==0) - { - MESSAGE("SALOME_LifeCycleCORBA::FindOrLoad_Component BATCH " << containerName << " " << componentName ) ; - _NS->Change_Directory("/Containers"); - CORBA::Object_ptr obj=_NS->Resolve(containerName); - Engines::Container_var cont=Engines::Container::_narrow(obj); - bool isLoadable = cont->load_component_Library(componentName); - if (!isLoadable) return Engines::Component::_nil(); - - Engines::Component_ptr myInstance = - cont->create_component_instance(componentName, 0); - return myInstance; - } MESSAGE("SALOME_LifeCycleCORBA::FindOrLoad_Component INTERACTIF " << containerName << " " << componentName ) ; - //#if 0 - // --- Check if Component Name is known in ModuleCatalog + // --- Check if Component Name is known in ModuleCatalog if (! isKnownComponentClass(componentName)) return Engines::Component::_nil(); // --- Check if containerName contains machine name (if yes: rg>0) - char *stContainer=strdup(containerName); string st2Container(stContainer); int rg=st2Container.find("/"); @@ -274,29 +301,21 @@ SALOME_LifeCycleCORBA::FindOrLoad_Component(const char *containerName, Engines::MachineParameters_var params=new Engines::MachineParameters; preSet(params); if (rg<0) - { - // containerName doesn't contain "/" => Local container - params->container_name=CORBA::string_dup(stContainer); - params->hostname=""; - } + { + // containerName doesn't contain "/" => Local container + params->container_name=CORBA::string_dup(stContainer); + params->hostname=""; + } else - { - stContainer[rg]='\0'; - params->container_name=CORBA::string_dup(stContainer+rg+1); - params->hostname=CORBA::string_dup(stContainer); - } + { + stContainer[rg]='\0'; + params->container_name=CORBA::string_dup(stContainer+rg+1); + params->hostname=CORBA::string_dup(stContainer); + } params->isMPI = false; SCRUTE(params->container_name); -// SCRUTE(params->hostname); -// SCRUTE(params->OS); -// SCRUTE(params->mem_mb); -// SCRUTE(params->cpu_clock); -// SCRUTE(params->nb_proc_per_node); -// SCRUTE(params->nb_node); -// SCRUTE(params->isMPI); free(stContainer); - return FindOrLoad_Component(params,componentName); - //#endif + return FindOrLoad_Component(params, componentName); } //============================================================================= @@ -309,31 +328,30 @@ SALOME_LifeCycleCORBA::FindOrLoad_Component(const char *containerName, bool SALOME_LifeCycleCORBA::isKnownComponentClass(const char *componentName) { - try + { + CORBA::Object_var obj = _NS->Resolve("/Kernel/ModulCatalog"); + SALOME_ModuleCatalog::ModuleCatalog_var Catalog = + SALOME_ModuleCatalog::ModuleCatalog::_narrow(obj) ; + ASSERT(! CORBA::is_nil(Catalog)); + SALOME_ModuleCatalog::Acomponent_var compoInfo = + Catalog->GetComponent(componentName); + if (CORBA::is_nil (compoInfo)) { - CORBA::Object_var obj = _NS->Resolve("/Kernel/ModulCatalog"); - SALOME_ModuleCatalog::ModuleCatalog_var Catalog = - SALOME_ModuleCatalog::ModuleCatalog::_narrow(obj) ; - ASSERT(! CORBA::is_nil(Catalog)); - SALOME_ModuleCatalog::Acomponent_var compoInfo = - Catalog->GetComponent(componentName); - if (CORBA::is_nil (compoInfo)) - { - INFOS("Catalog Error: Component not found in the catalog" ); - INFOS( componentName ); - return false; - } - else return true; + INFOS("Catalog Error: Component not found in the catalog" ); + INFOS( componentName ); + return false; } + else return true; + } catch (ServiceUnreachable&) - { - INFOS("Caught exception: Naming Service Unreachable"); - } + { + INFOS("Caught exception: Naming Service Unreachable"); + } catch (...) - { - INFOS("Caught unknown exception."); - } + { + INFOS("Caught unknown exception."); + } return false; } @@ -344,7 +362,7 @@ bool SALOME_LifeCycleCORBA::isKnownComponentClass(const char *componentName) //============================================================================= bool -SALOME_LifeCycleCORBA::isMpiContainer(const Engines::MachineParameters& params) +SALOME_LifeCycleCORBA::isMpiContainer(const Engines::ContainerParameters& params) throw(IncompatibleComponent) { if( params.isMPI ) @@ -360,6 +378,7 @@ SALOME_LifeCycleCORBA::isMpiContainer(const Engines::MachineParameters& params) * - container_name = "" : not relevant * - hostname = "" : not relevant * - OS = "" : not relevant + * - nb_proc = 0 : not relevant * - mem_mb = 0 : not relevant * - cpu_clock = 0 : not relevant * - nb_proc_per_node = 0 : not relevant @@ -368,12 +387,10 @@ SALOME_LifeCycleCORBA::isMpiContainer(const Engines::MachineParameters& params) */ //============================================================================= -void SALOME_LifeCycleCORBA::preSet( Engines::MachineParameters& params) +void SALOME_LifeCycleCORBA::preSet(Engines::MachineParameters& params) { params.container_name = ""; params.hostname = ""; - //param.componentList = 0; - //param.computerList = 0; params.OS = ""; params.mem_mb = 0; params.cpu_clock = 0; @@ -387,24 +404,77 @@ void SALOME_LifeCycleCORBA::preSet( Engines::MachineParameters& params) params.nb_component_nodes = 0; } +void +SALOME_LifeCycleCORBA::preSet(Engines::ResourceParameters& params) +{ + params.name = ""; + params.hostname = ""; + params.OS = ""; + params.nb_proc = 0; + params.mem_mb = 0; + params.cpu_clock = 0; + params.nb_node = 0; + params.nb_proc_per_node = 0; + params.policy = ""; +} + +void SALOME_LifeCycleCORBA::preSet( Engines::ContainerParameters& params) +{ + params.container_name = ""; + params.mode = ""; + params.workingdir = ""; + params.nb_proc = 0; + params.isMPI = false; + params.parallelLib = ""; + SALOME_LifeCycleCORBA::preSet(params.resource_params); +} + +void +SALOME_LifeCycleCORBA::convert(const Engines::MachineParameters& params_in, + Engines::ContainerParameters& params_out) +{ + SALOME_LifeCycleCORBA::preSet(params_out); + + // Container part + params_out.container_name = params_in.container_name; + params_out.mode = params_in.mode; + params_out.workingdir = params_in.workingdir; + params_out.isMPI = params_in.isMPI; + params_out.parallelLib = params_in.parallelLib; + + // Resource part + params_out.resource_params.hostname = params_in.hostname; + params_out.resource_params.OS = params_in.OS; + params_out.resource_params.mem_mb = params_in.mem_mb; + params_out.resource_params.cpu_clock = params_in.cpu_clock; + params_out.resource_params.nb_node = params_in.nb_node; + params_out.resource_params.nb_proc_per_node = params_in.nb_proc_per_node; + params_out.resource_params.policy = params_in.policy; + params_out.resource_params.componentList = params_in.componentList; + + params_out.resource_params.resList.length(params_in.computerList.length()); + for (CORBA::ULong i = 0; i < params_in.computerList.length(); i++) + params_out.resource_params.resList[i] = params_in.computerList[i]; +} + //============================================================================= /*! * \return a number of processors not 0, only for MPI containers */ //============================================================================= -int SALOME_LifeCycleCORBA::NbProc(const Engines::MachineParameters& params) +int SALOME_LifeCycleCORBA::NbProc(const Engines::ContainerParameters& params) { if( !isMpiContainer(params) ) return 0; - else if( (params.nb_node <= 0) && (params.nb_proc_per_node <= 0) ) + else if( (params.resource_params.nb_node <= 0) && (params.resource_params.nb_proc_per_node <= 0) ) return 1; - else if( params.nb_node == 0 ) - return params.nb_proc_per_node; - else if( params.nb_proc_per_node == 0 ) - return params.nb_node; + else if( params.resource_params.nb_node == 0 ) + return params.resource_params.nb_proc_per_node; + else if( params.resource_params.nb_proc_per_node == 0 ) + return params.resource_params.nb_node; else - return params.nb_node * params.nb_proc_per_node; + return params.resource_params.nb_node * params.resource_params.nb_proc_per_node; } //============================================================================= @@ -449,14 +519,14 @@ void SALOME_LifeCycleCORBA::shutdownServers() CORBA::Long pid = 0; CORBA::Object_var objS = _NS->Resolve("/Kernel/Session"); if (!CORBA::is_nil(objS)) + { + session = SALOME::Session::_narrow(objS); + if (!CORBA::is_nil(session)) { - session = SALOME::Session::_narrow(objS); - if (!CORBA::is_nil(session)) - { - pid = session->getPID(); - session->ping(); - } + pid = session->getPID(); + session->ping(); } + } string hostname = Kernel_Utils::GetHostname(); @@ -504,26 +574,27 @@ void SALOME_LifeCycleCORBA::shutdownServers() name.length(1); name[0].id = CORBA::string_dup(stdname.c_str()); try - { - if(!CORBA::is_nil(orb)) - theObj = orb->resolve_initial_references("NameService"); - if (!CORBA::is_nil(theObj)) - inc = CosNaming::NamingContext::_narrow(theObj); - } + { + if(!CORBA::is_nil(orb)) + theObj = orb->resolve_initial_references("NameService"); + if (!CORBA::is_nil(theObj)) + inc = CosNaming::NamingContext::_narrow(theObj); + } catch(...) + { + } + if(!CORBA::is_nil(inc)) + { + try { + objLog = inc->resolve(name); + SALOME_Logger::Logger_var logger = SALOME_Logger::Logger::_narrow(objLog); + if ( !CORBA::is_nil(logger) ) + logger->shutdown(); } - if(!CORBA::is_nil(inc)) { - try - { - objLog = inc->resolve(name); - SALOME_Logger::Logger_var logger = SALOME_Logger::Logger::_narrow(objLog); - if ( !CORBA::is_nil(logger) ) - logger->shutdown(); - } catch(...) - { - } + { + } } } @@ -536,31 +607,31 @@ void SALOME_LifeCycleCORBA::killOmniNames() { string portNumber (::getenv ("NSPORT") ); if ( !portNumber.empty() ) - { + { #ifdef WNT #else - string cmd ; - cmd = string( "ps -eo pid,command | grep -v grep | grep -E \"omniNames.*") - + portNumber - + string("\" | awk '{cmd=sprintf(\"kill -9 %s\",$1); system(cmd)}'" ); - MESSAGE(cmd); - try { - system ( cmd.c_str() ); - } - catch ( ... ) { - } -#endif + string cmd ; + cmd = string( "ps -eo pid,command | grep -v grep | grep -E \"omniNames.*") + + portNumber + + string("\" | awk '{cmd=sprintf(\"kill -9 %s\",$1); system(cmd)}'" ); + MESSAGE(cmd); + try { + system ( cmd.c_str() ); + } + catch ( ... ) { } +#endif + } // NPAL 18309 (Kill Notifd) if ( !portNumber.empty() ) - { - string cmd = ("from killSalomeWithPort import killNotifdAndClean; "); - cmd += string("killNotifdAndClean(") + portNumber + "); "; - cmd = string("python -c \"") + cmd +"\" >& /dev/null"; - MESSAGE(cmd); - system( cmd.c_str() ); - } + { + string cmd = ("from killSalomeWithPort import killNotifdAndClean; "); + cmd += string("killNotifdAndClean(") + portNumber + "); "; + cmd = string("python -c \"") + cmd +"\" >& /dev/null"; + MESSAGE(cmd); + system( cmd.c_str() ); + } } //============================================================================= @@ -579,47 +650,42 @@ void SALOME_LifeCycleCORBA::killOmniNames() Engines::Component_ptr SALOME_LifeCycleCORBA:: -_FindComponent(const Engines::MachineParameters& params, +_FindComponent(const Engines::ContainerParameters& params, const char *componentName, int studyId, - const Engines::MachineList& listOfMachines) + const Engines::ResourceList& listOfResources) { // --- build the list of machines on which the component is already running - const char *containerName = params.container_name; int nbproc = NbProc(params); -// MESSAGE("_FindComponent, required " << containerName << -// " " << componentName << " " << nbproc); - Engines::MachineList_var machinesOK = new Engines::MachineList; + Engines::ResourceList_var resourcesOK = new Engines::ResourceList; - unsigned int lghtOfmachinesOK = 0; - machinesOK->length(listOfMachines.length()); + unsigned int lghtOfresourcesOK = 0; + resourcesOK->length(listOfResources.length()); - for(unsigned int i=0; iResolveComponent(currentMachine, - containerName, - componentName, - nbproc); - if (!CORBA::is_nil(obj)) - machinesOK[lghtOfmachinesOK++] = CORBA::string_dup(currentMachine); - } + for(unsigned int i=0; i < listOfResources.length(); i++) + { + const char * currentResource = listOfResources[i]; + CORBA::Object_var obj = _NS->ResolveComponent(currentResource, + containerName, + componentName, + nbproc); + if (!CORBA::is_nil(obj)) + resourcesOK[lghtOfresourcesOK++] = CORBA::string_dup(currentResource); + } // --- find the best machine among the list - - if(lghtOfmachinesOK != 0) - { - machinesOK->length(lghtOfmachinesOK); - CORBA::String_var bestMachine = _ResManager->FindFirst(machinesOK); - CORBA::Object_var obj = _NS->ResolveComponent(bestMachine, - containerName, - componentName, - nbproc); - return Engines::Component::_narrow(obj); - } + if(lghtOfresourcesOK != 0) + { + resourcesOK->length(lghtOfresourcesOK); + CORBA::String_var bestResource = _ResManager->FindFirst(resourcesOK); + CORBA::Object_var obj = _NS->ResolveComponent(bestResource, + containerName, + componentName, + nbproc); + return Engines::Component::_narrow(obj); + } else return Engines::Component::_nil(); } @@ -640,14 +706,16 @@ _FindComponent(const Engines::MachineParameters& params, Engines::Component_ptr SALOME_LifeCycleCORBA:: -_LoadComponent(const Engines::MachineParameters& params, +_LoadComponent(const Engines::ContainerParameters& params, const char *componentName, int studyId) { MESSAGE("_LoadComponent, required " << params.container_name << " " << componentName << " " << NbProc(params)); - Engines::Container_var cont = _ContManager->FindOrStartContainer(params); + Engines::ContainerParameters local_params(params); + local_params.mode = CORBA::string_dup("findorstart"); + Engines::Container_var cont = _ContManager->GiveContainer(local_params); if (CORBA::is_nil(cont)) return Engines::Component::_nil(); bool isLoadable = cont->load_component_Library(componentName); @@ -668,7 +736,7 @@ _LoadComponent(const Engines::MachineParameters& params, */ //============================================================================= Engines::Component_ptr -SALOME_LifeCycleCORBA::Load_ParallelComponent(const Engines::MachineParameters& params, +SALOME_LifeCycleCORBA::Load_ParallelComponent(const Engines::ContainerParameters& params, const char *componentName, int studyId) { @@ -679,12 +747,13 @@ SALOME_LifeCycleCORBA::Load_ParallelComponent(const Engines::MachineParameters& MESSAGE("Number of component nodes : " << params.nb_component_nodes); MESSAGE("Component Name : " << componentName);*/ - Engines::MachineParameters parms(params); - parms.componentList.length(1); - parms.componentList[0] = componentName; + Engines::ContainerParameters parms(params); + parms.resource_params.componentList.length(1); + parms.resource_params.componentList[0] = componentName; + parms.mode = CORBA::string_dup("findorstart"); MESSAGE("Starting Parallel Container"); - Engines::Container_var cont = _ContManager->StartParallelContainer(parms); + Engines::Container_var cont = _ContManager->GiveContainer(parms); if (CORBA::is_nil(cont)) { INFOS("FindOrStartParallelContainer() returns a NULL container !"); return Engines::Component::_nil(); @@ -725,14 +794,15 @@ void SALOME_LifeCycleCORBA::copyFile(const char* hostSrc, const char* fileSrc, c Engines::ContainerManager_var contManager = getContainerManager(); - Engines::MachineParameters params; + Engines::ContainerParameters params; preSet(params); - params.hostname = hostDest; - Engines::Container_var containerDest = contManager->FindOrStartContainer(params); + params.resource_params.hostname = hostDest; + params.mode = CORBA::string_dup("findorstart"); + Engines::Container_var containerDest = contManager->GiveContainer(params); - params.hostname = hostSrc; - Engines::Container_var containerSrc = contManager->FindOrStartContainer(params); + params.resource_params.hostname = hostSrc; + Engines::Container_var containerSrc = contManager->GiveContainer(params); containerDest->copyFile(containerSrc,fileSrc,fileDest); } diff --git a/src/LifeCycleCORBA/SALOME_LifeCycleCORBA.hxx b/src/LifeCycleCORBA/SALOME_LifeCycleCORBA.hxx index c22385796..8287139f1 100644 --- a/src/LifeCycleCORBA/SALOME_LifeCycleCORBA.hxx +++ b/src/LifeCycleCORBA/SALOME_LifeCycleCORBA.hxx @@ -81,24 +81,32 @@ public: const char *componentName, int studyId =0); + // SALOME 6 - Interface + Engines::Component_ptr + FindOrLoad_Component(const Engines::ContainerParameters& params, + const char *componentName, + int studyId =0); + Engines::Component_ptr FindOrLoad_Component(const char *containerName, const char *componentName); // for compatibility // Parallel extension Engines::Component_ptr - Load_ParallelComponent(const Engines::MachineParameters& params, + Load_ParallelComponent(const Engines::ContainerParameters& params, const char *componentName, int studyId); bool isKnownComponentClass(const char *componentName); - bool isMpiContainer(const Engines::MachineParameters& params) + bool isMpiContainer(const Engines::ContainerParameters& params) throw(IncompatibleComponent); - int NbProc(const Engines::MachineParameters& params); + int NbProc(const Engines::ContainerParameters& params); static void preSet(Engines::MachineParameters& outparams); + static void preSet(Engines::ResourceParameters& outparams); + static void preSet(Engines::ContainerParameters& outparams); Engines::ContainerManager_ptr getContainerManager(); Engines::ResourcesManager_ptr getResourcesManager(); @@ -109,6 +117,10 @@ public: void shutdownServers(); static void killOmniNames(); + // For SALOME 5.1.x + // Will be deleted on SALOME 6 + void convert(const Engines::MachineParameters& params_in, + Engines::ContainerParameters& params_out); protected: /*! Establish if a component called "componentName" in a container called @@ -117,16 +129,16 @@ protected: * This method uses Naming Service to find the component. */ Engines::Component_ptr - _FindComponent(const Engines::MachineParameters& params, + _FindComponent(const Engines::ContainerParameters& params, const char *componentName, int studyId, - const Engines::MachineList& listOfMachines); + const Engines::ResourceList& listOfResources); Engines::Component_ptr - _LoadComponent(const Engines::MachineParameters& params, + _LoadComponent(const Engines::ContainerParameters& params, const char *componentName, int studyId); - + SALOME_NamingService *_NS; SALOME_NamingService *_NSnew; Engines::ContainerManager_var _ContManager; diff --git a/src/LifeCycleCORBA/Test/LifeCycleCORBATest.cxx b/src/LifeCycleCORBA/Test/LifeCycleCORBATest.cxx index fa4fc4777..7bf70e8c7 100644 --- a/src/LifeCycleCORBA/Test/LifeCycleCORBATest.cxx +++ b/src/LifeCycleCORBA/Test/LifeCycleCORBATest.cxx @@ -679,12 +679,12 @@ string LifeCycleCORBATest::GetRemoteHost() Engines::ResourcesManager::_narrow(obj); CPPUNIT_ASSERT(!CORBA::is_nil(resourcesManager)); - Engines::MachineParameters params; + Engines::ContainerParameters params; _LCC.preSet(params); // empty params to get all the machines - params.componentList.length(1); - params.componentList[0]="SalomeTestComponent"; + params.resource_params.componentList.length(1); + params.resource_params.componentList[0]="SalomeTestComponent"; - Engines::MachineList_var hostList = resourcesManager->GetFittingResources(params); + Engines::ResourceList_var hostList = resourcesManager->GetFittingResources(params.resource_params); CPPUNIT_ASSERT(hostList->length() > 1); string localHost = Kernel_Utils::GetHostname(); @@ -692,7 +692,8 @@ string LifeCycleCORBATest::GetRemoteHost() for (unsigned int i=0; i < hostList->length(); i++) { const char* aMachine = hostList[i]; - string machine(aMachine); + Engines::ResourceDefinition_var resource_definition = resourcesManager->GetResourceDefinition(aMachine); + string machine(resource_definition->hostname.in()); if (machine != localHost) { remoteHost = machine; diff --git a/src/LifeCycleCORBA/TestContainerManager.cxx b/src/LifeCycleCORBA/TestContainerManager.cxx index d9a3a82e3..8ffa25872 100644 --- a/src/LifeCycleCORBA/TestContainerManager.cxx +++ b/src/LifeCycleCORBA/TestContainerManager.cxx @@ -65,24 +65,24 @@ int main (int argc, char * argv[]) ASSERT( !CORBA::is_nil(obj)); Engines::ResourcesManager_var _ResManager=Engines::ResourcesManager::_narrow(obj); - Engines::MachineParameters p; - p.componentList.length(2); - p.componentList[0] = "MED"; - p.componentList[1] = "GEOM"; + Engines::ContainerParameters p; + p.resource_params.componentList.length(2); + p.resource_params.componentList[0] = "MED"; + p.resource_params.componentList[1] = "GEOM"; - p.hostname = ""; - p.OS = "LINUX"; - p.mem_mb = 1000; - p.cpu_clock = 1000; - p.nb_proc_per_node = 1; - p.nb_node = 1; + p.resource_params.hostname = ""; + p.resource_params.OS = "LINUX"; + p.resource_params.mem_mb = 1000; + p.resource_params.cpu_clock = 1000; + p.resource_params.nb_proc_per_node = 1; + p.resource_params.nb_node = 1; p.isMPI = false; char st[10]; for(int i=0;i<10;i++){ sprintf(st,"cycl_%d",i); p.container_name = CORBA::string_dup(st); - p.policy="cycl"; + p.resource_params.policy="cycl"; cont = _ContManager->GiveContainer(p); if(CORBA::is_nil(cont)) error = true; } @@ -90,13 +90,13 @@ int main (int argc, char * argv[]) for(int i=0;i<10;i++){ sprintf(st,"first_%d",i); p.container_name = CORBA::string_dup(st); - p.policy="first"; + p.resource_params.policy="first"; cont = _ContManager->GiveContainer(p); if(CORBA::is_nil(cont)) error = true; } p.container_name = CORBA::string_dup("best"); - p.policy="best"; + p.resource_params.policy="best"; cont = _ContManager->GiveContainer(p); if(CORBA::is_nil(cont)) bestImplemented = false; else bestImplemented = true; @@ -141,7 +141,7 @@ int main (int argc, char * argv[]) int nbpmax; for(std::map::iterator iter=cycle.begin();iter!=cycle.end();iter++){ if(strcmp((*iter).first.c_str(),"localhost")!=0){ - Engines::MachineDefinition *p = _ResManager->GetMachineParameters((*iter).first.c_str()); + Engines::ResourceDefinition *p = _ResManager->GetResourceDefinition((*iter).first.c_str()); int nbproc = p->nb_node * p->nb_proc_per_node; if(cycle[(*iter).first]/nbproccmax) cmax=cycle[(*iter).first]/nbproc; diff --git a/src/NamingService/SALOME_NamingService.cxx b/src/NamingService/SALOME_NamingService.cxx index ece4ca55a..8cf3c9268 100644 --- a/src/NamingService/SALOME_NamingService.cxx +++ b/src/NamingService/SALOME_NamingService.cxx @@ -626,6 +626,34 @@ SALOME_NamingService::ContainerName(const Engines::MachineParameters& params) return ret; } +string +SALOME_NamingService::ContainerName(const Engines::ContainerParameters& params) +{ + int nbproc; + + if ( !params.isMPI ) + nbproc = 0; + else if ( (params.resource_params.nb_node <= 0) && (params.resource_params.nb_proc_per_node <= 0) ) + nbproc = 1; + else if ( params.resource_params.nb_node == 0 ) + nbproc = params.resource_params.nb_proc_per_node; + else if ( params.resource_params.nb_proc_per_node == 0 ) + nbproc = params.resource_params.nb_node; + else + nbproc = params.resource_params.nb_node * params.resource_params.nb_proc_per_node; + + string ret = ContainerName(params.container_name); + + if ( nbproc >= 1 ) + { + char *suffix = new char[8]; + sprintf(suffix, "_%d", nbproc); + ret += suffix; + } + + return ret; +} + // ============================================================================ /*! \brief build a string representing a container in Naming Service. * @@ -676,6 +704,19 @@ BuildContainerNameForNS(const Engines::MachineParameters& params, return ret; } +string +SALOME_NamingService:: +BuildContainerNameForNS(const Engines::ContainerParameters& params, + const char *hostname) +{ + string ret = "/Containers/"; + ret += hostname; + ret += "/"; + ret += ContainerName(params); + + return ret; +} + // ============================================================================ /*! \brief search a name in current directory. * diff --git a/src/NamingService/SALOME_NamingService.hxx b/src/NamingService/SALOME_NamingService.hxx index df06b2e1c..cadc0530b 100644 --- a/src/NamingService/SALOME_NamingService.hxx +++ b/src/NamingService/SALOME_NamingService.hxx @@ -62,10 +62,16 @@ public: const int nbproc=0) throw(ServiceUnreachable); std::string ContainerName(const char *ContainerName); - std::string ContainerName(const Engines::MachineParameters& params); + std::string ContainerName(const Engines::ContainerParameters& params); std::string BuildContainerNameForNS(const char *ContainerName, const char *hostname); std::string + BuildContainerNameForNS(const Engines::ContainerParameters& params, + const char *hostname); + + // Will Be deleted on SALOME 6 + std::string ContainerName(const Engines::MachineParameters& params); + std::string BuildContainerNameForNS(const Engines::MachineParameters& params, const char *hostname); int Find(const char* name) diff --git a/src/ResourcesManager/ResourcesManager.cxx b/src/ResourcesManager/ResourcesManager.cxx index c874c7781..dc3edc2df 100644 --- a/src/ResourcesManager/ResourcesManager.cxx +++ b/src/ResourcesManager/ResourcesManager.cxx @@ -77,17 +77,24 @@ ResourcesManager_cpp(const char *xmlFilePath) ResourcesManager_cpp::ResourcesManager_cpp() throw(ResourcesException) { -#if defined(_DEBUG_) || defined(_DEBUG) - cerr << "ResourcesManager_cpp constructor" << endl; -#endif + RES_MESSAGE("ResourcesManager_cpp constructor"); + _resourceManagerMap["first"]=&first; _resourceManagerMap["cycl"]=&cycl; _resourceManagerMap["altcycl"]=&altcycl; _resourceManagerMap["best"]=&altcycl; _resourceManagerMap[""]=&altcycl; - std::string default_file(""); - if (getenv("APPLI") != 0) + if (getenv("USER_CATALOG_RESOURCES_FILE") != 0) + { + std::string user_file(""); + user_file = getenv("USER_CATALOG_RESOURCES_FILE"); + _path_resources.push_back(user_file); + } + else + { + std::string default_file(""); + if (getenv("APPLI") != 0) { default_file += getenv("HOME"); default_file += "/"; @@ -95,7 +102,7 @@ ResourcesManager_cpp::ResourcesManager_cpp() throw(ResourcesException) default_file += "/CatalogResources.xml"; _path_resources.push_back(default_file); } - else + else { if(!getenv("KERNEL_ROOT_DIR")) throw ResourcesException("you must define KERNEL_ROOT_DIR environment variable!! -> cannot load a CatalogResources.xml"); @@ -103,20 +110,12 @@ ResourcesManager_cpp::ResourcesManager_cpp() throw(ResourcesException) default_file += "/share/salome/resources/kernel/CatalogResources.xml"; _path_resources.push_back(default_file); } - - if (getenv("USER_CATALOG_RESOURCES_FILE") != 0) - { - std::string user_file(""); - user_file = getenv("USER_CATALOG_RESOURCES_FILE"); - _path_resources.push_back(user_file); } _lasttime=0; ParseXmlFiles(); -#if defined(_DEBUG_) || defined(_DEBUG) - cerr << "ResourcesManager_cpp constructor end"; -#endif + RES_MESSAGE("ResourcesManager_cpp constructor end"); } //============================================================================= @@ -127,159 +126,134 @@ ResourcesManager_cpp::ResourcesManager_cpp() throw(ResourcesException) ResourcesManager_cpp::~ResourcesManager_cpp() { -#if defined(_DEBUG_) || defined(_DEBUG) - cerr << "ResourcesManager_cpp destructor" << endl; -#endif + RES_MESSAGE("ResourcesManager_cpp destructor"); } //============================================================================= //! get the list of resource names fitting constraints given by params /*! - * If hostname is specified, check if it is local or known in resources catalog. - * - * Else - * - select first machines with corresponding OS (all machines if - * parameter OS empty), - * - then select the sublist of machines on which the component is known - * (if the result is empty, that probably means that the inventory of - * components is probably not done, so give complete list from previous step) + * Steps: + * 1: Restrict list with resourceList if defined + * 2: If name is defined -> check resource list + * 3: If not 2:, if hostname is defined -> check resource list + * 4: If not 3:, sort resource with nb_proc, etc... + * 5: In all cases remove resource that does not correspond with OS + * 6: And remove resource with componentList - if list is empty ignored it... */ //============================================================================= std::vector -ResourcesManager_cpp::GetFittingResources(const machineParams& params) throw(ResourcesException) +ResourcesManager_cpp::GetFittingResources(const resourceParams& params) throw(ResourcesException) { - vector vec; + RES_MESSAGE("[GetFittingResources] on computer " << Kernel_Utils::GetHostname().c_str()); + RES_MESSAGE("[GetFittingResources] with resource name: " << params.name); + RES_MESSAGE("[GetFittingResources] with hostname: "<< params.hostname); - ParseXmlFiles(); + // Result + std::vector vec; - const char *hostname = params.hostname.c_str(); -#if defined(_DEBUG_) || defined(_DEBUG) - cerr << "GetFittingResources " << hostname << " " << Kernel_Utils::GetHostname().c_str() << endl; -#endif + // Parse Again CalatogResource File + ParseXmlFiles(); - // PaCO++ parallel container case - std::string parallelLib(params.parallelLib); - if (params.nb_component_nodes > 0 && parallelLib != "") + // Steps: + // 1: Restrict list with resourceList if defined + // 2: If name is defined -> check resource list + // 3: If not 2:, if hostname is defined -> check resource list + // 4: If not 3:, sort resource with nb_proc, etc... + // 5: In all cases remove resource that does not correspond with OS + // 6: And remove resource with componentList - if list is empty ignored it... + + + MapOfParserResourcesType local_resourcesList = _resourcesList; + // Step 1 + if (params.resourceList.size() > 0) { -#if defined(_DEBUG_) || defined(_DEBUG) - std::cerr << "[GetFittingResources] ParallelContainer case" << std::endl; - std::cerr << "[GetFittingResources] parallelLib is " << parallelLib << std::endl; - std::cerr << "[GetFittingResources] nb_component_nodes is " << params.nb_component_nodes << std::endl; -#endif + RES_MESSAGE("[GetFittingResources] Restricted resource list found !"); + local_resourcesList.clear(); + std::vector::size_type sz = params.resourceList.size(); - // Currently we only support parallel containers that define a hostname target - if (hostname[0] != '\0') + for (unsigned int i=0; i < sz; i++) { - // Special case of localhost -> put containers into the real computer name - if (strcmp(hostname, "localhost") == 0) - vec.push_back(Kernel_Utils::GetHostname().c_str()); - else - { - // Try find the resource into the map - if (_resourcesList.find(hostname) != _resourcesList.end()) - vec.push_back(hostname); - else - std::cerr << "[GetFittingResources] ParallelContainer hostname does not exist into the resource list !" << std::endl; - } + if (_resourcesList.find(params.resourceList[i]) != _resourcesList.end()) + local_resourcesList[params.resourceList[i]] = _resourcesList[params.resourceList[i]]; + } + } + + // Step 2 + if (params.name != "") + { + RES_MESSAGE("[GetFittingResources] name parameter found !"); + if (_resourcesList.find(params.name) != _resourcesList.end()) + { + vec.push_back(params.name); } else - std::cerr << "[GetFittingResources] ParallelContainer hostname is empty -> cannot find a possible resource" << std::endl; - return vec; + RES_MESSAGE("[GetFittingResources] name was not found on resource list ! name was " << params.name); } - if (hostname[0] != '\0'){ + // Step 3 + else if (params.hostname != "") + { + RES_MESSAGE("[GetFittingResources] Entering in hostname case !"); - if ( strcmp(hostname, "localhost") == 0 || - strcmp(hostname, Kernel_Utils::GetHostname().c_str()) == 0 ) - { -//#if defined(_DEBUG_) || defined(_DEBUG) -// cerr << "ResourcesManager_cpp::GetFittingResources : localhost" << endl; -//#endif - vec.push_back(Kernel_Utils::GetHostname().c_str()); -//#if defined(_DEBUG_) || defined(_DEBUG) -// cerr << "ResourcesManager_cpp::GetFittingResources : " << vec.size() << endl; -//#endif - } - - else if (_resourcesList.find(hostname) != _resourcesList.end()) - { - // --- params.hostname is in the list of resources so return it. - vec.push_back(hostname); - } - - else if (_resourcesBatchList.find(hostname) != _resourcesBatchList.end()) + std::string hostname = params.hostname; + if (hostname == "localhost") + hostname = Kernel_Utils::GetHostname().c_str(); + + std::map::const_iterator iter = _resourcesList.begin(); + for (; iter != _resourcesList.end(); iter++) { - // --- params.hostname is in the list of resources so return it. - vec.push_back(hostname); + if ((*iter).second.HostName == hostname) + vec.push_back((*iter).first); } - - else - { - // Cas d'un cluster: nombre de noeuds > 1 - int cpt=0; - for (map::const_iterator iter = _resourcesList.begin(); iter != _resourcesList.end(); iter++){ - if( (*iter).second.DataForSort._nbOfNodes > 1 ){ - if( strncmp(hostname,(*iter).first.c_str(),strlen(hostname)) == 0 ){ - vec.push_back((*iter).first.c_str()); - cpt++; - } - } - } - if(cpt==0){ - // --- user specified an unknown hostame so notify him. -#if defined(_DEBUG_) || defined(_DEBUG) - cerr << "ResourcesManager_cpp::GetFittingResources : SALOME_Exception" << endl; -#endif - std::string error("GetFittinResouces : ResourcesManager doesn't find the host requested : "); - error += hostname; - throw ResourcesException(error); - } - } } - - else{ - // --- Search for available resources sorted by priority - vec=params.computerList; - SelectOnlyResourcesWithOS(vec, params.OS.c_str()); - - KeepOnlyResourcesWithComponent(vec, params.componentList); + // Step 4 + else + { + // --- Search for available resources sorted by priority + MapOfParserResourcesType_it i = local_resourcesList.begin(); + for (; i != local_resourcesList.end(); ++i) + vec.push_back(i->first); - //if hosts list (vec) is empty, ignore componentList constraint and use only OS constraint - if (vec.size() == 0) - SelectOnlyResourcesWithOS(vec, params.OS.c_str()); - // --- set wanted parameters + ResourceDataToSort::_nbOfProcWanted = params.nb_proc; ResourceDataToSort::_nbOfNodesWanted = params.nb_node; - ResourceDataToSort::_nbOfProcPerNodeWanted = params.nb_proc_per_node; - ResourceDataToSort::_CPUFreqMHzWanted = params.cpu_clock; - ResourceDataToSort::_memInMBWanted = params.mem_mb; - // --- end of set - list li; - - for (vector::iterator iter = vec.begin(); - iter != vec.end(); - iter++) - li.push_back(_resourcesList[(*iter)].DataForSort); - + // Sort + std::list li; + std::vector::iterator iter = vec.begin(); + for (; iter != vec.end(); iter++) + li.push_back(local_resourcesList[(*iter)].DataForSort); li.sort(); - - unsigned int i = 0; - - for (list::iterator iter2 = li.begin(); - iter2 != li.end(); - iter2++) - vec[i++] = (*iter2)._hostName; + + vec.clear(); + for (list::iterator iter2 = li.begin(); iter2 != li.end(); iter2++) + vec.push_back((*iter2)._Name); } + + // Step 5 + SelectOnlyResourcesWithOS(vec, params.OS.c_str()); - return vec; + // Step 6 + std::vector vec_save(vec); + KeepOnlyResourcesWithComponent(vec, params.componentList); + if (vec.size() == 0) + vec = vec_save; + + // End + // Send an exception if return list is empty... + if (vec.size() == 0) + { + std::string error("[GetFittingResources] ResourcesManager doesn't find any resource that feets to your parameters"); + throw ResourcesException(error); + } + return vec; } //============================================================================= @@ -290,14 +264,12 @@ ResourcesManager_cpp::GetFittingResources(const machineParams& params) throw(Res //============================================================================= int -ResourcesManager_cpp:: -AddResourceInCatalog(const machineParams& paramsOfNewResources, - const vector& componentsOnNewResources, - const char *alias, - const char *userName, - AccessModeType mode, - AccessProtocolType prot) -throw(ResourcesException) +ResourcesManager_cpp::AddResourceInCatalog(const resourceParams& paramsOfNewResources, + const vector& componentsOnNewResources, + const char *userName, + AccessModeType mode, + AccessProtocolType prot, + AccessProtocolType iprot) throw(ResourcesException) { vector::const_iterator iter = find(componentsOnNewResources.begin(), componentsOnNewResources.end(), @@ -306,9 +278,10 @@ throw(ResourcesException) if (iter != componentsOnNewResources.end()) { ParserResourcesType newElt; - newElt.DataForSort._hostName = paramsOfNewResources.hostname; - newElt.Alias = alias; + newElt.DataForSort._Name = paramsOfNewResources.name; + newElt.HostName = paramsOfNewResources.hostname; newElt.Protocol = prot; + newElt.ClusterInternalProtocol = iprot; newElt.Mode = mode; newElt.UserName = userName; newElt.ComponentsList = componentsOnNewResources; @@ -318,10 +291,9 @@ throw(ResourcesException) newElt.DataForSort._nbOfNodes = paramsOfNewResources.nb_node; newElt.DataForSort._nbOfProcPerNode = paramsOfNewResources.nb_proc_per_node; - _resourcesList[newElt.DataForSort._hostName] = newElt; + _resourcesList[newElt.DataForSort._Name] = newElt; return 0; } - else throw ResourcesException("KERNEL is not present in this resource"); } @@ -332,9 +304,9 @@ throw(ResourcesException) */ //============================================================================= -void ResourcesManager_cpp::DeleteResourceInCatalog(const char *hostname) +void ResourcesManager_cpp::DeleteResourceInCatalog(const char * name) { - _resourcesList.erase(hostname); + _resourcesList.erase(name); } //============================================================================= @@ -345,9 +317,8 @@ void ResourcesManager_cpp::DeleteResourceInCatalog(const char *hostname) void ResourcesManager_cpp::WriteInXmlFile(std::string & xml_file) { -#if defined(_DEBUG_) || defined(_DEBUG) - std::cerr << "WriteInXmlFile : start" << std::endl; -#endif + RES_MESSAGE("WriteInXmlFile : start"); + const char* aFilePath = xml_file.c_str(); FILE* aFile = fopen(aFilePath, "w"); @@ -361,7 +332,7 @@ void ResourcesManager_cpp::WriteInXmlFile(std::string & xml_file) xmlNewDocComment(aDoc, BAD_CAST "ResourcesCatalog"); SALOME_ResourcesCatalog_Handler* handler = - new SALOME_ResourcesCatalog_Handler(_resourcesList, _resourcesBatchList); + new SALOME_ResourcesCatalog_Handler(_resourcesList); handler->PrepareDocToXmlFile(aDoc); delete handler; @@ -372,9 +343,7 @@ void ResourcesManager_cpp::WriteInXmlFile(std::string & xml_file) // Free the document xmlFreeDoc(aDoc); fclose(aFile); -#if defined(_DEBUG_) || defined(_DEBUG) - std::cerr << "WriteInXmlFile : WRITING DONE!" << std::endl; -#endif + RES_MESSAGE("WriteInXmlFile : WRITING DONE!"); } //============================================================================= @@ -407,14 +376,13 @@ const MapOfParserResourcesType& ResourcesManager_cpp::ParseXmlFiles() if (to_parse) { _resourcesList.clear(); - _resourcesBatchList.clear(); // On parse tous les fichiers for(_path_resources_it = _path_resources.begin(); _path_resources_it != _path_resources.end(); ++_path_resources_it) { MapOfParserResourcesType _resourcesList_tmp; MapOfParserResourcesType _resourcesBatchList_tmp; SALOME_ResourcesCatalog_Handler* handler = - new SALOME_ResourcesCatalog_Handler(_resourcesList_tmp, _resourcesBatchList_tmp); + new SALOME_ResourcesCatalog_Handler(_resourcesList_tmp); const char* aFilePath = (*_path_resources_it).c_str(); FILE* aFile = fopen(aFilePath, "r"); @@ -438,18 +406,6 @@ const MapOfParserResourcesType& ResourcesManager_cpp::ParseXmlFiles() std::cerr << "ParseXmlFiles Warning, to resource with the same name was found, taking the first declaration : " << i->first << std::endl; } } - for (MapOfParserResourcesType_it i = _resourcesBatchList_tmp.begin(); i != _resourcesBatchList_tmp.end(); ++i) - { - MapOfParserResourcesType_it j = _resourcesBatchList.find(i->first); - if (j == _resourcesBatchList.end()) - { - _resourcesBatchList[i->first] = i->second; - } - else - { - std::cerr << "ParseXmlFiles Warning, to resource with the same name was found, taking the first declaration : " << i->first << std::endl; - } - } } else std::cerr << "ResourcesManager_cpp: could not parse file " << aFilePath << std::endl; @@ -477,50 +433,36 @@ const MapOfParserResourcesType& ResourcesManager_cpp::GetList() const return _resourcesList; } -string ResourcesManager_cpp::Find(const std::string& policy, const std::vector& listOfMachines) +string ResourcesManager_cpp::Find(const std::string& policy, const std::vector& listOfResources) { if(_resourceManagerMap.count(policy)==0) - return _resourceManagerMap[""]->Find(listOfMachines,_resourcesList); - return _resourceManagerMap[policy]->Find(listOfMachines,_resourcesList); + return _resourceManagerMap[""]->Find(listOfResources, _resourcesList); + return _resourceManagerMap[policy]->Find(listOfResources, _resourcesList); } //============================================================================= /*! - * Gives a sublist of machines with matching OS. - * If parameter OS is empty, gives the complete list of machines + * Gives a sublist of resources with matching OS. + * If parameter OS is empty, gives the complete list of resources */ //============================================================================= - -// Warning need an updated parsed list : _resourcesList -void ResourcesManager_cpp::SelectOnlyResourcesWithOS( vector& hosts, const char *OS) const -throw(ResourcesException) +void +ResourcesManager_cpp::SelectOnlyResourcesWithOS(std::vector& resources, std::string OS) { - string base(OS); - - if(hosts.size()==0) - { - //No constraint on computer list : take all known resources with OS - map::const_iterator iter; - for (iter = _resourcesList.begin(); iter != _resourcesList.end(); iter++) - { - if ( (*iter).second.OS == base || base.size() == 0) - hosts.push_back((*iter).first); - } - } - else + if (OS != "") + { + // a computer list is given : take only resources with OS on those computers + std::vector vec_tmp = resources; + resources.clear(); + vector::iterator iter = vec_tmp.begin(); + for (; iter != vec_tmp.end(); iter++) { - //a computer list is given : take only resources with OS on those computers - vector vec=hosts; - hosts.clear(); - vector::iterator iter; - for (iter = vec.begin(); iter != vec.end(); iter++) - { - MapOfParserResourcesType::const_iterator it = _resourcesList.find(*iter); - if(it != _resourcesList.end()) - if ( (*it).second.OS == base || base.size() == 0 ) - hosts.push_back(*iter); - } + MapOfParserResourcesType::const_iterator it = _resourcesList.find(*iter); + if(it != _resourcesList.end()) + if ( (*it).second.OS == OS) + resources.push_back(*iter); } + } } @@ -529,41 +471,47 @@ throw(ResourcesException) * Gives a sublist of machines on which the component is known. */ //============================================================================= - -//Warning need an updated parsed list : _resourcesList -void ResourcesManager_cpp::KeepOnlyResourcesWithComponent( vector& hosts, const vector& componentList) const -throw(ResourcesException) +void +ResourcesManager_cpp::KeepOnlyResourcesWithComponent(std::vector& resources, + const vector& componentList) { - for (vector::iterator iter = hosts.begin(); iter != hosts.end();) + std::vector::iterator iter = resources.begin(); + for (; iter != resources.end(); iter++) + { + MapOfParserResourcesType::const_iterator it = _resourcesList.find(*iter); + const vector& mapOfComponentsOfCurrentHost = (*it).second.ComponentsList; + + bool erasedHost = false; + if( mapOfComponentsOfCurrentHost.size() > 0 ) { - MapOfParserResourcesType::const_iterator it = _resourcesList.find(*iter); - const vector& mapOfComponentsOfCurrentHost = (((*it).second).ComponentsList); - - bool erasedHost = false; - if( mapOfComponentsOfCurrentHost.size() > 0 ){ - for(unsigned int i=0;i::const_iterator itt = find(mapOfComponentsOfCurrentHost.begin(), - mapOfComponentsOfCurrentHost.end(), - compoi); - if (itt == mapOfComponentsOfCurrentHost.end()){ - erasedHost = true; - break; - } + for(unsigned int i=0; i::const_iterator itt = find(mapOfComponentsOfCurrentHost.begin(), + mapOfComponentsOfCurrentHost.end(), + compoi); + if (itt == mapOfComponentsOfCurrentHost.end()) + { + erasedHost = true; + break; } } - if(erasedHost) - hosts.erase(iter); - else - iter++; } + if(erasedHost) + resources.erase(iter); + } } -ParserResourcesType ResourcesManager_cpp::GetResourcesList(const std::string& machine) +ParserResourcesType +ResourcesManager_cpp::GetResourcesDescr(const std::string & name) { - if (_resourcesList.find(machine) != _resourcesList.end()) - return _resourcesList[machine]; + if (_resourcesList.find(name) != _resourcesList.end()) + return _resourcesList[name]; else - return _resourcesBatchList[machine]; + { + std::string error("[GetResourcesDescr] Resource does not exist: "); + error += name; + throw ResourcesException(error); + } } diff --git a/src/ResourcesManager/ResourcesManager.hxx b/src/ResourcesManager/ResourcesManager.hxx index 9d95ee3f7..c5ce0f2c8 100644 --- a/src/ResourcesManager/ResourcesManager.hxx +++ b/src/ResourcesManager/ResourcesManager.hxx @@ -43,18 +43,18 @@ // in a critical section to be sure to be clean. // Only one thread should use the SALOME_ResourcesManager class in a SALOME // session. - -struct machineParams{ +struct resourceParams +{ + std::string name; std::string hostname; std::string OS; - std::string parallelLib; + unsigned int nb_proc; unsigned int nb_node; unsigned int nb_proc_per_node; unsigned int cpu_clock; unsigned int mem_mb; - unsigned int nb_component_nodes; std::vector componentList; - std::vector computerList; + std::vector resourceList; }; class RESOURCESMANAGER_EXPORT ResourcesException @@ -76,19 +76,19 @@ class RESOURCESMANAGER_EXPORT ResourcesManager_cpp ~ResourcesManager_cpp(); std::vector - GetFittingResources(const machineParams& params) throw(ResourcesException); + GetFittingResources(const resourceParams& params) throw(ResourcesException); - std::string Find(const std::string& policy, const std::vector& listOfMachines); + std::string Find(const std::string& policy, + const std::vector& listOfResources); - int AddResourceInCatalog - (const machineParams& paramsOfNewResources, - const std::vector& componentsOnNewResources, - const char *alias, - const char *userName, - AccessModeType mode, - AccessProtocolType prot) throw(ResourcesException); + int AddResourceInCatalog (const resourceParams& paramsOfNewResources, + const std::vector& componentsOnNewResources, + const char *userName, + AccessModeType mode, + AccessProtocolType prot, + AccessProtocolType iprot) throw(ResourcesException); - void DeleteResourceInCatalog(const char *hostname); + void DeleteResourceInCatalog(const char * name); void WriteInXmlFile(std::string & xml_file); @@ -96,17 +96,14 @@ class RESOURCESMANAGER_EXPORT ResourcesManager_cpp const MapOfParserResourcesType& GetList() const; - ParserResourcesType GetResourcesList(const std::string& machine); + ParserResourcesType GetResourcesDescr(const std::string & name); protected: - void SelectOnlyResourcesWithOS(std::vector& hosts, - const char *OS) const - throw(ResourcesException); + void SelectOnlyResourcesWithOS(std::vector& resources, std::string OS); - void KeepOnlyResourcesWithComponent(std::vector& hosts, - const std::vector& componentList) const - throw(ResourcesException); + void KeepOnlyResourcesWithComponent(std::vector& resources, + const std::vector& componentList); //! will contain the path to the ressources catalog std::list _path_resources; @@ -115,9 +112,6 @@ class RESOURCESMANAGER_EXPORT ResourcesManager_cpp //! will contain the informations on the data type catalog(after parsing) MapOfParserResourcesType _resourcesList; - //! will contain the informations on the data type catalog(after parsing) - MapOfParserResourcesType _resourcesBatchList; - //! a map that contains all the available load rate managers (the key is the name) std::map _resourceManagerMap; diff --git a/src/ResourcesManager/ResourcesManager_Defs.hxx b/src/ResourcesManager/ResourcesManager_Defs.hxx index c6d0c25e9..43936cdfa 100755 --- a/src/ResourcesManager/ResourcesManager_Defs.hxx +++ b/src/ResourcesManager/ResourcesManager_Defs.hxx @@ -32,4 +32,16 @@ # define RESOURCESMANAGER_EXPORT #endif +// MESSAGES +#define RES_MESS_INIT(deb) std::cerr << deb +#define RES_MESS_BEGIN(deb) RES_MESS_INIT(deb)<<__FILE__ <<" ["<<__LINE__<<"] : " +#define RES_MESS_END std::endl; +#define RES_INFOS(msg) {RES_MESS_BEGIN("- Trace ") << msg << RES_MESS_END} + +#if defined(_DEBUG_) || defined(_DEBUG) +#define RES_MESSAGE(msg) {RES_MESS_BEGIN("- Trace ") << msg << RES_MESS_END} +#else /* ifdef _DEBUG_*/ +#define RES_MESSAGE(msg) {} +#endif /* ifdef _DEBUG_*/ + #endif // __RESOURCESMANAGER_DEFS_HXX__ diff --git a/src/ResourcesManager/SALOME_ResourcesCatalog_Handler.cxx b/src/ResourcesManager/SALOME_ResourcesCatalog_Handler.cxx index d4f5d269b..b41a5aa56 100755 --- a/src/ResourcesManager/SALOME_ResourcesCatalog_Handler.cxx +++ b/src/ResourcesManager/SALOME_ResourcesCatalog_Handler.cxx @@ -40,17 +40,15 @@ using namespace std; //============================================================================= SALOME_ResourcesCatalog_Handler:: -SALOME_ResourcesCatalog_Handler(MapOfParserResourcesType& resources_list, - MapOfParserResourcesType& resources_batch_list): - _resources_list(resources_list), - _resources_batch_list(resources_batch_list) +SALOME_ResourcesCatalog_Handler(MapOfParserResourcesType& resources_list): _resources_list(resources_list) { //XML tags initialisation test_machine = "machine"; test_cluster = "cluster"; + test_name = "name"; test_hostname = "hostname"; - test_alias = "alias"; test_protocol = "protocol"; + test_cluster_internal_protocol = "iprotocol"; test_mode = "mode"; test_batch = "batch"; test_mpi = "mpi"; @@ -115,56 +113,33 @@ void SALOME_ResourcesCatalog_Handler::ProcessXmlDocument(xmlDocPtr theDoc) while(aCurNode != NULL) { // Cas d'une machine ou d'une machine batch - if ( !xmlStrcmp(aCurNode->name,(const xmlChar*)test_machine) ) + if (!xmlStrcmp(aCurNode->name,(const xmlChar*)test_machine)) { _resource.Clear(); bool Ok = ProcessMachine(aCurNode, _resource); if (Ok) { - // There is two lists - // _resources_list for interactive resources - // _resources_batch_list for batch resources - // This choice is done with Mode parameter - if (_resource.Mode == interactive) + // Adding a resource + if(_resource.HostName == "localhost") { - // Adding a generic cluster - int aNbNodes = _resource.DataForSort._nbOfNodes; - if( aNbNodes > 1 ){ - string clusterNode = _resource.DataForSort._hostName ; - for( int i=0; i < aNbNodes; i++ ){ - char inode[64]; - inode[0] = '\0' ; - sprintf(inode,"%s%d",clusterNode.c_str(),i+1); - std::string nodeName(inode); - _resource.DataForSort._hostName = nodeName ; - _resource.HostName = nodeName ; - _resources_list[nodeName] = _resource; - } - } - else + _resource.HostName = Kernel_Utils::GetHostname(); + if (_resource.Name == "localhost") { - // Adding a machine - if(_resource.HostName == "localhost") - { - _resource.HostName = Kernel_Utils::GetHostname(); - _resource.DataForSort._hostName = Kernel_Utils::GetHostname(); - _resources_list[Kernel_Utils::GetHostname()] = _resource; - } - else - _resources_list[_resource.HostName] = _resource; + _resource.Name = Kernel_Utils::GetHostname(); + _resource.DataForSort._Name = Kernel_Utils::GetHostname(); } } - else - // Adding a batch machine/cluster - _resources_batch_list[_resource.HostName] = _resource; + _resources_list[_resource.Name] = _resource; } } - if ( !xmlStrcmp(aCurNode->name,(const xmlChar*)test_cluster) ) + // Cas de la déclaration d'un cluster + if (!xmlStrcmp(aCurNode->name,(const xmlChar*)test_cluster)) { - // Cas de la déclaration d'un cluster _resource.Clear(); if(ProcessCluster(aCurNode, _resource)) - _resources_list[_resource.HostName] = _resource; + { + _resources_list[_resource.Name] = _resource; + } } aCurNode = aCurNode->next; } @@ -174,17 +149,21 @@ void SALOME_ResourcesCatalog_Handler::ProcessXmlDocument(xmlDocPtr theDoc) iter != _resources_list.end(); iter++) { - std::cerr << (*iter).first << std::endl; - std::cerr << (*iter).second.HostName << std::endl; - std::cerr << (*iter).second.Alias << std::endl; - std::cerr << (*iter).second.UserName << std::endl; - std::cerr << (*iter).second.AppliPath << std::endl; - std::cerr << (*iter).second.OS << std::endl; - std::cerr << (*iter).second.Protocol << std::endl; - std::cerr << (*iter).second.Mode << std::endl; + std::cerr << "************************************************" << std::endl; + std::cerr << "Resource " << (*iter).first << " found:" << std::endl; + std::cerr << " Name: " << (*iter).second.Name << std::endl; + std::cerr << " Hostname: " << (*iter).second.HostName << std::endl; + std::cerr << " Username: " << (*iter).second.UserName << std::endl; + std::cerr << " Appli path: " <<(*iter).second.AppliPath << std::endl; + std::cerr << " OS: " << (*iter).second.OS << std::endl; + std::cerr << " Protocol: " << (*iter).second.PrintAccessProtocolType() << std::endl; + std::cerr << " Internal Protocol: " <<(*iter).second.PrintClusterInternalProtocol() << std::endl; + std::cerr << " Mode: " << (*iter).second.PrintAccessModeType() << std::endl; + std::cerr << " Batch Type: " << (*iter).second.PrintBatchType() << std::endl; + std::cerr << " MPI Impl: " << (*iter).second.PrintMpiImplType() << std::endl; + std::cerr << "************************************************" << std::endl; } #endif - } bool @@ -195,7 +174,6 @@ SALOME_ResourcesCatalog_Handler::ProcessCluster(xmlNodePtr cluster_descr, Parser if (xmlHasProp(cluster_descr, (const xmlChar*)test_hostname)) { xmlChar* hostname = xmlGetProp(cluster_descr, (const xmlChar*)test_hostname); - resource.DataForSort._hostName = (const char*)hostname; resource.HostName = (const char*)hostname; xmlFree(hostname); } @@ -206,6 +184,20 @@ SALOME_ResourcesCatalog_Handler::ProcessCluster(xmlNodePtr cluster_descr, Parser return false; } + if (xmlHasProp(cluster_descr, (const xmlChar*)test_name)) + { + xmlChar* name = xmlGetProp(cluster_descr, (const xmlChar*)test_name); + resource.Name = (const char*)name; + resource.DataForSort._Name = (const char*)name; + xmlFree(name); + } + else + { + resource.Name = resource.HostName; + resource.DataForSort._Name = resource.HostName; + std::cerr << "SALOME_ResourcesCatalog_Handler::ProcessCluster : !!! Warning !!! No Name found use Hostname for resource: " << _resource.Name << std::endl; + } + if (xmlHasProp(cluster_descr, (const xmlChar*)test_use)) { xmlChar* use = xmlGetProp(cluster_descr, (const xmlChar*)test_use); @@ -291,7 +283,6 @@ SALOME_ResourcesCatalog_Handler::ProcessMember(xmlNodePtr member_descr, ParserRe if (xmlHasProp(member_descr, (const xmlChar*)test_hostname)) { xmlChar* hostname = xmlGetProp(member_descr, (const xmlChar*)test_hostname); - resource.DataForSort._hostName = (const char*)hostname; resource.HostName = (const char*)hostname; xmlFree(hostname); } @@ -327,6 +318,31 @@ SALOME_ResourcesCatalog_Handler::ProcessMember(xmlNodePtr member_descr, ParserRe return false; } + if (xmlHasProp(member_descr, (const xmlChar*)test_cluster_internal_protocol)) + { + xmlChar* iprotocol= xmlGetProp(member_descr, (const xmlChar*)test_cluster_internal_protocol); + switch (iprotocol[0]) + { + case 'r': + resource.ClusterInternalProtocol = rsh; + break; + case 's': + resource.ClusterInternalProtocol = ssh; + break; + default: + std::cerr << "SALOME_ResourcesCatalog_Handler::ProcessMember : Warning found a machine with a bad protocol" << std::endl; + std::cerr << "SALOME_ResourcesCatalog_Handler::ProcessMember : Warning this machine will not be added" << std::endl; + return false; + } + xmlFree(iprotocol); + } + else + { + std::cerr << "SALOME_ResourcesCatalog_Handler::ProcessMember : Warning found a machine without a protocol" << std::endl; + std::cerr << "SALOME_ResourcesCatalog_Handler::ProcessMember : Warning this machine will not be added" << std::endl; + return false; + } + if (xmlHasProp(member_descr, (const xmlChar*)test_user_name)) { xmlChar* user_name= xmlGetProp(member_descr, (const xmlChar*)test_user_name); @@ -387,7 +403,6 @@ SALOME_ResourcesCatalog_Handler::ProcessMachine(xmlNodePtr machine_descr, Parser if (xmlHasProp(machine_descr, (const xmlChar*)test_hostname)) { xmlChar* hostname = xmlGetProp(machine_descr, (const xmlChar*)test_hostname); - resource.DataForSort._hostName = (const char*)hostname; resource.HostName = (const char*)hostname; xmlFree(hostname); } @@ -398,14 +413,19 @@ SALOME_ResourcesCatalog_Handler::ProcessMachine(xmlNodePtr machine_descr, Parser return false; } - if (xmlHasProp(machine_descr, (const xmlChar*)test_alias)) + if (xmlHasProp(machine_descr, (const xmlChar*)test_name)) { - xmlChar* alias = xmlGetProp(machine_descr, (const xmlChar*)test_alias); - resource.Alias = (const char*)alias; - xmlFree(alias); + xmlChar* name = xmlGetProp(machine_descr, (const xmlChar*)test_name); + resource.Name = (const char*)name; + resource.DataForSort._Name = (const char*)name; + xmlFree(name); } else - resource.Alias = ""; + { + resource.Name = resource.HostName; + resource.DataForSort._Name = resource.HostName; + std::cerr << "SALOME_ResourcesCatalog_Handler::ProcessMachine : !!! Warning !!! No Name found use Hostname for resource: " << _resource.Name << std::endl; + } if (xmlHasProp(machine_descr, (const xmlChar*)test_batch_queue)) { @@ -446,6 +466,27 @@ SALOME_ResourcesCatalog_Handler::ProcessMachine(xmlNodePtr machine_descr, Parser else resource.Protocol = rsh; + if (xmlHasProp(machine_descr, (const xmlChar*)test_cluster_internal_protocol)) + { + xmlChar* iprotocol= xmlGetProp(machine_descr, (const xmlChar*)test_cluster_internal_protocol); + switch ( iprotocol[0]) + { + case 'r': + resource.ClusterInternalProtocol = rsh; + break; + case 's': + resource.ClusterInternalProtocol = ssh; + break; + default: + // If it'not in all theses cases, the protocol is affected to rsh + resource.ClusterInternalProtocol = rsh; + break; + } + xmlFree(iprotocol); + } + else + resource.ClusterInternalProtocol = resource.Protocol; + if (xmlHasProp(machine_descr, (const xmlChar*)test_mode)) { xmlChar* mode=xmlGetProp(machine_descr, (const xmlChar*)test_mode); @@ -478,6 +519,8 @@ SALOME_ResourcesCatalog_Handler::ProcessMachine(xmlNodePtr machine_descr, Parser resource.Batch = lsf; else if (aBatch == "sge") resource.Batch = sge; + else if (aBatch == "ssh_batch") + resource.Batch = ssh_batch; else resource.Batch = none; } @@ -613,184 +656,106 @@ void SALOME_ResourcesCatalog_Handler::PrepareDocToXmlFile(xmlDocPtr theDoc) root_node = xmlNewNode(NULL, BAD_CAST "resources"); xmlDocSetRootElement(theDoc, root_node); - for (map::iterator iter = - _resources_list.begin(); - iter != _resources_list.end(); - iter++) + std::map::iterator iter = _resources_list.begin(); + for (; iter != _resources_list.end(); iter++) + { + node = xmlNewChild(root_node, NULL, BAD_CAST test_machine, NULL); + xmlNewProp(node, BAD_CAST test_name, BAD_CAST (*iter).second.Name.c_str()); + xmlNewProp(node, BAD_CAST test_hostname, BAD_CAST (*iter).second.HostName.c_str()); + xmlNewProp(node, BAD_CAST test_batch_queue, BAD_CAST (*iter).second.batchQueue.c_str()); + xmlNewProp(node, BAD_CAST test_user_commands, BAD_CAST (*iter).second.userCommands.c_str()); + + switch ((*iter).second.Protocol) { - node = xmlNewChild(root_node, NULL, BAD_CAST test_machine, NULL); - xmlNewProp(node, BAD_CAST test_hostname, BAD_CAST (*iter).second.HostName.c_str()); - xmlNewProp(node, BAD_CAST test_alias, BAD_CAST (*iter).second.Alias.c_str()); - xmlNewProp(node, BAD_CAST test_batch_queue, BAD_CAST (*iter).second.batchQueue.c_str()); - xmlNewProp(node, BAD_CAST test_user_commands, BAD_CAST (*iter).second.userCommands.c_str()); - - switch ((*iter).second.Protocol) - { - case rsh: - xmlNewProp(node, BAD_CAST test_protocol, BAD_CAST "rsh"); - break; - case ssh: - xmlNewProp(node, BAD_CAST test_protocol, BAD_CAST "ssh"); - break; - default: - xmlNewProp(node, BAD_CAST test_protocol, BAD_CAST "rsh"); - } + case rsh: + xmlNewProp(node, BAD_CAST test_protocol, BAD_CAST "rsh"); + break; + case ssh: + xmlNewProp(node, BAD_CAST test_protocol, BAD_CAST "ssh"); + break; + default: + xmlNewProp(node, BAD_CAST test_protocol, BAD_CAST "rsh"); + } - switch ((*iter).second.Mode) - { - case interactive: - xmlNewProp(node, BAD_CAST test_mode, BAD_CAST "interactive"); - break; - case batch: - xmlNewProp(node, BAD_CAST test_mode, BAD_CAST "batch"); - break; - default: - xmlNewProp(node, BAD_CAST test_mode, BAD_CAST "interactive"); - } - - switch ((*iter).second.Batch) - { - case pbs: - xmlNewProp(node, BAD_CAST test_batch, BAD_CAST "pbs"); - break; - case lsf: - xmlNewProp(node, BAD_CAST test_batch, BAD_CAST "lsf"); - break; - case sge: - xmlNewProp(node, BAD_CAST test_batch, BAD_CAST "sge"); - break; - default: - xmlNewProp(node, BAD_CAST test_batch, BAD_CAST ""); - } - - switch ((*iter).second.mpi) - { - case lam: - xmlNewProp(node, BAD_CAST test_mpi, BAD_CAST "lam"); - break; - case mpich1: - xmlNewProp(node, BAD_CAST test_mpi, BAD_CAST "mpich1"); - break; - case mpich2: - xmlNewProp(node, BAD_CAST test_mpi, BAD_CAST "mpich2"); - break; - case openmpi: - xmlNewProp(node, BAD_CAST test_mpi, BAD_CAST "openmpi"); - break; - case slurm: - xmlNewProp(node, BAD_CAST test_mpi, BAD_CAST "slurm"); - break; - case prun: - xmlNewProp(node, BAD_CAST test_mpi, BAD_CAST "prun"); - break; - default: - xmlNewProp(node, BAD_CAST test_mpi, BAD_CAST ""); - } - - xmlNewProp(node, BAD_CAST test_user_name, BAD_CAST (*iter).second.UserName.c_str()); - - for (vector::const_iterator iter2 = - (*iter).second.ComponentsList.begin(); - iter2 != (*iter).second.ComponentsList.end(); - iter2++) - { - node1 = xmlNewChild(node, NULL, BAD_CAST test_components, NULL); - xmlNewProp(node1, BAD_CAST test_component_name, BAD_CAST (*iter2).c_str()); - } - - xmlNewProp(node, BAD_CAST test_os, BAD_CAST (*iter).second.OS.c_str()); - xmlNewProp(node, BAD_CAST test_mem_in_mb, BAD_CAST sprintf(string_buf, "%u", (*iter).second.DataForSort._memInMB)); - xmlNewProp(node, BAD_CAST test_cpu_freq_mhz, BAD_CAST sprintf(string_buf, "%u", (*iter).second.DataForSort._CPUFreqMHz)); - xmlNewProp(node, BAD_CAST test_nb_of_nodes, BAD_CAST sprintf(string_buf, "%u", (*iter).second.DataForSort._nbOfNodes)); - xmlNewProp(node, BAD_CAST test_nb_of_proc_per_node, BAD_CAST sprintf(string_buf, "%u", (*iter).second.DataForSort._nbOfProcPerNode)); + switch ((*iter).second.ClusterInternalProtocol) + { + case rsh: + xmlNewProp(node, BAD_CAST test_cluster_internal_protocol, BAD_CAST "rsh"); + break; + case ssh: + xmlNewProp(node, BAD_CAST test_cluster_internal_protocol, BAD_CAST "ssh"); + break; + default: + xmlNewProp(node, BAD_CAST test_cluster_internal_protocol, BAD_CAST "rsh"); } - for (map::iterator iter = - _resources_batch_list.begin(); - iter != _resources_batch_list.end(); - iter++) + + switch ((*iter).second.Mode) { - node = xmlNewChild(root_node, NULL, BAD_CAST test_machine, NULL); - xmlNewProp(node, BAD_CAST test_hostname, BAD_CAST (*iter).second.HostName.c_str()); - xmlNewProp(node, BAD_CAST test_alias, BAD_CAST (*iter).second.Alias.c_str()); - - switch ((*iter).second.Protocol) - { - case rsh: - xmlNewProp(node, BAD_CAST test_protocol, BAD_CAST "rsh"); - break; - case ssh: - xmlNewProp(node, BAD_CAST test_protocol, BAD_CAST "ssh"); - break; - default: - xmlNewProp(node, BAD_CAST test_protocol, BAD_CAST "rsh"); - } + case interactive: + xmlNewProp(node, BAD_CAST test_mode, BAD_CAST "interactive"); + break; + case batch: + xmlNewProp(node, BAD_CAST test_mode, BAD_CAST "batch"); + break; + default: + xmlNewProp(node, BAD_CAST test_mode, BAD_CAST "interactive"); + } - switch ((*iter).second.Mode) - { - case interactive: - xmlNewProp(node, BAD_CAST test_mode, BAD_CAST "interactive"); - break; - case batch: - xmlNewProp(node, BAD_CAST test_mode, BAD_CAST "batch"); - break; - default: - xmlNewProp(node, BAD_CAST test_mode, BAD_CAST "interactive"); - } - - switch ((*iter).second.Batch) - { - case pbs: - xmlNewProp(node, BAD_CAST test_batch, BAD_CAST "pbs"); - break; - case lsf: - xmlNewProp(node, BAD_CAST test_batch, BAD_CAST "lsf"); - break; - case sge: - xmlNewProp(node, BAD_CAST test_batch, BAD_CAST "sge"); - break; - default: - xmlNewProp(node, BAD_CAST test_batch, BAD_CAST ""); - } - - switch ((*iter).second.mpi) - { - case lam: - xmlNewProp(node, BAD_CAST test_mpi, BAD_CAST "lam"); - break; - case mpich1: - xmlNewProp(node, BAD_CAST test_mpi, BAD_CAST "mpich1"); - break; - case mpich2: - xmlNewProp(node, BAD_CAST test_mpi, BAD_CAST "mpich2"); - break; - case openmpi: - xmlNewProp(node, BAD_CAST test_mpi, BAD_CAST "openmpi"); - break; - case slurm: - xmlNewProp(node, BAD_CAST test_mpi, BAD_CAST "slurm"); - break; - case prun: - xmlNewProp(node, BAD_CAST test_mpi, BAD_CAST "prun"); - break; - default: - xmlNewProp(node, BAD_CAST test_mpi, BAD_CAST ""); - } - - xmlNewProp(node, BAD_CAST test_user_name, BAD_CAST (*iter).second.UserName.c_str()); - - for (vector::const_iterator iter2 = - (*iter).second.ComponentsList.begin(); - iter2 != (*iter).second.ComponentsList.end(); - iter2++) - { - node1 = xmlNewChild(node, NULL, BAD_CAST test_components, NULL); - xmlNewProp(node1, BAD_CAST test_component_name, BAD_CAST (*iter2).c_str()); - } - - xmlNewProp(node, BAD_CAST test_os, BAD_CAST (*iter).second.OS.c_str()); - xmlNewProp(node, BAD_CAST test_mem_in_mb, BAD_CAST sprintf(string_buf, "%u", (*iter).second.DataForSort._memInMB)); - xmlNewProp(node, BAD_CAST test_cpu_freq_mhz, BAD_CAST sprintf(string_buf, "%u", (*iter).second.DataForSort._CPUFreqMHz)); - xmlNewProp(node, BAD_CAST test_nb_of_nodes, BAD_CAST sprintf(string_buf, "%u", (*iter).second.DataForSort._nbOfNodes)); - xmlNewProp(node, BAD_CAST test_nb_of_proc_per_node, BAD_CAST sprintf(string_buf, "%u", (*iter).second.DataForSort._nbOfProcPerNode)); + switch ((*iter).second.Batch) + { + case pbs: + xmlNewProp(node, BAD_CAST test_batch, BAD_CAST "pbs"); + break; + case lsf: + xmlNewProp(node, BAD_CAST test_batch, BAD_CAST "lsf"); + break; + case sge: + xmlNewProp(node, BAD_CAST test_batch, BAD_CAST "sge"); + break; + case ssh_batch: + xmlNewProp(node, BAD_CAST test_batch, BAD_CAST "ssh_batch"); + break; + default: + xmlNewProp(node, BAD_CAST test_batch, BAD_CAST ""); } + + switch ((*iter).second.mpi) + { + case lam: + xmlNewProp(node, BAD_CAST test_mpi, BAD_CAST "lam"); + break; + case mpich1: + xmlNewProp(node, BAD_CAST test_mpi, BAD_CAST "mpich1"); + break; + case mpich2: + xmlNewProp(node, BAD_CAST test_mpi, BAD_CAST "mpich2"); + break; + case openmpi: + xmlNewProp(node, BAD_CAST test_mpi, BAD_CAST "openmpi"); + break; + case slurm: + xmlNewProp(node, BAD_CAST test_mpi, BAD_CAST "slurm"); + break; + case prun: + xmlNewProp(node, BAD_CAST test_mpi, BAD_CAST "prun"); + break; + default: + xmlNewProp(node, BAD_CAST test_mpi, BAD_CAST ""); + } + + xmlNewProp(node, BAD_CAST test_user_name, BAD_CAST (*iter).second.UserName.c_str()); + + std::vector::const_iterator iter2 = (*iter).second.ComponentsList.begin(); + for(;iter2 != (*iter).second.ComponentsList.end(); iter2++) + { + node1 = xmlNewChild(node, NULL, BAD_CAST test_components, NULL); + xmlNewProp(node1, BAD_CAST test_component_name, BAD_CAST (*iter2).c_str()); + } + + xmlNewProp(node, BAD_CAST test_os, BAD_CAST (*iter).second.OS.c_str()); + xmlNewProp(node, BAD_CAST test_mem_in_mb, BAD_CAST sprintf(string_buf, "%u", (*iter).second.DataForSort._memInMB)); + xmlNewProp(node, BAD_CAST test_cpu_freq_mhz, BAD_CAST sprintf(string_buf, "%u", (*iter).second.DataForSort._CPUFreqMHz)); + xmlNewProp(node, BAD_CAST test_nb_of_nodes, BAD_CAST sprintf(string_buf, "%u", (*iter).second.DataForSort._nbOfNodes)); + xmlNewProp(node, BAD_CAST test_nb_of_proc_per_node, BAD_CAST sprintf(string_buf, "%u", (*iter).second.DataForSort._nbOfProcPerNode)); + } } diff --git a/src/ResourcesManager/SALOME_ResourcesCatalog_Handler.hxx b/src/ResourcesManager/SALOME_ResourcesCatalog_Handler.hxx index c0445989f..0be33ec67 100755 --- a/src/ResourcesManager/SALOME_ResourcesCatalog_Handler.hxx +++ b/src/ResourcesManager/SALOME_ResourcesCatalog_Handler.hxx @@ -42,8 +42,7 @@ class RESOURCESMANAGER_EXPORT SALOME_ResourcesCatalog_Handler { public : - SALOME_ResourcesCatalog_Handler(MapOfParserResourcesType& resources_list, - MapOfParserResourcesType& resources_batch_list); + SALOME_ResourcesCatalog_Handler(MapOfParserResourcesType& resources_list); const MapOfParserResourcesType& GetResourcesAfterParsing() const; @@ -62,13 +61,13 @@ class RESOURCESMANAGER_EXPORT SALOME_ResourcesCatalog_Handler ParserResourcesType _resource; MapOfParserResourcesType& _resources_list; - MapOfParserResourcesType& _resources_batch_list; const char *test_machine; const char *test_cluster; + const char *test_name; const char *test_hostname; - const char *test_alias; const char *test_protocol; + const char *test_cluster_internal_protocol; const char *test_mode; const char *test_batch; const char *test_mpi; diff --git a/src/ResourcesManager/SALOME_ResourcesCatalog_Parser.cxx b/src/ResourcesManager/SALOME_ResourcesCatalog_Parser.cxx index 40870f49e..aedbcbe72 100644 --- a/src/ResourcesManager/SALOME_ResourcesCatalog_Parser.cxx +++ b/src/ResourcesManager/SALOME_ResourcesCatalog_Parser.cxx @@ -27,6 +27,7 @@ using namespace std; +unsigned int ResourceDataToSort::_nbOfProcWanted = NULL_VALUE; unsigned int ResourceDataToSort::_nbOfNodesWanted = NULL_VALUE; unsigned int ResourceDataToSort::_nbOfProcPerNodeWanted = NULL_VALUE; unsigned int ResourceDataToSort::_CPUFreqMHzWanted = NULL_VALUE; @@ -35,12 +36,12 @@ unsigned int ResourceDataToSort::_memInMBWanted = NULL_VALUE; ResourceDataToSort::ResourceDataToSort() {} -ResourceDataToSort::ResourceDataToSort(const string& hostname, +ResourceDataToSort::ResourceDataToSort(const string& name, unsigned int nbOfNodes, unsigned int nbOfProcPerNode, unsigned int CPUFreqMHz, unsigned int memInMB): - _hostName(hostname), + _Name(name), _nbOfNodes(nbOfNodes), _nbOfProcPerNode(nbOfProcPerNode), _CPUFreqMHz(CPUFreqMHz), @@ -57,6 +58,19 @@ bool ResourceDataToSort::operator< (const ResourceDataToSort& other) const unsigned int ResourceDataToSort::GetNumberOfPoints() const { unsigned int ret = 0; + //priority 0 : Nb of proc + + if (_nbOfProcWanted != NULL_VALUE) + { + unsigned int nb_proc = _nbOfNodes * _nbOfProcPerNode; + if (nb_proc == _nbOfProcWanted) + ret += 30000; + else if (nb_proc > _nbOfProcWanted) + ret += 20000; + else + ret += 10000; + } + //priority 1 : Nb of nodes if (_nbOfNodesWanted != NULL_VALUE) @@ -102,6 +116,7 @@ unsigned int ResourceDataToSort::GetNumberOfPoints() const ret += 1; } + //RES_MESSAGE("[GetNumberOfPoints] points number for resource: " << _Name << " " << ret); return ret; } @@ -118,13 +133,14 @@ void ParserResourcesType::Print() { ostringstream oss; oss << endl << + "Name : " << Name << endl << "HostName : " << HostName << endl << - "Alias : " << Alias << endl << "NbOfNodes : " << DataForSort._nbOfNodes << endl << "NbOfProcPerNode : " << DataForSort._nbOfProcPerNode << endl << "CPUFreqMHz : " << DataForSort._CPUFreqMHz << endl << "MemInMB : " << DataForSort._memInMB << endl << "Protocol : " << Protocol << endl << + "ClusterInternalProtocol : " << ClusterInternalProtocol << endl << "Mode : " << Mode << endl << "Batch : " << Batch << endl << "mpi : " << mpi << endl << @@ -150,19 +166,75 @@ void ParserResourcesType::Print() oss << "Cluster member called : " << (*it).HostName << endl; } cout << oss.str() << endl; +} + +std::string +ParserResourcesType::PrintAccessProtocolType() const +{ + if (Protocol == rsh) + return "rsh"; + else + return "ssh"; +} + +std::string +ParserResourcesType::PrintClusterInternalProtocol() const +{ + if (ClusterInternalProtocol == rsh) + return "rsh"; + else + return "ssh"; +} + +std::string +ParserResourcesType::PrintAccessModeType() const +{ + if (Mode == interactive) + return "interactive"; + else + return "batch"; +} +std::string +ParserResourcesType::PrintBatchType() const +{ + if (Batch == none) + return "none"; + else if (Batch == pbs) + return "pbs"; + else if (Batch == lsf) + return "lsf"; + else if (Batch == sge) + return "sge"; + else + return "ssh"; +} + +std::string +ParserResourcesType::PrintMpiImplType() const +{ + if (mpi == nompi) + return "no mpi"; + else if (mpi == lam) + return "lam"; + else if (mpi == mpich1) + return "mpich1"; + else if (mpi == mpich2) + return "mpich2"; + else if (mpi == openmpi) + return "openmpi"; + else if (mpi == slurm) + return "slurm"; + else + return "prun"; } void ParserResourcesType::Clear() { - DataForSort._hostName = ""; - DataForSort._nbOfNodes = 1; - DataForSort._nbOfProcPerNode = 1; - DataForSort._CPUFreqMHz = 0; - DataForSort._memInMB = 0; + Name = ""; HostName = ""; - Alias = ""; Protocol = rsh; + ClusterInternalProtocol = rsh; Mode = interactive; Batch = none; mpi = nompi; @@ -175,4 +247,10 @@ void ParserResourcesType::Clear() use = ""; ClusterMembersList.clear(); nbOfProc = 1; + + DataForSort._Name = ""; + DataForSort._nbOfNodes = 1; + DataForSort._nbOfProcPerNode = 1; + DataForSort._CPUFreqMHz = 0; + DataForSort._memInMB = 0; } diff --git a/src/ResourcesManager/SALOME_ResourcesCatalog_Parser.hxx b/src/ResourcesManager/SALOME_ResourcesCatalog_Parser.hxx index e4ad2d55f..908505483 100755 --- a/src/ResourcesManager/SALOME_ResourcesCatalog_Parser.hxx +++ b/src/ResourcesManager/SALOME_ResourcesCatalog_Parser.hxx @@ -40,7 +40,7 @@ enum AccessProtocolType {rsh, ssh}; enum AccessModeType {interactive, batch}; -enum BatchType {none, pbs, lsf, sge}; +enum BatchType {none, pbs, lsf, sge, ssh_batch}; enum MpiImplType {nompi, lam, mpich1, mpich2, openmpi, slurm, prun}; @@ -48,11 +48,12 @@ class RESOURCESMANAGER_EXPORT ResourceDataToSort { public: - std::string _hostName; + std::string _Name; unsigned int _nbOfNodes; unsigned int _nbOfProcPerNode; unsigned int _CPUFreqMHz; unsigned int _memInMB; + static unsigned int _nbOfProcWanted; static unsigned int _nbOfNodesWanted; static unsigned int _nbOfProcPerNodeWanted; static unsigned int _CPUFreqMHzWanted; @@ -60,7 +61,7 @@ class RESOURCESMANAGER_EXPORT ResourceDataToSort public: ResourceDataToSort(); - ResourceDataToSort(const std::string& hostname, + ResourceDataToSort(const std::string& name, unsigned int nbOfNodes, unsigned int nbOfProcPerNode, unsigned int CPUFreqMHz, @@ -76,6 +77,7 @@ struct RESOURCESMANAGER_EXPORT ParserResourcesClusterMembersType { std::string HostName; AccessProtocolType Protocol; + AccessProtocolType ClusterInternalProtocol; std::string UserName; std::string AppliPath; ResourceDataToSort DataForSort; @@ -84,9 +86,10 @@ struct RESOURCESMANAGER_EXPORT ParserResourcesClusterMembersType struct RESOURCESMANAGER_EXPORT ParserResourcesType { ResourceDataToSort DataForSort; + std::string Name; std::string HostName; - std::string Alias; AccessProtocolType Protocol; + AccessProtocolType ClusterInternalProtocol; AccessModeType Mode; BatchType Batch; MpiImplType mpi; @@ -103,6 +106,12 @@ struct RESOURCESMANAGER_EXPORT ParserResourcesType void Print(); void Clear(); + + std::string PrintAccessProtocolType() const; + std::string PrintAccessModeType() const; + std::string PrintBatchType() const; + std::string PrintMpiImplType() const; + std::string PrintClusterInternalProtocol() const; }; typedef std::map MapOfParserResourcesType; diff --git a/src/ResourcesManager/SALOME_ResourcesManager.cxx b/src/ResourcesManager/SALOME_ResourcesManager.cxx index 91d2de5c9..294993120 100644 --- a/src/ResourcesManager/SALOME_ResourcesManager.cxx +++ b/src/ResourcesManager/SALOME_ResourcesManager.cxx @@ -139,35 +139,40 @@ void SALOME_ResourcesManager::Shutdown() */ //============================================================================= -Engines::MachineList * -SALOME_ResourcesManager::GetFittingResources(const Engines::MachineParameters& params) +Engines::ResourceList * +SALOME_ResourcesManager::GetFittingResources(const Engines::ResourceParameters& params) { -// MESSAGE("ResourcesManager::GetFittingResources"); - machineParams p; + MESSAGE("ResourcesManager::GetFittingResources"); + Engines::ResourceList * ret = new Engines::ResourceList; + + // CORBA -> C++ + resourceParams p; + p.name = params.name; p.hostname = params.hostname; p.OS = params.OS; + p.nb_proc = params.nb_proc; p.nb_node = params.nb_node; p.nb_proc_per_node = params.nb_proc_per_node; p.cpu_clock = params.cpu_clock; p.mem_mb = params.mem_mb; - p.parallelLib = params.parallelLib; - p.nb_component_nodes = params.nb_component_nodes; - - for(unsigned int i=0;i vec = _rm.GetFittingResources(p); - ret->length(vec.size()); - for(unsigned int i=0;i vec = _rm.GetFittingResources(p); + + // C++ -> CORBA + ret->length(vec.size()); + for(unsigned int i=0;i ml; - for(unsigned int i=0;i C++ + vector rl; + for(unsigned int i=0; i ml; - for(unsigned int i=0;i C++ + vector rl; + for(unsigned int i=0; iname = CORBA::string_dup(resource.Name.c_str()); p_ptr->hostname = CORBA::string_dup(resource.HostName.c_str()); - p_ptr->alias = CORBA::string_dup(resource.Alias.c_str()); if( resource.Protocol == rsh ) p_ptr->protocol = "rsh"; else if( resource.Protocol == ssh ) p_ptr->protocol = "ssh"; + if( resource.ClusterInternalProtocol == rsh ) + p_ptr->iprotocol = "rsh"; + else if( resource.ClusterInternalProtocol == ssh ) + p_ptr->iprotocol = "ssh"; p_ptr->username = CORBA::string_dup(resource.UserName.c_str()); p_ptr->applipath = CORBA::string_dup(resource.AppliPath.c_str()); p_ptr->componentList.length(resource.ComponentsList.size()); @@ -240,13 +254,12 @@ Engines::MachineDefinition* SALOME_ResourcesManager::GetMachineParameters(const else if( resource.Batch == sge ) p_ptr->batch = "sge"; - p_ptr->nb_component_nodes=1; - return p_ptr; } std::string -SALOME_ResourcesManager::getMachineFile(std::string hostname, CORBA::Long nb_procs, +SALOME_ResourcesManager::getMachineFile(std::string hostname, + CORBA::Long nb_procs, std::string parallelLib) { std::string machine_file_name(""); @@ -266,6 +279,7 @@ SALOME_ResourcesManager::getMachineFile(std::string hostname, CORBA::Long nb_pro ParserResourcesClusterMembersType fake_node; fake_node.HostName = resource.HostName; fake_node.Protocol = resource.Protocol; + fake_node.ClusterInternalProtocol = resource.ClusterInternalProtocol; fake_node.UserName = resource.UserName; fake_node.AppliPath = resource.AppliPath; fake_node.DataForSort = resource.DataForSort; @@ -323,6 +337,7 @@ SALOME_ResourcesManager::getMachineFile(std::string hostname, CORBA::Long nb_pro ParserResourcesClusterMembersType fake_node; fake_node.HostName = resource.HostName; fake_node.Protocol = resource.Protocol; + fake_node.ClusterInternalProtocol = resource.ClusterInternalProtocol; fake_node.UserName = resource.UserName; fake_node.AppliPath = resource.AppliPath; fake_node.DataForSort = resource.DataForSort; diff --git a/src/ResourcesManager/SALOME_ResourcesManager.hxx b/src/ResourcesManager/SALOME_ResourcesManager.hxx index be58300ac..f186718c6 100644 --- a/src/ResourcesManager/SALOME_ResourcesManager.hxx +++ b/src/ResourcesManager/SALOME_ResourcesManager.hxx @@ -64,16 +64,16 @@ class SALOMERESOURCESMANAGER_EXPORT SALOME_ResourcesManager: ~SALOME_ResourcesManager(); // CORBA Methods - Engines::MachineList * - GetFittingResources(const Engines::MachineParameters& params); - char* FindFirst(const Engines::MachineList& listOfMachines); - char* Find(const char *policy, const Engines::MachineList& listOfMachines); - Engines::MachineDefinition* GetMachineParameters(const char *hostname); + Engines::ResourceList * GetFittingResources(const Engines::ResourceParameters& params); + char* FindFirst(const Engines::ResourceList& listOfResources); + char* Find(const char *policy, const Engines::ResourceList& listOfResources); + Engines::ResourceDefinition * GetResourceDefinition(const char * name); // Cpp Methods void Shutdown(); ResourcesManager_cpp *GetImpl() { return &_rm; } - std::string getMachineFile(std::string hostname, CORBA::Long nb_procs, + std::string getMachineFile(std::string hostname, + CORBA::Long nb_procs, std::string parallelLib); @@ -89,7 +89,6 @@ class SALOMERESOURCESMANAGER_EXPORT SALOME_ResourcesManager: MapOfParserResourcesType _resourcesBatchList; ResourcesManager_cpp _rm; - }; #endif // RESSOURCESCATALOG_IMPL_H -- 2.39.2