From: ribes Date: Tue, 15 Dec 2009 08:22:31 +0000 (+0000) Subject: - Resource Management refactoring: X-Git-Url: http://git.salome-platform.org/gitweb/?a=commitdiff_plain;h=b98a6eabf9683b5327af9e7efac17b0edd32b7eb;p=modules%2Fkernel.git - Resource Management refactoring: - Remove "alias" to a resource - Add "name" to a resource - The resource manager now work with resource and not a machine - Add nbproc for resource selection - Container Manager refactoring: - Split MachineParameters in two parts: ContainerParameters and ResourceParameters - Remove FindOrStartContainer, StartContainer and FindOrStartParallelContainer - Remove SALOME_BATCH - Add two new modes for GiveContainer: "find" and "findorstart" - LifeCycleCORBA: - For Salome 5.1.4: Same interface (use of MachineParemeters) - For Salome 6.x.x: Use of ContainerParameters --- diff --git a/idl/SALOME_ContainerManager.idl b/idl/SALOME_ContainerManager.idl index ceef9c138..1b48aa5af 100644 --- a/idl/SALOME_ContainerManager.idl +++ b/idl/SALOME_ContainerManager.idl @@ -31,88 +31,109 @@ module Engines { -//! Type to transmit list of machines. - typedef sequence MachineList; +//! Type to transmit list of resources. +typedef sequence ResourceList; //! components list - typedef sequence CompoList; +typedef sequence CompoList; //! files list - typedef sequence FilesList; +typedef sequence FilesList; //! modules list - typedef sequence ModulesList; +typedef sequence ModulesList; -//! Type to describe required properties of a container. -struct MachineParameters +//! Type to describe required properties of a resource +struct ResourceParameters { - //! container name if given else automatic - string container_name; - //! host name if given else automatic + //! resource name - manual selection + string name; + //! host name string hostname; - //! if given list of components that could be loaded on the container - CompoList componentList; - //! if given restricted list of machines to search in - MachineList computerList; - //! required operating system + //! if given required operating system string OS; + //! if given list of components that could be loaded on a container + //! Optional if no resource are found with this constraint + CompoList componentList; + + // Permits to order resources //! required number of proc long nb_proc; //! required memory size long mem_mb; //! required frequency long cpu_clock; - //! required number of proc per node - long nb_proc_per_node; //! required number of node long nb_node; - //! if true start a MPI container - boolean isMPI; - //! container working directory - string workingdir; - //! creation mode for GiveContainer. + //! required number of proc per node + long nb_proc_per_node; + + // Permits to configure SALOME resource management + //! resource management policy : first, cycl, altcycl or best (can be extended) + string policy; + //! restricted list of resources to search in + ResourceList resList; +}; + +//! Type to describe required properties of a container +struct ContainerParameters +{ + //! container name if given else automatic + string container_name; + + //! creation mode for GiveContainer if given else automatic /*!start creates a new container * get try to find an existing container * getorstart use an existing container if it exists or creates a new one */ string mode; - //! resource management policy : first, cycl, altcycl or best (can be extended) - string policy; + //! container working directory if given else automatic + string workingdir; + + // Parallel part + //! Number of proc of a parallel container + long nb_proc; + //! if true start a MPI container + boolean isMPI; //! PaCO specific informations string parallelLib; - long nb_component_nodes; + + //! Parameters to choose a resource + ResourceParameters resource_params; }; -//! Type to describe properties of a resource. -struct MachineDefinition +//! Type to describe a resource +struct ResourceDefinition { - //! host name + //! name + string name; + //! hostname string hostname; - //! alias name - string alias; - //! protocol to use to start a remote container (ssh or rsh) + //! protocol to connect to the resource + //! protocol used to start a remote container (ssh or rsh) string protocol; - //! cluster internal protocol to use to start a remote container (ssh or rsh) on the cluster - string iprotocol; //! login name to use to start a remote container string username; //! salome application to use to start a remote container string applipath; //! list of available components CompoList componentList; + //! operating system string OS; - //! memory size + //! memory size per node long mem_mb; //! frequency long cpu_clock; - //! number of proc per node - long nb_proc_per_node; //! number of node long nb_node; - //! MPI implementation - string mpiImpl; + //! number of proc per node + long nb_proc_per_node; //! batch system string batch; - long nb_component_nodes; + //! MPI implementation + string mpiImpl; + //! if the resource is a cluster: + //! internal protocol to use to start a remote container (ssh or rsh) on the cluster + string iprotocol; }; //! exception thrown if a computer is not found in the catalog @@ -138,8 +159,8 @@ struct JobParameters string maximum_duration; // Memory is expressed in megabytes -> mem_mb - // Number of Processors -> nb_node - MachineParameters resource_required; + // Number of Processors -> nb_proc + ResourceParameters resource_required; /*! Name of the batch queue choosed - optional @@ -162,7 +183,7 @@ interface SalomeLauncher // Useful methods long createJobWithFile(in string xmlJobFile, in string clusterName) raises (SALOME::SALOME_Exception); - boolean testBatch (in MachineParameters params) raises (SALOME::SALOME_Exception); + boolean testBatch (in ResourceParameters params) raises (SALOME::SALOME_Exception); // SALOME kernel service methods void Shutdown(); @@ -173,59 +194,80 @@ interface SalomeLauncher This interface is used for interaction with the unique instance of ContainerManager */ - interface ContainerManager - { - //! Find an existing container satisfying the constraints given by input parameters or start a new one. - Container FindOrStartContainer( in MachineParameters params); - - //! This operation launches a PaCO++ container. - /*! - \param Description of the container resquested. - \param List of computers ressources. - - \return Container's CORBA reference. - */ - Container StartParallelContainer( in MachineParameters params); - - //! Start a new container satisfying the constraints given by input parameters. - Container StartContainer( in MachineParameters params); - - //! Same as StartContainer except that in batch all containers have already been launched - /*! - We are in batch if environment variable SALOME_BATCH is 1. - In this case, containers have been launched at the beginning of the Salome session and - the container manager picks one in the pool of existing containers. - */ - Container GiveContainer( in MachineParameters params); - - //! Shutdown all containers that have been launched by the container manager - void ShutdownContainers(); - - } ; +interface ContainerManager +{ + //! GiveContainer - use mode parameter of ContainerParameters to configure + //! how this method works + //! Currently: get, start, getorstart, findorstart, find + Container GiveContainer(in ContainerParameters params); + + //! Shutdown all containers that have been launched by the container manager + void ShutdownContainers(); +} ; /*! \brief Interface of the %resourcesManager This interface is used for interaction with the unique instance of ResourcesManager */ - interface ResourcesManager - { - //! Find first available computer in a computers list - string FindFirst(in MachineList possibleComputers); +interface ResourcesManager +{ + //! Find first available resource in a resources list + string FindFirst(in ResourceList possibleResources); - //! Find best available computer according to policy in a computers list - string Find(in string policy, in MachineList possibleComputers); + //! Find best available computer according to policy in a computers list + string Find(in string policy, in ResourceList possibleResources); - //! Get a list of computers that are best suited to launch a container given constraints - /*! - The constraints are resource constraints (params) and components constraints (componentList) - */ - MachineList GetFittingResources( in MachineParameters params) - raises (SALOME::SALOME_Exception); + //! Get a list of resources that are best suited to launch a container given constraints + /*! + The constraints are resource constraints (params) and components constraints (componentList) + */ + ResourceList GetFittingResources(in ResourceParameters params) raises (SALOME::SALOME_Exception); + + //! Get definition of a resource + ResourceDefinition GetResourceDefinition(in string name); +}; - //! Get the current machine parameters of a computer - MachineDefinition GetMachineParameters( in string hostname ); - } ; +// For compatibility - will be erased on SALOME 6 +typedef sequence MachineList; +//! Type to describe required properties of a container. +struct MachineParameters +{ + //! container name if given else automatic + string container_name; + //! host name if given else automatic + string hostname; + //! if given list of components that could be loaded on the container + CompoList componentList; + //! if given restricted list of machines to search in + MachineList computerList; + //! required operating system + string OS; + //! required memory size + long mem_mb; + //! required frequency + long cpu_clock; + //! required number of proc per node + long nb_proc_per_node; + //! required number of node + long nb_node; + //! if true start a MPI container + boolean isMPI; + //! container working directory + string workingdir; + //! creation mode for GiveContainer. + /*!start creates a new container + * get try to find an existing container + * getorstart use an existing container if it exists or creates a new one + */ + string mode; + //! resource management policy : first, cycl, altcycl or best (can be extended) + string policy; + + //! PaCO specific informations + string parallelLib; + long nb_component_nodes; +}; }; #endif diff --git a/src/Container/SALOME_ContainerManager.cxx b/src/Container/SALOME_ContainerManager.cxx index ba8246b26..d68d7aef7 100644 --- a/src/Container/SALOME_ContainerManager.cxx +++ b/src/Container/SALOME_ContainerManager.cxx @@ -41,10 +41,6 @@ using namespace std; -vector SALOME_ContainerManager::_batchLaunchedContainers; - -vector::iterator SALOME_ContainerManager::_batchLaunchedContainersIter; - const char *SALOME_ContainerManager::_ContainerManagerNameInNS = "/ContainerManager"; @@ -57,7 +53,8 @@ const char *SALOME_ContainerManager::_ContainerManagerNameInNS = */ //============================================================================= -SALOME_ContainerManager::SALOME_ContainerManager(CORBA::ORB_ptr orb, PortableServer::POA_var poa, SALOME_ResourcesManager *rm, SALOME_NamingService *ns) +SALOME_ContainerManager::SALOME_ContainerManager(CORBA::ORB_ptr orb, PortableServer::POA_var poa, + SALOME_ResourcesManager *rm, SALOME_NamingService *ns) { MESSAGE("constructor"); _NS = ns; @@ -197,144 +194,187 @@ void SALOME_ContainerManager::ShutdownContainers() //============================================================================= //! Give a suitable Container given constraints /*! CORBA Method: - * \param params Machine Parameters required for the container + * \param params Container Parameters required for the container * \return the container or nil */ //============================================================================= - Engines::Container_ptr -SALOME_ContainerManager::GiveContainer(const Engines::MachineParameters& params) +SALOME_ContainerManager::GiveContainer(const Engines::ContainerParameters& params) { - char *valenv=getenv("SALOME_BATCH"); - if(valenv) - if (strcmp(valenv,"1")==0) - { - if(_batchLaunchedContainers.empty()) - fillBatchLaunchedContainers(); - - if (_batchLaunchedContainersIter == _batchLaunchedContainers.end()) - _batchLaunchedContainersIter = _batchLaunchedContainers.begin(); - - Engines::Container_ptr rtn = Engines::Container::_duplicate(*_batchLaunchedContainersIter); - _batchLaunchedContainersIter++; - return rtn; - } - return StartContainer(params); -} + Engines::Container_ptr ret = Engines::Container::_nil(); -//============================================================================= -//! Start a suitable Container in a list of machines with constraints -/*! C++ Method: - * Constraints are given by a machine parameters struct - * \param params Machine Parameters required for the container - * \param possibleComputers list of machines usable for start - * \param container_exe specific container executable (default=SALOME_Container) - */ -//============================================================================= + // Step 0: Default mode is start + Engines::ContainerParameters local_params(params); + if (std::string(local_params.mode.in()) == "") + local_params.mode = CORBA::string_dup("start"); + std::string mode = local_params.mode.in(); + MESSAGE("[GiveContainer] starting with mode: " << mode); -Engines::Container_ptr -SALOME_ContainerManager::StartContainer(const Engines::MachineParameters& params, - const Engines::MachineList& possibleComputers, - const std::string& container_exe) -{ -#ifdef WITH_PACO_PARALLEL - std::string parallelLib(params.parallelLib); - if (parallelLib != "") + // Step 1: Find Container for find and findorstart mode + if (mode == "find" or mode == "findorstart") { - Engines::MachineParameters myparams(params); - myparams.computerList=possibleComputers; - return StartParallelContainer(myparams); + ret = FindContainer(params, params.resource_params.resList); + if(!CORBA::is_nil(ret)) + return ret; + else + { + if (mode == "find") + { + MESSAGE("[GiveContainer] no container found"); + return ret; + } + else + { + mode = "start"; + } + } } -#endif - string containerNameInNS; - Engines::Container_ptr ret = Engines::Container::_nil(); - MESSAGE("SALOME_ContainerManager::StartContainer " << possibleComputers.length()); + // Step 2: Get all possibleResources from the parameters + Engines::ResourceList_var possibleResources = _ResManager->GetFittingResources(local_params.resource_params); + MESSAGE("[GiveContainer] - length of possible resources " << possibleResources->length()); + std::vector local_resources; - vector lm; -// if mode is "get" keep only machines with existing containers - if(std::string(params.mode.in())=="get") - { - for(unsigned int i=0;i_non_existent()) - lm.push_back(string(possibleComputers[i])); - } - catch(CORBA::Exception&) - { - // CORBA::Exception ignored. - } - } - } - else + // Step 3: if mode is "get" keep only machines with existing containers + if(mode == "get") + { + for(unsigned int i=0; i < possibleResources->length(); i++) { - for(unsigned int i=0;i_non_existent()) + local_resources.push_back(string(possibleResources[i])); + } + catch(CORBA::Exception&) {} } - string theMachine; - try - { - theMachine=_ResManager->GetImpl()->Find(params.policy.in(),lm); - } - catch( const SALOME_Exception &ex ) + // if local_resources is empty, we cannot give a container + if (local_resources.size() == 0) { - MESSAGE(ex.what()); - return Engines::Container::_nil(); + MESSAGE("[GiveContainer] cannot find a container for mode get"); + return ret; } + } + else + for(unsigned int i=0; i < possibleResources->length(); i++) + local_resources.push_back(string(possibleResources[i])); - //If the machine name is localhost use the real name - if(theMachine == "localhost") - theMachine=Kernel_Utils::GetHostname(); + // Step 4: select the resource where to get/start the container + std::string resource_selected; + try + { + resource_selected = _ResManager->GetImpl()->Find(params.resource_params.policy.in(), local_resources); + } + catch(const SALOME_Exception &ex) + { + MESSAGE("[GiveContainer] Exception in ResourceManager find !: " << ex.what()); + return ret; + } + MESSAGE("[GiveContainer] Resource selected is: " << resource_selected); - //check if an entry exists in Naming service - //if params.mode == "start" or "" shutdown the existing container before launching a new one with that name - //if params.mode == "getorstart" or "get" use the existing container + // Step 5: get container in the naming service + Engines::ResourceDefinition_var resource_definition = _ResManager->GetResourceDefinition(resource_selected.c_str()); + std::string hostname(resource_definition->name.in()); + std::string containerNameInNS; if(params.isMPI) - // A parallel container register on zero node in NS - containerNameInNS = _NS->BuildContainerNameForNS(params,GetMPIZeroNode(theMachine).c_str()); + // A mpi parallel container register on zero node in NS + containerNameInNS = _NS->BuildContainerNameForNS(params, GetMPIZeroNode(hostname).c_str()); else - containerNameInNS = _NS->BuildContainerNameForNS(params,theMachine.c_str()); + containerNameInNS = _NS->BuildContainerNameForNS(params, hostname.c_str()); + MESSAGE("[GiveContainer] Container name in the naming service: " << containerNameInNS); - SCRUTE(containerNameInNS); + // Step 6: check if the name exists in naming service + //if params.mode == "getorstart" or "get" use the existing container + //if params.mode == "start" shutdown the existing container before launching a new one with that name CORBA::Object_var obj = _NS->Resolve(containerNameInNS.c_str()); - if ( !CORBA::is_nil(obj) ) + if (!CORBA::is_nil(obj)) + { + try { - try - { - Engines::Container_var cont=Engines::Container::_narrow(obj); - if(!cont->_non_existent()) - { - if(std::string(params.mode.in())=="getorstart"||std::string(params.mode.in())=="get") - return cont._retn(); /* the container exists and params.mode is getorstart or get use it*/ - else - { - INFOS("A container is already registered with the name: " << containerNameInNS << ", shutdown the existing container"); - cont->Shutdown(); // shutdown the registered container if it exists - } - } - } - catch(CORBA::Exception&) - { - INFOS("CORBA::Exception ignored."); - } + Engines::Container_var cont=Engines::Container::_narrow(obj); + if(!cont->_non_existent()) + { + if(std::string(params.mode.in())=="getorstart" or std::string(params.mode.in())=="get") + return cont._retn(); /* the container exists and params.mode is getorstart or get use it*/ + else + { + INFOS("[GiveContainer] A container is already registered with the name: " << containerNameInNS << ", shutdown the existing container"); + cont->Shutdown(); // shutdown the registered container if it exists + } + } } + catch(CORBA::Exception&) + { + INFOS("[GiveContainer] CORBA::Exception ignored when trying to get the container - we start a new one"); + } + } - //try to launch a new container - MESSAGE("try to launch it on " << theMachine); - - string command; - if(theMachine==""){ - MESSAGE("SALOME_ContainerManager::StartContainer : no possible computer"); - return Engines::Container::_nil(); + // Step 7: type of container: PaCO, Exe, Mpi or Classic + // Mpi already tested in step 5, specific code on BuildCommandToLaunch Local/Remote Container methods + // TODO -> separates Mpi from Classic/Exe + // PaCO++ + std::string parallelLib(params.parallelLib); + if (std::string(local_params.parallelLib.in()) != "") + { + INFOS("[GiveContainer] PaCO++ container are not currently available"); + return ret; + } + // Classic or Exe ? + std::string container_exe = "SALOME_Container"; // Classic container + int found=0; + try + { + CORBA::String_var container_exe_tmp; + CORBA::Object_var obj = _NS->Resolve("/Kernel/ModulCatalog"); + SALOME_ModuleCatalog::ModuleCatalog_var Catalog = SALOME_ModuleCatalog::ModuleCatalog::_narrow(obj) ; + if (CORBA::is_nil (Catalog)) + { + INFOS("[GiveContainer] Module Catalog is not found -> cannot launch a container"); + return ret; + } + // Loop through component list + for(unsigned int i=0; i < local_params.resource_params.componentList.length(); i++) + { + const char* compoi = local_params.resource_params.componentList[i]; + SALOME_ModuleCatalog::Acomponent_var compoInfo = Catalog->GetComponent(compoi); + if (CORBA::is_nil (compoInfo)) + { + continue; + } + SALOME_ModuleCatalog::ImplType impl=compoInfo->implementation_type(); + container_exe_tmp=compoInfo->implementation_name(); + if(impl==SALOME_ModuleCatalog::CEXE) + { + if(found) + { + INFOS("ContainerManager Error: you can't have 2 CEXE component in the same container" ); + return Engines::Container::_nil(); + } + MESSAGE("[GiveContainer] Exe container found !: " << container_exe_tmp); + container_exe = container_exe_tmp.in(); + found=1; + } + } + } + catch (ServiceUnreachable&) + { + INFOS("Caught exception: Naming Service Unreachable"); + return ret; } - else if(theMachine==Kernel_Utils::GetHostname()) - command = BuildCommandToLaunchLocalContainer(params,container_exe); + catch (...) + { + INFOS("Caught unknown exception."); + return ret; + } + + // Step 8: start a new container + MESSAGE("[GiveContainer] Try to launch a new container on " << resource_selected); + std::string command; + if(hostname == Kernel_Utils::GetHostname()) + command = BuildCommandToLaunchLocalContainer(params, container_exe); else - command = BuildCommandToLaunchRemoteContainer(theMachine,params,container_exe); + command = BuildCommandToLaunchRemoteContainer(resource_selected, params, container_exe); //redirect stdout and stderr in a file #ifdef WNT @@ -344,16 +384,16 @@ SALOME_ContainerManager::StartContainer(const Engines::MachineParameters& params string logFilename="/tmp"; char* val = getenv("SALOME_TMP_DIR"); if(val) - { - struct stat file_info; - stat(val, &file_info); - bool is_dir = S_ISDIR(file_info.st_mode); - if (is_dir)logFilename=val; - else std::cerr << "SALOME_TMP_DIR environment variable is not a directory use /tmp instead" << std::endl; - } + { + struct stat file_info; + stat(val, &file_info); + bool is_dir = S_ISDIR(file_info.st_mode); + if (is_dir)logFilename=val; + else std::cerr << "SALOME_TMP_DIR environment variable is not a directory use /tmp instead" << std::endl; + } logFilename += "/"; #endif - logFilename += _NS->ContainerName(params)+"_"+ theMachine +"_"+getenv( "USER" )+".log" ; + logFilename += _NS->ContainerName(params)+"_"+ resource_selected +"_"+getenv( "USER" )+".log" ; command += " > " + logFilename + " 2>&1"; #ifdef WNT command = "%PYTHONBIN% -c \"import win32pm ; win32pm.spawnpid(r'" + command + "', '')\""; @@ -374,117 +414,37 @@ SALOME_ContainerManager::StartContainer(const Engines::MachineParameters& params RmTmpFile(_TmpFileName); // command file can be removed here return Engines::Container::_nil(); } - else{ - int count=TIME_OUT_TO_LAUNCH_CONT; - MESSAGE("count = "<Resolve(containerNameInNS.c_str()); ret=Engines::Container::_narrow(obj); } - - if ( CORBA::is_nil(ret) ) - { - MESSAGE("SALOME_ContainerManager::StartContainer rsh failed"); - } - else - { - logFilename=":"+logFilename; - logFilename="@"+Kernel_Utils::GetHostname()+logFilename; - logFilename=getenv( "USER" )+logFilename; - ret->logfilename(logFilename.c_str()); - } - - RmTmpFile(_TmpFileName); // command file can be removed here - return ret; - } -} - -//============================================================================= -//! Start a suitable Container given constraints -/*! CORBA Method: - * \param params Machine Parameters required for the container - */ -//============================================================================= - -Engines::Container_ptr -SALOME_ContainerManager::StartContainer(const Engines::MachineParameters& params) -{ - Engines::MachineList_var possibleComputers = _ResManager->GetFittingResources(params); - - // Look into ModulCatalog if a specific container must be launched - CORBA::String_var container_exe; - int found=0; - try - { - CORBA::Object_var obj = _NS->Resolve("/Kernel/ModulCatalog"); - SALOME_ModuleCatalog::ModuleCatalog_var Catalog = SALOME_ModuleCatalog::ModuleCatalog::_narrow(obj) ; - if (CORBA::is_nil (Catalog)) - return Engines::Container::_nil(); - // Loop through component list - for(unsigned int i=0;iGetComponent(compoi); - if (CORBA::is_nil (compoInfo)) - { - continue; - } - SALOME_ModuleCatalog::ImplType impl=compoInfo->implementation_type(); - container_exe=compoInfo->implementation_name(); - if(impl==SALOME_ModuleCatalog::CEXE) - { - if(found) - { - INFOS("ContainerManager Error: you can't have 2 CEXE component in the same container" ); - return Engines::Container::_nil(); - } - found=1; - } - } - } - catch (ServiceUnreachable&) + if (CORBA::is_nil(ret)) { - INFOS("Caught exception: Naming Service Unreachable"); - return Engines::Container::_nil(); + INFOS("[GiveContainer] was not able to launch container " << containerNameInNS); } - catch (...) + else { - INFOS("Caught unknown exception."); - return Engines::Container::_nil(); + // Setting log file name + logFilename=":"+logFilename; + logFilename="@"+Kernel_Utils::GetHostname()+logFilename; + logFilename=getenv( "USER" )+logFilename; + ret->logfilename(logFilename.c_str()); + RmTmpFile(_TmpFileName); // command file can be removed here } - - if(found) - return StartContainer(params,possibleComputers,container_exe.in()); - else - return StartContainer(params,possibleComputers); -} - -//============================================================================= -//! Find or start a suitable Container given some constraints -/*! CORBA Method: - * \param params Machine Parameters required for the container - * \return the container or nil - */ -//============================================================================= - -Engines::Container_ptr -SALOME_ContainerManager::FindOrStartContainer(const Engines::MachineParameters& params) -{ - Engines::Container_ptr ret = FindContainer(params,params.computerList); - if(!CORBA::is_nil(ret)) - return ret; - MESSAGE("Container doesn't exist try to launch it ..."); - - return StartContainer(params); + } + return ret; } //============================================================================= @@ -495,18 +455,17 @@ SALOME_ContainerManager::FindOrStartContainer(const Engines::MachineParameters& //============================================================================= Engines::Container_ptr -SALOME_ContainerManager::FindContainer(const Engines::MachineParameters& params, - const Engines::MachineList& possibleComputers) +SALOME_ContainerManager::FindContainer(const Engines::ContainerParameters& params, + const Engines::ResourceList& possibleResources) { - MESSAGE("FindContainer "<BuildContainerNameForNS(params,theMachine)); + std::string containerNameInNS(_NS->BuildContainerNameForNS(params, resource.c_str())); + MESSAGE("[FindContainer] Try to find a container " << containerNameInNS << " on resource " << resource); CORBA::Object_var obj = _NS->Resolve(containerNameInNS.c_str()); try - { - if(obj->_non_existent()) - return Engines::Container::_nil(); - else - return Engines::Container::_narrow(obj); - } - catch(const CORBA::Exception& e) - { + { + if(obj->_non_existent()) return Engines::Container::_nil(); - } + else + return Engines::Container::_narrow(obj); + } + catch(const CORBA::Exception& e) + { + return Engines::Container::_nil(); + } } -#ifdef WITH_PACO_PARALLEL //============================================================================= -/*! CORBA Method: - * Find or Start a suitable PaCO++ Parallel Container in a list of machines. - * \param params Machine Parameters required for the container - * \return CORBA container reference. - */ +/*! + * This is no longer valid (C++ container are also python containers) + */ //============================================================================= -Engines::Container_ptr -SALOME_ContainerManager::StartParallelContainer(const Engines::MachineParameters& params_const) +bool isPythonContainer(const char* ContainerName) { - CORBA::Object_var obj; - PaCO::InterfaceManager_var container_proxy; - Engines::Container_ptr ret = Engines::Container::_nil(); - Engines::MachineParameters params(params_const); + bool ret = false; + int len = strlen(ContainerName); - // Step 1 : Try to find a suitable container - // Currently not as good as could be since - // we have to verified the number of nodes of the container - // if a user tell that. - ret = FindContainer(params, params.computerList); - if(CORBA::is_nil(ret)) { - // Step 2 : Starting a new parallel container ! - INFOS("[StartParallelContainer] Starting a PaCO++ parallel container"); + if (len >= 2) + if (strcmp(ContainerName + len - 2, "Py") == 0) + ret = true; - // Step 3 : Choose a computer - std::string theMachine = _ResManager->FindFirst(params.computerList); - //If the machine name is localhost use the real name - if(theMachine == "localhost") - theMachine=Kernel_Utils::GetHostname(); + return ret; +} - if(theMachine == "") { - INFOS("[StartParallelContainer] !!!!!!!!!!!!!!!!!!!!!!!!!!"); - INFOS("[StartParallelContainer] No possible computer found"); - INFOS("[StartParallelContainer] !!!!!!!!!!!!!!!!!!!!!!!!!!"); - return ret; - } - INFOS("[StartParallelContainer] on machine : " << theMachine); - params.hostname = CORBA::string_dup(theMachine.c_str()); +//============================================================================= +/*! + * Builds the script to be launched + * + * If SALOME Application not defined ($APPLI), + * see BuildTempFileToLaunchRemoteContainer() + * + * Else rely on distant configuration. Command is under the form (example): + * ssh user@machine distantPath/runRemote.sh hostNS portNS WORKINGDIR workingdir \ + * SALOME_Container containerName &" - // Step 4 : starting parallel container proxy - Engines::MachineParameters params_proxy(params); - std::string command_proxy; - SALOME_ContainerManager::actual_launch_machine_t proxy_machine; - try + * - where user is ommited if not specified in CatalogResources, + * - where distant path is always relative to user@machine $HOME, and + * equal to $APPLI if not specified in CatalogResources, + * - where hostNS is the hostname of CORBA naming server (set by scripts to + * use to launch SALOME and servers in $APPLI: runAppli.sh, runRemote.sh) + * - where portNS is the port used by CORBA naming server (set by scripts to + * use to launch SALOME and servers in $APPLI: runAppli.sh, runRemote.sh) + * - where workingdir is the requested working directory for the container. + * If WORKINGDIR (and workingdir) is not present the working dir will be $HOME + */ +//============================================================================= + +string +SALOME_ContainerManager::BuildCommandToLaunchRemoteContainer +(const string& resource_name, + const Engines::ContainerParameters& params, const std::string& container_exe) +{ + + string command; + if (!_isAppliSalomeDefined) + command = BuildTempFileToLaunchRemoteContainer(resource_name, params); + else + { + int nbproc; + Engines::ResourceDefinition_var resource_definition = _ResManager->GetResourceDefinition(resource_name.c_str()); + std::string hostname(resource_definition->name.in()); + const ParserResourcesType& resInfo = _ResManager->GetImpl()->GetResourcesDescr(resource_name); + + if (params.isMPI) { - command_proxy = BuildCommandToLaunchParallelContainer("SALOME_ParallelContainerProxy", params_proxy, proxy_machine); - } - catch(const SALOME_Exception & ex) - { - INFOS("[StartParallelContainer] Exception in BuildCommandToLaunchParallelContainer"); - INFOS(ex.what()); - return ret; - } - params_proxy.nb_component_nodes = 0; // LaunchParallelContainer uses this value to know if it launches the proxy or the nodes - obj = LaunchParallelContainer(command_proxy, params_proxy, _NS->ContainerName(params_proxy), proxy_machine); - if (CORBA::is_nil(obj)) - { - INFOS("[StartParallelContainer] LaunchParallelContainer for proxy returns NIL !"); - return ret; - } - try - { - container_proxy = PaCO::InterfaceManager::_narrow(obj); - } - catch(CORBA::SystemException& e) - { - INFOS("[StartParallelContainer] Exception in _narrow after LaunchParallelContainer for proxy !"); - INFOS("CORBA::SystemException : " << e); - return ret; - } - catch(CORBA::Exception& e) - { - INFOS("[StartParallelContainer] Exception in _narrow after LaunchParallelContainer for proxy !"); - INFOS("CORBA::Exception" << e); - return ret; - } - catch(...) - { - INFOS("[StartParallelContainer] Exception in _narrow after LaunchParallelContainer for proxy !"); - INFOS("Unknown exception !"); - return ret; - } - if (CORBA::is_nil(container_proxy)) - { - INFOS("[StartParallelContainer] PaCO::InterfaceManager::_narrow returns NIL !"); - return ret; + if ((params.resource_params.nb_node <= 0) && (params.resource_params.nb_proc_per_node <= 0)) + nbproc = 1; + else if (params.resource_params.nb_node == 0) + nbproc = params.resource_params.nb_proc_per_node; + else if (params.resource_params.nb_proc_per_node == 0) + nbproc = params.resource_params.nb_node; + else + nbproc = params.resource_params.nb_node * params.resource_params.nb_proc_per_node; } - // Step 5 : starting parallel container nodes - std::string command_nodes; - Engines::MachineParameters params_nodes(params); - SALOME_ContainerManager::actual_launch_machine_t nodes_machines; - try - { - command_nodes = BuildCommandToLaunchParallelContainer("SALOME_ParallelContainerNode", params_nodes, nodes_machines, proxy_machine[0]); - } - catch(const SALOME_Exception & ex) + // "ssh -l user machine distantPath/runRemote.sh hostNS portNS WORKINGDIR workingdir \ + // SALOME_Container containerName &" + if (resInfo.Protocol == rsh) + command = "rsh "; + else if (resInfo.Protocol == ssh) + command = "ssh "; + else + throw SALOME_Exception("Unknown protocol"); + + if (resInfo.UserName != "") { - INFOS("[StartParallelContainer] Exception in BuildCommandToLaunchParallelContainer"); - INFOS(ex.what()); - return ret; + command += "-l "; + command += resInfo.UserName; + command += " "; } - std::string container_generic_node_name = _NS->ContainerName(params) + "Node"; - obj = LaunchParallelContainer(command_nodes, params_nodes, container_generic_node_name, nodes_machines); - if (CORBA::is_nil(obj)) + + command += resInfo.HostName; + command += " "; + + if (resInfo.AppliPath != "") + command += resInfo.AppliPath; // path relative to user@machine $HOME + else { - INFOS("[StartParallelContainer] LaunchParallelContainer for nodes returns NIL !"); - // Il faut tuer le proxy - try - { - Engines::Container_var proxy = Engines::Container::_narrow(container_proxy); - proxy->Shutdown(); - } - catch (...) - { - INFOS("[StartParallelContainer] Exception catched from proxy Shutdown..."); - } - return ret; + ASSERT(getenv("APPLI")); + command += getenv("APPLI"); // path relative to user@machine $HOME } - // Step 6 : connecting nodes and the proxy to actually create a parallel container - for (int i = 0; i < params.nb_component_nodes; i++) - { - std::ostringstream tmp; - tmp << i; - std::string proc_number = tmp.str(); - std::string container_node_name = container_generic_node_name + proc_number; + command += "/runRemote.sh "; - std::string theNodeMachine(nodes_machines[i]); - std::string containerNameInNS = _NS->BuildContainerNameForNS(container_node_name.c_str(), theNodeMachine.c_str()); - obj = _NS->Resolve(containerNameInNS.c_str()); - if (CORBA::is_nil(obj)) - { - INFOS("[StartParallelContainer] CONNECTION FAILED From Naming Service !"); - INFOS("[StartParallelContainer] Container name is " << containerNameInNS); - return ret; - } - try - { - MESSAGE("[StartParallelContainer] Deploying node : " << container_node_name); - PaCO::InterfaceParallel_var node = PaCO::InterfaceParallel::_narrow(obj); - node->deploy(); - MESSAGE("[StartParallelContainer] node " << container_node_name << " is deployed"); - } - catch(CORBA::SystemException& e) - { - INFOS("[StartParallelContainer] Exception in deploying node : " << containerNameInNS); - INFOS("CORBA::SystemException : " << e); - return ret; - } - catch(CORBA::Exception& e) - { - INFOS("[StartParallelContainer] Exception in deploying node : " << containerNameInNS); - INFOS("CORBA::Exception" << e); - return ret; - } - catch(...) - { - INFOS("[StartParallelContainer] Exception in deploying node : " << containerNameInNS); - INFOS("Unknown exception !"); - return ret; - } - } + ASSERT(getenv("NSHOST")); + command += getenv("NSHOST"); // hostname of CORBA name server - // Step 7 : starting parallel container - try - { - MESSAGE ("[StartParallelContainer] Starting parallel object"); - container_proxy->start(); - MESSAGE ("[StartParallelContainer] Parallel object is started"); - ret = Engines::Container::_narrow(container_proxy); - } - catch(CORBA::SystemException& e) - { - INFOS("Caught CORBA::SystemException. : " << e); - } - catch(PortableServer::POA::ServantAlreadyActive&) - { - INFOS("Caught CORBA::ServantAlreadyActiveException"); - } - catch(CORBA::Exception&) - { - INFOS("Caught CORBA::Exception."); - } - catch(std::exception& exc) + command += " "; + ASSERT(getenv("NSPORT")); + command += getenv("NSPORT"); // port of CORBA name server + + std::string wdir = params.workingdir.in(); + if(wdir != "") { - INFOS("Caught std::exception - "<ContainerName(params); + command += " -"; + AddOmninamesParams(command); + + MESSAGE("command =" << command); } - return ret; -} -#else -//============================================================================= -/*! CORBA Method: - * Find or Start a suitable PaCO++ Parallel Container in a list of machines. - * \param params Machine Parameters required for the container - * \return CORBA container reference. - */ -//============================================================================= -Engines::Container_ptr -SALOME_ContainerManager::StartParallelContainer(const Engines::MachineParameters& params) -{ - Engines::Container_ptr ret = Engines::Container::_nil(); - INFOS("[StartParallelContainer] is disabled !"); - INFOS("[StartParallelContainer] recompile SALOME Kernel to enable parallel extension"); - return ret; + + return command; } -#endif //============================================================================= -/*! This method launches the parallel container. - * It will may be placed on the ressources manager. - * - * \param command to launch - * \param container's parameters - * \param name of the container - * - * \return CORBA container reference - */ +/*! + * builds the command to be launched. + */ //============================================================================= -CORBA::Object_ptr -SALOME_ContainerManager::LaunchParallelContainer(const std::string& command, - const Engines::MachineParameters& params, - const std::string& name, - SALOME_ContainerManager::actual_launch_machine_t & vect_machine) +string +SALOME_ContainerManager::BuildCommandToLaunchLocalContainer +(const Engines::ContainerParameters& params, const std::string& container_exe) { - CORBA::Object_ptr obj = CORBA::Object::_nil(); - std::string containerNameInNS; - int count = TIME_OUT_TO_LAUNCH_CONT; + _TmpFileName = BuildTemporaryFileName(); + string command; + int nbproc = 0; - INFOS("[LaunchParallelContainer] Begin"); - int status = system(command.c_str()); - if (status == -1) { - INFOS("[LaunchParallelContainer] failed : system command status -1"); - return obj; - } - else if (status == 217) { - INFOS("[LaunchParallelContainer] failed : system command status 217"); - return obj; - } + ostringstream o; - if (params.nb_component_nodes == 0) - { - std::string theMachine(vect_machine[0]); - // Proxy We have launch a proxy - containerNameInNS = _NS->BuildContainerNameForNS((char*) name.c_str(), theMachine.c_str()); - INFOS("[LaunchParallelContainer] Waiting for Parallel Container proxy " << containerNameInNS << " on " << theMachine); - while (CORBA::is_nil(obj) && count) + if (params.isMPI) { -#ifndef WIN32 - sleep(1) ; -#else - Sleep(1000); + o << "mpirun -np "; + + if ( (params.resource_params.nb_node <= 0) && (params.resource_params.nb_proc_per_node <= 0) ) + nbproc = 1; + else if ( params.resource_params.nb_node == 0 ) + nbproc = params.resource_params.nb_proc_per_node; + else if ( params.resource_params.nb_proc_per_node == 0 ) + nbproc = params.resource_params.nb_node; + else + nbproc = params.resource_params.nb_node * params.resource_params.nb_proc_per_node; + + o << nbproc << " "; + +#ifdef WITHLAM + o << "-x PATH,LD_LIBRARY_PATH,OMNIORB_CONFIG,SALOME_trace "; +#elif defined(WITHOPENMPI) + if( getenv("OMPI_URI_FILE") == NULL ) + o << "-x PATH -x LD_LIBRARY_PATH -x OMNIORB_CONFIG -x SALOME_trace"; + else + { + o << "-x PATH -x LD_LIBRARY_PATH -x OMNIORB_CONFIG -x SALOME_trace -ompi-server file:"; + o << getenv("OMPI_URI_FILE"); + } #endif - count-- ; - obj = _NS->Resolve(containerNameInNS.c_str()); + + if (isPythonContainer(params.container_name)) + o << " pyMPI SALOME_ContainerPy.py "; + else + o << " SALOME_MPIContainer "; } - } - else - { - INFOS("[LaunchParallelContainer] launching the nodes of the parallel container"); - // We are waiting all the nodes - for (int i = 0; i < params.nb_component_nodes; i++) - { - obj = CORBA::Object::_nil(); - std::string theMachine(vect_machine[i]); - // Name of the node - std::ostringstream tmp; - tmp << i; - std::string proc_number = tmp.str(); - std::string container_node_name = name + proc_number; - containerNameInNS = _NS->BuildContainerNameForNS((char*) container_node_name.c_str(), theMachine.c_str()); - INFOS("[LaunchParallelContainer] Waiting for Parallel Container node " << containerNameInNS << " on " << theMachine); - while (CORBA::is_nil(obj) && count) { -#ifndef WIN32 - sleep(1) ; -#else - Sleep(1000); -#endif - count-- ; - obj = _NS->Resolve(containerNameInNS.c_str()); - } - if (CORBA::is_nil(obj)) - { - INFOS("[LaunchParallelContainer] Launch of node failed (or not found) !"); - return obj; - } - } - } - if (CORBA::is_nil(obj)) - INFOS("[LaunchParallelContainer] failed"); - - return obj; -} - -void SALOME_ContainerManager::fillBatchLaunchedContainers() -{ - _batchLaunchedContainers.clear(); - _NS->Change_Directory("/Containers"); - vector vec = _NS->list_directory_recurs(); - for(vector::iterator iter = vec.begin();iter!=vec.end();iter++){ - CORBA::Object_var obj=_NS->Resolve((*iter).c_str()); - Engines::Container_ptr cont=Engines::Container::_narrow(obj); - if(!CORBA::is_nil(cont)){ - _batchLaunchedContainers.push_back(cont); - } - } - _batchLaunchedContainersIter=_batchLaunchedContainers.begin(); -} - -//============================================================================= -/*! - * This is no longer valid (C++ container are also python containers) - */ -//============================================================================= - -bool isPythonContainer(const char* ContainerName) -{ - bool ret = false; - int len = strlen(ContainerName); - - if (len >= 2) - if (strcmp(ContainerName + len - 2, "Py") == 0) - ret = true; - - return ret; -} - -//============================================================================= -/*! - * Builds the script to be launched - * - * If SALOME Application not defined ($APPLI), - * see BuildTempFileToLaunchRemoteContainer() - * - * Else rely on distant configuration. Command is under the form (example): - * ssh user@machine distantPath/runRemote.sh hostNS portNS WORKINGDIR workingdir \ - * SALOME_Container containerName &" - - * - where user is ommited if not specified in CatalogResources, - * - where distant path is always relative to user@machine $HOME, and - * equal to $APPLI if not specified in CatalogResources, - * - where hostNS is the hostname of CORBA naming server (set by scripts to - * use to launch SALOME and servers in $APPLI: runAppli.sh, runRemote.sh) - * - where portNS is the port used by CORBA naming server (set by scripts to - * use to launch SALOME and servers in $APPLI: runAppli.sh, runRemote.sh) - * - where workingdir is the requested working directory for the container. - * If WORKINGDIR (and workingdir) is not present the working dir will be $HOME - */ -//============================================================================= - -string -SALOME_ContainerManager::BuildCommandToLaunchRemoteContainer -(const string& machine, - const Engines::MachineParameters& params, const std::string& container_exe) -{ - string command; - int nbproc; - - if ( ! _isAppliSalomeDefined ) - command = BuildTempFileToLaunchRemoteContainer(machine, params); - - else - { - const ParserResourcesType& resInfo = _ResManager->GetImpl()->GetResourcesList(machine); - - if (params.isMPI) - { - if ( (params.nb_node <= 0) && (params.nb_proc_per_node <= 0) ) - nbproc = 1; - else if ( params.nb_node == 0 ) - nbproc = params.nb_proc_per_node; - else if ( params.nb_proc_per_node == 0 ) - nbproc = params.nb_node; - else - nbproc = params.nb_node * params.nb_proc_per_node; - } - - // "ssh -l user machine distantPath/runRemote.sh hostNS portNS WORKINGDIR workingdir \ - // SALOME_Container containerName &" - - if (resInfo.Protocol == rsh) - command = "rsh "; - else if (resInfo.Protocol == ssh) - command = "ssh "; - else - throw SALOME_Exception("Unknown protocol"); - - if (resInfo.UserName != "") - { - command += "-l "; - command += resInfo.UserName; - command += " "; - } - - command += machine; - command += " "; - - if (resInfo.AppliPath != "") - command += resInfo.AppliPath; // path relative to user@machine $HOME - else - { - ASSERT(getenv("APPLI")); - command += getenv("APPLI"); // path relative to user@machine $HOME - } - - command += "/runRemote.sh "; - - ASSERT(getenv("NSHOST")); - command += getenv("NSHOST"); // hostname of CORBA name server - - command += " "; - ASSERT(getenv("NSPORT")); - command += getenv("NSPORT"); // port of CORBA name server - - std::string wdir=params.workingdir.in(); - if(wdir != "") - { - command += " WORKINGDIR "; - command += " '"; - if(wdir == "$TEMPDIR") - wdir="\\$TEMPDIR"; - command += wdir; // requested working directory - command += "'"; - } - - if(params.isMPI) - { - command += " mpirun -np "; - std::ostringstream o; - o << nbproc << " "; - command += o.str(); -#ifdef WITHLAM - command += "-x PATH,LD_LIBRARY_PATH,OMNIORB_CONFIG,SALOME_trace "; -#elif defined(WITHOPENMPI) - if( getenv("OMPI_URI_FILE") == NULL ) - command += "-x PATH -x LD_LIBRARY_PATH -x OMNIORB_CONFIG -x SALOME_trace"; - else{ - command += "-x PATH -x LD_LIBRARY_PATH -x OMNIORB_CONFIG -x SALOME_trace -ompi-server file:"; - command += getenv("OMPI_URI_FILE"); - } -#endif - command += " SALOME_MPIContainer "; - } - else - command += " " +container_exe+ " "; - - command += _NS->ContainerName(params); - command += " -"; - AddOmninamesParams(command); - - MESSAGE("command =" << command); - } - - return command; -} - -//============================================================================= -/*! - * builds the command to be launched. - */ -//============================================================================= - -string -SALOME_ContainerManager::BuildCommandToLaunchLocalContainer -(const Engines::MachineParameters& params, const std::string& container_exe) -{ - _TmpFileName = BuildTemporaryFileName(); - string command; - int nbproc = 0; - - ostringstream o; - - if (params.isMPI) - { - o << "mpirun -np "; - - if ( (params.nb_node <= 0) && (params.nb_proc_per_node <= 0) ) - nbproc = 1; - else if ( params.nb_node == 0 ) - nbproc = params.nb_proc_per_node; - else if ( params.nb_proc_per_node == 0 ) - nbproc = params.nb_node; - else - nbproc = params.nb_node * params.nb_proc_per_node; - - o << nbproc << " "; - -#ifdef WITHLAM - o << "-x PATH,LD_LIBRARY_PATH,OMNIORB_CONFIG,SALOME_trace "; -#elif defined(WITHOPENMPI) - if( getenv("OMPI_URI_FILE") == NULL ) - o << "-x PATH -x LD_LIBRARY_PATH -x OMNIORB_CONFIG -x SALOME_trace"; - else - { - o << "-x PATH -x LD_LIBRARY_PATH -x OMNIORB_CONFIG -x SALOME_trace -ompi-server file:"; - o << getenv("OMPI_URI_FILE"); - } -#endif - - if (isPythonContainer(params.container_name)) - o << " pyMPI SALOME_ContainerPy.py "; - else - o << " SALOME_MPIContainer "; - } - - else + + else { std::string wdir=params.workingdir.in(); if(wdir != "") @@ -1150,6 +793,18 @@ void SALOME_ContainerManager::AddOmninamesParams(string& command) const command += iorstr; } +//============================================================================= +/*! + * add to command all options relative to naming service. + */ +//============================================================================= + +void SALOME_ContainerManager::AddOmninamesParams(ofstream& fileStream) const +{ + CORBA::String_var iorstr = _NS->getIORaddr(); + fileStream << "ORBInitRef NameService="; + fileStream << iorstr; +} //============================================================================= /*! @@ -1163,169 +818,497 @@ void SALOME_ContainerManager::AddOmninamesParams(ostringstream& oss) const oss << "ORBInitRef NameService="; oss << iorstr; } - + +//============================================================================= +/*! + * generate a file name in /tmp directory + */ +//============================================================================= + +string SALOME_ContainerManager::BuildTemporaryFileName() const +{ + //build more complex file name to support multiple salome session + string aFileName = Kernel_Utils::GetTmpFileName(); +#ifndef WIN32 + aFileName += ".sh"; +#else + aFileName += ".bat"; +#endif + return aFileName; +} + +string SALOME_ContainerManager::GetMPIZeroNode(string machine) +{ + int status; + string zeronode; + string cmd; + string tmpFile = BuildTemporaryFileName(); + + cmd = "ssh " + machine + " mpirun -np 1 hostname > " + tmpFile; + + status = system(cmd.c_str()); + if( status == 0 ){ + ifstream fp(tmpFile.c_str(),ios::in); + fp >> zeronode; + } + + RmTmpFile(tmpFile); + + return zeronode; +} + +//============================================================================= +/*! + * Builds in a temporary file the script to be launched. + * + * Used if SALOME Application ($APPLI) is not defined. + * The command is build with data from CatalogResources, in which every path + * used on remote computer must be defined. + */ +//============================================================================= + +string +SALOME_ContainerManager::BuildTempFileToLaunchRemoteContainer +(const string& resource_name, + const Engines::ContainerParameters& params) throw(SALOME_Exception) +{ + int status; + + _TmpFileName = BuildTemporaryFileName(); + ofstream tempOutputFile; + tempOutputFile.open(_TmpFileName.c_str(), ofstream::out ); + const ParserResourcesType& resInfo = _ResManager->GetImpl()->GetResourcesDescr(resource_name); + tempOutputFile << "#! /bin/sh" << endl; + + // --- set env vars + + tempOutputFile << "export SALOME_trace=local" << endl; // mkr : 27.11.2006 : PAL13967 - Distributed supervision graphs - Problem with "SALOME_trace" + //tempOutputFile << "source " << resInfo.PreReqFilePath << endl; + + // ! env vars + + if (params.isMPI) + { + tempOutputFile << "mpirun -np "; + int nbproc; + + if ( (params.resource_params.nb_node <= 0) && (params.resource_params.nb_proc_per_node <= 0) ) + nbproc = 1; + else if ( params.resource_params.nb_node == 0 ) + nbproc = params.resource_params.nb_proc_per_node; + else if ( params.resource_params.nb_proc_per_node == 0 ) + nbproc = params.resource_params.nb_node; + else + nbproc = params.resource_params.nb_node * params.resource_params.nb_proc_per_node; + + std::ostringstream o; + + tempOutputFile << nbproc << " "; +#ifdef WITHLAM + tempOutputFile << "-x PATH,LD_LIBRARY_PATH,OMNIORB_CONFIG,SALOME_trace "; +#elif defined(WITHOPENMPI) + if( getenv("OMPI_URI_FILE") == NULL ) + tempOutputFile << "-x PATH -x LD_LIBRARY_PATH -x OMNIORB_CONFIG -x SALOME_trace"; + else{ + tempOutputFile << "-x PATH -x LD_LIBRARY_PATH -x OMNIORB_CONFIG -x SALOME_trace -ompi-server file:"; + tempOutputFile << getenv("OMPI_URI_FILE"); + } +#endif + } + + tempOutputFile << getenv("KERNEL_ROOT_DIR") << "/bin/salome/"; + + if (params.isMPI) + { + if (isPythonContainer(params.container_name)) + tempOutputFile << " pyMPI SALOME_ContainerPy.py "; + else + tempOutputFile << " SALOME_MPIContainer "; + } + + else + { + if (isPythonContainer(params.container_name)) + tempOutputFile << "SALOME_ContainerPy.py "; + else + tempOutputFile << "SALOME_Container "; + } + + tempOutputFile << _NS->ContainerName(params) << " -"; + AddOmninamesParams(tempOutputFile); + tempOutputFile << " &" << endl; + tempOutputFile.flush(); + tempOutputFile.close(); +#ifndef WIN32 + chmod(_TmpFileName.c_str(), 0x1ED); +#endif + + // --- Build command + + string command; + + if (resInfo.Protocol == rsh) + { + command = "rsh "; + string commandRcp = "rcp "; + commandRcp += _TmpFileName; + commandRcp += " "; + commandRcp += resInfo.HostName; + commandRcp += ":"; + commandRcp += _TmpFileName; + status = system(commandRcp.c_str()); + } + + else if (resInfo.Protocol == ssh) + { + command = "ssh "; + string commandRcp = "scp "; + commandRcp += _TmpFileName; + commandRcp += " "; + commandRcp += resInfo.HostName; + commandRcp += ":"; + commandRcp += _TmpFileName; + status = system(commandRcp.c_str()); + } + else + throw SALOME_Exception("Unknown protocol"); + + if(status) + throw SALOME_Exception("Error of connection on remote host"); + + command += resInfo.HostName; + _CommandForRemAccess = command; + command += " "; + command += _TmpFileName; + + SCRUTE(command); + + return command; + +} + +#ifdef WITH_PACO_PARALLEL +//============================================================================= +/*! CORBA Method: + * Find or Start a suitable PaCO++ Parallel Container in a list of machines. + * \param params Machine Parameters required for the container + * \return CORBA container reference. + */ +//============================================================================= +Engines::Container_ptr +SALOME_ContainerManager::StartPaCOPPContainer(const Engines::ContainerParameters& params_const) +{ + CORBA::Object_var obj; + PaCO::InterfaceManager_var container_proxy; + Engines::Container_ptr ret = Engines::Container::_nil(); + Engines::MachineParameters params(params_const); + + // Step 1 : Try to find a suitable container + // Currently not as good as could be since + // we have to verified the number of nodes of the container + // if a user tell that. + ret = FindContainer(params, params.computerList); + if(CORBA::is_nil(ret)) { + // Step 2 : Starting a new parallel container ! + INFOS("[StartParallelContainer] Starting a PaCO++ parallel container"); + + // Step 3 : Choose a computer + std::string theMachine = _ResManager->FindFirst(params.computerList); + //If the machine name is localhost use the real name + if(theMachine == "localhost") + theMachine=Kernel_Utils::GetHostname(); + + if(theMachine == "") { + INFOS("[StartParallelContainer] !!!!!!!!!!!!!!!!!!!!!!!!!!"); + INFOS("[StartParallelContainer] No possible computer found"); + INFOS("[StartParallelContainer] !!!!!!!!!!!!!!!!!!!!!!!!!!"); + return ret; + } + INFOS("[StartParallelContainer] on machine : " << theMachine); + params.hostname = CORBA::string_dup(theMachine.c_str()); + + // Step 4 : starting parallel container proxy + Engines::MachineParameters params_proxy(params); + std::string command_proxy; + SALOME_ContainerManager::actual_launch_machine_t proxy_machine; + try + { + command_proxy = BuildCommandToLaunchParallelContainer("SALOME_ParallelContainerProxy", params_proxy, proxy_machine); + } + catch(const SALOME_Exception & ex) + { + INFOS("[StartParallelContainer] Exception in BuildCommandToLaunchParallelContainer"); + INFOS(ex.what()); + return ret; + } + params_proxy.nb_proc = 0; // LaunchParallelContainer uses this value to know if it launches the proxy or the nodes + obj = LaunchParallelContainer(command_proxy, params_proxy, _NS->ContainerName(params_proxy), proxy_machine); + if (CORBA::is_nil(obj)) + { + INFOS("[StartParallelContainer] LaunchParallelContainer for proxy returns NIL !"); + return ret; + } + try + { + container_proxy = PaCO::InterfaceManager::_narrow(obj); + } + catch(CORBA::SystemException& e) + { + INFOS("[StartParallelContainer] Exception in _narrow after LaunchParallelContainer for proxy !"); + INFOS("CORBA::SystemException : " << e); + return ret; + } + catch(CORBA::Exception& e) + { + INFOS("[StartParallelContainer] Exception in _narrow after LaunchParallelContainer for proxy !"); + INFOS("CORBA::Exception" << e); + return ret; + } + catch(...) + { + INFOS("[StartParallelContainer] Exception in _narrow after LaunchParallelContainer for proxy !"); + INFOS("Unknown exception !"); + return ret; + } + if (CORBA::is_nil(container_proxy)) + { + INFOS("[StartParallelContainer] PaCO::InterfaceManager::_narrow returns NIL !"); + return ret; + } + + // Step 5 : starting parallel container nodes + std::string command_nodes; + Engines::MachineParameters params_nodes(params); + SALOME_ContainerManager::actual_launch_machine_t nodes_machines; + try + { + command_nodes = BuildCommandToLaunchParallelContainer("SALOME_ParallelContainerNode", params_nodes, nodes_machines, proxy_machine[0]); + } + catch(const SALOME_Exception & ex) + { + INFOS("[StartParallelContainer] Exception in BuildCommandToLaunchParallelContainer"); + INFOS(ex.what()); + return ret; + } + std::string container_generic_node_name = _NS->ContainerName(params) + "Node"; + obj = LaunchParallelContainer(command_nodes, params_nodes, container_generic_node_name, nodes_machines); + if (CORBA::is_nil(obj)) + { + INFOS("[StartParallelContainer] LaunchParallelContainer for nodes returns NIL !"); + // Il faut tuer le proxy + try + { + Engines::Container_var proxy = Engines::Container::_narrow(container_proxy); + proxy->Shutdown(); + } + catch (...) + { + INFOS("[StartParallelContainer] Exception catched from proxy Shutdown..."); + } + return ret; + } + + // Step 6 : connecting nodes and the proxy to actually create a parallel container + for (int i = 0; i < params.nb_proc; i++) + { + std::ostringstream tmp; + tmp << i; + std::string proc_number = tmp.str(); + std::string container_node_name = container_generic_node_name + proc_number; + + std::string theNodeMachine(nodes_machines[i]); + std::string containerNameInNS = _NS->BuildContainerNameForNS(container_node_name.c_str(), theNodeMachine.c_str()); + obj = _NS->Resolve(containerNameInNS.c_str()); + if (CORBA::is_nil(obj)) + { + INFOS("[StartParallelContainer] CONNECTION FAILED From Naming Service !"); + INFOS("[StartParallelContainer] Container name is " << containerNameInNS); + return ret; + } + try + { + MESSAGE("[StartParallelContainer] Deploying node : " << container_node_name); + PaCO::InterfaceParallel_var node = PaCO::InterfaceParallel::_narrow(obj); + node->deploy(); + MESSAGE("[StartParallelContainer] node " << container_node_name << " is deployed"); + } + catch(CORBA::SystemException& e) + { + INFOS("[StartParallelContainer] Exception in deploying node : " << containerNameInNS); + INFOS("CORBA::SystemException : " << e); + return ret; + } + catch(CORBA::Exception& e) + { + INFOS("[StartParallelContainer] Exception in deploying node : " << containerNameInNS); + INFOS("CORBA::Exception" << e); + return ret; + } + catch(...) + { + INFOS("[StartParallelContainer] Exception in deploying node : " << containerNameInNS); + INFOS("Unknown exception !"); + return ret; + } + } + + // Step 7 : starting parallel container + try + { + MESSAGE ("[StartParallelContainer] Starting parallel object"); + container_proxy->start(); + MESSAGE ("[StartParallelContainer] Parallel object is started"); + ret = Engines::Container::_narrow(container_proxy); + } + catch(CORBA::SystemException& e) + { + INFOS("Caught CORBA::SystemException. : " << e); + } + catch(PortableServer::POA::ServantAlreadyActive&) + { + INFOS("Caught CORBA::ServantAlreadyActiveException"); + } + catch(CORBA::Exception&) + { + INFOS("Caught CORBA::Exception."); + } + catch(std::exception& exc) + { + INFOS("Caught std::exception - "<getIORaddr(); - fileStream << "ORBInitRef NameService="; - fileStream << iorstr; + Engines::Container_ptr ret = Engines::Container::_nil(); + INFOS("[StartParallelContainer] is disabled !"); + INFOS("[StartParallelContainer] recompile SALOME Kernel to enable parallel extension"); + return ret; } +#endif -//============================================================================= -/*! - * generate a file name in /tmp directory - */ -//============================================================================= - -string SALOME_ContainerManager::BuildTemporaryFileName() const +#ifndef WITH_PACO_PARALLEL +CORBA::Object_ptr +SALOME_ContainerManager::LaunchParallelContainer(const std::string& command, + const Engines::ContainerParameters& params, + const std::string& name, + SALOME_ContainerManager::actual_launch_machine_t & vect_machine) { - //build more complex file name to support multiple salome session - string aFileName = Kernel_Utils::GetTmpFileName(); -#ifndef WIN32 - aFileName += ".sh"; -#else - aFileName += ".bat"; -#endif - return aFileName; + CORBA::Object_ptr obj = CORBA::Object::_nil(); + return obj; } - - +#else //============================================================================= -/*! - * Builds in a temporary file the script to be launched. - * - * Used if SALOME Application ($APPLI) is not defined. - * The command is build with data from CatalogResources, in which every path - * used on remote computer must be defined. - */ +/*! This method launches the parallel container. + * It will may be placed on the ressources manager. + * + * \param command to launch + * \param container's parameters + * \param name of the container + * + * \return CORBA container reference + */ //============================================================================= - -string -SALOME_ContainerManager::BuildTempFileToLaunchRemoteContainer -(const string& machine, - const Engines::MachineParameters& params) throw(SALOME_Exception) +CORBA::Object_ptr +SALOME_ContainerManager::LaunchParallelContainer(const std::string& command, + const Engines::ContainerParameters& params, + const std::string& name, + SALOME_ContainerManager::actual_launch_machine_t & vect_machine) { - int status; - - _TmpFileName = BuildTemporaryFileName(); - ofstream tempOutputFile; - tempOutputFile.open(_TmpFileName.c_str(), ofstream::out ); - const ParserResourcesType& resInfo = _ResManager->GetImpl()->GetResourcesList(machine); - tempOutputFile << "#! /bin/sh" << endl; - - // --- set env vars - - tempOutputFile << "export SALOME_trace=local" << endl; // mkr : 27.11.2006 : PAL13967 - Distributed supervision graphs - Problem with "SALOME_trace" - //tempOutputFile << "source " << resInfo.PreReqFilePath << endl; + CORBA::Object_ptr obj = CORBA::Object::_nil(); + std::string containerNameInNS; + int count = TIME_OUT_TO_LAUNCH_CONT; - // ! env vars + INFOS("[LaunchParallelContainer] Begin"); + int status = system(command.c_str()); + if (status == -1) { + INFOS("[LaunchParallelContainer] failed : system command status -1"); + return obj; + } + else if (status == 217) { + INFOS("[LaunchParallelContainer] failed : system command status 217"); + return obj; + } - if (params.isMPI) + if (params.nb_proc == 0) + { + std::string theMachine(vect_machine[0]); + // Proxy We have launch a proxy + containerNameInNS = _NS->BuildContainerNameForNS((char*) name.c_str(), theMachine.c_str()); + INFOS("[LaunchParallelContainer] Waiting for Parallel Container proxy " << containerNameInNS << " on " << theMachine); + while (CORBA::is_nil(obj) && count) { - tempOutputFile << "mpirun -np "; - int nbproc; - - if ( (params.nb_node <= 0) && (params.nb_proc_per_node <= 0) ) - nbproc = 1; - else if ( params.nb_node == 0 ) - nbproc = params.nb_proc_per_node; - else if ( params.nb_proc_per_node == 0 ) - nbproc = params.nb_node; - else - nbproc = params.nb_node * params.nb_proc_per_node; - - std::ostringstream o; - - tempOutputFile << nbproc << " "; -#ifdef WITHLAM - tempOutputFile << "-x PATH,LD_LIBRARY_PATH,OMNIORB_CONFIG,SALOME_trace "; -#elif defined(WITHOPENMPI) - if( getenv("OMPI_URI_FILE") == NULL ) - tempOutputFile << "-x PATH -x LD_LIBRARY_PATH -x OMNIORB_CONFIG -x SALOME_trace"; - else{ - tempOutputFile << "-x PATH -x LD_LIBRARY_PATH -x OMNIORB_CONFIG -x SALOME_trace -ompi-server file:"; - tempOutputFile << getenv("OMPI_URI_FILE"); - } +#ifndef WIN32 + sleep(1) ; +#else + Sleep(1000); #endif + count-- ; + obj = _NS->Resolve(containerNameInNS.c_str()); } - - tempOutputFile << getenv("KERNEL_ROOT_DIR") << "/bin/salome/"; - - if (params.isMPI) - { - if (isPythonContainer(params.container_name)) - tempOutputFile << " pyMPI SALOME_ContainerPy.py "; - else - tempOutputFile << " SALOME_MPIContainer "; - } - - else + } + else + { + INFOS("[LaunchParallelContainer] launching the nodes of the parallel container"); + // We are waiting all the nodes + for (int i = 0; i < params.nb_proc; i++) { - if (isPythonContainer(params.container_name)) - tempOutputFile << "SALOME_ContainerPy.py "; - else - tempOutputFile << "SALOME_Container "; - } - - tempOutputFile << _NS->ContainerName(params) << " -"; - AddOmninamesParams(tempOutputFile); - tempOutputFile << " &" << endl; - tempOutputFile.flush(); - tempOutputFile.close(); + obj = CORBA::Object::_nil(); + std::string theMachine(vect_machine[i]); + // Name of the node + std::ostringstream tmp; + tmp << i; + std::string proc_number = tmp.str(); + std::string container_node_name = name + proc_number; + containerNameInNS = _NS->BuildContainerNameForNS((char*) container_node_name.c_str(), theMachine.c_str()); + INFOS("[LaunchParallelContainer] Waiting for Parallel Container node " << containerNameInNS << " on " << theMachine); + while (CORBA::is_nil(obj) && count) { #ifndef WIN32 - chmod(_TmpFileName.c_str(), 0x1ED); + sleep(1) ; +#else + Sleep(1000); #endif - - // --- Build command - - string command; - - if (resInfo.Protocol == rsh) - { - command = "rsh "; - string commandRcp = "rcp "; - commandRcp += _TmpFileName; - commandRcp += " "; - commandRcp += machine; - commandRcp += ":"; - commandRcp += _TmpFileName; - status = system(commandRcp.c_str()); - } - - else if (resInfo.Protocol == ssh) - { - command = "ssh "; - string commandRcp = "scp "; - commandRcp += _TmpFileName; - commandRcp += " "; - commandRcp += machine; - commandRcp += ":"; - commandRcp += _TmpFileName; - status = system(commandRcp.c_str()); + count-- ; + obj = _NS->Resolve(containerNameInNS.c_str()); + } + if (CORBA::is_nil(obj)) + { + INFOS("[LaunchParallelContainer] Launch of node failed (or not found) !"); + return obj; + } } - else - throw SALOME_Exception("Unknown protocol"); - - if(status) - throw SALOME_Exception("Error of connection on remote host"); - - command += machine; - _CommandForRemAccess = command; - command += " "; - command += _TmpFileName; - - SCRUTE(command); - - return command; - + } + if (CORBA::is_nil(obj)) + INFOS("[LaunchParallelContainer] failed"); + + return obj; } +#endif +#ifndef WITH_PACO_PARALLEL +string +SALOME_ContainerManager::BuildCommandToLaunchParallelContainer(const std::string& exe_name, + const Engines::ContainerParameters& params, + SALOME_ContainerManager::actual_launch_machine_t & vect_machine, + const std::string proxy_hostname) +{ + return ""; +} +#else //============================================================================= /*! Creates a command line that the container manager uses to launch * a parallel container. @@ -1333,12 +1316,12 @@ SALOME_ContainerManager::BuildTempFileToLaunchRemoteContainer //============================================================================= string SALOME_ContainerManager::BuildCommandToLaunchParallelContainer(const std::string& exe_name, - const Engines::MachineParameters& params, + const Engines::ContainerParameters& params, SALOME_ContainerManager::actual_launch_machine_t & vect_machine, const std::string proxy_hostname) { // This method knows the differences between the proxy and the nodes. - // nb_component_nodes is not used in the same way if it is a proxy or + // nb_proc is not used in the same way if it is a proxy or // a node. //command = "gdb --args "; @@ -1354,7 +1337,7 @@ SALOME_ContainerManager::BuildCommandToLaunchParallelContainer(const std::string std::string hostname(CORBA::string_dup(params.hostname)); std::ostringstream tmp_string; - CORBA::Long nb_nodes = params.nb_component_nodes; + CORBA::Long nb_nodes = params.nb_proc; tmp_string << nb_nodes; std::string nbproc = tmp_string.str(); @@ -1395,7 +1378,7 @@ SALOME_ContainerManager::BuildCommandToLaunchParallelContainer(const std::string else { machine_file_name = _ResManager->getMachineFile(hostname, - params.nb_component_nodes, + params.nb_proc, parallelLib); } if (machine_file_name == "") @@ -1703,23 +1686,5 @@ SALOME_ContainerManager::BuildCommandToLaunchParallelContainer(const std::string MESSAGE("Parallel launch is: " << command); return command; } +#endif -string SALOME_ContainerManager::GetMPIZeroNode(string machine) -{ - int status; - string zeronode; - string cmd; - string tmpFile = BuildTemporaryFileName(); - - cmd = "ssh " + machine + " mpirun -np 1 hostname > " + tmpFile; - - status = system(cmd.c_str()); - if( status == 0 ){ - ifstream fp(tmpFile.c_str(),ios::in); - fp >> zeronode; - } - - RmTmpFile(tmpFile); - - return zeronode; -} diff --git a/src/Container/SALOME_ContainerManager.hxx b/src/Container/SALOME_ContainerManager.hxx index 0124c78ac..a0e7e19eb 100644 --- a/src/Container/SALOME_ContainerManager.hxx +++ b/src/Container/SALOME_ContainerManager.hxx @@ -42,49 +42,39 @@ public: SALOME_ContainerManager(CORBA::ORB_ptr orb, PortableServer::POA_var poa, SALOME_ResourcesManager *rm, SALOME_NamingService *ns); ~SALOME_ContainerManager(); - void Shutdown(); - void ShutdownContainers(); - + // Corba Methods Engines::Container_ptr - StartContainer(const Engines::MachineParameters& params, - const Engines::MachineList& possibleComputer, - const std::string& container_exe="SALOME_Container"); + GiveContainer(const Engines::ContainerParameters& params); - Engines::Container_ptr - StartContainer(const Engines::MachineParameters& params); - - Engines::Container_ptr - GiveContainer(const Engines::MachineParameters& params); + void ShutdownContainers(); - Engines::Container_ptr - FindOrStartContainer(const Engines::MachineParameters& params); + // C++ Methods + void Shutdown(); static const char *_ContainerManagerNameInNS; - // PaCO++ Parallel extension - Engines::Container_ptr - StartParallelContainer(const Engines::MachineParameters& params); - protected: - Engines::Container_ptr - FindContainer(const Engines::MachineParameters& params, - const Engines::MachineList& possibleComputers); + // C++ methods + Engines::Container_ptr + StartPaCOPPContainer(const Engines::ContainerParameters& params); Engines::Container_ptr - FindContainer(const Engines::MachineParameters& params, - const char *theMachine); + FindContainer(const Engines::ContainerParameters& params, + const Engines::ResourceList& possibleResources); - void fillBatchLaunchedContainers(); + Engines::Container_ptr + FindContainer(const Engines::ContainerParameters& params, + const std::string& resource); - std::string BuildCommandToLaunchRemoteContainer(const std::string& machine, - const Engines::MachineParameters& params, + std::string BuildCommandToLaunchRemoteContainer(const std::string & resource_name, + const Engines::ContainerParameters& params, const std::string& container_exe="SALOME_Container"); - std::string BuildCommandToLaunchLocalContainer(const Engines::MachineParameters& params, + std::string BuildCommandToLaunchLocalContainer(const Engines::ContainerParameters& params, const std::string& container_exe="SALOME_Container"); - std::string BuildTempFileToLaunchRemoteContainer(const std::string& machine, - const Engines::MachineParameters& params) throw(SALOME_Exception); + std::string BuildTempFileToLaunchRemoteContainer(const std::string& resource_name, + const Engines::ContainerParameters& params) throw(SALOME_Exception); void RmTmpFile(std::string& tmpFile); @@ -101,12 +91,12 @@ protected: // For PacO++ Parallel extension typedef std::vector actual_launch_machine_t; std::string BuildCommandToLaunchParallelContainer(const std::string& exe_name, - const Engines::MachineParameters& params, + const Engines::ContainerParameters& params, SALOME_ContainerManager::actual_launch_machine_t & vect_machine, const std::string proxy_hostname = ""); CORBA::Object_ptr LaunchParallelContainer(const std::string& command, - const Engines::MachineParameters& params, + const Engines::ContainerParameters& params, const std::string& name, SALOME_ContainerManager::actual_launch_machine_t & vect_machine); CORBA::ORB_var _orb; @@ -114,8 +104,6 @@ protected: SALOME_ResourcesManager *_ResManager; SALOME_NamingService *_NS; - static std::vector _batchLaunchedContainers; - static std::vector::iterator _batchLaunchedContainersIter; //! attribute that contains current tmp files generated std::string _TmpFileName; diff --git a/src/Launcher/BatchTest.cxx b/src/Launcher/BatchTest.cxx index 23ff34abe..591db823f 100644 --- a/src/Launcher/BatchTest.cxx +++ b/src/Launcher/BatchTest.cxx @@ -35,7 +35,7 @@ #ifdef WIN32 # include #endif -BatchTest::BatchTest(const Engines::MachineDefinition& batch_descr) +BatchTest::BatchTest(const Engines::ResourceDefinition& batch_descr) { #ifdef WITH_LIBBATCH _batch_descr = batch_descr; @@ -57,8 +57,8 @@ BatchTest::BatchTest(const Engines::MachineDefinition& batch_descr) // Creating test temporary file _test_filename = "/tmp/"; _test_filename += _date + "_test_cluster_file_"; - _test_filename += _batch_descr.alias.in(); - _base_filename = _date + "_test_cluster_file_" + _batch_descr.alias.in(); + _test_filename += _batch_descr.hostname.in(); + _base_filename = _date + "_test_cluster_file_" + _batch_descr.hostname.in(); #endif } @@ -71,7 +71,7 @@ BatchTest::test() INFOS(std::endl << "--- Testing batch Machine :" << std::endl << "--- Name : " << _batch_descr.hostname << std::endl - << "--- Alias : " << _batch_descr.alias << std::endl + << "--- hostname : " << _batch_descr.hostname << std::endl << "--- Protocol : " << _batch_descr.protocol << std::endl << "--- User Name : " << _batch_descr.username << std::endl << "--- Batch Type : " << _batch_descr.batch << std::endl @@ -114,21 +114,21 @@ BatchTest::test() return rtn; } -// For this test we use : alias, protocol, username +// For this test we use : hostname, protocol, username std::string BatchTest::test_connection() { int status; std::string command; std::string result("Failed : "); - std::string alias = _batch_descr.alias.in(); + std::string hostname = _batch_descr.hostname.in(); std::string username = _batch_descr.username.in(); std::string protocol = _batch_descr.protocol.in(); // Basic tests - if(alias == "") + if(hostname == "") { - result += "alias is empty !"; + result += "hostname is empty !"; return result; } if(username == "") @@ -145,7 +145,7 @@ BatchTest::test_connection() // Build command command += protocol + " " - + username + "@" + alias; + + username + "@" + hostname; // Test status = system(command.c_str()); @@ -161,7 +161,7 @@ BatchTest::test_connection() return result; } -// For this test we use : alias, protocol, username +// For this test we use : hostname, protocol, username std::string BatchTest::test_filecopy() { @@ -169,7 +169,7 @@ BatchTest::test_filecopy() std::string home; std::string command; std::string result("Failed : "); - std::string alias = _batch_descr.alias.in(); + std::string hostname = _batch_descr.hostname.in(); std::string username = _batch_descr.username.in(); std::string protocol = _batch_descr.protocol.in(); @@ -196,7 +196,7 @@ BatchTest::test_filecopy() if(protocol == "rsh") command = "rcp"; command += " " + _test_filename + " " - + username + "@" + alias + ":" + home; + + username + "@" + hostname + ":" + home; // Test status = system(command.c_str()); @@ -212,7 +212,7 @@ BatchTest::test_filecopy() return result; } -// For this test we use : alias, protocol, username +// For this test we use : hostname, protocol, username std::string BatchTest::test_getresult() { @@ -220,7 +220,7 @@ BatchTest::test_getresult() std::string home; std::string command; std::string result("Failed : "); - std::string alias = _batch_descr.alias.in(); + std::string hostname = _batch_descr.hostname.in(); std::string username = _batch_descr.username.in(); std::string protocol = _batch_descr.protocol.in(); @@ -235,7 +235,7 @@ BatchTest::test_getresult() command = "scp"; if(protocol == "rsh") command = "rcp"; - command += " " + username + "@" + alias + ":" + home + command += " " + username + "@" + hostname + ":" + home + "/" + _base_filename + " " + _test_filename + "_copy"; // Test @@ -285,7 +285,7 @@ BatchTest::test_jobsubmit_simple() std::string home; std::string command; std::string result("Failed : "); - std::string alias = _batch_descr.alias.in(); + std::string hostname = _batch_descr.hostname.in(); std::string username = _batch_descr.username.in(); std::string protocol = _batch_descr.protocol.in(); std::string batch_type = _batch_descr.batch.in(); @@ -336,7 +336,7 @@ BatchTest::test_jobsubmit_simple() if(protocol == "rsh") command = "rcp"; command += " " + _test_file_simple + " " - + username + "@" + alias + ":" + home; + + username + "@" + hostname + ":" + home; status = system(command.c_str()); if(status) { std::ostringstream oss; @@ -348,7 +348,7 @@ BatchTest::test_jobsubmit_simple() // Build command for submit job std::string file_job_name = _test_filename + "_jobid"; - command = protocol + " " + username + "@" + alias + " qsub " + _base_filename + "_simple > " + file_job_name; + command = protocol + " " + username + "@" + hostname + " qsub " + _base_filename + "_simple > " + file_job_name; status = system(command.c_str()); if(status) { std::ostringstream oss; @@ -368,7 +368,7 @@ BatchTest::test_jobsubmit_simple() file_job.close(); // Wait the end of the job - command = protocol + " " + username + "@" + alias + " qstat -f " + jobid + " > " + file_job_name; + command = protocol + " " + username + "@" + hostname + " qstat -f " + jobid + " > " + file_job_name; bool stop = false; while (!stop) { @@ -396,7 +396,7 @@ BatchTest::test_jobsubmit_simple() if(protocol == "rsh") command = "rcp"; command += " " - + username + "@" + alias + ":" + home + "/" + _date + "_simple* /tmp"; + + username + "@" + hostname + ":" + home + "/" + _date + "_simple* /tmp"; status = system(command.c_str()); if(status) { std::ostringstream oss; @@ -450,7 +450,7 @@ BatchTest::test_jobsubmit_mpi() std::string command; MpiImpl * mpiImpl; std::string result("Failed : "); - std::string alias = _batch_descr.alias.in(); + std::string hostname = _batch_descr.hostname.in(); std::string username = _batch_descr.username.in(); std::string protocol = _batch_descr.protocol.in(); std::string batch_type = _batch_descr.batch.in(); @@ -530,7 +530,7 @@ BatchTest::test_jobsubmit_mpi() if(protocol == "rsh") command = "rcp"; command += " " + _test_file_script + " " - + username + "@" + alias + ":" + home; + + username + "@" + hostname + ":" + home; status = system(command.c_str()); if(status) { std::ostringstream oss; @@ -543,7 +543,7 @@ BatchTest::test_jobsubmit_mpi() if(protocol == "rsh") command = "rcp"; command += " " + _test_file_mpi + " " - + username + "@" + alias + ":" + home; + + username + "@" + hostname + ":" + home; status = system(command.c_str()); if(status) { std::ostringstream oss; @@ -555,7 +555,7 @@ BatchTest::test_jobsubmit_mpi() // Build command for submit job std::string file_job_name = _test_filename + "_jobid"; - command = protocol + " " + username + "@" + alias + " qsub " + _base_filename + "_mpi > " + file_job_name; + command = protocol + " " + username + "@" + hostname + " qsub " + _base_filename + "_mpi > " + file_job_name; status = system(command.c_str()); if(status) { std::ostringstream oss; @@ -575,7 +575,7 @@ BatchTest::test_jobsubmit_mpi() file_job.close(); // Wait the end of the job - command = protocol + " " + username + "@" + alias + " qstat -f " + jobid + " > " + file_job_name; + command = protocol + " " + username + "@" + hostname + " qstat -f " + jobid + " > " + file_job_name; bool stop = false; while (!stop) { @@ -603,7 +603,7 @@ BatchTest::test_jobsubmit_mpi() if(protocol == "rsh") command = "rcp"; command += " " - + username + "@" + alias + ":" + home + "/" + _date + "_mpi* /tmp"; + + username + "@" + hostname + ":" + home + "/" + _date + "_mpi* /tmp"; status = system(command.c_str()); if(status) { std::ostringstream oss; @@ -649,7 +649,7 @@ BatchTest::test_appli() std::string home; std::string command; std::string result("Failed : "); - std::string alias = _batch_descr.alias.in(); + std::string hostname = _batch_descr.hostname.in(); std::string username = _batch_descr.username.in(); std::string protocol = _batch_descr.protocol.in(); std::string applipath = _batch_descr.applipath.in(); @@ -679,7 +679,7 @@ BatchTest::test_appli() if(protocol == "rsh") command = "rcp"; command += " " + _test_file_appli + " " - + username + "@" + alias + ":" + home; + + username + "@" + hostname + ":" + home; status = system(command.c_str()); if(status) { std::ostringstream oss; @@ -690,7 +690,7 @@ BatchTest::test_appli() } // Launch test - command = protocol + " " + username + "@" + alias + command = protocol + " " + username + "@" + hostname + " sh " + home + "/" + _base_filename + "_appli_test > " + _test_filename + "_appli_test_result"; @@ -732,12 +732,12 @@ BatchTest::get_home(std::string * home) int status; std::string result = ""; std::string command; - std::string alias = _batch_descr.alias.in(); + std::string hostname = _batch_descr.hostname.in(); std::string username = _batch_descr.username.in(); std::string protocol = _batch_descr.protocol.in(); std::string file_home_name = _test_filename + "_home"; - command = protocol + " " + username + "@" + alias + " 'echo $HOME' > " + file_home_name; + command = protocol + " " + username + "@" + hostname + " 'echo $HOME' > " + file_home_name; status = system(command.c_str()); if(status) { std::ostringstream oss; diff --git a/src/Launcher/BatchTest.hxx b/src/Launcher/BatchTest.hxx index 8fdf3736c..9a9a18388 100644 --- a/src/Launcher/BatchTest.hxx +++ b/src/Launcher/BatchTest.hxx @@ -32,7 +32,7 @@ class SALOMELAUNCHER_EXPORT BatchTest { public: - BatchTest(const Engines::MachineDefinition& batch_descr); + BatchTest(const Engines::ResourceDefinition& batch_descr); virtual ~BatchTest(); bool test(); @@ -48,7 +48,7 @@ class SALOMELAUNCHER_EXPORT BatchTest std::string get_home(std::string * home); private: - Engines::MachineDefinition _batch_descr; + Engines::ResourceDefinition _batch_descr; std::string _test_filename; std::string _base_filename; std::string _date; diff --git a/src/Launcher/Launcher.cxx b/src/Launcher/Launcher.cxx index 812cfedfc..45f23c56b 100644 --- a/src/Launcher/Launcher.cxx +++ b/src/Launcher/Launcher.cxx @@ -89,11 +89,8 @@ Launcher_cpp::createJob(Launcher::Job * new_job) LAUNCHER_MESSAGE("Creating a new job"); // First step take a resource - // Two cases: hostname is defined -> GetFittingResources will check if resource exists - // hostname is not defined -> Try to find a good resource - // Note: We use Alias parameter to get the real name of the machine -> To change ???? std::vector ResourceList; - machineParams params = new_job->getMachineRequiredParams(); + resourceParams params = new_job->getResourceRequiredParams(); try{ ResourceList = _ResManager->GetFittingResources(params); } @@ -108,21 +105,13 @@ Launcher_cpp::createJob(Launcher::Job * new_job) } // Second step configure the job with the resource selected - the first of the list - ParserResourcesType machine_definition = _ResManager->GetResourcesList(ResourceList[0]); - if (machine_definition.Alias == "") - { - LAUNCHER_INFOS("Alias is not defined for the resource selected: " << machine_definition.HostName); - delete new_job; - std::string mess = "Alias is not defined for the resource selected: "; - mess += machine_definition.HostName; - throw LauncherException(mess); - } + ParserResourcesType resource_definition = _ResManager->GetResourcesDescr(ResourceList[0]); - // Set machine definition to the job + // Set resource definition to the job // The job will check if the definitions needed try { - new_job->setMachineDefinition(machine_definition); + new_job->setResourceDefinition(resource_definition); } catch(const LauncherException &ex) { @@ -132,13 +121,13 @@ Launcher_cpp::createJob(Launcher::Job * new_job) } // Third step search batch manager for the resource into the map -> instanciate one if does not exist - std::string machine_name = machine_definition.Alias; - std::map::const_iterator it = _batchmap.find(machine_name); + std::string resource_name = resource_definition.Name; + std::map::const_iterator it = _batchmap.find(resource_name); if(it == _batchmap.end()) { try { - _batchmap[machine_name] = FactoryBatchManager(machine_definition); + _batchmap[resource_name] = FactoryBatchManager(resource_definition); } catch(const LauncherException &ex) { @@ -199,9 +188,9 @@ Launcher_cpp::launchJob(int job_id) throw LauncherException("Bad state of the job: " + job->getState()); } - std::string machine_name = job->getMachineDefinition().Alias; + std::string resource_name = job->getResourceDefinition().Name; try { - Batch::JobId batch_manager_job_id = _batchmap[machine_name]->submitJob(*(job->getBatchJob())); + Batch::JobId batch_manager_job_id = _batchmap[resource_name]->submitJob(*(job->getBatchJob())); job->setBatchManagerJobId(batch_manager_job_id); job->setState("QUEUED"); } @@ -256,13 +245,13 @@ Launcher_cpp::getJobResults(int job_id, std::string directory) } Launcher::Job * job = it_job->second; - std::string machine_name = job->getMachineDefinition().Alias; + std::string resource_name = job->getResourceDefinition().Name; try { if (directory != "") - _batchmap[machine_name]->importOutputFiles(*(job->getBatchJob()), directory); + _batchmap[resource_name]->importOutputFiles(*(job->getBatchJob()), directory); else - _batchmap[machine_name]->importOutputFiles(*(job->getBatchJob()), job->getResultDirectory()); + _batchmap[resource_name]->importOutputFiles(*(job->getBatchJob()), job->getResultDirectory()); } catch(const Batch::EmulationException &ex) { @@ -321,10 +310,10 @@ Launcher_cpp::createJobWithFile(const std::string xmlExecuteFile, for(int i=0; i < job_params.OutputFile.size();i++) new_job->add_out_file(job_params.OutputFile[i]); - machineParams p; + resourceParams p; p.hostname = clusterName; p.nb_proc = job_params.NbOfProcesses; - new_job->setMachineRequiredParams(p); + new_job->setResourceRequiredParams(p); createJob(new_job); return new_job->getNumber(); @@ -343,7 +332,7 @@ Launcher_cpp::FactoryBatchManager(ParserResourcesType& params) Batch::FactBatchManager_eClient* fact; int nb_proc_per_node = params.DataForSort._nbOfProcPerNode; - std::string hostname = params.Alias; + std::string hostname = params.HostName; switch(params.Protocol) { diff --git a/src/Launcher/Launcher_Job.cxx b/src/Launcher/Launcher_Job.cxx index c63814126..835d752f7 100644 --- a/src/Launcher/Launcher_Job.cxx +++ b/src/Launcher/Launcher_Job.cxx @@ -36,15 +36,14 @@ Launcher::Job::Job() _result_directory = ""; _maximum_duration = ""; _maximum_duration_in_second = -1; - _machine_required_params.hostname = ""; - _machine_required_params.OS = ""; - _machine_required_params.nb_proc = -1; - _machine_required_params.nb_node = -1; - _machine_required_params.nb_proc_per_node = -1; - _machine_required_params.cpu_clock = -1; - _machine_required_params.mem_mb = -1; - _machine_required_params.parallelLib = ""; - _machine_required_params.nb_component_nodes = -1; + _resource_required_params.name = ""; + _resource_required_params.hostname = ""; + _resource_required_params.OS = ""; + _resource_required_params.nb_proc = -1; + _resource_required_params.nb_node = -1; + _resource_required_params.nb_proc_per_node = -1; + _resource_required_params.cpu_clock = -1; + _resource_required_params.mem_mb = -1; _queue = ""; #ifdef WITH_LIBBATCH @@ -108,11 +107,11 @@ Launcher::Job::getNumber() } void -Launcher::Job::setMachineDefinition(const ParserResourcesType & machine_definition) +Launcher::Job::setResourceDefinition(const ParserResourcesType & resource_definition) { // Check machine_definition std::string user_name = ""; - if (machine_definition.UserName == "") + if (resource_definition.UserName == "") { user_name = getenv("USER"); if (user_name == "") @@ -122,16 +121,16 @@ Launcher::Job::setMachineDefinition(const ParserResourcesType & machine_definiti } } else - user_name = machine_definition.UserName; + user_name = resource_definition.UserName; - _machine_definition = machine_definition; - _machine_definition.UserName = user_name; + _resource_definition = resource_definition; + _resource_definition.UserName = user_name; } ParserResourcesType -Launcher::Job::getMachineDefinition() +Launcher::Job::getResourceDefinition() { - return _machine_definition; + return _resource_definition; } void @@ -220,10 +219,10 @@ Launcher::Job::setMaximumDuration(const std::string & maximum_duration) } void -Launcher::Job::setMachineRequiredParams(const machineParams & machine_required_params) +Launcher::Job::setResourceRequiredParams(const resourceParams & resource_required_params) { - checkMachineRequiredParams(machine_required_params); - _machine_required_params = machine_required_params; + checkResourceRequiredParams(resource_required_params); + _resource_required_params = resource_required_params; } void @@ -268,10 +267,10 @@ Launcher::Job::getMaximumDuration() return _maximum_duration; } -machineParams -Launcher::Job::getMachineRequiredParams() +resourceParams +Launcher::Job::getResourceRequiredParams() { - return _machine_required_params; + return _resource_required_params; } std::string @@ -314,12 +313,12 @@ Launcher::Job::checkMaximumDuration(const std::string & maximum_duration) } void -Launcher::Job::checkMachineRequiredParams(const machineParams & machine_required_params) +Launcher::Job::checkResourceRequiredParams(const resourceParams & resource_required_params) { // nb_proc has be to > 0 - if (machine_required_params.nb_proc <= 0) + if (resource_required_params.nb_proc <= 0) { - std::string message("[Launcher::Job::checkMachineRequiredParams] proc number is not > 0 ! "); + std::string message("[Launcher::Job::checkResourceRequiredParams] proc number is not > 0 ! "); throw LauncherException(message); } } @@ -405,14 +404,14 @@ Launcher::Job::common_job_params() { Batch::Parametre params; - params[USER] = _machine_definition.UserName; - params[NBPROC] = _machine_required_params.nb_proc; + params[USER] = _resource_definition.UserName; + params[NBPROC] = _resource_required_params.nb_proc; // Memory - if (_machine_required_params.mem_mb > 0) + if (_resource_required_params.mem_mb > 0) { // Memory is in kilobytes - params[MAXRAMSIZE] = _machine_required_params.mem_mb * 1024; + params[MAXRAMSIZE] = _resource_required_params.mem_mb * 1024; } // We define a default directory based on user time diff --git a/src/Launcher/Launcher_Job.hxx b/src/Launcher/Launcher_Job.hxx index ee949ea9b..853ca7953 100644 --- a/src/Launcher/Launcher_Job.hxx +++ b/src/Launcher/Launcher_Job.hxx @@ -58,8 +58,8 @@ namespace Launcher void setNumber(const int & number); int getNumber(); - virtual void setMachineDefinition(const ParserResourcesType & machine_definition); - ParserResourcesType getMachineDefinition(); + virtual void setResourceDefinition(const ParserResourcesType & resource_definition); + ParserResourcesType getResourceDefinition(); // Common parameters virtual void setJobFile(const std::string & job_file); @@ -69,7 +69,7 @@ namespace Launcher void add_in_file(const std::string & file); void add_out_file(const std::string & file); void setMaximumDuration(const std::string & maximum_duration); - void setMachineRequiredParams(const machineParams & machine_required_params); + void setResourceRequiredParams(const resourceParams & resource_required_params); void setQueue(const std::string & queue); void setEnvFile(const std::string & env_file); @@ -80,7 +80,7 @@ namespace Launcher const std::list & get_in_files(); const std::list & get_out_files(); std::string getMaximumDuration(); - machineParams getMachineRequiredParams(); + resourceParams getResourceRequiredParams(); std::string getQueue(); std::string getEnvFile(); @@ -88,7 +88,7 @@ namespace Launcher // Checks void checkMaximumDuration(const std::string & maximum_duration); - void checkMachineRequiredParams(const machineParams & machine_required_params); + void checkResourceRequiredParams(const resourceParams & resource_required_params); // Helps long convertMaximumDuration(const std::string & maximum_duration); @@ -104,7 +104,7 @@ namespace Launcher std::string _launch_date; std::string _env_file; - ParserResourcesType _machine_definition; + ParserResourcesType _resource_definition; std::string _job_file; std::string _job_file_name; @@ -117,7 +117,7 @@ namespace Launcher std::list _out_files; std::string _maximum_duration; long _maximum_duration_in_second; - machineParams _machine_required_params; + resourceParams _resource_required_params; std::string _queue; #ifdef WITH_LIBBATCH diff --git a/src/Launcher/Launcher_Job_PythonSALOME.cxx b/src/Launcher/Launcher_Job_PythonSALOME.cxx index 6fcffb68d..b45a37470 100644 --- a/src/Launcher/Launcher_Job_PythonSALOME.cxx +++ b/src/Launcher/Launcher_Job_PythonSALOME.cxx @@ -35,7 +35,7 @@ Launcher::Job_PythonSALOME::setJobFile(const std::string & job_file) void Launcher::Job_PythonSALOME::addJobTypeSpecificScript(std::ofstream & launch_script_stream) { - launch_script_stream << _machine_definition.AppliPath << "/runSession -p $appli_port python " << _job_file_name_complete << " > logs/python_" << _launch_date << ".log 2>&1" << std::endl; + launch_script_stream << _resource_definition.AppliPath << "/runSession -p $appli_port python " << _job_file_name_complete << " > logs/python_" << _launch_date << ".log 2>&1" << std::endl; } #endif diff --git a/src/Launcher/Launcher_Job_SALOME.cxx b/src/Launcher/Launcher_Job_SALOME.cxx index 1ad608b15..40808ab8c 100644 --- a/src/Launcher/Launcher_Job_SALOME.cxx +++ b/src/Launcher/Launcher_Job_SALOME.cxx @@ -25,15 +25,15 @@ Launcher::Job_SALOME::Job_SALOME() {} Launcher::Job_SALOME::~Job_SALOME() {} void -Launcher::Job_SALOME::setMachineDefinition(const ParserResourcesType & machine_definition) +Launcher::Job_SALOME::setResourceDefinition(const ParserResourcesType & resource_definition) { - // Check machine_definition - if (machine_definition.AppliPath == "") + // Check resource_definition + if (resource_definition.AppliPath == "") { - std::string mess = "Machine definition must define an application path !, machine name is: " + machine_definition.HostName; + std::string mess = "Resource definition must define an application path !, resource name is: " + resource_definition.Name; throw LauncherException(mess); } - Launcher::Job::setMachineDefinition(machine_definition); + Launcher::Job::setResourceDefinition(resource_definition); } void @@ -69,9 +69,9 @@ Launcher::Job_SALOME::buildSalomeScript(Batch::Parametre params) launch_script_stream << "export SALOME_TMP_DIR=" << work_directory << "/logs" << std::endl; // -- Generates Catalog Resources - std::string machine_protocol = "ssh"; - if (_machine_definition.ClusterInternalProtocol == rsh) - machine_protocol = "rsh"; + std::string resource_protocol = "ssh"; + if (_resource_definition.ClusterInternalProtocol == rsh) + resource_protocol = "rsh"; launch_script_stream << "if [ \"x$LIBBATCH_NODEFILE\" != \"x\" ]; then " << std::endl; launch_script_stream << "CATALOG_FILE=" << work_directory << "/CatalogResources_" << _launch_date << ".xml" << std::endl; @@ -80,20 +80,20 @@ Launcher::Job_SALOME::buildSalomeScript(Batch::Parametre params) launch_script_stream << "echo '' >> $CATALOG_FILE" << std::endl; launch_script_stream << "cat $LIBBATCH_NODEFILE | sort -u | while read host" << std::endl; launch_script_stream << "do" << std::endl; - launch_script_stream << "echo '> $CATALOG_FILE" << std::endl; - launch_script_stream << "echo ' userName=\"" << _machine_definition.UserName << "\"' >> $CATALOG_FILE" << std::endl; - launch_script_stream << "echo ' appliPath=\"" << _machine_definition.AppliPath << "\"' >> $CATALOG_FILE" << std::endl; + launch_script_stream << "echo '> $CATALOG_FILE" << std::endl; + launch_script_stream << "echo ' userName=\"" << _resource_definition.UserName << "\"' >> $CATALOG_FILE" << std::endl; + launch_script_stream << "echo ' appliPath=\"" << _resource_definition.AppliPath << "\"' >> $CATALOG_FILE" << std::endl; launch_script_stream << "echo '/>' >> $CATALOG_FILE" << std::endl; launch_script_stream << "done" << std::endl; launch_script_stream << "echo '' >> $CATALOG_FILE" << std::endl; launch_script_stream << "fi" << std::endl; // Launch SALOME with an appli - launch_script_stream << _machine_definition.AppliPath << "/runAppli --terminal --ns-port-log=" << launch_date_port_file << " > logs/salome_" << _launch_date << ".log 2>&1" << std::endl; + launch_script_stream << _resource_definition.AppliPath << "/runAppli --terminal --ns-port-log=" << launch_date_port_file << " > logs/salome_" << _launch_date << ".log 2>&1" << std::endl; launch_script_stream << "current=0\n" << "stop=20\n" - << "while ! test -f " << _machine_definition.AppliPath << "/" << launch_date_port_file << "\n" + << "while ! test -f " << _resource_definition.AppliPath << "/" << launch_date_port_file << "\n" << "do\n" << " sleep 2\n" << " let current=current+1\n" @@ -102,13 +102,13 @@ Launcher::Job_SALOME::buildSalomeScript(Batch::Parametre params) << " exit\n" << " fi\n" << "done\n" - << "appli_port=`cat " << _machine_definition.AppliPath << "/" << launch_date_port_file << "`\n"; + << "appli_port=`cat " << _resource_definition.AppliPath << "/" << launch_date_port_file << "`\n"; // Call real job type addJobTypeSpecificScript(launch_script_stream); // End - launch_script_stream << _machine_definition.AppliPath << "/runSession -p $appli_port shutdownSalome.py" << std::endl; + launch_script_stream << _resource_definition.AppliPath << "/runSession -p $appli_port shutdownSalome.py" << std::endl; launch_script_stream << "sleep 10" << std::endl; // Return diff --git a/src/Launcher/Launcher_Job_SALOME.hxx b/src/Launcher/Launcher_Job_SALOME.hxx index 00cf4bd2c..de351ab08 100644 --- a/src/Launcher/Launcher_Job_SALOME.hxx +++ b/src/Launcher/Launcher_Job_SALOME.hxx @@ -36,7 +36,7 @@ namespace Launcher Job_SALOME(); virtual ~Job_SALOME(); - virtual void setMachineDefinition(const ParserResourcesType & machine_definition); + virtual void setResourceDefinition(const ParserResourcesType & resource_definition); virtual void update_job(); #ifdef WITH_LIBBATCH diff --git a/src/Launcher/Launcher_Job_YACSFile.cxx b/src/Launcher/Launcher_Job_YACSFile.cxx index daf6b331e..1126e90ab 100644 --- a/src/Launcher/Launcher_Job_YACSFile.cxx +++ b/src/Launcher/Launcher_Job_YACSFile.cxx @@ -34,5 +34,5 @@ Launcher::Job_YACSFile::setJobFile(const std::string & job_file) void Launcher::Job_YACSFile::addJobTypeSpecificScript(std::ofstream & launch_script_stream) { - launch_script_stream << _machine_definition.AppliPath << "/runSession -p $appli_port driver " << _job_file_name_complete << " > logs/yacs_" << _launch_date << ".log 2>&1" << std::endl; + launch_script_stream << _resource_definition.AppliPath << "/runSession -p $appli_port driver " << _job_file_name_complete << " > logs/yacs_" << _launch_date << ".log 2>&1" << std::endl; } diff --git a/src/Launcher/SALOME_Launcher.cxx b/src/Launcher/SALOME_Launcher.cxx index 52e19abb3..d65f42f7d 100644 --- a/src/Launcher/SALOME_Launcher.cxx +++ b/src/Launcher/SALOME_Launcher.cxx @@ -149,7 +149,8 @@ SALOME_Launcher::createJob(const Engines::JobParameters & job_parameters) // Resources requirements try { - machineParams p; + resourceParams p; + p.name = job_parameters.resource_required.name; p.hostname = job_parameters.resource_required.hostname; p.OS = job_parameters.resource_required.OS; p.nb_proc = job_parameters.resource_required.nb_proc; @@ -157,7 +158,7 @@ SALOME_Launcher::createJob(const Engines::JobParameters & job_parameters) p.nb_proc_per_node = job_parameters.resource_required.nb_proc_per_node; p.cpu_clock = job_parameters.resource_required.cpu_clock; p.mem_mb = job_parameters.resource_required.mem_mb; - new_job->setMachineRequiredParams(p); + new_job->setResourceRequiredParams(p); } catch(const LauncherException &ex){ INFOS(ex.msg.c_str()); @@ -264,20 +265,20 @@ SALOME_Launcher::createJobWithFile(const char * xmlExecuteFile, */ //============================================================================= CORBA::Boolean -SALOME_Launcher::testBatch(const Engines::MachineParameters& params) +SALOME_Launcher::testBatch(const Engines::ResourceParameters& params) { MESSAGE("BEGIN OF SALOME_Launcher::testBatch"); CORBA::Boolean rtn = false; try { // find a cluster matching the structure params - Engines::MachineList *aMachineList = _ResManager->GetFittingResources(params); + Engines::ResourceList *aMachineList = _ResManager->GetFittingResources(params); if (aMachineList->length() == 0) throw SALOME_Exception("No resources have been found with your parameters"); - const Engines::MachineDefinition* p = _ResManager->GetMachineParameters((*aMachineList)[0]); - string clustername(p->alias); - INFOS("Choose cluster" << clustername); + const Engines::ResourceDefinition* p = _ResManager->GetResourceDefinition((*aMachineList)[0]); + string resource_name(p->name); + INFOS("Choose resource for test: " << resource_name); BatchTest t(*p); if (t.test()) diff --git a/src/Launcher/SALOME_Launcher.hxx b/src/Launcher/SALOME_Launcher.hxx index 6de459767..489b782dd 100644 --- a/src/Launcher/SALOME_Launcher.hxx +++ b/src/Launcher/SALOME_Launcher.hxx @@ -52,7 +52,7 @@ public: // Useful methods CORBA::Long createJobWithFile(const char * xmlExecuteFile, const char * clusterName); - CORBA::Boolean testBatch (const Engines::MachineParameters& params); + CORBA::Boolean testBatch (const Engines::ResourceParameters& params); // SALOME Kernel service methods void Shutdown(); diff --git a/src/LifeCycleCORBA/SALOME_FileTransferCORBA.cxx b/src/LifeCycleCORBA/SALOME_FileTransferCORBA.cxx index 4601dce6f..6a6f1335b 100644 --- a/src/LifeCycleCORBA/SALOME_FileTransferCORBA.cxx +++ b/src/LifeCycleCORBA/SALOME_FileTransferCORBA.cxx @@ -131,7 +131,10 @@ string SALOME_FileTransferCORBA::getLocalFile(string localFile) params.container_name = _containerName.c_str(); params.hostname = _refMachine.c_str(); - container = contManager->FindOrStartContainer(params); + Engines::ContainerParameters new_params; + LCC.convert(params, new_params); + new_params.mode = CORBA::string_dup("findorstart"); + container = contManager->GiveContainer(new_params); if (CORBA::is_nil(container)) { INFOS("machine " << _refMachine << " unreachable"); diff --git a/src/LifeCycleCORBA/SALOME_FileTransferCORBA.hxx b/src/LifeCycleCORBA/SALOME_FileTransferCORBA.hxx index 223a71511..20ba9dcc4 100644 --- a/src/LifeCycleCORBA/SALOME_FileTransferCORBA.hxx +++ b/src/LifeCycleCORBA/SALOME_FileTransferCORBA.hxx @@ -34,6 +34,7 @@ #include CORBA_CLIENT_HEADER(SALOME_Component) #include +#include #ifdef WIN32 # if defined LIFECYCLECORBA_EXPORTS || defined SalomeLifeCycleCORBA_EXPORTS diff --git a/src/LifeCycleCORBA/SALOME_LifeCycleCORBA.cxx b/src/LifeCycleCORBA/SALOME_LifeCycleCORBA.cxx index 7fe8c0fb9..c3e25f02e 100644 --- a/src/LifeCycleCORBA/SALOME_LifeCycleCORBA.cxx +++ b/src/LifeCycleCORBA/SALOME_LifeCycleCORBA.cxx @@ -137,15 +137,16 @@ SALOME_LifeCycleCORBA::FindComponent(const Engines::MachineParameters& params, if (! isKnownComponentClass(componentName)) return Engines::Component::_nil(); - Engines::MachineParameters parms(params); - parms.componentList.length(1); - parms.componentList[0] = componentName; - Engines::MachineList_var listOfMachines = _ResManager->GetFittingResources(parms); + Engines::ContainerParameters new_params; + convert(params, new_params); + new_params.resource_params.componentList.length(1); + new_params.resource_params.componentList[0] = componentName; + Engines::ResourceList_var listOfResources = _ResManager->GetFittingResources(new_params.resource_params); - Engines::Component_var compo = _FindComponent(parms, + Engines::Component_var compo = _FindComponent(new_params, componentName, studyId, - listOfMachines); + listOfResources); return compo._retn(); } @@ -170,14 +171,15 @@ SALOME_LifeCycleCORBA::LoadComponent(const Engines::MachineParameters& params, if (! isKnownComponentClass(componentName)) return Engines::Component::_nil(); - Engines::MachineParameters parms(params); - parms.componentList.length(1); - parms.componentList[0] = componentName; + Engines::ContainerParameters new_params; + convert(params, new_params); + new_params.resource_params.componentList.length(1); + new_params.resource_params.componentList[0] = componentName; - Engines::MachineList_var listOfMachines = _ResManager->GetFittingResources(parms); - parms.computerList=listOfMachines; + Engines::ResourceList_var listOfResources = _ResManager->GetFittingResources(new_params.resource_params); + new_params.resource_params.resList = listOfResources; - Engines::Component_var compo = _LoadComponent(parms, + Engines::Component_var compo = _LoadComponent(new_params, componentName, studyId); @@ -206,23 +208,65 @@ FindOrLoad_Component(const Engines::MachineParameters& params, if (! isKnownComponentClass(componentName)) return Engines::Component::_nil(); - Engines::MachineParameters parms(params); - parms.componentList.length(1); - parms.componentList[0] = componentName; - Engines::MachineList_var listOfMachines = _ResManager->GetFittingResources(parms); + Engines::ContainerParameters new_params; + convert(params, new_params); + new_params.resource_params.componentList.length(1); + new_params.resource_params.componentList[0] = componentName; - Engines::Component_var compo = _FindComponent(parms, + // For Compatibility -> if hostname == localhost put name == hostname + if (std::string(new_params.resource_params.hostname.in()) == "localhost") + { + new_params.resource_params.hostname = CORBA::string_dup(Kernel_Utils::GetHostname().c_str()); + new_params.resource_params.name = CORBA::string_dup(Kernel_Utils::GetHostname().c_str()); + } + + Engines::ResourceList_var listOfResources = _ResManager->GetFittingResources(new_params.resource_params); + + Engines::Component_var compo = _FindComponent(new_params, componentName, studyId, - listOfMachines); + listOfResources); if(CORBA::is_nil(compo)) - { - parms.computerList=listOfMachines; - compo = _LoadComponent(parms, + { + new_params.resource_params.resList = listOfResources; + compo = _LoadComponent(new_params, componentName, studyId); - } + } + + return compo._retn(); +} + +Engines::Component_ptr +SALOME_LifeCycleCORBA:: +FindOrLoad_Component(const Engines::ContainerParameters& params, + const char *componentName, + int studyId) +{ + // --- Check if Component Name is known in ModuleCatalog + + if (! isKnownComponentClass(componentName)) + return Engines::Component::_nil(); + + Engines::ContainerParameters new_params(params); + new_params.resource_params.componentList.length(1); + new_params.resource_params.componentList[0] = componentName; + + Engines::ResourceList_var listOfResources = _ResManager->GetFittingResources(new_params.resource_params); + + Engines::Component_var compo = _FindComponent(new_params, + componentName, + studyId, + listOfResources); + + if(CORBA::is_nil(compo)) + { + new_params.resource_params.resList = listOfResources; + compo = _LoadComponent(new_params, + componentName, + studyId); + } return compo._retn(); } @@ -243,30 +287,13 @@ Engines::Component_ptr SALOME_LifeCycleCORBA::FindOrLoad_Component(const char *containerName, const char *componentName) { - char *valenv=getenv("SALOME_BATCH"); - if(valenv) - if (strcmp(valenv,"1")==0) - { - MESSAGE("SALOME_LifeCycleCORBA::FindOrLoad_Component BATCH " << containerName << " " << componentName ) ; - _NS->Change_Directory("/Containers"); - CORBA::Object_ptr obj=_NS->Resolve(containerName); - Engines::Container_var cont=Engines::Container::_narrow(obj); - bool isLoadable = cont->load_component_Library(componentName); - if (!isLoadable) return Engines::Component::_nil(); - - Engines::Component_ptr myInstance = - cont->create_component_instance(componentName, 0); - return myInstance; - } MESSAGE("SALOME_LifeCycleCORBA::FindOrLoad_Component INTERACTIF " << containerName << " " << componentName ) ; - //#if 0 - // --- Check if Component Name is known in ModuleCatalog + // --- Check if Component Name is known in ModuleCatalog if (! isKnownComponentClass(componentName)) return Engines::Component::_nil(); // --- Check if containerName contains machine name (if yes: rg>0) - char *stContainer=strdup(containerName); string st2Container(stContainer); int rg=st2Container.find("/"); @@ -274,29 +301,21 @@ SALOME_LifeCycleCORBA::FindOrLoad_Component(const char *containerName, Engines::MachineParameters_var params=new Engines::MachineParameters; preSet(params); if (rg<0) - { - // containerName doesn't contain "/" => Local container - params->container_name=CORBA::string_dup(stContainer); - params->hostname=""; - } + { + // containerName doesn't contain "/" => Local container + params->container_name=CORBA::string_dup(stContainer); + params->hostname=""; + } else - { - stContainer[rg]='\0'; - params->container_name=CORBA::string_dup(stContainer+rg+1); - params->hostname=CORBA::string_dup(stContainer); - } + { + stContainer[rg]='\0'; + params->container_name=CORBA::string_dup(stContainer+rg+1); + params->hostname=CORBA::string_dup(stContainer); + } params->isMPI = false; SCRUTE(params->container_name); -// SCRUTE(params->hostname); -// SCRUTE(params->OS); -// SCRUTE(params->mem_mb); -// SCRUTE(params->cpu_clock); -// SCRUTE(params->nb_proc_per_node); -// SCRUTE(params->nb_node); -// SCRUTE(params->isMPI); free(stContainer); - return FindOrLoad_Component(params,componentName); - //#endif + return FindOrLoad_Component(params, componentName); } //============================================================================= @@ -309,31 +328,30 @@ SALOME_LifeCycleCORBA::FindOrLoad_Component(const char *containerName, bool SALOME_LifeCycleCORBA::isKnownComponentClass(const char *componentName) { - try + { + CORBA::Object_var obj = _NS->Resolve("/Kernel/ModulCatalog"); + SALOME_ModuleCatalog::ModuleCatalog_var Catalog = + SALOME_ModuleCatalog::ModuleCatalog::_narrow(obj) ; + ASSERT(! CORBA::is_nil(Catalog)); + SALOME_ModuleCatalog::Acomponent_var compoInfo = + Catalog->GetComponent(componentName); + if (CORBA::is_nil (compoInfo)) { - CORBA::Object_var obj = _NS->Resolve("/Kernel/ModulCatalog"); - SALOME_ModuleCatalog::ModuleCatalog_var Catalog = - SALOME_ModuleCatalog::ModuleCatalog::_narrow(obj) ; - ASSERT(! CORBA::is_nil(Catalog)); - SALOME_ModuleCatalog::Acomponent_var compoInfo = - Catalog->GetComponent(componentName); - if (CORBA::is_nil (compoInfo)) - { - INFOS("Catalog Error: Component not found in the catalog" ); - INFOS( componentName ); - return false; - } - else return true; + INFOS("Catalog Error: Component not found in the catalog" ); + INFOS( componentName ); + return false; } + else return true; + } catch (ServiceUnreachable&) - { - INFOS("Caught exception: Naming Service Unreachable"); - } + { + INFOS("Caught exception: Naming Service Unreachable"); + } catch (...) - { - INFOS("Caught unknown exception."); - } + { + INFOS("Caught unknown exception."); + } return false; } @@ -344,7 +362,7 @@ bool SALOME_LifeCycleCORBA::isKnownComponentClass(const char *componentName) //============================================================================= bool -SALOME_LifeCycleCORBA::isMpiContainer(const Engines::MachineParameters& params) +SALOME_LifeCycleCORBA::isMpiContainer(const Engines::ContainerParameters& params) throw(IncompatibleComponent) { if( params.isMPI ) @@ -369,14 +387,11 @@ SALOME_LifeCycleCORBA::isMpiContainer(const Engines::MachineParameters& params) */ //============================================================================= -void SALOME_LifeCycleCORBA::preSet( Engines::MachineParameters& params) +void SALOME_LifeCycleCORBA::preSet(Engines::MachineParameters& params) { params.container_name = ""; params.hostname = ""; - //param.componentList = 0; - //param.computerList = 0; params.OS = ""; - params.nb_proc = 0; params.mem_mb = 0; params.cpu_clock = 0; params.nb_proc_per_node = 0; @@ -389,24 +404,77 @@ void SALOME_LifeCycleCORBA::preSet( Engines::MachineParameters& params) params.nb_component_nodes = 0; } +void +SALOME_LifeCycleCORBA::preSet(Engines::ResourceParameters& params) +{ + params.name = ""; + params.hostname = ""; + params.OS = ""; + params.nb_proc = 0; + params.mem_mb = 0; + params.cpu_clock = 0; + params.nb_node = 0; + params.nb_proc_per_node = 0; + params.policy = ""; +} + +void SALOME_LifeCycleCORBA::preSet( Engines::ContainerParameters& params) +{ + params.container_name = ""; + params.mode = ""; + params.workingdir = ""; + params.nb_proc = 0; + params.isMPI = false; + params.parallelLib = ""; + SALOME_LifeCycleCORBA::preSet(params.resource_params); +} + +void +SALOME_LifeCycleCORBA::convert(const Engines::MachineParameters& params_in, + Engines::ContainerParameters& params_out) +{ + SALOME_LifeCycleCORBA::preSet(params_out); + + // Container part + params_out.container_name = params_in.container_name; + params_out.mode = params_in.mode; + params_out.workingdir = params_in.workingdir; + params_out.isMPI = params_in.isMPI; + params_out.parallelLib = params_in.parallelLib; + + // Resource part + params_out.resource_params.hostname = params_in.hostname; + params_out.resource_params.OS = params_in.OS; + params_out.resource_params.mem_mb = params_in.mem_mb; + params_out.resource_params.cpu_clock = params_in.cpu_clock; + params_out.resource_params.nb_node = params_in.nb_node; + params_out.resource_params.nb_proc_per_node = params_in.nb_proc_per_node; + params_out.resource_params.policy = params_in.policy; + params_out.resource_params.componentList = params_in.componentList; + + params_out.resource_params.resList.length(params_in.computerList.length()); + for (CORBA::ULong i = 0; i < params_in.computerList.length(); i++) + params_out.resource_params.resList[i] = params_in.computerList[i]; +} + //============================================================================= /*! * \return a number of processors not 0, only for MPI containers */ //============================================================================= -int SALOME_LifeCycleCORBA::NbProc(const Engines::MachineParameters& params) +int SALOME_LifeCycleCORBA::NbProc(const Engines::ContainerParameters& params) { if( !isMpiContainer(params) ) return 0; - else if( (params.nb_node <= 0) && (params.nb_proc_per_node <= 0) ) + else if( (params.resource_params.nb_node <= 0) && (params.resource_params.nb_proc_per_node <= 0) ) return 1; - else if( params.nb_node == 0 ) - return params.nb_proc_per_node; - else if( params.nb_proc_per_node == 0 ) - return params.nb_node; + else if( params.resource_params.nb_node == 0 ) + return params.resource_params.nb_proc_per_node; + else if( params.resource_params.nb_proc_per_node == 0 ) + return params.resource_params.nb_node; else - return params.nb_node * params.nb_proc_per_node; + return params.resource_params.nb_node * params.resource_params.nb_proc_per_node; } //============================================================================= @@ -451,14 +519,14 @@ void SALOME_LifeCycleCORBA::shutdownServers() CORBA::Long pid = 0; CORBA::Object_var objS = _NS->Resolve("/Kernel/Session"); if (!CORBA::is_nil(objS)) + { + session = SALOME::Session::_narrow(objS); + if (!CORBA::is_nil(session)) { - session = SALOME::Session::_narrow(objS); - if (!CORBA::is_nil(session)) - { - pid = session->getPID(); - session->ping(); - } + pid = session->getPID(); + session->ping(); } + } string hostname = Kernel_Utils::GetHostname(); @@ -506,26 +574,27 @@ void SALOME_LifeCycleCORBA::shutdownServers() name.length(1); name[0].id = CORBA::string_dup(stdname.c_str()); try - { - if(!CORBA::is_nil(orb)) - theObj = orb->resolve_initial_references("NameService"); - if (!CORBA::is_nil(theObj)) - inc = CosNaming::NamingContext::_narrow(theObj); - } + { + if(!CORBA::is_nil(orb)) + theObj = orb->resolve_initial_references("NameService"); + if (!CORBA::is_nil(theObj)) + inc = CosNaming::NamingContext::_narrow(theObj); + } catch(...) + { + } + if(!CORBA::is_nil(inc)) + { + try { + objLog = inc->resolve(name); + SALOME_Logger::Logger_var logger = SALOME_Logger::Logger::_narrow(objLog); + if ( !CORBA::is_nil(logger) ) + logger->shutdown(); } - if(!CORBA::is_nil(inc)) { - try - { - objLog = inc->resolve(name); - SALOME_Logger::Logger_var logger = SALOME_Logger::Logger::_narrow(objLog); - if ( !CORBA::is_nil(logger) ) - logger->shutdown(); - } catch(...) - { - } + { + } } } @@ -538,31 +607,31 @@ void SALOME_LifeCycleCORBA::killOmniNames() { string portNumber (::getenv ("NSPORT") ); if ( !portNumber.empty() ) - { + { #ifdef WNT #else - string cmd ; - cmd = string( "ps -eo pid,command | grep -v grep | grep -E \"omniNames.*") - + portNumber - + string("\" | awk '{cmd=sprintf(\"kill -9 %s\",$1); system(cmd)}'" ); - MESSAGE(cmd); - try { - system ( cmd.c_str() ); - } - catch ( ... ) { - } -#endif + string cmd ; + cmd = string( "ps -eo pid,command | grep -v grep | grep -E \"omniNames.*") + + portNumber + + string("\" | awk '{cmd=sprintf(\"kill -9 %s\",$1); system(cmd)}'" ); + MESSAGE(cmd); + try { + system ( cmd.c_str() ); } + catch ( ... ) { + } +#endif + } // NPAL 18309 (Kill Notifd) if ( !portNumber.empty() ) - { - string cmd = ("from killSalomeWithPort import killNotifdAndClean; "); - cmd += string("killNotifdAndClean(") + portNumber + "); "; - cmd = string("python -c \"") + cmd +"\" >& /dev/null"; - MESSAGE(cmd); - system( cmd.c_str() ); - } + { + string cmd = ("from killSalomeWithPort import killNotifdAndClean; "); + cmd += string("killNotifdAndClean(") + portNumber + "); "; + cmd = string("python -c \"") + cmd +"\" >& /dev/null"; + MESSAGE(cmd); + system( cmd.c_str() ); + } } //============================================================================= @@ -581,47 +650,42 @@ void SALOME_LifeCycleCORBA::killOmniNames() Engines::Component_ptr SALOME_LifeCycleCORBA:: -_FindComponent(const Engines::MachineParameters& params, +_FindComponent(const Engines::ContainerParameters& params, const char *componentName, int studyId, - const Engines::MachineList& listOfMachines) + const Engines::ResourceList& listOfResources) { // --- build the list of machines on which the component is already running - const char *containerName = params.container_name; int nbproc = NbProc(params); -// MESSAGE("_FindComponent, required " << containerName << -// " " << componentName << " " << nbproc); - Engines::MachineList_var machinesOK = new Engines::MachineList; + Engines::ResourceList_var resourcesOK = new Engines::ResourceList; - unsigned int lghtOfmachinesOK = 0; - machinesOK->length(listOfMachines.length()); + unsigned int lghtOfresourcesOK = 0; + resourcesOK->length(listOfResources.length()); - for(unsigned int i=0; iResolveComponent(currentMachine, - containerName, - componentName, - nbproc); - if (!CORBA::is_nil(obj)) - machinesOK[lghtOfmachinesOK++] = CORBA::string_dup(currentMachine); - } + for(unsigned int i=0; i < listOfResources.length(); i++) + { + const char * currentResource = listOfResources[i]; + CORBA::Object_var obj = _NS->ResolveComponent(currentResource, + containerName, + componentName, + nbproc); + if (!CORBA::is_nil(obj)) + resourcesOK[lghtOfresourcesOK++] = CORBA::string_dup(currentResource); + } // --- find the best machine among the list - - if(lghtOfmachinesOK != 0) - { - machinesOK->length(lghtOfmachinesOK); - CORBA::String_var bestMachine = _ResManager->FindFirst(machinesOK); - CORBA::Object_var obj = _NS->ResolveComponent(bestMachine, - containerName, - componentName, - nbproc); - return Engines::Component::_narrow(obj); - } + if(lghtOfresourcesOK != 0) + { + resourcesOK->length(lghtOfresourcesOK); + CORBA::String_var bestResource = _ResManager->FindFirst(resourcesOK); + CORBA::Object_var obj = _NS->ResolveComponent(bestResource, + containerName, + componentName, + nbproc); + return Engines::Component::_narrow(obj); + } else return Engines::Component::_nil(); } @@ -642,14 +706,16 @@ _FindComponent(const Engines::MachineParameters& params, Engines::Component_ptr SALOME_LifeCycleCORBA:: -_LoadComponent(const Engines::MachineParameters& params, +_LoadComponent(const Engines::ContainerParameters& params, const char *componentName, int studyId) { MESSAGE("_LoadComponent, required " << params.container_name << " " << componentName << " " << NbProc(params)); - Engines::Container_var cont = _ContManager->FindOrStartContainer(params); + Engines::ContainerParameters local_params(params); + local_params.mode = CORBA::string_dup("findorstart"); + Engines::Container_var cont = _ContManager->GiveContainer(local_params); if (CORBA::is_nil(cont)) return Engines::Component::_nil(); bool isLoadable = cont->load_component_Library(componentName); @@ -670,7 +736,7 @@ _LoadComponent(const Engines::MachineParameters& params, */ //============================================================================= Engines::Component_ptr -SALOME_LifeCycleCORBA::Load_ParallelComponent(const Engines::MachineParameters& params, +SALOME_LifeCycleCORBA::Load_ParallelComponent(const Engines::ContainerParameters& params, const char *componentName, int studyId) { @@ -681,12 +747,13 @@ SALOME_LifeCycleCORBA::Load_ParallelComponent(const Engines::MachineParameters& MESSAGE("Number of component nodes : " << params.nb_component_nodes); MESSAGE("Component Name : " << componentName);*/ - Engines::MachineParameters parms(params); - parms.componentList.length(1); - parms.componentList[0] = componentName; + Engines::ContainerParameters parms(params); + parms.resource_params.componentList.length(1); + parms.resource_params.componentList[0] = componentName; + parms.mode = CORBA::string_dup("findorstart"); MESSAGE("Starting Parallel Container"); - Engines::Container_var cont = _ContManager->StartParallelContainer(parms); + Engines::Container_var cont = _ContManager->GiveContainer(parms); if (CORBA::is_nil(cont)) { INFOS("FindOrStartParallelContainer() returns a NULL container !"); return Engines::Component::_nil(); @@ -727,14 +794,15 @@ void SALOME_LifeCycleCORBA::copyFile(const char* hostSrc, const char* fileSrc, c Engines::ContainerManager_var contManager = getContainerManager(); - Engines::MachineParameters params; + Engines::ContainerParameters params; preSet(params); - params.hostname = hostDest; - Engines::Container_var containerDest = contManager->FindOrStartContainer(params); + params.resource_params.hostname = hostDest; + params.mode = CORBA::string_dup("findorstart"); + Engines::Container_var containerDest = contManager->GiveContainer(params); - params.hostname = hostSrc; - Engines::Container_var containerSrc = contManager->FindOrStartContainer(params); + params.resource_params.hostname = hostSrc; + Engines::Container_var containerSrc = contManager->GiveContainer(params); containerDest->copyFile(containerSrc,fileSrc,fileDest); } diff --git a/src/LifeCycleCORBA/SALOME_LifeCycleCORBA.hxx b/src/LifeCycleCORBA/SALOME_LifeCycleCORBA.hxx index c22385796..8287139f1 100644 --- a/src/LifeCycleCORBA/SALOME_LifeCycleCORBA.hxx +++ b/src/LifeCycleCORBA/SALOME_LifeCycleCORBA.hxx @@ -81,24 +81,32 @@ public: const char *componentName, int studyId =0); + // SALOME 6 - Interface + Engines::Component_ptr + FindOrLoad_Component(const Engines::ContainerParameters& params, + const char *componentName, + int studyId =0); + Engines::Component_ptr FindOrLoad_Component(const char *containerName, const char *componentName); // for compatibility // Parallel extension Engines::Component_ptr - Load_ParallelComponent(const Engines::MachineParameters& params, + Load_ParallelComponent(const Engines::ContainerParameters& params, const char *componentName, int studyId); bool isKnownComponentClass(const char *componentName); - bool isMpiContainer(const Engines::MachineParameters& params) + bool isMpiContainer(const Engines::ContainerParameters& params) throw(IncompatibleComponent); - int NbProc(const Engines::MachineParameters& params); + int NbProc(const Engines::ContainerParameters& params); static void preSet(Engines::MachineParameters& outparams); + static void preSet(Engines::ResourceParameters& outparams); + static void preSet(Engines::ContainerParameters& outparams); Engines::ContainerManager_ptr getContainerManager(); Engines::ResourcesManager_ptr getResourcesManager(); @@ -109,6 +117,10 @@ public: void shutdownServers(); static void killOmniNames(); + // For SALOME 5.1.x + // Will be deleted on SALOME 6 + void convert(const Engines::MachineParameters& params_in, + Engines::ContainerParameters& params_out); protected: /*! Establish if a component called "componentName" in a container called @@ -117,16 +129,16 @@ protected: * This method uses Naming Service to find the component. */ Engines::Component_ptr - _FindComponent(const Engines::MachineParameters& params, + _FindComponent(const Engines::ContainerParameters& params, const char *componentName, int studyId, - const Engines::MachineList& listOfMachines); + const Engines::ResourceList& listOfResources); Engines::Component_ptr - _LoadComponent(const Engines::MachineParameters& params, + _LoadComponent(const Engines::ContainerParameters& params, const char *componentName, int studyId); - + SALOME_NamingService *_NS; SALOME_NamingService *_NSnew; Engines::ContainerManager_var _ContManager; diff --git a/src/LifeCycleCORBA/Test/LifeCycleCORBATest.cxx b/src/LifeCycleCORBA/Test/LifeCycleCORBATest.cxx index fa4fc4777..7bf70e8c7 100644 --- a/src/LifeCycleCORBA/Test/LifeCycleCORBATest.cxx +++ b/src/LifeCycleCORBA/Test/LifeCycleCORBATest.cxx @@ -679,12 +679,12 @@ string LifeCycleCORBATest::GetRemoteHost() Engines::ResourcesManager::_narrow(obj); CPPUNIT_ASSERT(!CORBA::is_nil(resourcesManager)); - Engines::MachineParameters params; + Engines::ContainerParameters params; _LCC.preSet(params); // empty params to get all the machines - params.componentList.length(1); - params.componentList[0]="SalomeTestComponent"; + params.resource_params.componentList.length(1); + params.resource_params.componentList[0]="SalomeTestComponent"; - Engines::MachineList_var hostList = resourcesManager->GetFittingResources(params); + Engines::ResourceList_var hostList = resourcesManager->GetFittingResources(params.resource_params); CPPUNIT_ASSERT(hostList->length() > 1); string localHost = Kernel_Utils::GetHostname(); @@ -692,7 +692,8 @@ string LifeCycleCORBATest::GetRemoteHost() for (unsigned int i=0; i < hostList->length(); i++) { const char* aMachine = hostList[i]; - string machine(aMachine); + Engines::ResourceDefinition_var resource_definition = resourcesManager->GetResourceDefinition(aMachine); + string machine(resource_definition->hostname.in()); if (machine != localHost) { remoteHost = machine; diff --git a/src/LifeCycleCORBA/TestContainerManager.cxx b/src/LifeCycleCORBA/TestContainerManager.cxx index d9a3a82e3..8ffa25872 100644 --- a/src/LifeCycleCORBA/TestContainerManager.cxx +++ b/src/LifeCycleCORBA/TestContainerManager.cxx @@ -65,24 +65,24 @@ int main (int argc, char * argv[]) ASSERT( !CORBA::is_nil(obj)); Engines::ResourcesManager_var _ResManager=Engines::ResourcesManager::_narrow(obj); - Engines::MachineParameters p; - p.componentList.length(2); - p.componentList[0] = "MED"; - p.componentList[1] = "GEOM"; + Engines::ContainerParameters p; + p.resource_params.componentList.length(2); + p.resource_params.componentList[0] = "MED"; + p.resource_params.componentList[1] = "GEOM"; - p.hostname = ""; - p.OS = "LINUX"; - p.mem_mb = 1000; - p.cpu_clock = 1000; - p.nb_proc_per_node = 1; - p.nb_node = 1; + p.resource_params.hostname = ""; + p.resource_params.OS = "LINUX"; + p.resource_params.mem_mb = 1000; + p.resource_params.cpu_clock = 1000; + p.resource_params.nb_proc_per_node = 1; + p.resource_params.nb_node = 1; p.isMPI = false; char st[10]; for(int i=0;i<10;i++){ sprintf(st,"cycl_%d",i); p.container_name = CORBA::string_dup(st); - p.policy="cycl"; + p.resource_params.policy="cycl"; cont = _ContManager->GiveContainer(p); if(CORBA::is_nil(cont)) error = true; } @@ -90,13 +90,13 @@ int main (int argc, char * argv[]) for(int i=0;i<10;i++){ sprintf(st,"first_%d",i); p.container_name = CORBA::string_dup(st); - p.policy="first"; + p.resource_params.policy="first"; cont = _ContManager->GiveContainer(p); if(CORBA::is_nil(cont)) error = true; } p.container_name = CORBA::string_dup("best"); - p.policy="best"; + p.resource_params.policy="best"; cont = _ContManager->GiveContainer(p); if(CORBA::is_nil(cont)) bestImplemented = false; else bestImplemented = true; @@ -141,7 +141,7 @@ int main (int argc, char * argv[]) int nbpmax; for(std::map::iterator iter=cycle.begin();iter!=cycle.end();iter++){ if(strcmp((*iter).first.c_str(),"localhost")!=0){ - Engines::MachineDefinition *p = _ResManager->GetMachineParameters((*iter).first.c_str()); + Engines::ResourceDefinition *p = _ResManager->GetResourceDefinition((*iter).first.c_str()); int nbproc = p->nb_node * p->nb_proc_per_node; if(cycle[(*iter).first]/nbproccmax) cmax=cycle[(*iter).first]/nbproc; diff --git a/src/LifeCycleCORBA_SWIG/LifeCycleCORBA.py b/src/LifeCycleCORBA_SWIG/LifeCycleCORBA.py index 2a459c6e5..df7445c42 100644 --- a/src/LifeCycleCORBA_SWIG/LifeCycleCORBA.py +++ b/src/LifeCycleCORBA_SWIG/LifeCycleCORBA.py @@ -41,7 +41,7 @@ class LifeCycleCORBA (SALOME_LifeCycleCORBA): class MachineParameters (Engines.MachineParameters): def __init__(self, container_name='', hostname='', componentList=[], computerList=[], OS='', - nb_proc=0, mem_mb=0, cpu_clock=0, nb_proc_per_node=0, nb_node=0, isMPI=False, workingdir='', + mem_mb=0, cpu_clock=0, nb_proc_per_node=0, nb_node=0, isMPI=False, workingdir='', mode='start', policy='altcycl', parallelLib='', nb_component_nodes=0): Engines.MachineParameters.__init__(self,container_name, hostname, componentList, computerList, OS, mem_mb, cpu_clock, nb_proc_per_node, nb_node, isMPI, workingdir, diff --git a/src/LifeCycleCORBA_SWIG/libSALOME_LifeCycleCORBA.i b/src/LifeCycleCORBA_SWIG/libSALOME_LifeCycleCORBA.i index 2caeec2e7..22da974ba 100644 --- a/src/LifeCycleCORBA_SWIG/libSALOME_LifeCycleCORBA.i +++ b/src/LifeCycleCORBA_SWIG/libSALOME_LifeCycleCORBA.i @@ -153,10 +153,6 @@ using namespace std; { param->OS = CORBA::string_dup(PyString_AsString(value)); } - else if (strcmp(keystr,"nb_proc")==0) - { - param->nb_proc = PyLong_AsLong(value); - } else if (strcmp(keystr,"mem_mb")==0) { param->mem_mb = PyLong_AsLong(value); diff --git a/src/NamingService/SALOME_NamingService.cxx b/src/NamingService/SALOME_NamingService.cxx index ece4ca55a..8cf3c9268 100644 --- a/src/NamingService/SALOME_NamingService.cxx +++ b/src/NamingService/SALOME_NamingService.cxx @@ -626,6 +626,34 @@ SALOME_NamingService::ContainerName(const Engines::MachineParameters& params) return ret; } +string +SALOME_NamingService::ContainerName(const Engines::ContainerParameters& params) +{ + int nbproc; + + if ( !params.isMPI ) + nbproc = 0; + else if ( (params.resource_params.nb_node <= 0) && (params.resource_params.nb_proc_per_node <= 0) ) + nbproc = 1; + else if ( params.resource_params.nb_node == 0 ) + nbproc = params.resource_params.nb_proc_per_node; + else if ( params.resource_params.nb_proc_per_node == 0 ) + nbproc = params.resource_params.nb_node; + else + nbproc = params.resource_params.nb_node * params.resource_params.nb_proc_per_node; + + string ret = ContainerName(params.container_name); + + if ( nbproc >= 1 ) + { + char *suffix = new char[8]; + sprintf(suffix, "_%d", nbproc); + ret += suffix; + } + + return ret; +} + // ============================================================================ /*! \brief build a string representing a container in Naming Service. * @@ -676,6 +704,19 @@ BuildContainerNameForNS(const Engines::MachineParameters& params, return ret; } +string +SALOME_NamingService:: +BuildContainerNameForNS(const Engines::ContainerParameters& params, + const char *hostname) +{ + string ret = "/Containers/"; + ret += hostname; + ret += "/"; + ret += ContainerName(params); + + return ret; +} + // ============================================================================ /*! \brief search a name in current directory. * diff --git a/src/NamingService/SALOME_NamingService.hxx b/src/NamingService/SALOME_NamingService.hxx index df06b2e1c..cadc0530b 100644 --- a/src/NamingService/SALOME_NamingService.hxx +++ b/src/NamingService/SALOME_NamingService.hxx @@ -62,10 +62,16 @@ public: const int nbproc=0) throw(ServiceUnreachable); std::string ContainerName(const char *ContainerName); - std::string ContainerName(const Engines::MachineParameters& params); + std::string ContainerName(const Engines::ContainerParameters& params); std::string BuildContainerNameForNS(const char *ContainerName, const char *hostname); std::string + BuildContainerNameForNS(const Engines::ContainerParameters& params, + const char *hostname); + + // Will Be deleted on SALOME 6 + std::string ContainerName(const Engines::MachineParameters& params); + std::string BuildContainerNameForNS(const Engines::MachineParameters& params, const char *hostname); int Find(const char* name) diff --git a/src/ResourcesManager/ResourcesManager.cxx b/src/ResourcesManager/ResourcesManager.cxx index 7290c711e..dc3edc2df 100644 --- a/src/ResourcesManager/ResourcesManager.cxx +++ b/src/ResourcesManager/ResourcesManager.cxx @@ -77,9 +77,8 @@ ResourcesManager_cpp(const char *xmlFilePath) ResourcesManager_cpp::ResourcesManager_cpp() throw(ResourcesException) { -#if defined(_DEBUG_) || defined(_DEBUG) - cerr << "ResourcesManager_cpp constructor" << endl; -#endif + RES_MESSAGE("ResourcesManager_cpp constructor"); + _resourceManagerMap["first"]=&first; _resourceManagerMap["cycl"]=&cycl; _resourceManagerMap["altcycl"]=&altcycl; @@ -116,9 +115,7 @@ ResourcesManager_cpp::ResourcesManager_cpp() throw(ResourcesException) _lasttime=0; ParseXmlFiles(); -#if defined(_DEBUG_) || defined(_DEBUG) - cerr << "ResourcesManager_cpp constructor end"; -#endif + RES_MESSAGE("ResourcesManager_cpp constructor end"); } //============================================================================= @@ -129,161 +126,134 @@ ResourcesManager_cpp::ResourcesManager_cpp() throw(ResourcesException) ResourcesManager_cpp::~ResourcesManager_cpp() { -#if defined(_DEBUG_) || defined(_DEBUG) - cerr << "ResourcesManager_cpp destructor" << endl; -#endif + RES_MESSAGE("ResourcesManager_cpp destructor"); } //============================================================================= //! get the list of resource names fitting constraints given by params /*! - * If hostname is specified, check if it is local or known in resources catalog. - * - * Else - * - select first machines with corresponding OS (all machines if - * parameter OS empty), - * - then select the sublist of machines on which the component is known - * (if the result is empty, that probably means that the inventory of - * components is probably not done, so give complete list from previous step) + * Steps: + * 1: Restrict list with resourceList if defined + * 2: If name is defined -> check resource list + * 3: If not 2:, if hostname is defined -> check resource list + * 4: If not 3:, sort resource with nb_proc, etc... + * 5: In all cases remove resource that does not correspond with OS + * 6: And remove resource with componentList - if list is empty ignored it... */ //============================================================================= std::vector -ResourcesManager_cpp::GetFittingResources(const machineParams& params) throw(ResourcesException) +ResourcesManager_cpp::GetFittingResources(const resourceParams& params) throw(ResourcesException) { - vector vec; + RES_MESSAGE("[GetFittingResources] on computer " << Kernel_Utils::GetHostname().c_str()); + RES_MESSAGE("[GetFittingResources] with resource name: " << params.name); + RES_MESSAGE("[GetFittingResources] with hostname: "<< params.hostname); - ParseXmlFiles(); + // Result + std::vector vec; - const char *hostname = params.hostname.c_str(); -#if defined(_DEBUG_) || defined(_DEBUG) - cerr << "GetFittingResources " << hostname << " " << Kernel_Utils::GetHostname().c_str() << endl; -#endif + // Parse Again CalatogResource File + ParseXmlFiles(); - // PaCO++ parallel container case - std::string parallelLib(params.parallelLib); - if (params.nb_component_nodes > 0 && parallelLib != "") + // Steps: + // 1: Restrict list with resourceList if defined + // 2: If name is defined -> check resource list + // 3: If not 2:, if hostname is defined -> check resource list + // 4: If not 3:, sort resource with nb_proc, etc... + // 5: In all cases remove resource that does not correspond with OS + // 6: And remove resource with componentList - if list is empty ignored it... + + + MapOfParserResourcesType local_resourcesList = _resourcesList; + // Step 1 + if (params.resourceList.size() > 0) { -#if defined(_DEBUG_) || defined(_DEBUG) - std::cerr << "[GetFittingResources] ParallelContainer case" << std::endl; - std::cerr << "[GetFittingResources] parallelLib is " << parallelLib << std::endl; - std::cerr << "[GetFittingResources] nb_component_nodes is " << params.nb_component_nodes << std::endl; -#endif + RES_MESSAGE("[GetFittingResources] Restricted resource list found !"); + local_resourcesList.clear(); + std::vector::size_type sz = params.resourceList.size(); - // Currently we only support parallel containers that define a hostname target - if (hostname[0] != '\0') + for (unsigned int i=0; i < sz; i++) { - // Special case of localhost -> put containers into the real computer name - if (strcmp(hostname, "localhost") == 0) - vec.push_back(Kernel_Utils::GetHostname().c_str()); - else - { - // Try find the resource into the map - if (_resourcesList.find(hostname) != _resourcesList.end()) - vec.push_back(hostname); - else - std::cerr << "[GetFittingResources] ParallelContainer hostname does not exist into the resource list !" << std::endl; - } + if (_resourcesList.find(params.resourceList[i]) != _resourcesList.end()) + local_resourcesList[params.resourceList[i]] = _resourcesList[params.resourceList[i]]; + } + } + + // Step 2 + if (params.name != "") + { + RES_MESSAGE("[GetFittingResources] name parameter found !"); + if (_resourcesList.find(params.name) != _resourcesList.end()) + { + vec.push_back(params.name); } else - std::cerr << "[GetFittingResources] ParallelContainer hostname is empty -> cannot find a possible resource" << std::endl; - return vec; + RES_MESSAGE("[GetFittingResources] name was not found on resource list ! name was " << params.name); } - if (hostname[0] != '\0'){ + // Step 3 + else if (params.hostname != "") + { + RES_MESSAGE("[GetFittingResources] Entering in hostname case !"); - if ( strcmp(hostname, "localhost") == 0 || - strcmp(hostname, Kernel_Utils::GetHostname().c_str()) == 0 ) - { -//#if defined(_DEBUG_) || defined(_DEBUG) -// cerr << "ResourcesManager_cpp::GetFittingResources : localhost" << endl; -//#endif - vec.push_back(Kernel_Utils::GetHostname().c_str()); -//#if defined(_DEBUG_) || defined(_DEBUG) -// cerr << "ResourcesManager_cpp::GetFittingResources : " << vec.size() << endl; -//#endif - } - - else if (_resourcesList.find(hostname) != _resourcesList.end()) - { - // --- params.hostname is in the list of resources so return it. - vec.push_back(hostname); - } - - else if (_resourcesBatchList.find(hostname) != _resourcesBatchList.end()) + std::string hostname = params.hostname; + if (hostname == "localhost") + hostname = Kernel_Utils::GetHostname().c_str(); + + std::map::const_iterator iter = _resourcesList.begin(); + for (; iter != _resourcesList.end(); iter++) { - // --- params.hostname is in the list of resources so return it. - vec.push_back(hostname); + if ((*iter).second.HostName == hostname) + vec.push_back((*iter).first); } - - else - { - // Cas d'un cluster: nombre de noeuds > 1 - int cpt=0; - for (map::const_iterator iter = _resourcesList.begin(); iter != _resourcesList.end(); iter++){ - if( (*iter).second.DataForSort._nbOfNodes > 1 ){ - if( strncmp(hostname,(*iter).first.c_str(),strlen(hostname)) == 0 ){ - vec.push_back((*iter).first.c_str()); - cpt++; - } - } - } - if(cpt==0){ - // --- user specified an unknown hostame so notify him. -#if defined(_DEBUG_) || defined(_DEBUG) - cerr << "ResourcesManager_cpp::GetFittingResources : SALOME_Exception" << endl; -#endif - std::string error("GetFittinResouces : ResourcesManager doesn't find the host requested : "); - error += hostname; - throw ResourcesException(error); - } - } } - - else{ - // --- Search for available resources sorted by priority - vec=params.computerList; - SelectOnlyResourcesWithOS(vec, params.OS.c_str()); - - KeepOnlyResourcesWithComponent(vec, params.componentList); + // Step 4 + else + { + // --- Search for available resources sorted by priority + MapOfParserResourcesType_it i = local_resourcesList.begin(); + for (; i != local_resourcesList.end(); ++i) + vec.push_back(i->first); - //if hosts list (vec) is empty, ignore componentList constraint and use only OS constraint - if (vec.size() == 0) - SelectOnlyResourcesWithOS(vec, params.OS.c_str()); - // --- set wanted parameters ResourceDataToSort::_nbOfProcWanted = params.nb_proc; - ResourceDataToSort::_nbOfNodesWanted = params.nb_node; - ResourceDataToSort::_nbOfProcPerNodeWanted = params.nb_proc_per_node; - ResourceDataToSort::_CPUFreqMHzWanted = params.cpu_clock; - ResourceDataToSort::_memInMBWanted = params.mem_mb; - // --- end of set - list li; - - for (vector::iterator iter = vec.begin(); - iter != vec.end(); - iter++) - li.push_back(_resourcesList[(*iter)].DataForSort); - + // Sort + std::list li; + std::vector::iterator iter = vec.begin(); + for (; iter != vec.end(); iter++) + li.push_back(local_resourcesList[(*iter)].DataForSort); li.sort(); - - unsigned int i = 0; - - for (list::iterator iter2 = li.begin(); - iter2 != li.end(); - iter2++) - vec[i++] = (*iter2)._hostName; + + vec.clear(); + for (list::iterator iter2 = li.begin(); iter2 != li.end(); iter2++) + vec.push_back((*iter2)._Name); } + + // Step 5 + SelectOnlyResourcesWithOS(vec, params.OS.c_str()); - return vec; + // Step 6 + std::vector vec_save(vec); + KeepOnlyResourcesWithComponent(vec, params.componentList); + if (vec.size() == 0) + vec = vec_save; + + // End + // Send an exception if return list is empty... + if (vec.size() == 0) + { + std::string error("[GetFittingResources] ResourcesManager doesn't find any resource that feets to your parameters"); + throw ResourcesException(error); + } + return vec; } //============================================================================= @@ -294,15 +264,12 @@ ResourcesManager_cpp::GetFittingResources(const machineParams& params) throw(Res //============================================================================= int -ResourcesManager_cpp:: -AddResourceInCatalog(const machineParams& paramsOfNewResources, - const vector& componentsOnNewResources, - const char *alias, - const char *userName, - AccessModeType mode, - AccessProtocolType prot, - AccessProtocolType iprot) -throw(ResourcesException) +ResourcesManager_cpp::AddResourceInCatalog(const resourceParams& paramsOfNewResources, + const vector& componentsOnNewResources, + const char *userName, + AccessModeType mode, + AccessProtocolType prot, + AccessProtocolType iprot) throw(ResourcesException) { vector::const_iterator iter = find(componentsOnNewResources.begin(), componentsOnNewResources.end(), @@ -311,8 +278,8 @@ throw(ResourcesException) if (iter != componentsOnNewResources.end()) { ParserResourcesType newElt; - newElt.DataForSort._hostName = paramsOfNewResources.hostname; - newElt.Alias = alias; + newElt.DataForSort._Name = paramsOfNewResources.name; + newElt.HostName = paramsOfNewResources.hostname; newElt.Protocol = prot; newElt.ClusterInternalProtocol = iprot; newElt.Mode = mode; @@ -324,10 +291,9 @@ throw(ResourcesException) newElt.DataForSort._nbOfNodes = paramsOfNewResources.nb_node; newElt.DataForSort._nbOfProcPerNode = paramsOfNewResources.nb_proc_per_node; - _resourcesList[newElt.DataForSort._hostName] = newElt; + _resourcesList[newElt.DataForSort._Name] = newElt; return 0; } - else throw ResourcesException("KERNEL is not present in this resource"); } @@ -338,9 +304,9 @@ throw(ResourcesException) */ //============================================================================= -void ResourcesManager_cpp::DeleteResourceInCatalog(const char *hostname) +void ResourcesManager_cpp::DeleteResourceInCatalog(const char * name) { - _resourcesList.erase(hostname); + _resourcesList.erase(name); } //============================================================================= @@ -351,9 +317,8 @@ void ResourcesManager_cpp::DeleteResourceInCatalog(const char *hostname) void ResourcesManager_cpp::WriteInXmlFile(std::string & xml_file) { -#if defined(_DEBUG_) || defined(_DEBUG) - std::cerr << "WriteInXmlFile : start" << std::endl; -#endif + RES_MESSAGE("WriteInXmlFile : start"); + const char* aFilePath = xml_file.c_str(); FILE* aFile = fopen(aFilePath, "w"); @@ -367,7 +332,7 @@ void ResourcesManager_cpp::WriteInXmlFile(std::string & xml_file) xmlNewDocComment(aDoc, BAD_CAST "ResourcesCatalog"); SALOME_ResourcesCatalog_Handler* handler = - new SALOME_ResourcesCatalog_Handler(_resourcesList, _resourcesBatchList); + new SALOME_ResourcesCatalog_Handler(_resourcesList); handler->PrepareDocToXmlFile(aDoc); delete handler; @@ -378,9 +343,7 @@ void ResourcesManager_cpp::WriteInXmlFile(std::string & xml_file) // Free the document xmlFreeDoc(aDoc); fclose(aFile); -#if defined(_DEBUG_) || defined(_DEBUG) - std::cerr << "WriteInXmlFile : WRITING DONE!" << std::endl; -#endif + RES_MESSAGE("WriteInXmlFile : WRITING DONE!"); } //============================================================================= @@ -413,14 +376,13 @@ const MapOfParserResourcesType& ResourcesManager_cpp::ParseXmlFiles() if (to_parse) { _resourcesList.clear(); - _resourcesBatchList.clear(); // On parse tous les fichiers for(_path_resources_it = _path_resources.begin(); _path_resources_it != _path_resources.end(); ++_path_resources_it) { MapOfParserResourcesType _resourcesList_tmp; MapOfParserResourcesType _resourcesBatchList_tmp; SALOME_ResourcesCatalog_Handler* handler = - new SALOME_ResourcesCatalog_Handler(_resourcesList_tmp, _resourcesBatchList_tmp); + new SALOME_ResourcesCatalog_Handler(_resourcesList_tmp); const char* aFilePath = (*_path_resources_it).c_str(); FILE* aFile = fopen(aFilePath, "r"); @@ -444,18 +406,6 @@ const MapOfParserResourcesType& ResourcesManager_cpp::ParseXmlFiles() std::cerr << "ParseXmlFiles Warning, to resource with the same name was found, taking the first declaration : " << i->first << std::endl; } } - for (MapOfParserResourcesType_it i = _resourcesBatchList_tmp.begin(); i != _resourcesBatchList_tmp.end(); ++i) - { - MapOfParserResourcesType_it j = _resourcesBatchList.find(i->first); - if (j == _resourcesBatchList.end()) - { - _resourcesBatchList[i->first] = i->second; - } - else - { - std::cerr << "ParseXmlFiles Warning, to resource with the same name was found, taking the first declaration : " << i->first << std::endl; - } - } } else std::cerr << "ResourcesManager_cpp: could not parse file " << aFilePath << std::endl; @@ -483,50 +433,36 @@ const MapOfParserResourcesType& ResourcesManager_cpp::GetList() const return _resourcesList; } -string ResourcesManager_cpp::Find(const std::string& policy, const std::vector& listOfMachines) +string ResourcesManager_cpp::Find(const std::string& policy, const std::vector& listOfResources) { if(_resourceManagerMap.count(policy)==0) - return _resourceManagerMap[""]->Find(listOfMachines,_resourcesList); - return _resourceManagerMap[policy]->Find(listOfMachines,_resourcesList); + return _resourceManagerMap[""]->Find(listOfResources, _resourcesList); + return _resourceManagerMap[policy]->Find(listOfResources, _resourcesList); } //============================================================================= /*! - * Gives a sublist of machines with matching OS. - * If parameter OS is empty, gives the complete list of machines + * Gives a sublist of resources with matching OS. + * If parameter OS is empty, gives the complete list of resources */ //============================================================================= - -// Warning need an updated parsed list : _resourcesList -void ResourcesManager_cpp::SelectOnlyResourcesWithOS( vector& hosts, const char *OS) const -throw(ResourcesException) +void +ResourcesManager_cpp::SelectOnlyResourcesWithOS(std::vector& resources, std::string OS) { - string base(OS); - - if(hosts.size()==0) - { - //No constraint on computer list : take all known resources with OS - map::const_iterator iter; - for (iter = _resourcesList.begin(); iter != _resourcesList.end(); iter++) - { - if ( (*iter).second.OS == base || base.size() == 0) - hosts.push_back((*iter).first); - } - } - else + if (OS != "") + { + // a computer list is given : take only resources with OS on those computers + std::vector vec_tmp = resources; + resources.clear(); + vector::iterator iter = vec_tmp.begin(); + for (; iter != vec_tmp.end(); iter++) { - //a computer list is given : take only resources with OS on those computers - vector vec=hosts; - hosts.clear(); - vector::iterator iter; - for (iter = vec.begin(); iter != vec.end(); iter++) - { - MapOfParserResourcesType::const_iterator it = _resourcesList.find(*iter); - if(it != _resourcesList.end()) - if ( (*it).second.OS == base || base.size() == 0 ) - hosts.push_back(*iter); - } + MapOfParserResourcesType::const_iterator it = _resourcesList.find(*iter); + if(it != _resourcesList.end()) + if ( (*it).second.OS == OS) + resources.push_back(*iter); } + } } @@ -535,41 +471,47 @@ throw(ResourcesException) * Gives a sublist of machines on which the component is known. */ //============================================================================= - -//Warning need an updated parsed list : _resourcesList -void ResourcesManager_cpp::KeepOnlyResourcesWithComponent( vector& hosts, const vector& componentList) const -throw(ResourcesException) +void +ResourcesManager_cpp::KeepOnlyResourcesWithComponent(std::vector& resources, + const vector& componentList) { - for (vector::iterator iter = hosts.begin(); iter != hosts.end();) + std::vector::iterator iter = resources.begin(); + for (; iter != resources.end(); iter++) + { + MapOfParserResourcesType::const_iterator it = _resourcesList.find(*iter); + const vector& mapOfComponentsOfCurrentHost = (*it).second.ComponentsList; + + bool erasedHost = false; + if( mapOfComponentsOfCurrentHost.size() > 0 ) { - MapOfParserResourcesType::const_iterator it = _resourcesList.find(*iter); - const vector& mapOfComponentsOfCurrentHost = (((*it).second).ComponentsList); - - bool erasedHost = false; - if( mapOfComponentsOfCurrentHost.size() > 0 ){ - for(unsigned int i=0;i::const_iterator itt = find(mapOfComponentsOfCurrentHost.begin(), - mapOfComponentsOfCurrentHost.end(), - compoi); - if (itt == mapOfComponentsOfCurrentHost.end()){ - erasedHost = true; - break; - } + for(unsigned int i=0; i::const_iterator itt = find(mapOfComponentsOfCurrentHost.begin(), + mapOfComponentsOfCurrentHost.end(), + compoi); + if (itt == mapOfComponentsOfCurrentHost.end()) + { + erasedHost = true; + break; } } - if(erasedHost) - hosts.erase(iter); - else - iter++; } + if(erasedHost) + resources.erase(iter); + } } -ParserResourcesType ResourcesManager_cpp::GetResourcesList(const std::string& machine) +ParserResourcesType +ResourcesManager_cpp::GetResourcesDescr(const std::string & name) { - if (_resourcesList.find(machine) != _resourcesList.end()) - return _resourcesList[machine]; + if (_resourcesList.find(name) != _resourcesList.end()) + return _resourcesList[name]; else - return _resourcesBatchList[machine]; + { + std::string error("[GetResourcesDescr] Resource does not exist: "); + error += name; + throw ResourcesException(error); + } } diff --git a/src/ResourcesManager/ResourcesManager.hxx b/src/ResourcesManager/ResourcesManager.hxx index f7cbb5291..c5ce0f2c8 100644 --- a/src/ResourcesManager/ResourcesManager.hxx +++ b/src/ResourcesManager/ResourcesManager.hxx @@ -43,19 +43,18 @@ // in a critical section to be sure to be clean. // Only one thread should use the SALOME_ResourcesManager class in a SALOME // session. - -struct machineParams{ +struct resourceParams +{ + std::string name; std::string hostname; std::string OS; - std::string parallelLib; unsigned int nb_proc; unsigned int nb_node; unsigned int nb_proc_per_node; unsigned int cpu_clock; unsigned int mem_mb; - unsigned int nb_component_nodes; std::vector componentList; - std::vector computerList; + std::vector resourceList; }; class RESOURCESMANAGER_EXPORT ResourcesException @@ -77,20 +76,19 @@ class RESOURCESMANAGER_EXPORT ResourcesManager_cpp ~ResourcesManager_cpp(); std::vector - GetFittingResources(const machineParams& params) throw(ResourcesException); + GetFittingResources(const resourceParams& params) throw(ResourcesException); - std::string Find(const std::string& policy, const std::vector& listOfMachines); + std::string Find(const std::string& policy, + const std::vector& listOfResources); - int AddResourceInCatalog - (const machineParams& paramsOfNewResources, - const std::vector& componentsOnNewResources, - const char *alias, - const char *userName, - AccessModeType mode, - AccessProtocolType prot, - AccessProtocolType iprot) throw(ResourcesException); + int AddResourceInCatalog (const resourceParams& paramsOfNewResources, + const std::vector& componentsOnNewResources, + const char *userName, + AccessModeType mode, + AccessProtocolType prot, + AccessProtocolType iprot) throw(ResourcesException); - void DeleteResourceInCatalog(const char *hostname); + void DeleteResourceInCatalog(const char * name); void WriteInXmlFile(std::string & xml_file); @@ -98,17 +96,14 @@ class RESOURCESMANAGER_EXPORT ResourcesManager_cpp const MapOfParserResourcesType& GetList() const; - ParserResourcesType GetResourcesList(const std::string& machine); + ParserResourcesType GetResourcesDescr(const std::string & name); protected: - void SelectOnlyResourcesWithOS(std::vector& hosts, - const char *OS) const - throw(ResourcesException); + void SelectOnlyResourcesWithOS(std::vector& resources, std::string OS); - void KeepOnlyResourcesWithComponent(std::vector& hosts, - const std::vector& componentList) const - throw(ResourcesException); + void KeepOnlyResourcesWithComponent(std::vector& resources, + const std::vector& componentList); //! will contain the path to the ressources catalog std::list _path_resources; @@ -117,9 +112,6 @@ class RESOURCESMANAGER_EXPORT ResourcesManager_cpp //! will contain the informations on the data type catalog(after parsing) MapOfParserResourcesType _resourcesList; - //! will contain the informations on the data type catalog(after parsing) - MapOfParserResourcesType _resourcesBatchList; - //! a map that contains all the available load rate managers (the key is the name) std::map _resourceManagerMap; diff --git a/src/ResourcesManager/ResourcesManager_Defs.hxx b/src/ResourcesManager/ResourcesManager_Defs.hxx index c6d0c25e9..43936cdfa 100755 --- a/src/ResourcesManager/ResourcesManager_Defs.hxx +++ b/src/ResourcesManager/ResourcesManager_Defs.hxx @@ -32,4 +32,16 @@ # define RESOURCESMANAGER_EXPORT #endif +// MESSAGES +#define RES_MESS_INIT(deb) std::cerr << deb +#define RES_MESS_BEGIN(deb) RES_MESS_INIT(deb)<<__FILE__ <<" ["<<__LINE__<<"] : " +#define RES_MESS_END std::endl; +#define RES_INFOS(msg) {RES_MESS_BEGIN("- Trace ") << msg << RES_MESS_END} + +#if defined(_DEBUG_) || defined(_DEBUG) +#define RES_MESSAGE(msg) {RES_MESS_BEGIN("- Trace ") << msg << RES_MESS_END} +#else /* ifdef _DEBUG_*/ +#define RES_MESSAGE(msg) {} +#endif /* ifdef _DEBUG_*/ + #endif // __RESOURCESMANAGER_DEFS_HXX__ diff --git a/src/ResourcesManager/SALOME_ResourcesCatalog_Handler.cxx b/src/ResourcesManager/SALOME_ResourcesCatalog_Handler.cxx index 69ecec78b..b41a5aa56 100755 --- a/src/ResourcesManager/SALOME_ResourcesCatalog_Handler.cxx +++ b/src/ResourcesManager/SALOME_ResourcesCatalog_Handler.cxx @@ -40,16 +40,13 @@ using namespace std; //============================================================================= SALOME_ResourcesCatalog_Handler:: -SALOME_ResourcesCatalog_Handler(MapOfParserResourcesType& resources_list, - MapOfParserResourcesType& resources_batch_list): - _resources_list(resources_list), - _resources_batch_list(resources_batch_list) +SALOME_ResourcesCatalog_Handler(MapOfParserResourcesType& resources_list): _resources_list(resources_list) { //XML tags initialisation test_machine = "machine"; test_cluster = "cluster"; + test_name = "name"; test_hostname = "hostname"; - test_alias = "alias"; test_protocol = "protocol"; test_cluster_internal_protocol = "iprotocol"; test_mode = "mode"; @@ -116,56 +113,33 @@ void SALOME_ResourcesCatalog_Handler::ProcessXmlDocument(xmlDocPtr theDoc) while(aCurNode != NULL) { // Cas d'une machine ou d'une machine batch - if ( !xmlStrcmp(aCurNode->name,(const xmlChar*)test_machine) ) + if (!xmlStrcmp(aCurNode->name,(const xmlChar*)test_machine)) { _resource.Clear(); bool Ok = ProcessMachine(aCurNode, _resource); if (Ok) { - // There is two lists - // _resources_list for interactive resources - // _resources_batch_list for batch resources - // This choice is done with Mode parameter - if (_resource.Mode == interactive) + // Adding a resource + if(_resource.HostName == "localhost") { - // Adding a generic cluster - int aNbNodes = _resource.DataForSort._nbOfNodes; - if( aNbNodes > 1 ){ - string clusterNode = _resource.DataForSort._hostName ; - for( int i=0; i < aNbNodes; i++ ){ - char inode[64]; - inode[0] = '\0' ; - sprintf(inode,"%s%d",clusterNode.c_str(),i+1); - std::string nodeName(inode); - _resource.DataForSort._hostName = nodeName ; - _resource.HostName = nodeName ; - _resources_list[nodeName] = _resource; - } - } - else + _resource.HostName = Kernel_Utils::GetHostname(); + if (_resource.Name == "localhost") { - // Adding a machine - if(_resource.HostName == "localhost") - { - _resource.HostName = Kernel_Utils::GetHostname(); - _resource.DataForSort._hostName = Kernel_Utils::GetHostname(); - _resources_list[Kernel_Utils::GetHostname()] = _resource; - } - else - _resources_list[_resource.HostName] = _resource; + _resource.Name = Kernel_Utils::GetHostname(); + _resource.DataForSort._Name = Kernel_Utils::GetHostname(); } } - else - // Adding a batch machine/cluster - _resources_batch_list[_resource.HostName] = _resource; + _resources_list[_resource.Name] = _resource; } } - if ( !xmlStrcmp(aCurNode->name,(const xmlChar*)test_cluster) ) + // Cas de la déclaration d'un cluster + if (!xmlStrcmp(aCurNode->name,(const xmlChar*)test_cluster)) { - // Cas de la déclaration d'un cluster _resource.Clear(); if(ProcessCluster(aCurNode, _resource)) - _resources_list[_resource.HostName] = _resource; + { + _resources_list[_resource.Name] = _resource; + } } aCurNode = aCurNode->next; } @@ -175,18 +149,21 @@ void SALOME_ResourcesCatalog_Handler::ProcessXmlDocument(xmlDocPtr theDoc) iter != _resources_list.end(); iter++) { - std::cerr << (*iter).first << std::endl; - std::cerr << (*iter).second.HostName << std::endl; - std::cerr << (*iter).second.Alias << std::endl; - std::cerr << (*iter).second.UserName << std::endl; - std::cerr << (*iter).second.AppliPath << std::endl; - std::cerr << (*iter).second.OS << std::endl; - std::cerr << (*iter).second.Protocol << std::endl; - std::cerr << (*iter).second.ClusterInternalProtocol << std::endl; - std::cerr << (*iter).second.Mode << std::endl; + std::cerr << "************************************************" << std::endl; + std::cerr << "Resource " << (*iter).first << " found:" << std::endl; + std::cerr << " Name: " << (*iter).second.Name << std::endl; + std::cerr << " Hostname: " << (*iter).second.HostName << std::endl; + std::cerr << " Username: " << (*iter).second.UserName << std::endl; + std::cerr << " Appli path: " <<(*iter).second.AppliPath << std::endl; + std::cerr << " OS: " << (*iter).second.OS << std::endl; + std::cerr << " Protocol: " << (*iter).second.PrintAccessProtocolType() << std::endl; + std::cerr << " Internal Protocol: " <<(*iter).second.PrintClusterInternalProtocol() << std::endl; + std::cerr << " Mode: " << (*iter).second.PrintAccessModeType() << std::endl; + std::cerr << " Batch Type: " << (*iter).second.PrintBatchType() << std::endl; + std::cerr << " MPI Impl: " << (*iter).second.PrintMpiImplType() << std::endl; + std::cerr << "************************************************" << std::endl; } #endif - } bool @@ -197,7 +174,6 @@ SALOME_ResourcesCatalog_Handler::ProcessCluster(xmlNodePtr cluster_descr, Parser if (xmlHasProp(cluster_descr, (const xmlChar*)test_hostname)) { xmlChar* hostname = xmlGetProp(cluster_descr, (const xmlChar*)test_hostname); - resource.DataForSort._hostName = (const char*)hostname; resource.HostName = (const char*)hostname; xmlFree(hostname); } @@ -208,6 +184,20 @@ SALOME_ResourcesCatalog_Handler::ProcessCluster(xmlNodePtr cluster_descr, Parser return false; } + if (xmlHasProp(cluster_descr, (const xmlChar*)test_name)) + { + xmlChar* name = xmlGetProp(cluster_descr, (const xmlChar*)test_name); + resource.Name = (const char*)name; + resource.DataForSort._Name = (const char*)name; + xmlFree(name); + } + else + { + resource.Name = resource.HostName; + resource.DataForSort._Name = resource.HostName; + std::cerr << "SALOME_ResourcesCatalog_Handler::ProcessCluster : !!! Warning !!! No Name found use Hostname for resource: " << _resource.Name << std::endl; + } + if (xmlHasProp(cluster_descr, (const xmlChar*)test_use)) { xmlChar* use = xmlGetProp(cluster_descr, (const xmlChar*)test_use); @@ -293,7 +283,6 @@ SALOME_ResourcesCatalog_Handler::ProcessMember(xmlNodePtr member_descr, ParserRe if (xmlHasProp(member_descr, (const xmlChar*)test_hostname)) { xmlChar* hostname = xmlGetProp(member_descr, (const xmlChar*)test_hostname); - resource.DataForSort._hostName = (const char*)hostname; resource.HostName = (const char*)hostname; xmlFree(hostname); } @@ -414,7 +403,6 @@ SALOME_ResourcesCatalog_Handler::ProcessMachine(xmlNodePtr machine_descr, Parser if (xmlHasProp(machine_descr, (const xmlChar*)test_hostname)) { xmlChar* hostname = xmlGetProp(machine_descr, (const xmlChar*)test_hostname); - resource.DataForSort._hostName = (const char*)hostname; resource.HostName = (const char*)hostname; xmlFree(hostname); } @@ -425,14 +413,19 @@ SALOME_ResourcesCatalog_Handler::ProcessMachine(xmlNodePtr machine_descr, Parser return false; } - if (xmlHasProp(machine_descr, (const xmlChar*)test_alias)) + if (xmlHasProp(machine_descr, (const xmlChar*)test_name)) { - xmlChar* alias = xmlGetProp(machine_descr, (const xmlChar*)test_alias); - resource.Alias = (const char*)alias; - xmlFree(alias); + xmlChar* name = xmlGetProp(machine_descr, (const xmlChar*)test_name); + resource.Name = (const char*)name; + resource.DataForSort._Name = (const char*)name; + xmlFree(name); } else - resource.Alias = ""; + { + resource.Name = resource.HostName; + resource.DataForSort._Name = resource.HostName; + std::cerr << "SALOME_ResourcesCatalog_Handler::ProcessMachine : !!! Warning !!! No Name found use Hostname for resource: " << _resource.Name << std::endl; + } if (xmlHasProp(machine_descr, (const xmlChar*)test_batch_queue)) { @@ -517,7 +510,6 @@ SALOME_ResourcesCatalog_Handler::ProcessMachine(xmlNodePtr machine_descr, Parser if (xmlHasProp(machine_descr, (const xmlChar*)test_batch)) { - std::cerr << "COUCOU !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!" << std::endl; xmlChar* batch = xmlGetProp(machine_descr, (const xmlChar*)test_batch); std::string aBatch = (const char*)batch; xmlFree(batch); @@ -664,214 +656,106 @@ void SALOME_ResourcesCatalog_Handler::PrepareDocToXmlFile(xmlDocPtr theDoc) root_node = xmlNewNode(NULL, BAD_CAST "resources"); xmlDocSetRootElement(theDoc, root_node); - for (map::iterator iter = - _resources_list.begin(); - iter != _resources_list.end(); - iter++) + std::map::iterator iter = _resources_list.begin(); + for (; iter != _resources_list.end(); iter++) + { + node = xmlNewChild(root_node, NULL, BAD_CAST test_machine, NULL); + xmlNewProp(node, BAD_CAST test_name, BAD_CAST (*iter).second.Name.c_str()); + xmlNewProp(node, BAD_CAST test_hostname, BAD_CAST (*iter).second.HostName.c_str()); + xmlNewProp(node, BAD_CAST test_batch_queue, BAD_CAST (*iter).second.batchQueue.c_str()); + xmlNewProp(node, BAD_CAST test_user_commands, BAD_CAST (*iter).second.userCommands.c_str()); + + switch ((*iter).second.Protocol) { - node = xmlNewChild(root_node, NULL, BAD_CAST test_machine, NULL); - xmlNewProp(node, BAD_CAST test_hostname, BAD_CAST (*iter).second.HostName.c_str()); - xmlNewProp(node, BAD_CAST test_alias, BAD_CAST (*iter).second.Alias.c_str()); - xmlNewProp(node, BAD_CAST test_batch_queue, BAD_CAST (*iter).second.batchQueue.c_str()); - xmlNewProp(node, BAD_CAST test_user_commands, BAD_CAST (*iter).second.userCommands.c_str()); - - switch ((*iter).second.Protocol) - { - case rsh: - xmlNewProp(node, BAD_CAST test_protocol, BAD_CAST "rsh"); - break; - case ssh: - xmlNewProp(node, BAD_CAST test_protocol, BAD_CAST "ssh"); - break; - default: - xmlNewProp(node, BAD_CAST test_protocol, BAD_CAST "rsh"); - } + case rsh: + xmlNewProp(node, BAD_CAST test_protocol, BAD_CAST "rsh"); + break; + case ssh: + xmlNewProp(node, BAD_CAST test_protocol, BAD_CAST "ssh"); + break; + default: + xmlNewProp(node, BAD_CAST test_protocol, BAD_CAST "rsh"); + } - switch ((*iter).second.ClusterInternalProtocol) - { - case rsh: - xmlNewProp(node, BAD_CAST test_cluster_internal_protocol, BAD_CAST "rsh"); - break; - case ssh: - xmlNewProp(node, BAD_CAST test_cluster_internal_protocol, BAD_CAST "ssh"); - break; - default: - xmlNewProp(node, BAD_CAST test_cluster_internal_protocol, BAD_CAST "rsh"); - } + switch ((*iter).second.ClusterInternalProtocol) + { + case rsh: + xmlNewProp(node, BAD_CAST test_cluster_internal_protocol, BAD_CAST "rsh"); + break; + case ssh: + xmlNewProp(node, BAD_CAST test_cluster_internal_protocol, BAD_CAST "ssh"); + break; + default: + xmlNewProp(node, BAD_CAST test_cluster_internal_protocol, BAD_CAST "rsh"); + } - switch ((*iter).second.Mode) - { - case interactive: - xmlNewProp(node, BAD_CAST test_mode, BAD_CAST "interactive"); - break; - case batch: - xmlNewProp(node, BAD_CAST test_mode, BAD_CAST "batch"); - break; - default: - xmlNewProp(node, BAD_CAST test_mode, BAD_CAST "interactive"); - } - - switch ((*iter).second.Batch) - { - case pbs: - xmlNewProp(node, BAD_CAST test_batch, BAD_CAST "pbs"); - break; - case lsf: - xmlNewProp(node, BAD_CAST test_batch, BAD_CAST "lsf"); - break; - case sge: - xmlNewProp(node, BAD_CAST test_batch, BAD_CAST "sge"); - break; - case ssh_batch: - xmlNewProp(node, BAD_CAST test_batch, BAD_CAST "ssh_batch"); - break; - default: - xmlNewProp(node, BAD_CAST test_batch, BAD_CAST ""); - } - - switch ((*iter).second.mpi) - { - case lam: - xmlNewProp(node, BAD_CAST test_mpi, BAD_CAST "lam"); - break; - case mpich1: - xmlNewProp(node, BAD_CAST test_mpi, BAD_CAST "mpich1"); - break; - case mpich2: - xmlNewProp(node, BAD_CAST test_mpi, BAD_CAST "mpich2"); - break; - case openmpi: - xmlNewProp(node, BAD_CAST test_mpi, BAD_CAST "openmpi"); - break; - case slurm: - xmlNewProp(node, BAD_CAST test_mpi, BAD_CAST "slurm"); - break; - case prun: - xmlNewProp(node, BAD_CAST test_mpi, BAD_CAST "prun"); - break; - default: - xmlNewProp(node, BAD_CAST test_mpi, BAD_CAST ""); - } - - xmlNewProp(node, BAD_CAST test_user_name, BAD_CAST (*iter).second.UserName.c_str()); - - for (vector::const_iterator iter2 = - (*iter).second.ComponentsList.begin(); - iter2 != (*iter).second.ComponentsList.end(); - iter2++) - { - node1 = xmlNewChild(node, NULL, BAD_CAST test_components, NULL); - xmlNewProp(node1, BAD_CAST test_component_name, BAD_CAST (*iter2).c_str()); - } - - xmlNewProp(node, BAD_CAST test_os, BAD_CAST (*iter).second.OS.c_str()); - xmlNewProp(node, BAD_CAST test_mem_in_mb, BAD_CAST sprintf(string_buf, "%u", (*iter).second.DataForSort._memInMB)); - xmlNewProp(node, BAD_CAST test_cpu_freq_mhz, BAD_CAST sprintf(string_buf, "%u", (*iter).second.DataForSort._CPUFreqMHz)); - xmlNewProp(node, BAD_CAST test_nb_of_nodes, BAD_CAST sprintf(string_buf, "%u", (*iter).second.DataForSort._nbOfNodes)); - xmlNewProp(node, BAD_CAST test_nb_of_proc_per_node, BAD_CAST sprintf(string_buf, "%u", (*iter).second.DataForSort._nbOfProcPerNode)); + switch ((*iter).second.Mode) + { + case interactive: + xmlNewProp(node, BAD_CAST test_mode, BAD_CAST "interactive"); + break; + case batch: + xmlNewProp(node, BAD_CAST test_mode, BAD_CAST "batch"); + break; + default: + xmlNewProp(node, BAD_CAST test_mode, BAD_CAST "interactive"); } - for (map::iterator iter = - _resources_batch_list.begin(); - iter != _resources_batch_list.end(); - iter++) + + switch ((*iter).second.Batch) { - node = xmlNewChild(root_node, NULL, BAD_CAST test_machine, NULL); - xmlNewProp(node, BAD_CAST test_hostname, BAD_CAST (*iter).second.HostName.c_str()); - xmlNewProp(node, BAD_CAST test_alias, BAD_CAST (*iter).second.Alias.c_str()); - - switch ((*iter).second.Protocol) - { - case rsh: - xmlNewProp(node, BAD_CAST test_protocol, BAD_CAST "rsh"); - break; - case ssh: - xmlNewProp(node, BAD_CAST test_protocol, BAD_CAST "ssh"); - break; - default: - xmlNewProp(node, BAD_CAST test_protocol, BAD_CAST "rsh"); - } + case pbs: + xmlNewProp(node, BAD_CAST test_batch, BAD_CAST "pbs"); + break; + case lsf: + xmlNewProp(node, BAD_CAST test_batch, BAD_CAST "lsf"); + break; + case sge: + xmlNewProp(node, BAD_CAST test_batch, BAD_CAST "sge"); + break; + case ssh_batch: + xmlNewProp(node, BAD_CAST test_batch, BAD_CAST "ssh_batch"); + break; + default: + xmlNewProp(node, BAD_CAST test_batch, BAD_CAST ""); + } - switch ((*iter).second.ClusterInternalProtocol) - { - case rsh: - xmlNewProp(node, BAD_CAST test_cluster_internal_protocol, BAD_CAST "rsh"); - break; - case ssh: - xmlNewProp(node, BAD_CAST test_cluster_internal_protocol, BAD_CAST "ssh"); - break; - default: - xmlNewProp(node, BAD_CAST test_cluster_internal_protocol, BAD_CAST "rsh"); - } + switch ((*iter).second.mpi) + { + case lam: + xmlNewProp(node, BAD_CAST test_mpi, BAD_CAST "lam"); + break; + case mpich1: + xmlNewProp(node, BAD_CAST test_mpi, BAD_CAST "mpich1"); + break; + case mpich2: + xmlNewProp(node, BAD_CAST test_mpi, BAD_CAST "mpich2"); + break; + case openmpi: + xmlNewProp(node, BAD_CAST test_mpi, BAD_CAST "openmpi"); + break; + case slurm: + xmlNewProp(node, BAD_CAST test_mpi, BAD_CAST "slurm"); + break; + case prun: + xmlNewProp(node, BAD_CAST test_mpi, BAD_CAST "prun"); + break; + default: + xmlNewProp(node, BAD_CAST test_mpi, BAD_CAST ""); + } + + xmlNewProp(node, BAD_CAST test_user_name, BAD_CAST (*iter).second.UserName.c_str()); - switch ((*iter).second.Mode) - { - case interactive: - xmlNewProp(node, BAD_CAST test_mode, BAD_CAST "interactive"); - break; - case batch: - xmlNewProp(node, BAD_CAST test_mode, BAD_CAST "batch"); - break; - default: - xmlNewProp(node, BAD_CAST test_mode, BAD_CAST "interactive"); - } - - switch ((*iter).second.Batch) - { - case pbs: - xmlNewProp(node, BAD_CAST test_batch, BAD_CAST "pbs"); - break; - case lsf: - xmlNewProp(node, BAD_CAST test_batch, BAD_CAST "lsf"); - break; - case sge: - xmlNewProp(node, BAD_CAST test_batch, BAD_CAST "sge"); - break; - case ssh_batch: - xmlNewProp(node, BAD_CAST test_batch, BAD_CAST "ssh_batch"); - break; - default: - xmlNewProp(node, BAD_CAST test_batch, BAD_CAST ""); - } - - switch ((*iter).second.mpi) - { - case lam: - xmlNewProp(node, BAD_CAST test_mpi, BAD_CAST "lam"); - break; - case mpich1: - xmlNewProp(node, BAD_CAST test_mpi, BAD_CAST "mpich1"); - break; - case mpich2: - xmlNewProp(node, BAD_CAST test_mpi, BAD_CAST "mpich2"); - break; - case openmpi: - xmlNewProp(node, BAD_CAST test_mpi, BAD_CAST "openmpi"); - break; - case slurm: - xmlNewProp(node, BAD_CAST test_mpi, BAD_CAST "slurm"); - break; - case prun: - xmlNewProp(node, BAD_CAST test_mpi, BAD_CAST "prun"); - break; - default: - xmlNewProp(node, BAD_CAST test_mpi, BAD_CAST ""); - } - - xmlNewProp(node, BAD_CAST test_user_name, BAD_CAST (*iter).second.UserName.c_str()); - - for (vector::const_iterator iter2 = - (*iter).second.ComponentsList.begin(); - iter2 != (*iter).second.ComponentsList.end(); - iter2++) - { - node1 = xmlNewChild(node, NULL, BAD_CAST test_components, NULL); - xmlNewProp(node1, BAD_CAST test_component_name, BAD_CAST (*iter2).c_str()); - } - - xmlNewProp(node, BAD_CAST test_os, BAD_CAST (*iter).second.OS.c_str()); - xmlNewProp(node, BAD_CAST test_mem_in_mb, BAD_CAST sprintf(string_buf, "%u", (*iter).second.DataForSort._memInMB)); - xmlNewProp(node, BAD_CAST test_cpu_freq_mhz, BAD_CAST sprintf(string_buf, "%u", (*iter).second.DataForSort._CPUFreqMHz)); - xmlNewProp(node, BAD_CAST test_nb_of_nodes, BAD_CAST sprintf(string_buf, "%u", (*iter).second.DataForSort._nbOfNodes)); - xmlNewProp(node, BAD_CAST test_nb_of_proc_per_node, BAD_CAST sprintf(string_buf, "%u", (*iter).second.DataForSort._nbOfProcPerNode)); + std::vector::const_iterator iter2 = (*iter).second.ComponentsList.begin(); + for(;iter2 != (*iter).second.ComponentsList.end(); iter2++) + { + node1 = xmlNewChild(node, NULL, BAD_CAST test_components, NULL); + xmlNewProp(node1, BAD_CAST test_component_name, BAD_CAST (*iter2).c_str()); } + + xmlNewProp(node, BAD_CAST test_os, BAD_CAST (*iter).second.OS.c_str()); + xmlNewProp(node, BAD_CAST test_mem_in_mb, BAD_CAST sprintf(string_buf, "%u", (*iter).second.DataForSort._memInMB)); + xmlNewProp(node, BAD_CAST test_cpu_freq_mhz, BAD_CAST sprintf(string_buf, "%u", (*iter).second.DataForSort._CPUFreqMHz)); + xmlNewProp(node, BAD_CAST test_nb_of_nodes, BAD_CAST sprintf(string_buf, "%u", (*iter).second.DataForSort._nbOfNodes)); + xmlNewProp(node, BAD_CAST test_nb_of_proc_per_node, BAD_CAST sprintf(string_buf, "%u", (*iter).second.DataForSort._nbOfProcPerNode)); + } } diff --git a/src/ResourcesManager/SALOME_ResourcesCatalog_Handler.hxx b/src/ResourcesManager/SALOME_ResourcesCatalog_Handler.hxx index ffdbc54e8..0be33ec67 100755 --- a/src/ResourcesManager/SALOME_ResourcesCatalog_Handler.hxx +++ b/src/ResourcesManager/SALOME_ResourcesCatalog_Handler.hxx @@ -42,8 +42,7 @@ class RESOURCESMANAGER_EXPORT SALOME_ResourcesCatalog_Handler { public : - SALOME_ResourcesCatalog_Handler(MapOfParserResourcesType& resources_list, - MapOfParserResourcesType& resources_batch_list); + SALOME_ResourcesCatalog_Handler(MapOfParserResourcesType& resources_list); const MapOfParserResourcesType& GetResourcesAfterParsing() const; @@ -62,12 +61,11 @@ class RESOURCESMANAGER_EXPORT SALOME_ResourcesCatalog_Handler ParserResourcesType _resource; MapOfParserResourcesType& _resources_list; - MapOfParserResourcesType& _resources_batch_list; const char *test_machine; const char *test_cluster; + const char *test_name; const char *test_hostname; - const char *test_alias; const char *test_protocol; const char *test_cluster_internal_protocol; const char *test_mode; diff --git a/src/ResourcesManager/SALOME_ResourcesCatalog_Parser.cxx b/src/ResourcesManager/SALOME_ResourcesCatalog_Parser.cxx index 918d97d9d..aedbcbe72 100644 --- a/src/ResourcesManager/SALOME_ResourcesCatalog_Parser.cxx +++ b/src/ResourcesManager/SALOME_ResourcesCatalog_Parser.cxx @@ -36,12 +36,12 @@ unsigned int ResourceDataToSort::_memInMBWanted = NULL_VALUE; ResourceDataToSort::ResourceDataToSort() {} -ResourceDataToSort::ResourceDataToSort(const string& hostname, +ResourceDataToSort::ResourceDataToSort(const string& name, unsigned int nbOfNodes, unsigned int nbOfProcPerNode, unsigned int CPUFreqMHz, unsigned int memInMB): - _hostName(hostname), + _Name(name), _nbOfNodes(nbOfNodes), _nbOfProcPerNode(nbOfProcPerNode), _CPUFreqMHz(CPUFreqMHz), @@ -116,6 +116,7 @@ unsigned int ResourceDataToSort::GetNumberOfPoints() const ret += 1; } + //RES_MESSAGE("[GetNumberOfPoints] points number for resource: " << _Name << " " << ret); return ret; } @@ -132,8 +133,8 @@ void ParserResourcesType::Print() { ostringstream oss; oss << endl << + "Name : " << Name << endl << "HostName : " << HostName << endl << - "Alias : " << Alias << endl << "NbOfNodes : " << DataForSort._nbOfNodes << endl << "NbOfProcPerNode : " << DataForSort._nbOfProcPerNode << endl << "CPUFreqMHz : " << DataForSort._CPUFreqMHz << endl << @@ -165,18 +166,73 @@ void ParserResourcesType::Print() oss << "Cluster member called : " << (*it).HostName << endl; } cout << oss.str() << endl; +} + +std::string +ParserResourcesType::PrintAccessProtocolType() const +{ + if (Protocol == rsh) + return "rsh"; + else + return "ssh"; +} + +std::string +ParserResourcesType::PrintClusterInternalProtocol() const +{ + if (ClusterInternalProtocol == rsh) + return "rsh"; + else + return "ssh"; +} + +std::string +ParserResourcesType::PrintAccessModeType() const +{ + if (Mode == interactive) + return "interactive"; + else + return "batch"; +} +std::string +ParserResourcesType::PrintBatchType() const +{ + if (Batch == none) + return "none"; + else if (Batch == pbs) + return "pbs"; + else if (Batch == lsf) + return "lsf"; + else if (Batch == sge) + return "sge"; + else + return "ssh"; +} + +std::string +ParserResourcesType::PrintMpiImplType() const +{ + if (mpi == nompi) + return "no mpi"; + else if (mpi == lam) + return "lam"; + else if (mpi == mpich1) + return "mpich1"; + else if (mpi == mpich2) + return "mpich2"; + else if (mpi == openmpi) + return "openmpi"; + else if (mpi == slurm) + return "slurm"; + else + return "prun"; } void ParserResourcesType::Clear() { - DataForSort._hostName = ""; - DataForSort._nbOfNodes = 1; - DataForSort._nbOfProcPerNode = 1; - DataForSort._CPUFreqMHz = 0; - DataForSort._memInMB = 0; + Name = ""; HostName = ""; - Alias = ""; Protocol = rsh; ClusterInternalProtocol = rsh; Mode = interactive; @@ -191,4 +247,10 @@ void ParserResourcesType::Clear() use = ""; ClusterMembersList.clear(); nbOfProc = 1; + + DataForSort._Name = ""; + DataForSort._nbOfNodes = 1; + DataForSort._nbOfProcPerNode = 1; + DataForSort._CPUFreqMHz = 0; + DataForSort._memInMB = 0; } diff --git a/src/ResourcesManager/SALOME_ResourcesCatalog_Parser.hxx b/src/ResourcesManager/SALOME_ResourcesCatalog_Parser.hxx index 1b353f80b..908505483 100755 --- a/src/ResourcesManager/SALOME_ResourcesCatalog_Parser.hxx +++ b/src/ResourcesManager/SALOME_ResourcesCatalog_Parser.hxx @@ -48,7 +48,7 @@ class RESOURCESMANAGER_EXPORT ResourceDataToSort { public: - std::string _hostName; + std::string _Name; unsigned int _nbOfNodes; unsigned int _nbOfProcPerNode; unsigned int _CPUFreqMHz; @@ -61,7 +61,7 @@ class RESOURCESMANAGER_EXPORT ResourceDataToSort public: ResourceDataToSort(); - ResourceDataToSort(const std::string& hostname, + ResourceDataToSort(const std::string& name, unsigned int nbOfNodes, unsigned int nbOfProcPerNode, unsigned int CPUFreqMHz, @@ -86,8 +86,8 @@ struct RESOURCESMANAGER_EXPORT ParserResourcesClusterMembersType struct RESOURCESMANAGER_EXPORT ParserResourcesType { ResourceDataToSort DataForSort; + std::string Name; std::string HostName; - std::string Alias; AccessProtocolType Protocol; AccessProtocolType ClusterInternalProtocol; AccessModeType Mode; @@ -106,6 +106,12 @@ struct RESOURCESMANAGER_EXPORT ParserResourcesType void Print(); void Clear(); + + std::string PrintAccessProtocolType() const; + std::string PrintAccessModeType() const; + std::string PrintBatchType() const; + std::string PrintMpiImplType() const; + std::string PrintClusterInternalProtocol() const; }; typedef std::map MapOfParserResourcesType; diff --git a/src/ResourcesManager/SALOME_ResourcesManager.cxx b/src/ResourcesManager/SALOME_ResourcesManager.cxx index e0b92ef64..294993120 100644 --- a/src/ResourcesManager/SALOME_ResourcesManager.cxx +++ b/src/ResourcesManager/SALOME_ResourcesManager.cxx @@ -139,11 +139,15 @@ void SALOME_ResourcesManager::Shutdown() */ //============================================================================= -Engines::MachineList * -SALOME_ResourcesManager::GetFittingResources(const Engines::MachineParameters& params) +Engines::ResourceList * +SALOME_ResourcesManager::GetFittingResources(const Engines::ResourceParameters& params) { -// MESSAGE("ResourcesManager::GetFittingResources"); - machineParams p; + MESSAGE("ResourcesManager::GetFittingResources"); + Engines::ResourceList * ret = new Engines::ResourceList; + + // CORBA -> C++ + resourceParams p; + p.name = params.name; p.hostname = params.hostname; p.OS = params.OS; p.nb_proc = params.nb_proc; @@ -151,24 +155,24 @@ SALOME_ResourcesManager::GetFittingResources(const Engines::MachineParameters& p p.nb_proc_per_node = params.nb_proc_per_node; p.cpu_clock = params.cpu_clock; p.mem_mb = params.mem_mb; - p.parallelLib = params.parallelLib; - p.nb_component_nodes = params.nb_component_nodes; - - for(unsigned int i=0;i vec = _rm.GetFittingResources(p); - ret->length(vec.size()); - for(unsigned int i=0;i vec = _rm.GetFittingResources(p); + + // C++ -> CORBA + ret->length(vec.size()); + for(unsigned int i=0;i ml; - for(unsigned int i=0;i C++ + vector rl; + for(unsigned int i=0; i ml; - for(unsigned int i=0;i C++ + vector rl; + for(unsigned int i=0; iname = CORBA::string_dup(resource.Name.c_str()); p_ptr->hostname = CORBA::string_dup(resource.HostName.c_str()); - p_ptr->alias = CORBA::string_dup(resource.Alias.c_str()); if( resource.Protocol == rsh ) p_ptr->protocol = "rsh"; else if( resource.Protocol == ssh ) @@ -245,13 +254,12 @@ Engines::MachineDefinition* SALOME_ResourcesManager::GetMachineParameters(const else if( resource.Batch == sge ) p_ptr->batch = "sge"; - p_ptr->nb_component_nodes=1; - return p_ptr; } std::string -SALOME_ResourcesManager::getMachineFile(std::string hostname, CORBA::Long nb_procs, +SALOME_ResourcesManager::getMachineFile(std::string hostname, + CORBA::Long nb_procs, std::string parallelLib) { std::string machine_file_name(""); diff --git a/src/ResourcesManager/SALOME_ResourcesManager.hxx b/src/ResourcesManager/SALOME_ResourcesManager.hxx index be58300ac..f186718c6 100644 --- a/src/ResourcesManager/SALOME_ResourcesManager.hxx +++ b/src/ResourcesManager/SALOME_ResourcesManager.hxx @@ -64,16 +64,16 @@ class SALOMERESOURCESMANAGER_EXPORT SALOME_ResourcesManager: ~SALOME_ResourcesManager(); // CORBA Methods - Engines::MachineList * - GetFittingResources(const Engines::MachineParameters& params); - char* FindFirst(const Engines::MachineList& listOfMachines); - char* Find(const char *policy, const Engines::MachineList& listOfMachines); - Engines::MachineDefinition* GetMachineParameters(const char *hostname); + Engines::ResourceList * GetFittingResources(const Engines::ResourceParameters& params); + char* FindFirst(const Engines::ResourceList& listOfResources); + char* Find(const char *policy, const Engines::ResourceList& listOfResources); + Engines::ResourceDefinition * GetResourceDefinition(const char * name); // Cpp Methods void Shutdown(); ResourcesManager_cpp *GetImpl() { return &_rm; } - std::string getMachineFile(std::string hostname, CORBA::Long nb_procs, + std::string getMachineFile(std::string hostname, + CORBA::Long nb_procs, std::string parallelLib); @@ -89,7 +89,6 @@ class SALOMERESOURCESMANAGER_EXPORT SALOME_ResourcesManager: MapOfParserResourcesType _resourcesBatchList; ResourcesManager_cpp _rm; - }; #endif // RESSOURCESCATALOG_IMPL_H