From 9a3453b4e394d301d931c1db9a7e2386afa66dd3 Mon Sep 17 00:00:00 2001 From: secher Date: Thu, 5 Feb 2009 15:16:25 +0000 Subject: [PATCH] some openmpi improvements, some MPIObject improvements to test ParaMEDMEM test in MYMPICOMPO module --- salome_adm/unix/config_files/check_openmpi.m4 | 6 + src/Container/Component_i.cxx | 57 ++------ src/Container/Container_i.cxx | 7 +- src/Container/SALOME_Component_i.hxx | 15 +-- src/Container/SALOME_ContainerManager.cxx | 127 +++++++++--------- src/Container/SALOME_ContainerManager.hxx | 13 +- src/MPIContainer/MPIContainer_i.cxx | 118 ++++++++-------- src/MPIContainer/MPIObject_i.cxx | 82 ++++++++++- src/MPIContainer/MPIObject_i.hxx | 25 +++- src/MPIContainer/SALOME_MPIContainer.cxx | 8 +- 10 files changed, 250 insertions(+), 208 deletions(-) diff --git a/salome_adm/unix/config_files/check_openmpi.m4 b/salome_adm/unix/config_files/check_openmpi.m4 index cbbe91f4d..fe5957a2c 100644 --- a/salome_adm/unix/config_files/check_openmpi.m4 +++ b/salome_adm/unix/config_files/check_openmpi.m4 @@ -42,9 +42,15 @@ if test "$WITHOPENMPI" = yes; then AC_CHECK_HEADER(mpi.h,WITHOPENMPI="yes",WITHOPENMPI="no") CPPFLAGS="$CPPFLAGS_old" + LIBS_old="$LIBS" + LIBS="-L${OPENMPI_HOME}/lib $LIBS" + AC_CHECK_LIB(mpi,MPI_Publish_name,WITHMPI2="yes",WITHMPI2="no") + LIBS="$LIBS_old" + AC_MSG_CHECKING(for openmpi) if test "$WITHOPENMPI" = "yes";then mpi_ok=yes + CPPFLAGS="-DWITHOPENMPI $CPPFLAGS" AC_MSG_RESULT(yes) else mpi_ok=no diff --git a/src/Container/Component_i.cxx b/src/Container/Component_i.cxx index dfcae478f..eda10bf9d 100644 --- a/src/Container/Component_i.cxx +++ b/src/Container/Component_i.cxx @@ -78,6 +78,7 @@ Engines_Component_i::Engines_Component_i():_myConnexionToRegistry(0), _notifSupp * \param instanceName unique instance name for this object (see Container_i) * \param interfaceName component class name * \param notif use of notification + * \param regist (true or false) use of registry (default true) */ //============================================================================= @@ -86,7 +87,8 @@ Engines_Component_i::Engines_Component_i(CORBA::ORB_ptr orb, PortableServer::ObjectId * contId, const char *instanceName, const char *interfaceName, - bool notif) : + bool notif, + bool regist ) : _instanceName(instanceName), _interfaceName(interfaceName), _myConnexionToRegistry(0), @@ -100,18 +102,19 @@ Engines_Component_i::Engines_Component_i(CORBA::ORB_ptr orb, _CanceledThread(false) { MESSAGE("Component constructor with instanceName "<< _instanceName); - //SCRUTE(pd_refCount); _orb = CORBA::ORB::_duplicate(orb); _poa = PortableServer::POA::_duplicate(poa); _contId = contId ; CORBA::Object_var o = _poa->id_to_reference(*contId); // container ior... _container=Engines::Container::_narrow(o); - const CORBA::String_var ior = _orb->object_to_string(o); - _myConnexionToRegistry = new RegistryConnexion(0, 0, ior,"theSession", - _instanceName.c_str()); + + if(regist){ + const CORBA::String_var ior = _orb->object_to_string(o); + _myConnexionToRegistry = new RegistryConnexion(0, 0, ior,"theSession", + _instanceName.c_str()); + } _notifSupplier = new NOTIFICATION_Supplier(instanceName, notif); - //SCRUTE(pd_refCount); } //============================================================================= @@ -167,48 +170,6 @@ Engines_Component_i::Engines_Component_i(CORBA::ORB_ptr orb, } -//============================================================================= -/*! - * Standard constructor for parallel component - * Connection Notification (no connection to Registry !) - * \param orb Object Request broker given by Container - * \param poa Portable Object Adapter from Container (normally root_poa) - * \param contId container CORBA id inside the server - * \param instanceName unique instance name for this object (see Container_i) - * \param interfaceName component class name - * \param flag not used... - * \param notif use of notification - */ -//============================================================================= - -Engines_Component_i::Engines_Component_i(CORBA::ORB_ptr orb, - PortableServer::POA_ptr poa, - PortableServer::ObjectId * contId, - const char *instanceName, - const char *interfaceName, - int flag, - bool notif ) : - _instanceName(instanceName), - _interfaceName(interfaceName), - _myConnexionToRegistry(0), - _notifSupplier(0), - _ThreadId(0) , - _ThreadCpuUsed(0) , - _Executed(false) , - _graphName("") , - _nodeName(""), - _studyId(-1), - _CanceledThread(false) -{ - _orb = CORBA::ORB::_duplicate(orb); - _poa = PortableServer::POA::_duplicate(poa); - _contId = contId ; - CORBA::Object_var o = _poa->id_to_reference(*contId); // container ior... - _container=Engines::Container::_narrow(o); - - _notifSupplier = new NOTIFICATION_Supplier(instanceName, notif); -} - //============================================================================= /*! * Destructor: call Container for decrement of instances count. diff --git a/src/Container/Container_i.cxx b/src/Container/Container_i.cxx index 954c8fcf7..ac95ef8d7 100644 --- a/src/Container/Container_i.cxx +++ b/src/Container/Container_i.cxx @@ -94,7 +94,7 @@ omni_mutex Engines_Container_i::_numInstanceMutex ; //============================================================================= Engines_Container_i::Engines_Container_i () : -_numInstance(0) +_numInstance(0),_id(0),_NS(0) { } @@ -111,7 +111,7 @@ Engines_Container_i::Engines_Container_i (CORBA::ORB_ptr orb, bool activAndRegist, bool isServantAloneInProcess ) : -_numInstance(0),_isServantAloneInProcess(isServantAloneInProcess) + _numInstance(0),_isServantAloneInProcess(isServantAloneInProcess),_id(0),_NS(0) { _pid = (long)getpid(); @@ -227,7 +227,8 @@ _numInstance(0),_isServantAloneInProcess(isServantAloneInProcess) Engines_Container_i::~Engines_Container_i() { MESSAGE("Container_i::~Container_i()"); - delete _id; + if(_id) + delete _id; if(_NS) delete _NS; } diff --git a/src/Container/SALOME_Component_i.hxx b/src/Container/SALOME_Component_i.hxx index ccf8ddff2..89c17a6ca 100644 --- a/src/Container/SALOME_Component_i.hxx +++ b/src/Container/SALOME_Component_i.hxx @@ -24,7 +24,7 @@ // Author : Paul RASCLE, EDF - MARC TAJCHMAN, CEA // Module : SALOME // $Header$ - +// #ifndef _SALOME_COMPONENT_I_HXX_ #define _SALOME_COMPONENT_I_HXX_ @@ -59,7 +59,8 @@ public: PortableServer::ObjectId * contId, const char *instanceName, const char *interfaceName, - bool notif = false); + bool notif = false, + bool regist = true); //Constructor for standalone component Engines_Component_i(CORBA::ORB_ptr orb, PortableServer::POA_ptr poa, @@ -67,15 +68,7 @@ public: const char *instanceName, const char *interfaceName, bool notif = false, - bool regist=true); - // Constructor for parallel component : don't call registry - Engines_Component_i(CORBA::ORB_ptr orb, - PortableServer::POA_ptr poa, - PortableServer::ObjectId * contId, - const char *instanceName, - const char *interfaceName, - int flag, - bool notif = false); + bool regist = true); virtual ~Engines_Component_i(); diff --git a/src/Container/SALOME_ContainerManager.cxx b/src/Container/SALOME_ContainerManager.cxx index 3c7fdf021..dfc70e91d 100644 --- a/src/Container/SALOME_ContainerManager.cxx +++ b/src/Container/SALOME_ContainerManager.cxx @@ -62,7 +62,6 @@ SALOME_ContainerManager::SALOME_ContainerManager(CORBA::ORB_ptr orb, PortableSer MESSAGE("constructor"); _NS = ns; _ResManager = rm; - _id=0; PortableServer::POAManager_var pman = poa->the_POAManager(); _orb = CORBA::ORB::_duplicate(orb) ; @@ -217,9 +216,7 @@ StartContainer(const Engines::MachineParameters& params, if (parallelLib != "") return FindOrStartParallelContainer(params, possibleComputers); #endif - long id; string containerNameInNS; - char idc[3*sizeof(long)]; Engines::Container_ptr ret = Engines::Container::_nil(); MESSAGE("SALOME_ContainerManager::StartContainer " << @@ -254,15 +251,6 @@ StartContainer(const Engines::MachineParameters& params, MESSAGE("try to launch it on " << theMachine); - // Get Id for container: a parallel container registers in Naming Service - // on the machine where is process 0. ContainerManager does'nt know the name - // of this machine before the launch of the parallel container. So to get - // the IOR of the parallel container in Naming Service, ContainerManager - // gives a unique Id. The parallel container registers his name under - // /ContainerManager/Id directory in NamingService - - id = GetIdForContainer(); - string command; if(theMachine==""){ MESSAGE("SALOME_ContainerManager::StartContainer : " << @@ -270,19 +258,16 @@ StartContainer(const Engines::MachineParameters& params, return Engines::Container::_nil(); } else if(theMachine==Kernel_Utils::GetHostname()) - command = BuildCommandToLaunchLocalContainer(params,id,container_exe); + command = BuildCommandToLaunchLocalContainer(params,container_exe); else - command = BuildCommandToLaunchRemoteContainer(theMachine,params,id,container_exe); + command = BuildCommandToLaunchRemoteContainer(theMachine,params,container_exe); - // RmTmpFile(); Too early! May be this function has not been used for a long time... + // RmTmpFile(_TmpFileName); Too early! May be this function has not been used for a long time... //check if an entry exists in Naming service if(params.isMPI) - { - containerNameInNS = "/ContainerManager/id"; - sprintf(idc,"%ld",id); - containerNameInNS += idc; - } + // A parallel container register on zero node in NS + containerNameInNS = _NS->BuildContainerNameForNS(params,GetMPIZeroNode(theMachine).c_str()); else containerNameInNS = _NS->BuildContainerNameForNS(params,theMachine.c_str()); @@ -314,13 +299,13 @@ StartContainer(const Engines::MachineParameters& params, if (status == -1){ MESSAGE("SALOME_LifeCycleCORBA::StartOrFindContainer rsh failed " << "(system command status -1)"); - RmTmpFile(); // command file can be removed here + RmTmpFile(_TmpFileName); // command file can be removed here return Engines::Container::_nil(); } else if (status == 217){ MESSAGE("SALOME_LifeCycleCORBA::StartOrFindContainer rsh failed " << "(system command status 217)"); - RmTmpFile(); // command file can be removed here + RmTmpFile(_TmpFileName); // command file can be removed here return Engines::Container::_nil(); } else{ @@ -352,7 +337,7 @@ StartContainer(const Engines::MachineParameters& params, ret->logfilename(logFilename.c_str()); } - RmTmpFile(); // command file can be removed here + RmTmpFile(_TmpFileName); // command file can be removed here return ret; } } @@ -730,24 +715,6 @@ SALOME_ContainerManager::LaunchParallelContainer(const std::string& command, return obj; } -//============================================================================= -/*! - * Get Id for container: a parallel container registers in Naming Service - * on the machine where is process 0. ContainerManager does'nt know the name - * of this machine before the launch of the parallel container. So to get - * the IOR of the parallel container in Naming Service, ContainerManager - * gives a unique Id. The parallel container registers his name under - * /ContainerManager/Id directory in NamingService - */ -//============================================================================= - - -long SALOME_ContainerManager::GetIdForContainer(void) -{ - _id++; - return _id; -} - void SALOME_ContainerManager::fillBatchLaunchedContainers() { _batchLaunchedContainers.clear(); @@ -807,11 +774,10 @@ bool isPythonContainer(const char* ContainerName) string SALOME_ContainerManager::BuildCommandToLaunchRemoteContainer (const string& machine, - const Engines::MachineParameters& params, const long id,const std::string& container_exe) + const Engines::MachineParameters& params, const std::string& container_exe) { string command; int nbproc; - char idc[3*sizeof(long)]; if ( ! _isAppliSalomeDefined ) command = BuildTempFileToLaunchRemoteContainer(machine, params); @@ -887,6 +853,13 @@ SALOME_ContainerManager::BuildCommandToLaunchRemoteContainer command += o.str(); #ifdef WITHLAM command += "-x PATH,LD_LIBRARY_PATH,OMNIORB_CONFIG,SALOME_trace "; +#elif defined(WITHOPENMPI) + if( getenv("OMPI_URI_FILE") == NULL ) + command += "-x PATH -x LD_LIBRARY_PATH -x OMNIORB_CONFIG -x SALOME_trace"; + else{ + command += "-x PATH -x LD_LIBRARY_PATH -x OMNIORB_CONFIG -x SALOME_trace -ompi-server file:"; + command += getenv("OMPI_URI_FILE"); + } #endif command += " SALOME_MPIContainer "; } @@ -894,9 +867,6 @@ SALOME_ContainerManager::BuildCommandToLaunchRemoteContainer command += " " +container_exe+ " "; command += _NS->ContainerName(params); - command += " -id "; - sprintf(idc,"%ld",id); - command += idc; command += " -"; AddOmninamesParams(command); @@ -914,12 +884,11 @@ SALOME_ContainerManager::BuildCommandToLaunchRemoteContainer string SALOME_ContainerManager::BuildCommandToLaunchLocalContainer -(const Engines::MachineParameters& params, const long id,const std::string& container_exe) +(const Engines::MachineParameters& params, const std::string& container_exe) { _TmpFileName = BuildTemporaryFileName(); string command; int nbproc = 0; - //char idc[3*sizeof(long)]; ofstream command_file( _TmpFileName.c_str() ); @@ -946,14 +915,22 @@ SALOME_ContainerManager::BuildCommandToLaunchLocalContainer #ifdef WITHLAM //command += "-x PATH,LD_LIBRARY_PATH,OMNIORB_CONFIG,SALOME_trace "; command_file << "-x PATH,LD_LIBRARY_PATH,OMNIORB_CONFIG,SALOME_trace "; +#elif defined(WITHOPENMPI) + //command += "-x PATH -x LD_LIBRARY_PATH -x OMNIORB_CONFIG -x SALOME_trace "; + if( getenv("OMPI_URI_FILE") == NULL ) + command_file << "-x PATH -x LD_LIBRARY_PATH -x OMNIORB_CONFIG -x SALOME_trace"; + else{ + command_file << "-x PATH -x LD_LIBRARY_PATH -x OMNIORB_CONFIG -x SALOME_trace -ompi-server file:"; + command_file << getenv("OMPI_URI_FILE"); + } #endif if (isPythonContainer(params.container_name)) //command += "pyMPI SALOME_ContainerPy.py "; - command_file << "pyMPI SALOME_ContainerPy.py "; + command_file << " pyMPI SALOME_ContainerPy.py "; else //command += "SALOME_MPIContainer "; - command_file << "SALOME_MPIContainer "; + command_file << " SALOME_MPIContainer "; } else @@ -998,16 +975,8 @@ SALOME_ContainerManager::BuildCommandToLaunchLocalContainer } - - /*command += _NS->ContainerName(params); - command += " -id "; - sprintf(idc,"%ld",id); - command += idc; - command += " -"; - AddOmninamesParams(command);*/ - command_file << _NS->ContainerName(params); - command_file << " -id " << id << " -"; + command_file << " -"; AddOmninamesParams(command_file); command_file.close(); @@ -1027,9 +996,9 @@ SALOME_ContainerManager::BuildCommandToLaunchLocalContainer */ //============================================================================= -void SALOME_ContainerManager::RmTmpFile() +void SALOME_ContainerManager::RmTmpFile(std::string& tmpFileName) { - int lenght = _TmpFileName.size(); + int lenght = tmpFileName.size(); if ( lenght > 0) { #ifdef WIN32 @@ -1038,13 +1007,13 @@ void SALOME_ContainerManager::RmTmpFile() string command = "rm "; #endif if ( lenght > 4 ) - command += _TmpFileName.substr(0, lenght - 3 ); + command += tmpFileName.substr(0, lenght - 3 ); else - command += _TmpFileName; + command += tmpFileName; command += '*'; system(command.c_str()); //if dir is empty - remove it - string tmp_dir = Kernel_Utils::GetDirByPath( _TmpFileName ); + string tmp_dir = Kernel_Utils::GetDirByPath( tmpFileName ); if ( Kernel_Utils::IsEmptyDir( tmp_dir ) ) { #ifdef WIN32 @@ -1152,6 +1121,13 @@ SALOME_ContainerManager::BuildTempFileToLaunchRemoteContainer tempOutputFile << nbproc << " "; #ifdef WITHLAM tempOutputFile << "-x PATH,LD_LIBRARY_PATH,OMNIORB_CONFIG,SALOME_trace "; +#elif defined(WITHOPENMPI) + if( getenv("OMPI_URI_FILE") == NULL ) + tempOutputFile << "-x PATH -x LD_LIBRARY_PATH -x OMNIORB_CONFIG -x SALOME_trace"; + else{ + tempOutputFile << "-x PATH -x LD_LIBRARY_PATH -x OMNIORB_CONFIG -x SALOME_trace -ompi-server file:"; + tempOutputFile << getenv("OMPI_URI_FILE"); + } #endif } @@ -1160,9 +1136,9 @@ SALOME_ContainerManager::BuildTempFileToLaunchRemoteContainer if (params.isMPI) { if (isPythonContainer(params.container_name)) - tempOutputFile << "pyMPI SALOME_ContainerPy.py "; + tempOutputFile << " pyMPI SALOME_ContainerPy.py "; else - tempOutputFile << "SALOME_MPIContainer "; + tempOutputFile << " SALOME_MPIContainer "; } else @@ -1372,3 +1348,22 @@ void SALOME_ContainerManager::startMPI() } } +string SALOME_ContainerManager::GetMPIZeroNode(string machine) +{ + int status; + string zeronode; + string cmd; + string tmpFile = BuildTemporaryFileName(); + + cmd = "ssh " + machine + " mpirun -np 1 hostname > " + tmpFile; + + status = system(cmd.c_str()); + if( status == 0 ){ + ifstream fp(tmpFile.c_str(),ios::in); + fp >> zeronode; + } + + RmTmpFile(tmpFile); + + return zeronode; +} diff --git a/src/Container/SALOME_ContainerManager.hxx b/src/Container/SALOME_ContainerManager.hxx index 7b1f26eab..4c928ae54 100644 --- a/src/Container/SALOME_ContainerManager.hxx +++ b/src/Container/SALOME_ContainerManager.hxx @@ -88,19 +88,17 @@ protected: void fillBatchLaunchedContainers(); - long GetIdForContainer(void); - std::string BuildCommandToLaunchRemoteContainer(const std::string& machine, - const Engines::MachineParameters& params, const long id, - const std::string& container_exe="SALOME_Container"); + const Engines::MachineParameters& params, + const std::string& container_exe="SALOME_Container"); - std::string BuildCommandToLaunchLocalContainer(const Engines::MachineParameters& params, const long id, + std::string BuildCommandToLaunchLocalContainer(const Engines::MachineParameters& params, const std::string& container_exe="SALOME_Container"); std::string BuildTempFileToLaunchRemoteContainer(const std::string& machine, const Engines::MachineParameters& params) throw(SALOME_Exception); - void RmTmpFile(); + void RmTmpFile(std::string& tmpFile); void AddOmninamesParams(std::string& command) const; @@ -108,6 +106,8 @@ protected: std::string BuildTemporaryFileName() const; + std::string GetMPIZeroNode(std::string machine); + // Parallel extension std::string BuildCommandToLaunchLocalParallelContainer(const std::string& exe_name, const Engines::MachineParameters& params, @@ -115,7 +115,6 @@ protected: void startMPI(); bool _MpiStarted; - long _id; CORBA::ORB_var _orb; PortableServer::POA_var _poa; diff --git a/src/MPIContainer/MPIContainer_i.cxx b/src/MPIContainer/MPIContainer_i.cxx index ecdfba30c..c3fd23405 100644 --- a/src/MPIContainer/MPIContainer_i.cxx +++ b/src/MPIContainer/MPIContainer_i.cxx @@ -48,52 +48,22 @@ Engines_MPIContainer_i::Engines_MPIContainer_i(int nbproc, int numproc, int argc, char *argv[]) : Engines_Container_i(orb,poa,containerName,argc,argv,false), MPIObject_i(nbproc,numproc) { - long id=0; - string IdContainerinNS; - char idc[3*sizeof(long)]; - MESSAGE("[" << numproc << "] activate object"); _id = _poa->activate_object(this); - - if(argc>1) - { - for(int i=0;iid_to_reference(*_id); + Engines::Container_var pCont = Engines::Container::_narrow(obj); + _remove_ref(); if(numproc==0){ _NS = new SALOME_NamingService(); _NS->init_orb( CORBA::ORB::_duplicate(_orb) ) ; - CORBA::Object_var obj=_poa->id_to_reference(*_id); - Engines::Container_var pCont = Engines::Container::_narrow(obj); - string hostname = Kernel_Utils::GetHostname(); _containerName = _NS->BuildContainerNameForNS(containerName,hostname.c_str()); SCRUTE(_containerName); _NS->Register(pCont, _containerName.c_str()); - // A parallel container registers in Naming Service - // on the machine where is process 0. ContainerManager does'nt know the name - // of this machine before the launch of the parallel container. So to get - // the IOR of the parallel container in Naming Service, ContainerManager - // gives a unique Id. The parallel container registers his name under - // /ContainerManager/Id directory in NamingService - - IdContainerinNS = "/ContainerManager/id"; - sprintf(idc,"%ld",id); - IdContainerinNS += idc; - SCRUTE(IdContainerinNS); - _NS->Register(pCont, IdContainerinNS.c_str()); - } // Root recupere les ior des container des autre process @@ -121,6 +91,24 @@ void Engines_MPIContainer_i::Shutdown() for(ip= 1;ip<_nbproc;ip++) (Engines::MPIContainer::_narrow((*_tior)[ip]))->Shutdown(); } + + std::map::iterator itm; + for (itm = _listInstances_map.begin(); itm != _listInstances_map.end(); itm++) + { + try + { + itm->second->destroy(); + } + catch(const CORBA::Exception& e) + { + // ignore this entry and continue + } + catch(...) + { + // ignore this entry and continue + } + } + _orb->shutdown(0); } @@ -149,7 +137,6 @@ bool Engines_MPIContainer_i::Lload_component_Library(const char* componentName) // --- try dlopen C++ component string impl_name = string ("lib") + aCompName + string("Engine.so"); - SCRUTE(impl_name); _numInstanceMutex.lock(); // lock to be alone // (see decInstanceCnt, finalize_removal)) @@ -167,12 +154,15 @@ bool Engines_MPIContainer_i::Lload_component_Library(const char* componentName) { _library_map[impl_name] = handle; _numInstanceMutex.unlock(); + MESSAGE("[" << _numproc << "] Library " << impl_name << " loaded"); + MPI_Barrier(MPI_COMM_WORLD); return true; } else { - INFOS("[" << _numproc << "] Can't load shared library : " << impl_name); - INFOS("[" << _numproc << "] error dlopen: " << dlerror()); + MESSAGE("[" << _numproc << "] Can't load shared library : " << impl_name); + MESSAGE("[" << _numproc << "] error dlopen: " << dlerror()); + MPI_Barrier(MPI_COMM_WORLD); } _numInstanceMutex.unlock(); @@ -287,17 +277,15 @@ Engines_MPIContainer_i::Lcreate_component_instance( const char* genericRegisterN //--- try C++ string impl_name = string ("lib") + genericRegisterName +string("Engine.so"); - void* handle = _library_map[impl_name]; - if ( !handle ) { - INFOS("shared library " << impl_name <<"must be loaded before instance"); - return Engines::Component::_nil() ; - } - else { - iobject = createMPIInstance(genericRegisterName, - handle, - studyId); - return iobject._retn(); - } + if (_library_map.count(impl_name) != 0) // C++ component + { + void* handle = _library_map[impl_name]; + iobject = createMPIInstance(genericRegisterName, + handle, + studyId); + return iobject._retn(); + } + } Engines::Component_ptr @@ -311,7 +299,6 @@ Engines_MPIContainer_i::createMPIInstance(string genericRegisterName, string aGenRegisterName = genericRegisterName; string factory_name = aGenRegisterName + string("Engine_factory"); - SCRUTE(factory_name) ; typedef PortableServer::ObjectId * (*MPIFACTORY_FUNCTION) (int,int, @@ -321,17 +308,17 @@ Engines_MPIContainer_i::createMPIInstance(string genericRegisterName, const char *, const char *) ; - MPIFACTORY_FUNCTION MPIComponent_factory - = (MPIFACTORY_FUNCTION) dlsym(handle, factory_name.c_str()); + dlerror(); + MPIFACTORY_FUNCTION MPIComponent_factory = (MPIFACTORY_FUNCTION) dlsym(handle, factory_name.c_str()); - char *error ; - if ( (error = dlerror() ) != NULL) { - // Try to load a sequential component - MESSAGE("[" << _numproc << "] Try to load a sequential component"); - _numInstanceMutex.unlock() ; - iobject = Engines_Container_i::createInstance(genericRegisterName,handle,studyId); - if( CORBA::is_nil(iobject) ) return Engines::Component::_duplicate(iobject); - } + if ( !MPIComponent_factory ) + { + INFOS( "[" << _numproc << "] Can't resolve symbol: " + factory_name ); + SCRUTE( dlerror() ); + pobj = Engines::MPIObject::_nil(); + BCastIOR(_orb,pobj,false); + return Engines::Component::_nil(); + } // --- create instance @@ -370,8 +357,6 @@ Engines_MPIContainer_i::createMPIInstance(string genericRegisterName, //SCRUTE(servant->pd_refCount); _listInstances_map[instanceName] = iobject; _cntInstances_map[aGenRegisterName] += 1; - SCRUTE(aGenRegisterName); - SCRUTE(_cntInstances_map[aGenRegisterName]); //SCRUTE(servant->pd_refCount); bool ret_studyId = servant->setStudyId(studyId); ASSERT(ret_studyId); @@ -387,10 +372,14 @@ Engines_MPIContainer_i::createMPIInstance(string genericRegisterName, BCastIOR(_orb,pobj,false); } - catch (...) - { - INFOS( "Container_i::createInstance exception catched" ) ; - } + catch(const POException &ex){ + INFOS( ex.msg << " on process number " << ex.numproc ) ; + return Engines::Component::_nil(); + } + catch (...){ + INFOS( "Container_i::createInstance exception catched" ) ; + return Engines::Component::_nil(); + } return iobject._retn(); } @@ -450,6 +439,7 @@ Engines::Component_ptr Engines_MPIContainer_i::Lload_impl( string factory_name = _nameToRegister + string("Engine_factory"); MESSAGE("[" << _numproc << "] factory_name=" << factory_name) ; + dlerror(); PortableServer::ObjectId * (*MPIComponent_factory) (int,int, CORBA::ORB_ptr, PortableServer::POA_ptr, diff --git a/src/MPIContainer/MPIObject_i.cxx b/src/MPIContainer/MPIObject_i.cxx index f68a739b6..ff45f5e5e 100644 --- a/src/MPIContainer/MPIObject_i.cxx +++ b/src/MPIContainer/MPIObject_i.cxx @@ -23,10 +23,10 @@ // File : MPIObject_i.cxx // Module : SALOME // -#include #include "MPIObject_i.hxx" #include "utilities.h" using namespace std; +#define TIMEOUT 5 MPIObject_i::MPIObject_i() { @@ -65,7 +65,7 @@ void MPIObject_i::tior(const Engines::IORTab& ior) } void MPIObject_i::BCastIOR(CORBA::ORB_ptr orb, Engines::MPIObject_ptr pobj, - bool amiCont) + bool amiCont) throw(POException) { int err, ip, n; char *ior; @@ -95,13 +95,14 @@ void MPIObject_i::BCastIOR(CORBA::ORB_ptr orb, Engines::MPIObject_ptr pobj, } iort[ip] = orb->string_to_object(ior); delete [] ior; + if(CORBA::is_nil(iort[ip])) + throw POException(ip,"MPI Component not loaded"); } // On donne le tableau des ior a l'objet Corba du process 0 if( amiCont ) tior(*(iort._retn())); else pobj->tior(*(iort._retn())); - } else{ // Conversion IOR vers string @@ -123,3 +124,78 @@ void MPIObject_i::BCastIOR(CORBA::ORB_ptr orb, Engines::MPIObject_ptr pobj, } +#ifdef HAVE_MPI2 +MPI_Comm MPIObject_i::remoteMPI2Connect(string service) throw(POException) +{ + int i; + MPI_Comm gcom; + char port_name_clt[MPI_MAX_PORT_NAME]; + + _srv = 0; + _service = service; + + MPI_Barrier(MPI_COMM_WORLD); + + MPI_Errhandler_set(MPI_COMM_WORLD, MPI_ERRORS_RETURN); + if( _numproc == 0 ){ + /* rank 0 try to be a server. If service is already published, try to be a cient */ + MPI_Open_port(MPI_INFO_NULL, _port_name); + if ( MPI_Publish_name((char*)_service.c_str(), MPI_INFO_NULL, _port_name) == MPI_SUCCESS ) { + _srv = 1; + MESSAGE("[" << _numproc << "] service " << _service << " available at " << _port_name << "\n"); + } + else if ( MPI_Lookup_name((char*)_service.c_str(), MPI_INFO_NULL, port_name_clt) == MPI_SUCCESS ){ + MESSAGE("[" << _numproc << "] I get the connection with " << _service << " at " << port_name_clt << "!\n"); + MPI_Close_port( _port_name ); + } + else{ + /* Throw exception */ + MESSAGE("[" << _numproc << "] Error on connection with " << _service << " at " << port_name_clt << "!\n"); + throw POException(_numproc,"Error on connection with " + _service + " at " + port_name_clt); + } + } + else{ + i=0; + /* Waiting rank 0 publish name and try to be a client */ + while ( i != TIMEOUT ) { + sleep(1); + if ( MPI_Lookup_name((char*)_service.c_str(), MPI_INFO_NULL, port_name_clt) == MPI_SUCCESS ){ + MESSAGE("[" << _numproc << "] I get the connection with " << _service << " at " << port_name_clt << "!\n"); + break; + } + i++; + } + if(i==TIMEOUT){ + /* Throw exception */ + MESSAGE("[" << _numproc << "] Error on connection with " << _service << " at " << port_name_clt << "!\n"); + throw POException(_numproc,"Error on connection with " + _service + " at " + port_name_clt); + } + } + MPI_Errhandler_set(MPI_COMM_WORLD, MPI_ERRORS_ARE_FATAL); + + /* If rank 0 is server, all processes call MPI_Comm_accept */ + /* If rank 0 is not server, all processes call MPI_Comm_connect */ + MPI_Bcast(&_srv,1,MPI_INT,0,MPI_COMM_WORLD); + if ( _srv ) + MPI_Comm_accept( _port_name, MPI_INFO_NULL, 0, MPI_COMM_WORLD, &gcom ); + else + MPI_Comm_connect(port_name_clt, MPI_INFO_NULL, 0, MPI_COMM_WORLD, &gcom ); + + /* only rank 0 can be server for unpublish name */ + if(_numproc != 0) _srv = 0; + + return gcom; + +} + +void MPIObject_i::remoteMPI2Disconnect(MPI_Comm gcom) +{ + MPI_Comm_disconnect( &gcom ); + if ( _srv ) { + MPI_Unpublish_name((char*)_service.c_str(), MPI_INFO_NULL, _port_name); + MESSAGE("[" << _numproc << "] " << _service << ": close port " << _port_name << "\n"); + MPI_Close_port( _port_name ); + } +} +#endif + diff --git a/src/MPIContainer/MPIObject_i.hxx b/src/MPIContainer/MPIObject_i.hxx index 21ea9d84c..641cb1138 100644 --- a/src/MPIContainer/MPIObject_i.hxx +++ b/src/MPIContainer/MPIObject_i.hxx @@ -26,8 +26,19 @@ #ifndef _SALOME_POBJECT_I_H_ #define _SALOME_POBJECT_I_H_ +#include +#include #include #include CORBA_SERVER_HEADER(SALOME_MPIObject) +#define defaultService "SERVER" + +class POException +{ +public: + const std::string msg; + const int numproc; + POException(const int ip,const std::string m) : numproc(ip),msg(m) {} +}; class MPIObject_i: public POA_Engines::MPIObject { @@ -47,7 +58,19 @@ class MPIObject_i: public POA_Engines::MPIObject // IOR des objets paralleles sur tous les process mpi Engines::IORTab* _tior; // Echange des IOR de l'objet entre process - void BCastIOR(CORBA::ORB_ptr orb,Engines::MPIObject_ptr pobj,bool amiCont); + void BCastIOR(CORBA::ORB_ptr orb,Engines::MPIObject_ptr pobj,bool amiCont) throw(POException); +#ifdef HAVE_MPI2 + // MPI2 connection + MPI_Comm remoteMPI2Connect(std::string service=defaultService) throw(POException); + // MPI2 disconnection + void remoteMPI2Disconnect(MPI_Comm gcom); +#endif + +private: + int _srv; + char _port_name[MPI_MAX_PORT_NAME]; + std::string _service; + } ; #endif diff --git a/src/MPIContainer/SALOME_MPIContainer.cxx b/src/MPIContainer/SALOME_MPIContainer.cxx index 647e54470..c288ec8cf 100644 --- a/src/MPIContainer/SALOME_MPIContainer.cxx +++ b/src/MPIContainer/SALOME_MPIContainer.cxx @@ -83,7 +83,7 @@ int main(int argc, char* argv[]) } MESSAGE("[" << numproc << "] MPIContainer: load MPIContainer servant"); - myContainer = new Engines_MPIContainer_i(nbproc,numproc,orb,factory_poa, containerName,argc,argv); + new Engines_MPIContainer_i(nbproc,numproc,orb,factory_poa, containerName,argc,argv); pman->activate(); @@ -106,13 +106,11 @@ int main(int argc, char* argv[]) INFOS("Caught unknown exception."); } - if(myContainer) - delete myContainer; + MPI_Finalize(); END_OF("[" << numproc << "] " << argv[0]); - // delete myThreadTrace; - MPI_Finalize(); + exit(0); } -- 2.39.2