From 81ea5df6e9b804a382af650998d3089b629c8906 Mon Sep 17 00:00:00 2001 From: secher Date: Fri, 8 Jul 2011 14:40:12 +0000 Subject: [PATCH] allow using mpich for mpi2 coupling in Salome --- salome_adm/unix/config_files/check_hdf5.m4 | 2 -- salome_adm/unix/config_files/check_mpich.m4 | 26 +++++++++------ src/Container/SALOME_ContainerManager.cxx | 37 ++++++++++++++++++--- src/Container/SALOME_ContainerManager.hxx | 2 +- src/MPIContainer/MPIObject_i.cxx | 7 +++- 5 files changed, 55 insertions(+), 19 deletions(-) diff --git a/salome_adm/unix/config_files/check_hdf5.m4 b/salome_adm/unix/config_files/check_hdf5.m4 index 4f5d8f03f..01f96f50c 100644 --- a/salome_adm/unix/config_files/check_hdf5.m4 +++ b/salome_adm/unix/config_files/check_hdf5.m4 @@ -65,8 +65,6 @@ else fi fi -CHECK_MPI - if test "x$hdf5_ok" = "xno" then if test -e "$HDF5HOME/include/hdf5.h" diff --git a/salome_adm/unix/config_files/check_mpich.m4 b/salome_adm/unix/config_files/check_mpich.m4 index a56d3c5e6..52234bb37 100644 --- a/salome_adm/unix/config_files/check_mpich.m4 +++ b/salome_adm/unix/config_files/check_mpich.m4 @@ -44,9 +44,9 @@ if test "$WITHMPICH" = yes; then MPI_INCLUDES="-I$MPICH_HOME/include" if test "x$MPICH_HOME" = "x/usr" then - MPI_LIBS="" + MPI_LIBS="-lmpichcxx -lmpich -lopa -lmpl -lrt -lpthread" else - MPI_LIBS="-L$MPICH_HOME/lib" + MPI_LIBS="-L$MPICH_HOME/lib -lmpichcxx -lmpich -lopa -lmpl -lrt -lpthread" fi fi @@ -55,20 +55,24 @@ if test "$WITHMPICH" = yes; then AC_CHECK_HEADER(mpi.h,WITHMPICH="yes",WITHMPICH="no") CPPFLAGS="$CPPFLAGS_old" - if test "$WITHMPICH" = "yes";then - LDFLAGS_old="$LDFLAGS" - LDFLAGS="$MPI_LIBS $LDFLAGS" - AC_CHECK_LIB(mpich,MPI_Init,WITHMPICH="yes",WITHMPICH="no") - AC_CHECK_LIB(mpich,MPI_Publish_name,WITHMPI2="yes",WITHMPI2="no") - LDFLAGS="$LDFLAGS_old" - fi + LIBS_old="$LIBS" + LIBS="$MPI_LIBS $LIBS" + AC_CHECK_LIB(mpich,MPI_Init,WITHMPICH="yes",WITHMPICH="no") + AC_CHECK_LIB(mpich,MPI_Publish_name,WITHMPI2="yes",WITHMPI2="no") + LIBS="$LIBS_old" + AC_MSG_CHECKING(for mpich) if test "$WITHMPICH" = "yes";then - WITHMPI="yes" mpi_ok=yes - MPI_LIBS="$MPI_LIBS -lmpich" + mpi2_ok=$WITHMPI2 + WITHMPI="yes" + CPPFLAGS="-DWITHMPICH $CPPFLAGS" + AC_MSG_RESULT(yes) else mpi_ok=no + mpi2_ok=no + WITHMPI=no + AC_MSG_RESULT(no) fi fi diff --git a/src/Container/SALOME_ContainerManager.cxx b/src/Container/SALOME_ContainerManager.cxx index 6f2e8ccac..52ce0ac7e 100644 --- a/src/Container/SALOME_ContainerManager.cxx +++ b/src/Container/SALOME_ContainerManager.cxx @@ -87,7 +87,7 @@ SALOME_ContainerManager::SALOME_ContainerManager(CORBA::ORB_ptr orb, PortableSer #ifdef HAVE_MPI2 #ifdef WITHOPENMPI - _pid_ompiServer = -1; + _pid_mpiServer = -1; // the urifile name depends on pid of the process std::stringstream urifile; urifile << getenv("HOME") << "/.urifile_" << getpid(); @@ -108,10 +108,29 @@ SALOME_ContainerManager::SALOME_ContainerManager(CORBA::ORB_ptr orb, PortableSer std::set::const_iterator it; for(it=thepids2.begin();it!=thepids2.end();it++) if(thepids1.find(*it) == thepids1.end()) - _pid_ompiServer = *it; - if(_pid_ompiServer < 0) + _pid_mpiServer = *it; + if(_pid_mpiServer < 0) throw SALOME_Exception("Error when getting ompi-server id"); } +#elif defined(WITHMPICH) + _pid_mpiServer = -1; + // get the pid of all hydra_nameserver + std::set thepids1 = getpidofprogram("hydra_nameserver"); + // launch a new hydra_nameserver + std::string command; + command = "hydra_nameserver &"; + int status=system(command.c_str()); + if(status!=0) + throw SALOME_Exception("Error when launching hydra_nameserver"); + // get the pid of all hydra_nameserver + std::set thepids2 = getpidofprogram("hydra_nameserver"); + // my hydra_nameserver is the new one + std::set::const_iterator it; + for(it=thepids2.begin();it!=thepids2.end();it++) + if(thepids1.find(*it) == thepids1.end()) + _pid_mpiServer = *it; + if(_pid_mpiServer < 0) + throw SALOME_Exception("Error when getting hydra_nameserver id"); #endif #endif @@ -131,13 +150,17 @@ SALOME_ContainerManager::~SALOME_ContainerManager() #ifdef WITHOPENMPI if( getenv("OMPI_URI_FILE") != NULL ){ // kill my ompi-server - if( kill(_pid_ompiServer,SIGTERM) != 0 ) + if( kill(_pid_mpiServer,SIGTERM) != 0 ) throw SALOME_Exception("Error when killing ompi-server"); // delete my urifile int status=system("rm -f ${OMPI_URI_FILE}"); if(status!=0) throw SALOME_Exception("Error when removing urifile"); } +#elif defined(WITHMPICH) + // kill my hydra_nameserver + if( kill(_pid_mpiServer,SIGTERM) != 0 ) + throw SALOME_Exception("Error when killing hydra_nameserver"); #endif #endif } @@ -692,6 +715,8 @@ SALOME_ContainerManager::BuildCommandToLaunchRemoteContainer command += "-x PATH -x LD_LIBRARY_PATH -x OMNIORB_CONFIG -x SALOME_trace -ompi-server file:"; command += getenv("OMPI_URI_FILE"); } +#elif defined(WITHMPICH) + command += "-nameserver " + Kernel_Utils::GetHostname(); #endif command += " SALOME_MPIContainer "; } @@ -747,6 +772,8 @@ SALOME_ContainerManager::BuildCommandToLaunchLocalContainer o << "-x PATH -x LD_LIBRARY_PATH -x OMNIORB_CONFIG -x SALOME_trace -ompi-server file:"; o << getenv("OMPI_URI_FILE"); } +#elif defined(WITHMPICH) + o << "-nameserver " + Kernel_Utils::GetHostname(); #endif if (isPythonContainer(params.container_name)) @@ -954,6 +981,8 @@ SALOME_ContainerManager::BuildTempFileToLaunchRemoteContainer tempOutputFile << "-x PATH -x LD_LIBRARY_PATH -x OMNIORB_CONFIG -x SALOME_trace -ompi-server file:"; tempOutputFile << getenv("OMPI_URI_FILE"); } +#elif defined(WITHMPICH) + tempOutputFile << "-nameserver " + Kernel_Utils::GetHostname(); #endif } diff --git a/src/Container/SALOME_ContainerManager.hxx b/src/Container/SALOME_ContainerManager.hxx index 1ae8be0dc..6aff78529 100644 --- a/src/Container/SALOME_ContainerManager.hxx +++ b/src/Container/SALOME_ContainerManager.hxx @@ -113,7 +113,7 @@ protected: static omni_mutex _numInstanceMutex ; // lib and instance protection - pid_t _pid_ompiServer; + pid_t _pid_mpiServer; // Begin of PacO++ Parallel extension typedef std::vector actual_launch_machine_t; diff --git a/src/MPIContainer/MPIObject_i.cxx b/src/MPIContainer/MPIObject_i.cxx index 4d8728ef2..ce9c98936 100644 --- a/src/MPIContainer/MPIObject_i.cxx +++ b/src/MPIContainer/MPIObject_i.cxx @@ -158,7 +158,12 @@ void MPIObject_i::remoteMPI2Connect(std::string service) { /* rank 0 try to be a server. If service is already published, try to be a cient */ MPI_Open_port(MPI_INFO_NULL, port_name); - if ( MPI_Publish_name((char*)service.c_str(), MPI_INFO_NULL, port_name) == MPI_SUCCESS ) + if ( MPI_Lookup_name((char*)service.c_str(), MPI_INFO_NULL, port_name_clt) == MPI_SUCCESS ) + { + MESSAGE("[" << _numproc << "] I get the connection with " << service << " at " << port_name_clt << std::endl); + MPI_Close_port( port_name ); + } + else if ( MPI_Publish_name((char*)service.c_str(), MPI_INFO_NULL, port_name) == MPI_SUCCESS ) { _srv[service] = true; _port_name[service] = port_name; -- 2.39.2