]> SALOME platform Git repositories - modules/kernel.git/commitdiff
Salome HOME
PR: mpi containers portability
authorprascle <prascle>
Wed, 30 Nov 2005 19:59:11 +0000 (19:59 +0000)
committerprascle <prascle>
Wed, 30 Nov 2005 19:59:11 +0000 (19:59 +0000)
16 files changed:
salome_adm/unix/config_files/check_lam.m4
src/Communication/ReceiverFactory.cxx
src/Communication/Receivers.hxx
src/Communication/SALOME_Comm_i.cxx
src/Communication/SALOME_Comm_i.hxx
src/Communication/SenderFactory.cxx
src/Communication_SWIG/libSALOME_Comm.i
src/Container/SALOME_Container.cxx
src/Container/SALOME_ContainerManager.cxx
src/Container/SALOME_ContainerManager.hxx
src/MPIContainer/MPIContainer_i.cxx
src/MPIContainer/MPIObject_i.cxx
src/MPIContainer/SALOME_MPIContainer.cxx
src/NamingService/SALOME_NamingService.cxx
src/ResourcesManager/SALOME_ResourcesManager.cxx
src/ResourcesManager/SALOME_ResourcesManager.hxx

index bde79d7e60460356bd778f267dda9f4c89b97e95..ae21b56fe91d53eb910fcee86b9aa7ed7f65ab54 100644 (file)
@@ -59,17 +59,51 @@ if test "$WITHLAM" = yes; then
     LIBS_old="$LIBS"
     LDFLAGS_old="$LDFLAGS"
     LDFLAGS="$MPI_LIBS $LDFLAGS"
-    AC_CHECK_LIB(lam,lam_mp_init,,WITHLAM="no")
-    AC_CHECK_LIB(mpi,MPI_Init,WITHLAM="yes",WITHLAM="no")
-    AC_CHECK_LIB(mpi,MPI_Publish_name,WITHMPI2="yes",WITHMPI2="no")
-    LDFLAGS="$LDFLAGS_old"
-    LIBS="$LIBS_old"
+  fi
+
+  if test "$WITHLAM" = "yes";then
+    WITHLAM="no"
+
+    if test "$WITHLAM" = "no";then
+      CPPFLAGS="$MPI_INCLUDES $CPPFLAGS"
+      LIBS="$LIBS -lmpi++"
+      AC_TRY_LINK([
+      #include <mpi.h>
+      ], [int argc=0; char **argv=0; MPI_Init(&argc,&argv);],
+      WITHLAM="yes",WITHLAM="no")
+      if test "$WITHLAM" = "yes";then
+        MPI_LIBS="$MPI_LIBS -lmpi++"
+      fi
+      LIBS="$LIBS_old"
+      CPPFLAGS="$CPPFLAGS_old"
+
+      AC_CHECK_LIB(mpi++,MPI_Publish_name,WITHMPI2="yes",WITHMPI2="no")
+      LDFLAGS="$LDFLAGS_old"
+      LIBS="$LIBS_old"
+    fi
+
+    if test "$WITHLAM" = "no";then
+      AC_CHECK_LIB(lam,lam_mp_init,WITHLAM="yes",WITHLAM="no")
+      if test "$WITHLAM" = "yes";then
+        MPI_LIBS="$MPI_LIBS -llam"
+        LIBS="$LIBS -llam"
+      fi
+
+      AC_CHECK_LIB(mpi,MPI_Init,WITHLAM="yes",WITHLAM="no")
+      if test "$WITHLAM" = "yes";then
+        MPI_LIBS="$MPI_LIBS -lmpi"
+      fi
+
+      AC_CHECK_LIB(mpi,MPI_Publish_name,WITHMPI2="yes",WITHMPI2="no")
+      LDFLAGS="$LDFLAGS_old"
+      LIBS="$LIBS_old"
+    fi
   fi
 
   if test "$WITHLAM" = "yes";then
      WITHMPI="yes"
      mpi_ok=yes
-     MPI_LIBS="$MPI_LIBS -llammpi++"
+     CPPFLAGS="-DWITHLAM $CPPFLAGS"
   else
      mpi_ok=no
   fi
index 1a2bd9125d61c1b49d5dc5ef1b1617ac8bc3bbb0..94e52b25695a6c819d3628b10ac64068a3c63ed8 100644 (file)
@@ -1,3 +1,6 @@
+#ifdef HAVE_MPI2
+#include "mpi.h"
+#endif
 #include "ReceiverFactory.hxx"
 #include "Receivers.hxx"
 using namespace std;
index c4309b754a3fa082c66d5df09c19a40426a726e0..c02d9fffc5d26826058125b83c2b9585432accd8 100644 (file)
@@ -1,11 +1,11 @@
 #ifndef _RECEIVERS_HXX_
 #define _RECEIVERS_HXX_
 
-#include "SALOME_Comm_i.hxx"
-#include "Receiver.hxx"
 #ifdef HAVE_MPI2
 #include "mpi.h"
 #endif
+#include "SALOME_Comm_i.hxx"
+#include "Receiver.hxx"
 
 /*!
   Receiver used for transfert with CORBA when no copy is required remotely and locally.
index 71fbdadf85813225600046620e72dc46993fdcaa..7eb8513f70cbd25d84e1afd35ef0cd28c2a13f10 100644 (file)
@@ -1,7 +1,7 @@
+#include "SALOME_Comm_i.hxx"
 #ifndef WNT
 #include <rpc/xdr.h>
 #endif
-#include "SALOME_Comm_i.hxx"
 #include "poa.h"
 #include "omnithread.h"
 #include "Utils_SINGLETON.hxx"
index f7f196c620b5bf62740c164fe1e1dc2c88486267..1a8507ab0e6576a07eac99dd2b6d0d9f6b1e9828 100644 (file)
@@ -1,12 +1,12 @@
 #ifndef _SALOME_COMM_I_HXX_
 #define _SALOME_COMM_I_HXX_
 
-#include <string>
-#include <SALOMEconfig.h>
-#include CORBA_SERVER_HEADER(SALOME_Comm)
 #ifdef HAVE_MPI2
 #include "mpi.h"
 #endif
+#include <string>
+#include <SALOMEconfig.h>
+#include CORBA_SERVER_HEADER(SALOME_Comm)
 
 #define TIMEOUT 20
 
index 04f8056d0ec928486ff5907008a5911a3a6f4c33..ef1e956e045fa14ed744b84f2c245126cdb8a4d6 100644 (file)
@@ -1,7 +1,7 @@
+#include "SALOME_Comm_i.hxx"
 #include "SenderFactory.hxx"
 #include "utilities.h"
 #include "SALOMEMultiComm.hxx"
-#include "SALOME_Comm_i.hxx"
 using namespace std;
 
 #ifdef COMP_CORBA_DOUBLE
index 42d4d5a3e43faab00b858580bb3d3d010c44b2fe..e96cc3eb04d31389cecc0b363dbd4d9d1bf4c120 100644 (file)
@@ -2,6 +2,9 @@
 
 %{
   #include "ReceiverFactory.hxx"
+  #undef SEEK_SET
+  #undef SEEK_CUR
+  #undef SEEK_END
   #include "SALOME_Comm_i.hxx"
 %}
 
index f6f1884ff70a0fa602af6ed029cbd15429fcfda4..a97bb39dc028626ebda6aaf8f79569392ac0feb8 100644 (file)
 //  Module : SALOME
 //  $Header$
 
+#ifdef HAVE_MPI2
+#include <mpi.h>
+#endif
+
 #include <iostream>
 #include <string>
 #include <stdio.h>
 #include <Utils_Timer.hxx>
 #endif
 
-#ifdef HAVE_MPI2
-#include <mpi.h>
-#endif
-
 #include "Container_init_python.hxx"
 
 using namespace std;
index b7a3c708e1c68133adde04f27734be07ee527cf7..0e5e2cde88230ee0543cb241775727dfbeadb677 100644 (file)
@@ -29,6 +29,7 @@ SALOME_ContainerManager::SALOME_ContainerManager(CORBA::ORB_ptr orb)
   MESSAGE("constructor");
   _NS = new SALOME_NamingService(orb);
   _ResManager = new SALOME_ResourcesManager(orb);
+  _id=0;
   PortableServer::POA_var root_poa = PortableServer::POA::_the_root_poa();
   PortableServer::POAManager_var pman = root_poa->the_POAManager();
   PortableServer::POA_var my_poa;
@@ -118,6 +119,10 @@ SALOME_ContainerManager::
 FindOrStartContainer(const Engines::MachineParameters& params,
                     const Engines::MachineList& possibleComputers)
 {
+  long id;
+  string containerNameInNS;
+  char idc[sizeof(long)+1];
+
   Engines::Container_ptr ret = FindContainer(params,possibleComputers);
   if(!CORBA::is_nil(ret))
     return ret;
@@ -128,6 +133,15 @@ FindOrStartContainer(const Engines::MachineParameters& params,
   string theMachine=_ResManager->FindBest(possibleComputers);
   MESSAGE("try to launch it on " << theMachine);
 
+  // Get Id for container: a parallel container registers in Naming Service
+  // on the machine where is process 0. ContainerManager does'nt know the name
+  // of this machine before the launch of the parallel container. So to get
+  // the IOR of the parallel container in Naming Service, ContainerManager
+  // gives a unique Id. The parallel container registers his name under
+  // /ContainerManager/Id directory in NamingService
+
+  id = GetIdForContainer();
+
   string command;
   if(theMachine=="")
     {
@@ -137,11 +151,11 @@ FindOrStartContainer(const Engines::MachineParameters& params,
     }
   else if(theMachine==GetHostname())
     {
-      command=_ResManager->BuildCommandToLaunchLocalContainer(params);
+      command=_ResManager->BuildCommandToLaunchLocalContainer(params,id);
     }
   else
     command =
-      _ResManager->BuildCommandToLaunchRemoteContainer(theMachine,params);
+      _ResManager->BuildCommandToLaunchRemoteContainer(theMachine,params,id);
 
   _ResManager->RmTmpFile();
   int status=system(command.c_str());
@@ -170,8 +184,15 @@ FindOrStartContainer(const Engines::MachineParameters& params,
          count-- ;
          if ( count != 10 )
            MESSAGE( count << ". Waiting for FactoryServer on " << theMachine);
-         string containerNameInNS =
-           _NS->BuildContainerNameForNS(params,theMachine.c_str());
+         if(params.isMPI)
+           {
+             containerNameInNS = "/ContainerManager/id";
+             sprintf(idc,"%ld",id);
+             containerNameInNS += idc;
+           }
+         else
+           containerNameInNS =
+             _NS->BuildContainerNameForNS(params,theMachine.c_str());
          SCRUTE(containerNameInNS);
          CORBA::Object_var obj = _NS->Resolve(containerNameInNS.c_str());
          ret=Engines::Container::_narrow(obj);
@@ -273,3 +294,22 @@ FindContainer(const Engines::MachineParameters& params,
   MESSAGE("FindContainer: not found");
   return Engines::Container::_nil();
 }
+
+//=============================================================================
+/*! 
+ * Get Id for container: a parallel container registers in Naming Service
+ * on the machine where is process 0. ContainerManager does'nt know the name
+ * of this machine before the launch of the parallel container. So to get
+ * the IOR of the parallel container in Naming Service, ContainerManager
+ * gives a unique Id. The parallel container registers his name under
+ * /ContainerManager/Id directory in NamingService
+ */
+//=============================================================================
+
+
+long SALOME_ContainerManager::GetIdForContainer(void)
+{
+  _id++;
+  return _id;
+}
+
index f8311e9f732b09bb55c1a84051272ecdca9d0055..1217db07bfb6ac18d201c53031be3df6cbe4eeff 100644 (file)
@@ -58,6 +58,9 @@ private:
   FindContainer(const Engines::MachineParameters& params,
                const char *theMachine);
 
+  long GetIdForContainer(void);
+  long _id;
+
   SALOME_ResourcesManager *_ResManager;
   SALOME_NamingService *_NS;
 };
index 3b97ad7938dd76dcec8cd55aee9c547c61c620f2..8b7309d8449d4dbdd2bfbaaaa5207ea23f4bc70f 100644 (file)
@@ -45,25 +45,52 @@ Engines_MPIContainer_i::Engines_MPIContainer_i(int nbproc, int numproc,
                                               int argc, char *argv[]) 
   : Engines_Container_i(orb,poa,containerName,argc,argv,false), MPIObject_i(nbproc,numproc)
 {
+  long id=0;
+  string IdContainerinNS;
+  char idc[sizeof(long)+1];
+
   MESSAGE("[" << numproc << "] activate object");
   _id = _poa->activate_object(this);
-//   this->_add_ref();
+
+  if(argc>1)
+    {
+      for(int i=0;i<argc;i++)
+       {
+         if(strcmp(argv[i],"-id")==NULL)
+           {
+             id = atoi(argv[i+1]);
+             continue;
+           }
+       }
+    }
+  SCRUTE(id);
 
   if(numproc==0){
 
     _NS = new SALOME_NamingService();
-//     _NS = SINGLETON_<SALOME_NamingService>::Instance() ;
-//     ASSERT(SINGLETON_<SALOME_NamingService>::IsAlreadyExisting()) ;
     _NS->init_orb( CORBA::ORB::_duplicate(_orb) ) ;
 
-//     Engines::Container_ptr pCont 
-//       = Engines::Container::_narrow(POA_Engines::MPIContainer::_this());
     CORBA::Object_var obj=_poa->id_to_reference(*_id);
     Engines::Container_var pCont = Engines::Container::_narrow(obj);
+
     string hostname = GetHostname();
     _containerName = _NS->BuildContainerNameForNS(containerName,hostname.c_str());
     SCRUTE(_containerName);
     _NS->Register(pCont, _containerName.c_str());
+
+    // A parallel container registers in Naming Service
+    // on the machine where is process 0. ContainerManager does'nt know the name
+    // of this machine before the launch of the parallel container. So to get
+    // the IOR of the parallel container in Naming Service, ContainerManager
+    // gives a unique Id. The parallel container registers his name under
+    // /ContainerManager/Id directory in NamingService
+
+    IdContainerinNS = "/ContainerManager/id";
+    sprintf(idc,"%ld",id);
+    IdContainerinNS += idc;
+    SCRUTE(IdContainerinNS);
+    _NS->Register(pCont, IdContainerinNS.c_str());
+
   }
 
   // Root recupere les ior des container des autre process
index 0da19e33ca5753158c78f16f0951fd48bd9b7590..ab6e1a38600695fe9de72e802c719534749b8c42 100644 (file)
@@ -24,9 +24,9 @@
 //  File   : MPIObject_i.cxx
 //  Module : SALOME
 
+#include <mpi.h>
 #include "MPIObject_i.hxx"
 #include "utilities.h"
-#include <mpi.h>
 using namespace std;
 
 MPIObject_i::MPIObject_i()
index bf2322190a7e30cf16008458e622172568d32045..3932ff3d4ef1ca1a741576580faa1bb7cbf0cbbb 100644 (file)
@@ -1,9 +1,9 @@
+#include <mpi.h>
 #include <iostream>
 #include "MPIContainer_i.hxx"
 #include "Utils_ORB_INIT.hxx"
 #include "Utils_SINGLETON.hxx"
 #include "utilities.h"
-#include <mpi.h>
 #include "SALOMETraceCollector.hxx"
 using namespace std;
 
index 36b0961a14dc33968a398ea5bb03455cace91d43..e0ad5fcd59e69d2487ca1d186878731643d8ce04 100644 (file)
@@ -543,6 +543,16 @@ SALOME_NamingService::ResolveComponent(const char* hostname,
          for (unsigned int ind = 0; ind < contList.size(); ind++)
            {
              name = contList[ind].c_str();
+
+             if ( nbproc >= 1 )
+               {
+                 char *str_nbproc = new char[8];
+                 sprintf(str_nbproc, "_%d", nbproc);
+                 if( strstr(name.c_str(),str_nbproc) == NULL)
+                   continue; // check only containers with _%d in name
+                 delete [] str_nbproc;
+               }
+
              name += "/";
              name += componentName;
              SCRUTE(name);
@@ -1458,15 +1468,17 @@ throw(ServiceUnreachable)
 void SALOME_NamingService::Destroy_FullDirectory(const char* Path)
 throw(ServiceUnreachable)
 {
-  Change_Directory(Path);
-  vector<string> contList = list_directory();
-
-  for (unsigned int ind = 0; ind < contList.size(); ind++)
-    Destroy_Name(contList[ind].c_str());
-
-  Destroy_Directory(Path);
+  if( Change_Directory(Path) )
+    {
+      vector<string> contList = list_directory();
 
-  Destroy_Name(Path);
+      for (unsigned int ind = 0; ind < contList.size(); ind++)
+       Destroy_Name(contList[ind].c_str());
+      
+      Destroy_Directory(Path);
+      
+      Destroy_Name(Path);
+    }
 }
 
 // ============================================================================
index 26f9d2de74d3d1172c60d9d62799eda7afa00ef0..8a92adefc65a25329b332d0f27ed027c4de6236d 100644 (file)
@@ -357,10 +357,12 @@ bool isPythonContainer(const char* ContainerName)
 string
 SALOME_ResourcesManager::BuildCommandToLaunchRemoteContainer
 (const string& machine,
- const Engines::MachineParameters& params)
+ const Engines::MachineParameters& params, const long id)
 {
   string command;
-
+  int nbproc;
+  char idc[sizeof(long)+1];
+         
   if ( ! _isAppliSalomeDefined )
     command = BuildTempFileToLaunchRemoteContainer(machine, params);
 
@@ -370,8 +372,6 @@ SALOME_ResourcesManager::BuildCommandToLaunchRemoteContainer
 
       if (params.isMPI)
         {
-          int nbproc;
-
           if ( (params.nb_node <= 0) && (params.nb_proc_per_node <= 0) )
             nbproc = 1;
           else if ( params.nb_node == 0 )
@@ -418,9 +418,33 @@ SALOME_ResourcesManager::BuildCommandToLaunchRemoteContainer
       ASSERT(getenv("NSPORT"));
       command += getenv("NSPORT"); // port of CORBA name server
 
-      command += " SALOME_Container ";
+      if(params.isMPI)
+       {
+         command += " mpirun -np ";
+         std::ostringstream o;
+         o << nbproc << " ";
+         command += o.str();
+#ifdef WITHLAM
+         command += "-x PATH,LD_LIBRARY_PATH,OMNIORB_CONFIG,SALOME_trace ";
+#endif 
+         command += " SALOME_MPIContainer ";
+       }
+      else
+       command += " SALOME_Container ";
+
+      command += _NS->ContainerName(params);
+      command += " -id ";
+      sprintf(idc,"%ld",id);
+      command += idc;
+      command += " -";
+      AddOmninamesParams(command);
+      command += " > /tmp/";
       command += _NS->ContainerName(params);
-      command += "&";
+      command += "_";
+      command += GetHostname();
+      command += "_";
+      command += getenv( "USER" ) ;
+      command += ".log 2>&1 &" ;
 
       MESSAGE("command =" << command);
     }
@@ -437,11 +461,12 @@ SALOME_ResourcesManager::BuildCommandToLaunchRemoteContainer
 
 string
 SALOME_ResourcesManager::BuildCommandToLaunchLocalContainer
-(const Engines::MachineParameters& params)
+(const Engines::MachineParameters& params, const long id)
 {
   _TmpFileName = "";
   string command;
   int nbproc = 0;
+  char idc[sizeof(long)+1];
 
   if (params.isMPI)
     {
@@ -461,7 +486,9 @@ SALOME_ResourcesManager::BuildCommandToLaunchLocalContainer
       o << nbproc << " ";
 
       command += o.str();
+#ifdef WITHLAM
       command += "-x PATH,LD_LIBRARY_PATH,OMNIORB_CONFIG,SALOME_trace ";
+#endif
 
       if (isPythonContainer(params.container_name))
         command += "pyMPI SALOME_ContainerPy.py ";
@@ -478,6 +505,9 @@ SALOME_ResourcesManager::BuildCommandToLaunchLocalContainer
     }
 
   command += _NS->ContainerName(params);
+  command += " -id ";
+  sprintf(idc,"%ld",id);
+  command += idc;
   command += " -";
   AddOmninamesParams(command);
   command += " > /tmp/";
@@ -771,6 +801,9 @@ SALOME_ResourcesManager::BuildTempFileToLaunchRemoteContainer
       std::ostringstream o;
 
       tempOutputFile << nbproc << " ";
+#ifdef WITHLAM
+      tempOutputFile << "-x PATH,LD_LIBRARY_PATH,OMNIORB_CONFIG,SALOME_trace ";
+#endif
     }
 
   tempOutputFile << (*(resInfo.ModulesPath.find("KERNEL"))).second
index 029c9fa9ee96ae04822feb6fab6e03f15da82035..b41e0d24a0bab73dbe61b8c61be5a715ffc35f57 100644 (file)
@@ -53,10 +53,10 @@ class RESOURCESMANAGER_EXPORT SALOME_ResourcesManager
 
     std::string BuildCommandToLaunchRemoteContainer
     (const std::string& machine,
-     const Engines::MachineParameters& params);
+     const Engines::MachineParameters& params, const long id);
 
     std::string BuildCommandToLaunchLocalContainer
-    (const Engines::MachineParameters& params);
+    (const Engines::MachineParameters& params, const long id);
 
     void RmTmpFile();