]> SALOME platform Git repositories - modules/kernel.git/commitdiff
Salome HOME
debug of MPI containers
authorsecher <secher>
Tue, 29 Nov 2005 14:13:03 +0000 (14:13 +0000)
committersecher <secher>
Tue, 29 Nov 2005 14:13:03 +0000 (14:13 +0000)
16 files changed:
salome_adm/unix/config_files/check_lam.m4
src/Communication/ReceiverFactory.cxx
src/Communication/Receivers.hxx
src/Communication/SALOME_Comm_i.cxx
src/Communication/SALOME_Comm_i.hxx
src/Communication/SenderFactory.cxx
src/Communication_SWIG/libSALOME_Comm.i
src/Container/SALOME_Container.cxx
src/Container/SALOME_ContainerManager.cxx
src/Container/SALOME_ContainerManager.hxx
src/MPIContainer/MPIContainer_i.cxx
src/MPIContainer/MPIObject_i.cxx
src/MPIContainer/SALOME_MPIContainer.cxx
src/NamingService/SALOME_NamingService.cxx
src/ResourcesManager/SALOME_ResourcesManager.cxx
src/ResourcesManager/SALOME_ResourcesManager.hxx

index bde79d7e60460356bd778f267dda9f4c89b97e95..3d8d9d8b6cb54bfac7465c7fb7fd821295fc8003 100644 (file)
@@ -69,7 +69,8 @@ if test "$WITHLAM" = yes; then
   if test "$WITHLAM" = "yes";then
      WITHMPI="yes"
      mpi_ok=yes
-     MPI_LIBS="$MPI_LIBS -llammpi++"
+     MPI_LIBS="$MPI_LIBS -lmpi -llam"
+     CPPFLAGS="-DWITHLAM $CPPFLAGS"
   else
      mpi_ok=no
   fi
index 1a2bd9125d61c1b49d5dc5ef1b1617ac8bc3bbb0..94e52b25695a6c819d3628b10ac64068a3c63ed8 100644 (file)
@@ -1,3 +1,6 @@
+#ifdef HAVE_MPI2
+#include "mpi.h"
+#endif
 #include "ReceiverFactory.hxx"
 #include "Receivers.hxx"
 using namespace std;
index c4309b754a3fa082c66d5df09c19a40426a726e0..c02d9fffc5d26826058125b83c2b9585432accd8 100644 (file)
@@ -1,11 +1,11 @@
 #ifndef _RECEIVERS_HXX_
 #define _RECEIVERS_HXX_
 
-#include "SALOME_Comm_i.hxx"
-#include "Receiver.hxx"
 #ifdef HAVE_MPI2
 #include "mpi.h"
 #endif
+#include "SALOME_Comm_i.hxx"
+#include "Receiver.hxx"
 
 /*!
   Receiver used for transfert with CORBA when no copy is required remotely and locally.
index 71fbdadf85813225600046620e72dc46993fdcaa..7eb8513f70cbd25d84e1afd35ef0cd28c2a13f10 100644 (file)
@@ -1,7 +1,7 @@
+#include "SALOME_Comm_i.hxx"
 #ifndef WNT
 #include <rpc/xdr.h>
 #endif
-#include "SALOME_Comm_i.hxx"
 #include "poa.h"
 #include "omnithread.h"
 #include "Utils_SINGLETON.hxx"
index f7f196c620b5bf62740c164fe1e1dc2c88486267..1a8507ab0e6576a07eac99dd2b6d0d9f6b1e9828 100644 (file)
@@ -1,12 +1,12 @@
 #ifndef _SALOME_COMM_I_HXX_
 #define _SALOME_COMM_I_HXX_
 
-#include <string>
-#include <SALOMEconfig.h>
-#include CORBA_SERVER_HEADER(SALOME_Comm)
 #ifdef HAVE_MPI2
 #include "mpi.h"
 #endif
+#include <string>
+#include <SALOMEconfig.h>
+#include CORBA_SERVER_HEADER(SALOME_Comm)
 
 #define TIMEOUT 20
 
index 04f8056d0ec928486ff5907008a5911a3a6f4c33..ef1e956e045fa14ed744b84f2c245126cdb8a4d6 100644 (file)
@@ -1,7 +1,7 @@
+#include "SALOME_Comm_i.hxx"
 #include "SenderFactory.hxx"
 #include "utilities.h"
 #include "SALOMEMultiComm.hxx"
-#include "SALOME_Comm_i.hxx"
 using namespace std;
 
 #ifdef COMP_CORBA_DOUBLE
index 42d4d5a3e43faab00b858580bb3d3d010c44b2fe..e96cc3eb04d31389cecc0b363dbd4d9d1bf4c120 100644 (file)
@@ -2,6 +2,9 @@
 
 %{
   #include "ReceiverFactory.hxx"
+  #undef SEEK_SET
+  #undef SEEK_CUR
+  #undef SEEK_END
   #include "SALOME_Comm_i.hxx"
 %}
 
index f6f1884ff70a0fa602af6ed029cbd15429fcfda4..a97bb39dc028626ebda6aaf8f79569392ac0feb8 100644 (file)
 //  Module : SALOME
 //  $Header$
 
+#ifdef HAVE_MPI2
+#include <mpi.h>
+#endif
+
 #include <iostream>
 #include <string>
 #include <stdio.h>
 #include <Utils_Timer.hxx>
 #endif
 
-#ifdef HAVE_MPI2
-#include <mpi.h>
-#endif
-
 #include "Container_init_python.hxx"
 
 using namespace std;
index b7a3c708e1c68133adde04f27734be07ee527cf7..c7aab78a15d4cc4bcf87c39a79e606c8295e3012 100644 (file)
@@ -118,6 +118,10 @@ SALOME_ContainerManager::
 FindOrStartContainer(const Engines::MachineParameters& params,
                     const Engines::MachineList& possibleComputers)
 {
+  long id;
+  string containerNameInNS;
+  char idc[64];
+
   Engines::Container_ptr ret = FindContainer(params,possibleComputers);
   if(!CORBA::is_nil(ret))
     return ret;
@@ -128,6 +132,14 @@ FindOrStartContainer(const Engines::MachineParameters& params,
   string theMachine=_ResManager->FindBest(possibleComputers);
   MESSAGE("try to launch it on " << theMachine);
 
+  // Get Id for container: a parallel container register in Naming Service
+  // on the machine where is process 0. ContainerManager does'nt know the name
+  // of this machine before the launch of the parallel container. So to get
+  // the IOR of the parallel container in Naming Service, ContainerManager
+  // give an Id. The parallel container register his name under
+  // /ContainerManager/Id directory in NamingService
+  id = GetIdForContainer();
+
   string command;
   if(theMachine=="")
     {
@@ -137,11 +149,11 @@ FindOrStartContainer(const Engines::MachineParameters& params,
     }
   else if(theMachine==GetHostname())
     {
-      command=_ResManager->BuildCommandToLaunchLocalContainer(params);
+      command=_ResManager->BuildCommandToLaunchLocalContainer(params,id);
     }
   else
     command =
-      _ResManager->BuildCommandToLaunchRemoteContainer(theMachine,params);
+      _ResManager->BuildCommandToLaunchRemoteContainer(theMachine,params,id);
 
   _ResManager->RmTmpFile();
   int status=system(command.c_str());
@@ -170,8 +182,14 @@ FindOrStartContainer(const Engines::MachineParameters& params,
          count-- ;
          if ( count != 10 )
            MESSAGE( count << ". Waiting for FactoryServer on " << theMachine);
-         string containerNameInNS =
-           _NS->BuildContainerNameForNS(params,theMachine.c_str());
+         if(params.isMPI){
+           containerNameInNS = "/ContainerManager/id";
+           sprintf(idc,"%ld",id);
+           containerNameInNS += idc;
+         }
+         else
+           containerNameInNS =
+             _NS->BuildContainerNameForNS(params,theMachine.c_str());
          SCRUTE(containerNameInNS);
          CORBA::Object_var obj = _NS->Resolve(containerNameInNS.c_str());
          ret=Engines::Container::_narrow(obj);
@@ -273,3 +291,12 @@ FindContainer(const Engines::MachineParameters& params,
   MESSAGE("FindContainer: not found");
   return Engines::Container::_nil();
 }
+
+
+long SALOME_ContainerManager::GetIdForContainer(void)
+{
+  long id;
+  id = rand();
+  return id;
+}
+
index f8311e9f732b09bb55c1a84051272ecdca9d0055..96114f8929fccddf2e3b1328a8652a742692098d 100644 (file)
@@ -58,6 +58,8 @@ private:
   FindContainer(const Engines::MachineParameters& params,
                const char *theMachine);
 
+  long  GetIdForContainer(void);
+
   SALOME_ResourcesManager *_ResManager;
   SALOME_NamingService *_NS;
 };
index 3b97ad7938dd76dcec8cd55aee9c547c61c620f2..635f137d68348306c7db10ee8c48d07069aba1ec 100644 (file)
@@ -45,25 +45,48 @@ Engines_MPIContainer_i::Engines_MPIContainer_i(int nbproc, int numproc,
                                               int argc, char *argv[]) 
   : Engines_Container_i(orb,poa,containerName,argc,argv,false), MPIObject_i(nbproc,numproc)
 {
+  long id=0;
+  string IdContainerinNS;
+  char idc[64];
+
   MESSAGE("[" << numproc << "] activate object");
   _id = _poa->activate_object(this);
-//   this->_add_ref();
+
+  if(argc>1){
+    for(int i=0;i<argc;i++){
+      if(strcmp(argv[i],"-id")==NULL){
+       id = atoi(argv[i+1]);
+       continue;
+      }
+    }
+  }
+  SCRUTE(id);
 
   if(numproc==0){
 
     _NS = new SALOME_NamingService();
-//     _NS = SINGLETON_<SALOME_NamingService>::Instance() ;
-//     ASSERT(SINGLETON_<SALOME_NamingService>::IsAlreadyExisting()) ;
     _NS->init_orb( CORBA::ORB::_duplicate(_orb) ) ;
 
-//     Engines::Container_ptr pCont 
-//       = Engines::Container::_narrow(POA_Engines::MPIContainer::_this());
     CORBA::Object_var obj=_poa->id_to_reference(*_id);
     Engines::Container_var pCont = Engines::Container::_narrow(obj);
+
     string hostname = GetHostname();
     _containerName = _NS->BuildContainerNameForNS(containerName,hostname.c_str());
     SCRUTE(_containerName);
     _NS->Register(pCont, _containerName.c_str());
+
+    // A parallel container register in Naming Service
+    // on the machine where is process 0. ContainerManager does'nt know the name
+    // of this machine before the launch of the parallel container. So to get
+    // the IOR of the parallel container in Naming Service, ContainerManager
+    // give an Id. The parallel container register his name under
+    // /ContainerManager/Id directory in NamingService
+    IdContainerinNS = "/ContainerManager/id";
+    sprintf(idc,"%ld",id);
+    IdContainerinNS += idc;
+    SCRUTE(IdContainerinNS);
+    _NS->Register(pCont, IdContainerinNS.c_str());
+
   }
 
   // Root recupere les ior des container des autre process
index 0da19e33ca5753158c78f16f0951fd48bd9b7590..ab6e1a38600695fe9de72e802c719534749b8c42 100644 (file)
@@ -24,9 +24,9 @@
 //  File   : MPIObject_i.cxx
 //  Module : SALOME
 
+#include <mpi.h>
 #include "MPIObject_i.hxx"
 #include "utilities.h"
-#include <mpi.h>
 using namespace std;
 
 MPIObject_i::MPIObject_i()
index bf2322190a7e30cf16008458e622172568d32045..3932ff3d4ef1ca1a741576580faa1bb7cbf0cbbb 100644 (file)
@@ -1,9 +1,9 @@
+#include <mpi.h>
 #include <iostream>
 #include "MPIContainer_i.hxx"
 #include "Utils_ORB_INIT.hxx"
 #include "Utils_SINGLETON.hxx"
 #include "utilities.h"
-#include <mpi.h>
 #include "SALOMETraceCollector.hxx"
 using namespace std;
 
index a13dd52d46736c30467500f2b5da3038b7d427e3..ff670801b32442c2a3f7c7078ba42d4ebb5df6fe 100644 (file)
@@ -543,6 +543,15 @@ SALOME_NamingService::ResolveComponent(const char* hostname,
          for (unsigned int ind = 0; ind < contList.size(); ind++)
            {
              name = contList[ind].c_str();
+
+             if ( nbproc >= 1 ){
+               char *str_nbproc = new char[8];
+               sprintf(str_nbproc, "_%d", nbproc);
+               if( strstr(name.c_str(),str_nbproc) == NULL)
+                 continue;
+               delete [] str_nbproc;
+             }
+
              name += "/";
              name += componentName;
              SCRUTE(name);
@@ -1458,15 +1467,16 @@ throw(ServiceUnreachable)
 void SALOME_NamingService::Destroy_FullDirectory(const char* Path)
 throw(ServiceUnreachable)
 {
-  Change_Directory(Path);
-  vector<string> contList = list_directory();
+  if( Change_Directory(Path) ){
+    vector<string> contList = list_directory();
 
-  for (unsigned int ind = 0; ind < contList.size(); ind++)
-    Destroy_Name(contList[ind].c_str());
+    for (unsigned int ind = 0; ind < contList.size(); ind++)
+      Destroy_Name(contList[ind].c_str());
 
-  Destroy_Directory(Path);
+    Destroy_Directory(Path);
 
-  Destroy_Name(Path);
+    Destroy_Name(Path);
+  }
 }
 
 // ============================================================================
index 0a9aeeaeaf285470fa7e334c6ed2d0ead8f17c36..384253f01f57cdb6488efd3ecfbec8e3edee6824 100644 (file)
@@ -357,10 +357,12 @@ bool isPythonContainer(const char* ContainerName)
 string
 SALOME_ResourcesManager::BuildCommandToLaunchRemoteContainer
 (const string& machine,
- const Engines::MachineParameters& params)
+ const Engines::MachineParameters& params,const long id)
 {
   string command;
-
+  int nbproc;
+  char idc[64];
+         
   if ( ! _isAppliSalomeDefined )
     command = BuildTempFileToLaunchRemoteContainer(machine, params);
 
@@ -370,8 +372,6 @@ SALOME_ResourcesManager::BuildCommandToLaunchRemoteContainer
 
       if (params.isMPI)
         {
-          int nbproc;
-
           if ( (params.nb_node <= 0) && (params.nb_proc_per_node <= 0) )
             nbproc = 1;
           else if ( params.nb_node == 0 )
@@ -418,9 +418,31 @@ SALOME_ResourcesManager::BuildCommandToLaunchRemoteContainer
       ASSERT(getenv("NSPORT"));
       command += getenv("NSPORT"); // port of CORBA name server
 
-      command += " SALOME_Container ";
+      if(params.isMPI){
+       command += " mpirun -np ";
+       std::ostringstream o;
+       o << nbproc << " ";
+       command += o.str();
+#ifdef WITHLAM
+       command += "-x PATH,LD_LIBRARY_PATH,OMNIORB_CONFIG,SALOME_trace ";
+#endif 
+       command += " SALOME_MPIContainer ";
+      }
+      else
+       command += " SALOME_Container ";
+      command += _NS->ContainerName(params);
+      command += " -id ";
+      sprintf(idc,"%ld",id);
+      command += idc;
+      command += " -";
+      AddOmninamesParams(command);
+      command += " > /tmp/";
       command += _NS->ContainerName(params);
-      command += "&";
+      command += "_";
+      command += GetHostname();
+      command += "_";
+      command += getenv( "USER" ) ;
+      command += ".log 2>&1 &" ;
 
       MESSAGE("command =" << command);
     }
@@ -437,11 +459,12 @@ SALOME_ResourcesManager::BuildCommandToLaunchRemoteContainer
 
 string
 SALOME_ResourcesManager::BuildCommandToLaunchLocalContainer
-(const Engines::MachineParameters& params)
+(const Engines::MachineParameters& params,const long id)
 {
   _TmpFileName = "";
   string command;
   int nbproc = 0;
+  char idc[64];
 
   if (params.isMPI)
     {
@@ -461,7 +484,9 @@ SALOME_ResourcesManager::BuildCommandToLaunchLocalContainer
       o << nbproc << " ";
 
       command += o.str();
-//       command += "-x PATH,LD_LIBRARY_PATH,OMNIORB_CONFIG,SALOME_trace ";
+#ifdef WITHLAM
+      command += "-x PATH,LD_LIBRARY_PATH,OMNIORB_CONFIG,SALOME_trace ";
+#endif
 
       if (isPythonContainer(params.container_name))
         command += "pyMPI SALOME_ContainerPy.py ";
@@ -478,6 +503,9 @@ SALOME_ResourcesManager::BuildCommandToLaunchLocalContainer
     }
 
   command += _NS->ContainerName(params);
+  command += " -id ";
+  sprintf(idc,"%ld",id);
+  command += idc;
   command += " -";
   AddOmninamesParams(command);
   command += " > /tmp/";
@@ -771,6 +799,9 @@ SALOME_ResourcesManager::BuildTempFileToLaunchRemoteContainer
       std::ostringstream o;
 
       tempOutputFile << nbproc << " ";
+#ifdef WITHLAM
+      tempOutputFile << "-x PATH,LD_LIBRARY_PATH,OMNIORB_CONFIG,SALOME_trace ";
+#endif
     }
 
   tempOutputFile << (*(resInfo.ModulesPath.find("KERNEL"))).second
index 029c9fa9ee96ae04822feb6fab6e03f15da82035..8c56d07831e8b1a1115373a7e4771672ca515aa1 100644 (file)
@@ -53,10 +53,10 @@ class RESOURCESMANAGER_EXPORT SALOME_ResourcesManager
 
     std::string BuildCommandToLaunchRemoteContainer
     (const std::string& machine,
-     const Engines::MachineParameters& params);
+     const Engines::MachineParameters& params,const long id);
 
     std::string BuildCommandToLaunchLocalContainer
-    (const Engines::MachineParameters& params);
+    (const Engines::MachineParameters& params,const long id);
 
     void RmTmpFile();