1 // SALOME MPIContainer : implemenation of container based on MPI libraries
3 // Copyright (C) 2003 OPEN CASCADE, EADS/CCR, LIP6, CEA/DEN,
4 // CEDRAT, EDF R&D, LEG, PRINCIPIA R&D, BUREAU VERITAS
6 // This library is free software; you can redistribute it and/or
7 // modify it under the terms of the GNU Lesser General Public
8 // License as published by the Free Software Foundation; either
9 // version 2.1 of the License.
11 // This library is distributed in the hope that it will be useful,
12 // but WITHOUT ANY WARRANTY; without even the implied warranty of
13 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 // Lesser General Public License for more details.
16 // You should have received a copy of the GNU Lesser General Public
17 // License along with this library; if not, write to the Free Software
18 // Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
20 // See http://www.opencascade.org/SALOME/ or email : webmaster.salome@opencascade.org
24 // File : MPIContainer_i.cxx
30 #include "MPIContainer_i.hxx"
31 #include "SALOME_NamingService.hxx"
32 #include "Utils_SINGLETON.hxx"
34 #include "utilities.h"
37 // L'appel au registry SALOME ne se fait que pour le process 0
38 Engines_MPIContainer_i::Engines_MPIContainer_i(int nbproc, int numproc,
40 PortableServer::POA_ptr poa,
42 int argc, char *argv[])
43 : Engines_Container_i(orb,poa,containerName,argc,argv,false), MPIObject_i(nbproc,numproc)
45 MESSAGE("[" << numproc << "] activate object");
46 _id = _poa->activate_object(this);
51 // _NS = new SALOME_NamingService(_orb);
52 _NS = SINGLETON_<SALOME_NamingService>::Instance() ;
53 ASSERT(SINGLETON_<SALOME_NamingService>::IsAlreadyExisting()) ;
54 _NS->init_orb( orb ) ;
56 // Engines::Container_ptr pCont
57 // = Engines::Container::_narrow(POA_Engines::MPIContainer::_this());
58 Engines::Container_ptr pCont = Engines::Container::_narrow(_poa->id_to_reference(*_id));
59 SCRUTE(_containerName);
60 _NS->Register(pCont, _containerName.c_str());
63 // Root recupere les ior des container des autre process
64 Engines::MPIObject_var pobj = POA_Engines::MPIContainer::_this();
65 BCastIOR(_orb,pobj,true);
68 Engines_MPIContainer_i::Engines_MPIContainer_i(int nbproc, int numproc)
69 : Engines_Container_i(), MPIObject_i(nbproc,numproc)
73 Engines_MPIContainer_i::~Engines_MPIContainer_i(void)
75 MESSAGE("[" << _numproc << "] Engines_MPIContainer_i::~Engines_MPIContainer_i()");
76 if( !handle_map.empty() ){
77 MESSAGE("[" << _numproc << "] Engines_MPIContainer_i::~Engines_MPIContainer_i: warning destroy a not empty container");
81 // Start MPI Container
82 Engines::MPIContainer_ptr Engines_MPIContainer_i::start_MPIimpl(
83 const char* ContainerName,
89 MESSAGE("[" << _numproc << "] start_impl argc " << _argc << " ContainerName " << ContainerName
90 << hex << this << dec) ;
91 _numInstanceMutex.lock() ; // lock on the instance number
93 CORBA::Object_var obj = Engines::MPIContainer::_nil() ;
96 string cont("/Containers/");
97 cont += machineName() ;
99 cont += ContainerName;
100 INFOS("[" << _numproc << "] " << machineName() << " start_impl unknown container " << cont.c_str()
101 << " try to Resolve" );
102 obj = _NS->Resolve( cont.c_str() );
103 nilvar = CORBA::is_nil( obj ) ;
105 INFOS("[" << _numproc << "] " << machineName() << " start_impl unknown container "
109 catch (ServiceUnreachable&) {
110 INFOS("[" << _numproc << "] " << machineName() << "Caught exception: Naming Service Unreachable");
113 INFOS("[" << _numproc << "] " << machineName() << "Caught unknown exception.");
116 _numInstanceMutex.unlock() ;
117 MESSAGE("[" << _numproc << "] start_impl container found without new launch") ;
118 return Engines::MPIContainer::_narrow(obj);
121 while ( _argv[ i ] ) {
122 MESSAGE("[" << _numproc << "] argv" << i << " " << _argv[ i ]) ;
125 sprintf(nbp,"mpirun -np %d SALOME_MPIContainer ",nbproc);
127 shstr += ContainerName ;
130 shstr += _argv[ 2 ] ;
132 shstr += _argv[ 3 ] ;
134 shstr += " > /tmp/" ;
135 shstr += ContainerName ;
136 shstr += ".log 2>&1 &" ;
137 MESSAGE("system(" << shstr << ")") ;
138 int status = system( shstr.c_str() ) ;
140 INFOS("[" << _numproc << "] Engines_MPIContainer_i::start_impl SALOME_MPIContainer failed (system command status -1)") ;
142 else if (status == 217) {
143 INFOS("[" << _numproc << "] Engines_MPIContainer_i::start_impl SALOME_MPIContainer failed (system command status 217)") ;
145 INFOS("[" << _numproc << "] " << machineName() << " Engines_MPIContainer_i::start_impl SALOME_MPIContainer launch done");
147 obj = Engines::MPIContainer::_nil() ;
149 string cont("/Containers/");
150 cont += machineName() ;
152 cont += ContainerName;
155 while ( nilvar && count >= 0) {
157 obj = _NS->Resolve(cont.c_str());
158 nilvar = CORBA::is_nil( obj ) ;
160 INFOS("[" << _numproc << "] " << count << ". " << machineName()
161 << " start_impl unknown container " << cont.c_str());
165 _numInstanceMutex.unlock() ;
167 MESSAGE("[" << _numproc << "] start_impl container found after new launch of SALOME_MPIContainer") ;
169 return Engines::MPIContainer::_narrow(obj);
171 catch (ServiceUnreachable&) {
172 INFOS("[" << _numproc << "] " << machineName() << "Caught exception: Naming Service Unreachable");
175 INFOS("[" << _numproc << "] " << machineName() << "Caught unknown exception.");
177 _numInstanceMutex.unlock() ;
178 MESSAGE("[" << _numproc << "] start_impl MPI container not found after new launch of SALOME_MPIContainer") ;
179 return Engines::MPIContainer::_nil() ;
183 Engines::Component_ptr Engines_MPIContainer_i::load_impl(const char* nameToRegister,
184 const char* componentName)
189 // Invocation du chargement du composant dans les autres process
190 for(ip= 1;ip<_nbproc;ip++)
191 (Engines::MPIContainer::_narrow((*_tior)[ip]))->SPload_impl(nameToRegister,
195 return Lload_impl(nameToRegister,componentName);
200 void Engines_MPIContainer_i::SPload_impl(const char* nameToRegister,
201 const char* componentName)
203 Lload_impl(nameToRegister,componentName);
206 Engines::Component_ptr Engines_MPIContainer_i::Lload_impl(
207 const char* nameToRegister,
208 const char* componentName)
210 Engines::Component_var iobject;
211 Engines::MPIObject_var pobj;
214 sprintf(cproc,"_%d",_numproc);
216 BEGIN_OF("[" << _numproc << "] MPIContainer_i::Lload_impl");
218 _numInstanceMutex.lock() ; // lock on the instance number
221 sprintf(_aNumI,"%d",_numInstance) ;
223 string _impl_name = componentName;
224 string _nameToRegister = nameToRegister;
225 string instanceName = _nameToRegister + "_inst_" + _aNumI + cproc;
226 MESSAGE("[" << _numproc << "] instanceName=" << instanceName);
228 string absolute_impl_name(_impl_name);
229 MESSAGE("[" << _numproc << "] absolute_impl_name=" << absolute_impl_name);
230 void * handle = dlopen(absolute_impl_name.c_str(), RTLD_LAZY);
232 INFOS("[" << _numproc << "] Can't load shared library : " << absolute_impl_name);
233 INFOS("[" << _numproc << "] error dlopen: " << dlerror());
234 return Engines::Component::_nil() ;
237 string factory_name = _nameToRegister + string("Engine_factory");
238 MESSAGE("[" << _numproc << "] factory_name=" << factory_name) ;
240 PortableServer::ObjectId * (*MPIComponent_factory) (int,int,
242 PortableServer::POA_ptr,
243 PortableServer::ObjectId *,
246 (PortableServer::ObjectId * (*) (int,int,
248 PortableServer::POA_ptr,
249 PortableServer::ObjectId *,
252 dlsym(handle, factory_name.c_str());
255 if ((error = dlerror()) != NULL){
256 // Try to load a sequential component
257 MESSAGE("[" << _numproc << "] Try to load a sequential component");
258 _numInstanceMutex.unlock() ;
259 iobject = Engines_Container_i::load_impl(nameToRegister,componentName);
260 if( CORBA::is_nil(iobject) ) return Engines::Component::_duplicate(iobject);
263 // Instanciation du composant parallele
264 MESSAGE("[" << _numproc << "] Try to load a parallel component");
265 PortableServer::ObjectId * id = (MPIComponent_factory)
266 (_nbproc,_numproc,_orb, _poa, _id, instanceName.c_str(), _nameToRegister.c_str());
267 // get reference from id
268 CORBA::Object_var o = _poa->id_to_reference(*id);
269 pobj = Engines::MPIObject::_narrow(o) ;
270 iobject = Engines::Component::_narrow(o) ;
274 // utiliser + tard le registry ici :
275 // register the engine under the name containerName.dir/nameToRegister.object
276 string component_registerName = _containerName + "/" + _nameToRegister;
277 _NS->Register(iobject, component_registerName.c_str()) ;
280 handle_map[instanceName] = handle;
281 _numInstanceMutex.unlock() ;
283 // Root recupere les ior des composants des autre process
284 BCastIOR(_orb,pobj,false);
286 END_OF("[" <<_numproc << "] MPIContainer_i::Lload_impl");
287 return Engines::Component::_duplicate(iobject);
291 void Engines_MPIContainer_i::remove_impl(Engines::Component_ptr component_i)
294 Engines::Component_ptr cptr;
295 Engines::MPIObject_ptr pcptr;
296 Engines::MPIObject_ptr spcptr;
298 ASSERT(! CORBA::is_nil(component_i));
301 // Invocation de la destruction du composant dans les autres process
302 pcptr = (Engines::MPIObject_ptr)component_i;
303 for(ip= 1;ip<_nbproc;ip++){
304 spcptr = Engines::MPIObject::_narrow((*(pcptr->tior()))[ip]);
305 cptr = (Engines::Component_ptr)spcptr;
306 (Engines::MPIContainer::_narrow((*_tior)[ip]))->SPremove_impl(cptr);
310 Lremove_impl(component_i);
313 void Engines_MPIContainer_i::SPremove_impl(Engines::Component_ptr component_i)
315 Lremove_impl(component_i);
318 void Engines_MPIContainer_i::Lremove_impl(Engines::Component_ptr component_i)
321 Engines::Component_ptr cptr;
322 Engines::MPIObject_ptr pcptr;
323 Engines::MPIObject_ptr spcptr;
325 BEGIN_OF("[" << _numproc << "] MPIContainer_i::Lremove_impl");
327 ASSERT(! CORBA::is_nil(component_i));
329 string instanceName = component_i->instanceName() ;
330 MESSAGE("[" << _numproc << "] unload component " << instanceName);
331 component_i->destroy() ;
332 MESSAGE("[" << _numproc << "] test key handle_map");
333 _numInstanceMutex.lock() ; // lock on the remove on handle_map
334 if (handle_map[instanceName]) // if key does not exist, created & initialized null
336 remove_map[instanceName] = handle_map[instanceName] ;
338 else MESSAGE("[" << _numproc << "] no key handle_map");
339 handle_map.erase(instanceName) ;
340 _numInstanceMutex.unlock() ;
341 MESSAGE("[" << _numproc << "] list handle_map");
342 map<string, void *>::iterator im ;
343 for (im = handle_map.begin() ; im != handle_map.end() ; im ++)
345 MESSAGE("[" << _numproc << "] stay " << (*im).first);
348 END_OF("[" << _numproc << "] MPIContainer_i::Lremove_impl");
352 void Engines_MPIContainer_i::finalize_removal()
357 // Invocation de la destruction du composant dans les autres process
358 for(ip= 1;ip<_nbproc;ip++)
359 (Engines::MPIContainer::_narrow((*_tior)[ip]))->SPfinalize_removal();
365 void Engines_MPIContainer_i::SPfinalize_removal()
370 void Engines_MPIContainer_i::Lfinalize_removal()
372 BEGIN_OF("[" << _numproc << "] MPIContainer_i::Lfinalize_removal");
374 map<string, void *>::iterator im ;
375 // lock on the explore remove_map & dlclose
376 _numInstanceMutex.lock() ;
377 for (im = remove_map.begin() ; im != remove_map.end() ; im ++)
379 void * handle = (*im).second ;
380 MESSAGE("[" << _numproc << "] dlclose " << (*im).first);
383 MESSAGE("[" << _numproc << "] remove_map.clear()");
385 _numInstanceMutex.unlock() ;
387 END_OF("[" << _numproc << "] MPIContainer_i::Lfinalize_removal");
391 void Engines_MPIContainer_i::MPIShutdown()
394 MESSAGE("[" << _numproc << "] shutdown of Corba Server");
396 for(ip= 1;ip<_nbproc;ip++)
397 (Engines::MPIContainer::_narrow((*_tior)[ip]))->Shutdown();