src/ParaMEDMEM/OverlapDEC.cxx

   1 // Copyright (C) 2007-2016  CEA/DEN, EDF R&D
   2 //
   3 // This library is free software; you can redistribute it and/or
   4 // modify it under the terms of the GNU Lesser General Public
   5 // License as published by the Free Software Foundation; either
   6 // version 2.1 of the License, or (at your option) any later version.
   7 //
   8 // This library is distributed in the hope that it will be useful,
   9 // but WITHOUT ANY WARRANTY; without even the implied warranty of
  10 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  11 // Lesser General Public License for more details.
  12 //
  13 // You should have received a copy of the GNU Lesser General Public
  14 // License along with this library; if not, write to the Free Software
  15 // Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307 USA
  16 //
  17 // See http://www.salome-platform.org/ or email : webmaster.salome@opencascade.com
  18 //
  19 // Author : Anthony Geay (CEA/DEN)
  20
  21 #include "OverlapDEC.hxx"
  22 #include "CommInterface.hxx"
  23 #include "ParaMESH.hxx"
  24 #include "ParaFIELD.hxx"
  25 #include "MPIProcessorGroup.hxx"
  26 #include "OverlapElementLocator.hxx"
  27 #include "OverlapInterpolationMatrix.hxx"
  28 #include "ICoCoMEDField.hxx"
  29
  30 namespace MEDCoupling
  31 {
  32 /*!
  33     \anchor OverlapDEC-det
  34     \class OverlapDEC
  35
  36     \section OverlapDEC-over Overview
  37
  38     The \c OverlapDEC enables the \ref InterpKerRemapGlobal "conservative remapping" of fields between
  39     two parallel codes. This remapping is based on the computation of intersection volumes on
  40     a \b single \b processor \b group. On this processor group are defined two field-templates called A
  41     and B. The computation is possible for 3D meshes, 2D meshes, 3D-surface meshes, 1D meshes and
  42     2D-curve meshes. Dimensions must be similar for the distribution templates A and B.
  43
  44     The main difference with \ref InterpKernelDEC-det "InterpKernelDEC" is that this
  45     \ref para-dec "DEC" works with a *single* processor group, in which processors will share the work.
  46     Consequently each processor manages two \ref MEDCouplingFieldTemplatesPage "field templates" (A and B)
  47     called source and target.
  48     Furthermore all processors in the processor group cooperate in the global interpolation matrix
  49     computation. In this respect \c InterpKernelDEC is a specialization of \c OverlapDEC.
  50
  51     \section ParaMEDMEMOverlapDECAlgorithmDescription Algorithm description
  52
  53     Let's consider the following use case that is ran in ParaMEDMEMTest_OverlapDEC.cxx to describes
  54     the different steps of the computation. The processor group contains 3 processors.
  55     \anchor ParaMEDMEMOverlapDECImgTest1
  56     \image html OverlapDEC1.png "Example split of the source and target mesh among the 3 procs"
  57
  58     \subsection ParaMEDMEMOverlapDECAlgoStep1 Step 1 : Bounding box exchange and global interaction between procs computation.
  59
  60     In order to reduce as much as possible the amount of communications between distant processors,
  61     every processor computes a bounding box for A and B. Then a AllToAll communication is performed
  62     so that
  63     every processor can compute the \b global interactions between processor.
  64     This computation leads every processor to compute the same global TODO list expressed as a list
  65     of pair. A pair ( x, y ) means that proc \b x fieldtemplate A can interact with fieltemplate B of
  66     proc \b y because the two bounding boxes interact.
  67     In the \ref ParaMEDMEMOverlapDECImgTest1 "example above" this computation leads to the following
  68     a \b global TODO list :
  69
  70     \b (0,0),(0,1),(1,0),(1,2),(2,0),(2,1),(2,2)
  71
  72     Here the pair (0,2) does not appear because the bounding box of fieldtemplateA of proc#2 does
  73     not intersect that of fieldtemplate B on proc#0.
  74
  75     Stage performed by MEDCoupling::OverlapElementLocator::computeBoundingBoxes.
  76
  77     \subsection ParaMEDMEMOverlapDECAlgoStep2 Step 2 : Computation of local TODO list
  78
  79     Starting from the global interaction previously computed in \ref ParaMEDMEMOverlapDECAlgoStep1
  80     "Step 1", each proc computes the TODO list per proc.
  81     The following rules is chosen : a pair (x,y) can be treated by either proc \#x or proc \#y,
  82     in order to reduce the amount of data transfers among
  83     processors. The algorithm chosen for load balancing is the following : Each processor has
  84     an empty \b local TODO list at the beginning. Then for each pair (k,m) in
  85     \b global TODO list, if proc\#k has less temporary local list than proc\#m pair, (k,m) is added
  86     to temporary local TODO list of proc\#k.
  87     If proc\#m has less temporary local TODO list than proc\#k pair, (k,m) is added to temporary
  88     local TODO list of proc\#m.
  89     If proc\#k and proc\#m have the same amount of temporary local TODO list pair, (k,m) is added to
  90     temporary local TODO list of proc\#k.
  91
  92     In the \ref ParaMEDMEMOverlapDECImgTest1 "example above" this computation leads to the following
  93     local TODO list :
  94
  95     - proc\#0 : (0,0)
  96     - proc\#1 : (0,1),(1,0)
  97     - proc\#2 : (1,2),(2,0),(2,1),(2,2)
  98
  99     The algorithm described here is not perfect for this use case, we hope to enhance it soon.
 100
 101     At this stage each proc knows precisely its \b local TODO list (with regard to interpolation).
 102     The \b local TODO list of other procs than local
 103     is kept for future computations.
 104
 105     \subsection ParaMEDMEMOverlapDECAlgoStep3 Step 3 : Matrix echange between procs
 106
 107     Knowing the \b local TODO list, the aim now is to exchange field-templates between procs.
 108     Each proc computes knowing TODO list per
 109     proc computed in \ref ParaMEDMEMOverlapDECAlgoStep2 "Step 2" the exchange TODO list :
 110
 111     In the \ref ParaMEDMEMOverlapDECImgTest1 "example above" the exchange TODO list gives the
 112     following results :
 113
 114     Sending TODO list per proc :
 115
 116     - proc \#0 : Send fieldtemplate A to Proc\#1, Send fieldtemplate B to Proc\#1, Send fieldtemplate
 117     B to Proc\#2
 118     - Proc \#1 : Send fieldtemplate A to Proc\#2, Send fieldtemplate B to Proc\#2
 119     - Proc \#2 : No send.
 120
 121     Receiving TODO list per proc :
 122
 123     - proc \#0 : No receiving
 124     - proc \#1 : receiving fieldtemplate A from Proc\#0,  receiving fieldtemplate B from Proc\#0
 125     - proc \#2 : receiving fieldtemplate B from Proc\#0, receiving fieldtemplate A from Proc\#1,
 126     receiving fieldtemplate B from Proc\#1
 127
 128     To avoid as much as possible large volumes of transfers between procs, only relevant parts of
 129     meshes are sent. In order for proc\#k to send fieldtemplate A to fieldtemplate B
 130     of proc \#m., proc\#k computes the part of mesh A contained in the boundingbox B of proc\#m. It
 131     implies that the corresponding cellIds or nodeIds of the
 132     corresponding part are sent to proc \#m too.
 133
 134     Let's consider the couple (k,m) in the TODO list. This couple is treated by either k or m as
 135     seen in \ref ParaMEDMEMOverlapDECAlgoStep2 "here in Step2".
 136
 137     As will be dealt in Step 6, for final matrix-vector computations, the resulting matrix of the
 138     couple (k,m) wherever it is computed (proc \#k or proc \#m)
 139     will be stored in \b proc\#m.
 140
 141     - If proc \#k is in charge (performs the matrix computation) for this couple (k,m), target ids
 142     (cells or nodes) of the mesh in proc \#m are renumbered, because proc \#m has seelected a sub mesh
 143     of the target mesh to avoid large amounts of data to transfer. In this case as proc \#m is ultimately
 144      in charge of the matrix, proc \#k must keep preciously the
 145     source ids needed to be sent to proc\#m. No problem will appear for matrix assembling in proc m
 146     for source ids because no restriction was done.
 147     Concerning source ids to be sent for the matrix-vector computation, proc k will know precisely
 148     which source ids field values to send to proc \#m.
 149     This is embodied by OverlapMapping::keepTracksOfTargetIds in proc m.
 150
 151     - If proc \#m is in charge (performs matrix computation) for this couple (k,m), source ids (cells
 152     or nodes) of the mesh in proc \#k are renumbered, because proc \#k has selected a sub mesh of the
 153      source mesh to avoid large amounts of data to transfer. In this case as proc \#k is ultimately
 154      in charge of the matrix, proc \#m receives the source ids
 155     from remote proc \#k, and thus the matrix is directly correct, no need for renumbering as
 156      in \ref ParaMEDMEMOverlapDECAlgoStep5 "Step 5". However proc \#k must
 157     keep track of the ids sent to proc \#m for the matrix-vector computation.
 158     This is incarnated by OverlapMapping::keepTracksOfSourceIds in proc k.
 159
 160     This step is performed in MEDCoupling::OverlapElementLocator::exchangeMeshes method.
 161
 162     \subsection ParaMEDMEMOverlapDECAlgoStep4 Step 4 : Computation of the interpolation matrix
 163
 164     After mesh exchange in \ref ParaMEDMEMOverlapDECAlgoStep3 "Step3" each processor has all the
 165     required information to treat its \b local TODO list computed in
 166     \ref ParaMEDMEMOverlapDECAlgoStep2 "Step2". This step is potentially CPU costly, which is why
 167     the \b local TODO list per proc is expected to
 168     be as well balanced as possible.
 169
 170     The interpolation is performed as the \ref MEDCoupling::MEDCouplingRemapper "remapper" does.
 171
 172     This operation is performed by OverlapInterpolationMatrix::addContribution method.
 173
 174     \subsection ParaMEDMEMOverlapDECAlgoStep5 Step 5 : Global matrix construction.
 175
 176     After having performed the TODO list at the end of \ref ParaMEDMEMOverlapDECAlgoStep4 "Step4"
 177     we need to assemble the final matrix.
 178
 179     The final aim is to have a distributed matrix \f$ M_k \f$ on each proc\#k. In order to reduce
 180     data exchange during the matrix product process,
 181     \f$ M_k \f$ is built using sizeof(Proc group) \c std::vector< \c std::map<int,double> \c >.
 182
 183     For a proc\#k, it is necessary to fetch info of all matrices built in
 184     \ref ParaMEDMEMOverlapDECAlgoStep4 "Step4" where the first element in pair (i,j)
 185     is equal to k.
 186
 187     After this step, the matrix repartition is the following after a call to
 188     MEDCoupling::OverlapMapping::prepare :
 189
 190     - proc\#0 : (0,0),(1,0),(2,0)
 191     - proc\#1 : (0,1),(2,1)
 192     - proc\#2 : (1,2),(2,2)
 193
 194     Tuple (2,1) computed on proc 2 is stored in proc 1 after execution of the function
 195     "prepare". This is an example of item 0 in \ref ParaMEDMEMOverlapDECAlgoStep2 "Step2".
 196     Tuple (0,1) computed on proc 1 is stored in proc 1 too. This is an example of item 1 in \ref ParaMEDMEMOverlapDECAlgoStep2 "Step2".
 197
 198     In the end MEDCoupling::OverlapMapping::_proc_ids_to_send_vector_st will contain :
 199
 200     - Proc\#0 : 0,1
 201     - Proc\#1 : 0,2
 202     - Proc\#2 : 0,1,2
 203
 204     In the end MEDCoupling::OverlapMapping::_proc_ids_to_recv_vector_st will contain :
 205
 206     - Proc\#0 : 0,1,2
 207     - Proc\#1 : 0,2
 208     - Proc\#2 : 1,2
 209
 210     The method in charge to perform this is : MEDCoupling::OverlapMapping::prepare.
 211 */
 212   OverlapDEC::OverlapDEC(const std::set<int>& procIds, const MPI_Comm& world_comm):
 213       _load_balancing_algo(1),
 214       _own_group(true),_interpolation_matrix(0), _locator(0),
 215       _source_field(0),_own_source_field(false),
 216       _target_field(0),_own_target_field(false),
 217       _default_field_value(0.0),
 218       _comm(MPI_COMM_NULL)
 219   {
 220     MEDCoupling::CommInterface comm;
 221     int *ranks_world=new int[procIds.size()]; // ranks of sources and targets in world_comm
 222     std::copy(procIds.begin(),procIds.end(),ranks_world);
 223     MPI_Group group,world_group;
 224     comm.commGroup(world_comm,&world_group);
 225     comm.groupIncl(world_group,procIds.size(),ranks_world,&group);
 226     delete [] ranks_world;
 227     comm.commCreate(world_comm,group,&_comm);
 228     comm.groupFree(&group);
 229     comm.groupFree(&world_group);
 230     if(_comm==MPI_COMM_NULL)
 231       {
 232         _group=0;
 233         return ;
 234       }
 235     std::set<int> idsUnion;
 236     for(std::size_t i=0;i<procIds.size();i++)
 237       idsUnion.insert(i);
 238     _group=new MPIProcessorGroup(comm,idsUnion,_comm);
 239   }
 240
 241   OverlapDEC::~OverlapDEC()
 242   {
 243     if(_own_group)
 244       delete _group;
 245     if(_own_source_field)
 246       delete _source_field;
 247     if(_own_target_field)
 248       delete _target_field;
 249     delete _interpolation_matrix;
 250     delete _locator;
 251     if (_comm != MPI_COMM_NULL)
 252       {
 253         MEDCoupling::CommInterface comm;
 254         comm.commFree(&_comm);
 255       }
 256   }
 257
 258   void OverlapDEC::sendRecvData(bool way)
 259   {
 260     if(way)
 261       sendData();
 262     else
 263       recvData();
 264   }
 265
 266   void OverlapDEC::sendData()
 267   {
 268     _interpolation_matrix->multiply(_default_field_value);
 269   }
 270
 271   void OverlapDEC::recvData()
 272   {
 273     throw INTERP_KERNEL::Exception("Not implemented yet !!!!");
 274     //_interpolation_matrix->transposeMultiply();
 275   }
 276
 277   void OverlapDEC::synchronize()
 278   {
 279     if(!isInGroup())
 280       return ;
 281     // Check number of components of field on both side (for now allowing void field/mesh on one proc is not allowed)
 282     if (!_source_field || !_source_field->getField())
 283       throw INTERP_KERNEL::Exception("OverlapDEC::synchronize(): currently, having a void source field on a proc is not allowed!");
 284     if (!_target_field || !_target_field->getField())
 285       throw INTERP_KERNEL::Exception("OverlapDEC::synchronize(): currently, having a void target field on a proc is not allowed!");
 286     if (_target_field->getField()->getNumberOfComponents() != _source_field->getField()->getNumberOfComponents())
 287       throw INTERP_KERNEL::Exception("OverlapDEC::synchronize(): source and target field have different number of components!");
 288     delete _interpolation_matrix;
 289     _locator = new OverlapElementLocator(_source_field,_target_field,*_group, getBoundingBoxAdjustmentAbs(), _load_balancing_algo);
 290     _interpolation_matrix=new OverlapInterpolationMatrix(_source_field,_target_field,*_group,*this,*this, *_locator);
 291     _locator->copyOptions(*this);
 292     _locator->exchangeMeshes(*_interpolation_matrix);
 293     std::vector< std::pair<int,int> > jobs=_locator->getToDoList();
 294     std::string srcMeth=_locator->getSourceMethod();
 295     std::string trgMeth=_locator->getTargetMethod();
 296     for(std::vector< std::pair<int,int> >::const_iterator it=jobs.begin();it!=jobs.end();it++)
 297       {
 298         const MEDCouplingPointSet *src=_locator->getSourceMesh((*it).first);
 299         const DataArrayInt *srcIds=_locator->getSourceIds((*it).first);
 300         const MEDCouplingPointSet *trg=_locator->getTargetMesh((*it).second);
 301         const DataArrayInt *trgIds=_locator->getTargetIds((*it).second);
 302         _interpolation_matrix->computeLocalIntersection(src,srcIds,srcMeth,(*it).first,trg,trgIds,trgMeth,(*it).second);
 303       }
 304     _interpolation_matrix->prepare(_locator->getProcsToSendFieldData());
 305     _interpolation_matrix->computeSurfacesAndDeno();
 306   }
 307
 308   void OverlapDEC::attachSourceLocalField(ParaFIELD *field, bool ownPt)
 309   {
 310     if(!isInGroup())
 311       return ;
 312     if(_own_source_field)
 313       delete _source_field;
 314     _source_field=field;
 315     _own_source_field=ownPt;
 316   }
 317
 318   void OverlapDEC::attachTargetLocalField(ParaFIELD *field, bool ownPt)
 319   {
 320     if(!isInGroup())
 321       return ;
 322     if(_own_target_field)
 323       delete _target_field;
 324     _target_field=field;
 325     _own_target_field=ownPt;
 326   }
 327
 328   void OverlapDEC::attachSourceLocalField(MEDCouplingFieldDouble *field)
 329   {
 330     if(!isInGroup())
 331       return ;
 332
 333     ParaMESH *paramesh = new ParaMESH(static_cast<MEDCouplingPointSet *>(const_cast<MEDCouplingMesh *>(field->getMesh())),
 334                                       *_group,field->getMesh()->getName());
 335     ParaFIELD *tmpField=new ParaFIELD(field, paramesh, *_group);
 336     tmpField->setOwnSupport(true);
 337     attachSourceLocalField(tmpField,true);
 338   }
 339
 340   void OverlapDEC::attachTargetLocalField(MEDCouplingFieldDouble *field)
 341   {
 342     if(!isInGroup())
 343       return ;
 344
 345     ParaMESH *paramesh = new ParaMESH(static_cast<MEDCouplingPointSet *>(const_cast<MEDCouplingMesh *>(field->getMesh())),
 346                                       *_group,field->getMesh()->getName());
 347     ParaFIELD *tmpField=new ParaFIELD(field, paramesh, *_group);
 348     tmpField->setOwnSupport(true);
 349     attachTargetLocalField(tmpField,true);
 350   }
 351
 352   void OverlapDEC::attachSourceLocalField(ICoCo::MEDField *field)
 353   {
 354     attachSourceLocalField(field->getField());
 355   }
 356
 357   void OverlapDEC::attachTargetLocalField(ICoCo::MEDField *field)
 358   {
 359     attachTargetLocalField(field->getField());
 360   }
 361
 362   bool OverlapDEC::isInGroup() const
 363   {
 364     if(!_group)
 365       return false;
 366     return _group->containsMyRank();
 367   }
 368
 369   void OverlapDEC::debugPrintWorkSharing(std::ostream & ostr) const
 370   {
 371     _locator->debugPrintWorkSharing(ostr);
 372   }
 373 }