src/ParaMEDMEM/OverlapDEC.cxx

   1 // Copyright (C) 2007-2015  CEA/DEN, EDF R&D
   2 //
   3 // This library is free software; you can redistribute it and/or
   4 // modify it under the terms of the GNU Lesser General Public
   5 // License as published by the Free Software Foundation; either
   6 // version 2.1 of the License, or (at your option) any later version.
   7 //
   8 // This library is distributed in the hope that it will be useful,
   9 // but WITHOUT ANY WARRANTY; without even the implied warranty of
  10 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  11 // Lesser General Public License for more details.
  12 //
  13 // You should have received a copy of the GNU Lesser General Public
  14 // License along with this library; if not, write to the Free Software
  15 // Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307 USA
  16 //
  17 // See http://www.salome-platform.org/ or email : webmaster.salome@opencascade.com
  18 //
  19 // Author : Anthony Geay (CEA/DEN)
  20
  21 #include "OverlapDEC.hxx"
  22 #include "CommInterface.hxx"
  23 #include "ParaFIELD.hxx"
  24 #include "MPIProcessorGroup.hxx"
  25 #include "OverlapElementLocator.hxx"
  26 #include "OverlapInterpolationMatrix.hxx"
  27
  28 namespace ParaMEDMEM
  29 {
  30 /*!
  31     \anchor OverlapDEC-det
  32     \class OverlapDEC
  33
  34     \section OverlapDEC-over Overview
  35
  36     The \c OverlapDEC enables the \ref InterpKerRemapGlobal "conservative remapping" of fields between
  37     two parallel codes. This remapping is based on the computation of intersection volumes on
  38     a \b single \b processor \b group. On this processor group are defined two field-templates called A
  39     and B. The computation is possible for 3D meshes, 2D meshes, 3D-surface meshes, 1D meshes and
  40     2D-curve meshes. Dimensions must be similar for the distribution templates A and B.
  41
  42     The main difference with \ref InterpKernelDEC-det "InterpKernelDEC" is that this
  43     \ref para-dec "DEC" works with a *single* processor group, in which processors will share the work.
  44     Consequently each processor manages two \ref MEDCouplingFieldTemplatesPage "field templates" (A and B)
  45     called source and target.
  46     Furthermore all processors in the processor group cooperate in the global interpolation matrix
  47     computation. In this respect \c InterpKernelDEC is a specialization of \c OverlapDEC.
  48
  49     \section ParaMEDMEMOverlapDECAlgorithmDescription Algorithm description
  50
  51     Let's consider the following use case that is ran in ParaMEDMEMTest_OverlapDEC.cxx to describes
  52     the different steps of the computation. The processor group contains 3 processors.
  53     \anchor ParaMEDMEMOverlapDECImgTest1
  54     \image html OverlapDEC1.png "Example split of the source and target mesh among the 3 procs"
  55
  56     \subsection ParaMEDMEMOverlapDECAlgoStep1 Step 1 : Bounding box exchange and global interaction between procs computation.
  57
  58     In order to reduce as much as possible the amount of communications between distant processors,
  59     every processor computes a bounding box for A and B. Then a AllToAll communication is performed
  60     so that
  61     every processor can compute the \b global interactions between processor.
  62     This computation leads every processor to compute the same global TODO list expressed as a list
  63     of pair. A pair ( x, y ) means that proc \b x fieldtemplate A can interact with fieltemplate B of
  64     proc \b y because the two bounding boxes interact.
  65     In the \ref ParaMEDMEMOverlapDECImgTest1 "example above" this computation leads to the following
  66     a \b global TODO list :
  67
  68     \b (0,0),(0,1),(1,0),(1,2),(2,0),(2,1),(2,2)
  69
  70     Here the pair (0,2) does not appear because the bounding box of fieldtemplateA of proc#2 does
  71     not intersect that of fieldtemplate B on proc#0.
  72
  73     Stage performed by ParaMEDMEM::OverlapElementLocator::computeBoundingBoxes.
  74
  75     \subsection ParaMEDMEMOverlapDECAlgoStep2 Step 2 : Computation of local TODO list
  76
  77     Starting from the global interaction previously computed in \ref ParaMEDMEMOverlapDECAlgoStep1
  78     "Step 1", each proc computes the TODO list per proc.
  79     The following rules is chosen : a pair (x,y) can be treated by either proc \#x or proc \#y,
  80     in order to reduce the amount of data transfert among
  81     processors. The algorithm chosen for load balancing is the following : Each processor has
  82     an empty \b local TODO list at the beginning. Then for each pair (k,m) in
  83     \b global TODO list, if proc\#k has less temporary local list than proc\#m pair, (k,m) is added
  84     to temparary local TODO list of proc\#k.
  85     If proc\#m has less temporary local TODO list than proc\#k pair, (k,m) is added to temporary
  86     local TODO list of proc\#m.
  87     If proc\#k and proc\#m have the same amount of temporary local TODO list pair, (k,m) is added to
  88     temporary local TODO list of proc\#k.
  89
  90     In the \ref ParaMEDMEMOverlapDECImgTest1 "example above" this computation leads to the following
  91     local TODO list :
  92
  93     - proc\#0 : (0,0)
  94     - proc\#1 : (0,1),(1,0)
  95     - proc\#2 : (1,2),(2,0),(2,1),(2,2)
  96
  97     The algorithm described here is not perfect for this use case, we hope to enhance it soon.
  98
  99     At this stage each proc knows precisely its \b local TODO list (with regard to interpolation).
 100     The \b local TODO list of other procs than local
 101     is kept for future computations.
 102
 103     \subsection ParaMEDMEMOverlapDECAlgoStep3 Step 3 : Matrix echange between procs
 104
 105     Knowing the \b local TODO list, the aim now is to exchange field-templates between procs.
 106     Each proc computes knowing TODO list per
 107     proc computed in \ref ParaMEDMEMOverlapDECAlgoStep2 "Step 2" the exchange TODO list :
 108
 109     In the \ref ParaMEDMEMOverlapDECImgTest1 "example above" the exchange TODO list gives the
 110     following results :
 111
 112     Sending TODO list per proc :
 113
 114     - proc \#0 : Send fieldtemplate A to Proc\#1, Send fieldtemplate B to Proc\#1, Send fieldtemplate
 115     B to Proc\#2
 116     - Proc \#1 : Send fieldtemplate A to Proc\#2, Send fieldtemplate B to Proc\#2
 117     - Proc \#2 : No send.
 118
 119     Receiving TODO list per proc :
 120
 121     - proc \#0 : No receiving
 122     - proc \#1 : receiving fieldtemplate A from Proc\#0,  receiving fieldtemplate B from Proc\#0
 123     - proc \#2 : receiving fieldtemplate B from Proc\#0, receiving fieldtemplate A from Proc\#1,
 124     receiving fieldtemplate B from Proc\#1
 125
 126     To avoid as much as possible large volumes of transfers between procs, only relevant parts of
 127     meshes are sent. In order for proc\#k to send fieldtemplate A to fieldtemplate B
 128     of proc \#m., proc\#k computes the part of mesh A contained in the boundingbox B of proc\#m. It
 129     implies that the corresponding cellIds or nodeIds of the
 130     corresponding part are sent to proc \#m too.
 131
 132     Let's consider the couple (k,m) in the TODO list. This couple is treated by either k or m as
 133     seen in \ref ParaMEDMEMOverlapDECAlgoStep2 "here in Step2".
 134
 135     As will be dealt in Step 6, for final matrix-vector computations, the resulting matrix of the
 136     couple (k,m) whereever it is computed (proc \#k or proc \#m)
 137     will be stored in \b proc\#m.
 138
 139     - If proc \#k is in charge (performs the matrix computation) for this couple (k,m), target ids
 140     (cells or nodes) of the mesh in proc \#m are renumbered, because proc \#m has seelected a sub mesh
 141     of the target mesh to avoid large amounts of data to transfer. In this case as proc \#m is ultimately
 142      in charge of the matrix, proc \#k must keep preciously the
 143     source ids needed to be sent to proc\#m. No problem will appear for matrix assembling in proc m
 144     for source ids because no restriction was done.
 145     Concerning source ids to be sent for the matrix-vector computation, proc k will know precisely
 146     which source ids field values to send to proc \#m.
 147     This is embodied by OverlapMapping::keepTracksOfTargetIds in proc m.
 148
 149     - If proc \#m is in charge (performs matrix computation) for this couple (k,m), source ids (cells
 150     or nodes) of the mesh in proc \#k are renumbered, because proc \#k has selected a sub mesh of the
 151      source mesh to avoid large amounts of data to transfer. In this case as proc \#k is ultimately
 152      in charge of the matrix, proc \#m receives the source ids
 153     from remote proc \#k, and thus the matrix is directly correct, no need for renumbering as
 154      in \ref ParaMEDMEMOverlapDECAlgoStep5 "Step 5". However proc \#k must
 155     keep track of the ids sent to proc \#m for te matrix-vector computation.
 156     This is incarnated by OverlapMapping::keepTracksOfSourceIds in proc k.
 157
 158     This step is performed in ParaMEDMEM::OverlapElementLocator::exchangeMeshes method.
 159
 160     \subsection ParaMEDMEMOverlapDECAlgoStep4 Step 4 : Computation of the interpolation matrix
 161
 162     After mesh exchange in \ref ParaMEDMEMOverlapDECAlgoStep3 "Step3" each processor has all the
 163     required information to treat its \b local TODO list computed in
 164     \ref ParaMEDMEMOverlapDECAlgoStep2 "Step2". This step is potentially CPU costly, which is why
 165     the \b local TODO list per proc is expected to
 166     be as well balanced as possible.
 167
 168     The interpolation is performed as the \ref ParaMEDMEM::MEDCouplingRemapper "remapper" does.
 169
 170     This operation is performed by OverlapInterpolationMatrix::addContribution method.
 171
 172     \subsection ParaMEDMEMOverlapDECAlgoStep5 Step 5 : Global matrix construction.
 173
 174     After having performed the TODO list at the end of \ref ParaMEDMEMOverlapDECAlgoStep4 "Step4"
 175     we need to assemble the final matrix.
 176
 177     The final aim is to have a distributed matrix \f$ M_k \f$ on each proc\#k. In order to reduce
 178     data exchange during the matrix product process,
 179     \f$ M_k \f$ is built using sizeof(Proc group) \c std::vector< \c std::map<int,double> \c >.
 180
 181     For a proc\#k, it is necessary to fetch info of all matrices built in
 182     \ref ParaMEDMEMOverlapDECAlgoStep4 "Step4" where the first element in pair (i,j)
 183     is equal to k.
 184
 185     After this step, the matrix repartition is the following after a call to
 186     ParaMEDMEM::OverlapMapping::prepare :
 187
 188     - proc\#0 : (0,0),(1,0),(2,0)
 189     - proc\#1 : (0,1),(2,1)
 190     - proc\#2 : (1,2),(2,2)
 191
 192     Tuple (2,1) computed on proc 2 is stored in proc 1 after execution of the function
 193     "prepare". This is an example of item 0 in \ref ParaMEDMEMOverlapDECAlgoStep2 "Step2".
 194     Tuple (0,1) computed on proc 1 is stored in proc 1 too. This is an example of item 1 in \ref ParaMEDMEMOverlapDECAlgoStep2 "Step2".
 195
 196     In the end ParaMEDMEM::OverlapMapping::_proc_ids_to_send_vector_st will contain :
 197
 198     - Proc\#0 : 0,1
 199     - Proc\#1 : 0,2
 200     - Proc\#2 : 0,1,2
 201
 202     In the end ParaMEDMEM::OverlapMapping::_proc_ids_to_recv_vector_st will contain :
 203
 204     - Proc\#0 : 0,1,2
 205     - Proc\#1 : 0,2
 206     - Proc\#2 : 1,2
 207
 208     The method in charge to perform this is : ParaMEDMEM::OverlapMapping::prepare.
 209 */
 210   OverlapDEC::OverlapDEC(const std::set<int>& procIds, const MPI_Comm& world_comm):
 211       _own_group(true),_interpolation_matrix(0), _locator(0),
 212       _source_field(0),_own_source_field(false),
 213       _target_field(0),_own_target_field(false),
 214       _default_field_value(0.0),
 215       _comm(MPI_COMM_NULL)
 216   {
 217     ParaMEDMEM::CommInterface comm;
 218     int *ranks_world=new int[procIds.size()]; // ranks of sources and targets in world_comm
 219     std::copy(procIds.begin(),procIds.end(),ranks_world);
 220     MPI_Group group,world_group;
 221     comm.commGroup(world_comm,&world_group);
 222     comm.groupIncl(world_group,procIds.size(),ranks_world,&group);
 223     delete [] ranks_world;
 224     comm.commCreate(world_comm,group,&_comm);
 225     comm.groupFree(&group);
 226     comm.groupFree(&world_group);
 227     if(_comm==MPI_COMM_NULL)
 228       {
 229         _group=0;
 230         return ;
 231       }
 232     std::set<int> idsUnion;
 233     for(std::size_t i=0;i<procIds.size();i++)
 234       idsUnion.insert(i);
 235     _group=new MPIProcessorGroup(comm,idsUnion,_comm);
 236   }
 237
 238   OverlapDEC::~OverlapDEC()
 239   {
 240     if(_own_group)
 241       delete _group;
 242     if(_own_source_field)
 243       delete _source_field;
 244     if(_own_target_field)
 245       delete _target_field;
 246     delete _interpolation_matrix;
 247     delete _locator;
 248     if (_comm != MPI_COMM_NULL)
 249       {
 250         ParaMEDMEM::CommInterface comm;
 251         comm.commFree(&_comm);
 252       }
 253   }
 254
 255   void OverlapDEC::sendRecvData(bool way)
 256   {
 257     if(way)
 258       sendData();
 259     else
 260       recvData();
 261   }
 262
 263   void OverlapDEC::sendData()
 264   {
 265     _interpolation_matrix->multiply(_default_field_value);
 266   }
 267
 268   void OverlapDEC::recvData()
 269   {
 270     throw INTERP_KERNEL::Exception("Not implemented yet !!!!");
 271     //_interpolation_matrix->transposeMultiply();
 272   }
 273
 274   void OverlapDEC::synchronize()
 275   {
 276     if(!isInGroup())
 277       return ;
 278     // Check number of components of field on both side (for now allowing void field/mesh on one proc is not allowed)
 279     if (!_source_field || !_source_field->getField())
 280       throw INTERP_KERNEL::Exception("OverlapDEC::synchronize(): currently, having a void source field on a proc is not allowed!");
 281     if (!_target_field || !_target_field->getField())
 282       throw INTERP_KERNEL::Exception("OverlapDEC::synchronize(): currently, having a void target field on a proc is not allowed!");
 283     if (_target_field->getField()->getNumberOfComponents() != _source_field->getField()->getNumberOfComponents())
 284       throw INTERP_KERNEL::Exception("OverlapDEC::synchronize(): source and target field have different number of components!");
 285     delete _interpolation_matrix;
 286     _locator = new OverlapElementLocator(_source_field,_target_field,*_group, getBoundingBoxAdjustmentAbs());
 287     _interpolation_matrix=new OverlapInterpolationMatrix(_source_field,_target_field,*_group,*this,*this, *_locator);
 288     _locator->copyOptions(*this);
 289     _locator->exchangeMeshes(*_interpolation_matrix);
 290     std::vector< std::pair<int,int> > jobs=_locator->getToDoList();
 291     std::string srcMeth=_locator->getSourceMethod();
 292     std::string trgMeth=_locator->getTargetMethod();
 293     for(std::vector< std::pair<int,int> >::const_iterator it=jobs.begin();it!=jobs.end();it++)
 294       {
 295         const MEDCouplingPointSet *src=_locator->getSourceMesh((*it).first);
 296         const DataArrayInt *srcIds=_locator->getSourceIds((*it).first);
 297         const MEDCouplingPointSet *trg=_locator->getTargetMesh((*it).second);
 298         const DataArrayInt *trgIds=_locator->getTargetIds((*it).second);
 299         _interpolation_matrix->addContribution(src,srcIds,srcMeth,(*it).first,trg,trgIds,trgMeth,(*it).second);
 300       }
 301     _interpolation_matrix->prepare(_locator->getProcsToSendFieldData());
 302     _interpolation_matrix->computeDeno();
 303   }
 304
 305   void OverlapDEC::attachSourceLocalField(ParaFIELD *field, bool ownPt)
 306   {
 307     if(!isInGroup())
 308       return ;
 309     if(_own_source_field)
 310       delete _source_field;
 311     _source_field=field;
 312     _own_source_field=ownPt;
 313   }
 314
 315   void OverlapDEC::attachTargetLocalField(ParaFIELD *field, bool ownPt)
 316   {
 317     if(!isInGroup())
 318       return ;
 319     if(_own_target_field)
 320       delete _target_field;
 321     _target_field=field;
 322     _own_target_field=ownPt;
 323   }
 324
 325   bool OverlapDEC::isInGroup() const
 326   {
 327     if(!_group)
 328       return false;
 329     return _group->containsMyRank();
 330   }
 331 }