medtool/src/ParaMEDMEM/OverlapDEC.cxx

   1 // Copyright (C) 2007-2015  CEA/DEN, EDF R&D
   2 //
   3 // This library is free software; you can redistribute it and/or
   4 // modify it under the terms of the GNU Lesser General Public
   5 // License as published by the Free Software Foundation; either
   6 // version 2.1 of the License, or (at your option) any later version.
   7 //
   8 // This library is distributed in the hope that it will be useful,
   9 // but WITHOUT ANY WARRANTY; without even the implied warranty of
  10 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  11 // Lesser General Public License for more details.
  12 //
  13 // You should have received a copy of the GNU Lesser General Public
  14 // License along with this library; if not, write to the Free Software
  15 // Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307 USA
  16 //
  17 // See http://www.salome-platform.org/ or email : webmaster.salome@opencascade.com
  18 //
  19 // Author : Anthony Geay (CEA/DEN)
  20
  21 #include "OverlapDEC.hxx"
  22 #include "CommInterface.hxx"
  23 #include "ParaMESH.hxx"
  24 #include "ParaFIELD.hxx"
  25 #include "MPIProcessorGroup.hxx"
  26 #include "OverlapElementLocator.hxx"
  27 #include "OverlapInterpolationMatrix.hxx"
  28
  29 namespace ParaMEDMEM
  30 {
  31 /*!
  32     \anchor OverlapDEC-det
  33     \class OverlapDEC
  34
  35     \section OverlapDEC-over Overview
  36
  37     The \c OverlapDEC enables the \ref InterpKerRemapGlobal "conservative remapping" of fields between
  38     two parallel codes. This remapping is based on the computation of intersection volumes on
  39     a \b single \b processor \b group. On this processor group are defined two field-templates called A
  40     and B. The computation is possible for 3D meshes, 2D meshes, 3D-surface meshes, 1D meshes and
  41     2D-curve meshes. Dimensions must be similar for the distribution templates A and B.
  42
  43     The main difference with \ref InterpKernelDEC-det "InterpKernelDEC" is that this
  44     \ref para-dec "DEC" works with a *single* processor group, in which processors will share the work.
  45     Consequently each processor manages two \ref MEDCouplingFieldTemplatesPage "field templates" (A and B)
  46     called source and target.
  47     Furthermore all processors in the processor group cooperate in the global interpolation matrix
  48     computation. In this respect \c InterpKernelDEC is a specialization of \c OverlapDEC.
  49
  50     \section ParaMEDMEMOverlapDECAlgorithmDescription Algorithm description
  51
  52     Let's consider the following use case that is ran in ParaMEDMEMTest_OverlapDEC.cxx to describes
  53     the different steps of the computation. The processor group contains 3 processors.
  54     \anchor ParaMEDMEMOverlapDECImgTest1
  55     \image html OverlapDEC1.png "Example split of the source and target mesh among the 3 procs"
  56
  57     \subsection ParaMEDMEMOverlapDECAlgoStep1 Step 1 : Bounding box exchange and global interaction between procs computation.
  58
  59     In order to reduce as much as possible the amount of communications between distant processors,
  60     every processor computes a bounding box for A and B. Then a AllToAll communication is performed
  61     so that
  62     every processor can compute the \b global interactions between processor.
  63     This computation leads every processor to compute the same global TODO list expressed as a list
  64     of pair. A pair ( x, y ) means that proc \b x fieldtemplate A can interact with fieltemplate B of
  65     proc \b y because the two bounding boxes interact.
  66     In the \ref ParaMEDMEMOverlapDECImgTest1 "example above" this computation leads to the following
  67     a \b global TODO list :
  68
  69     \b (0,0),(0,1),(1,0),(1,2),(2,0),(2,1),(2,2)
  70
  71     Here the pair (0,2) does not appear because the bounding box of fieldtemplateA of proc#2 does
  72     not intersect that of fieldtemplate B on proc#0.
  73
  74     Stage performed by ParaMEDMEM::OverlapElementLocator::computeBoundingBoxes.
  75
  76     \subsection ParaMEDMEMOverlapDECAlgoStep2 Step 2 : Computation of local TODO list
  77
  78     Starting from the global interaction previously computed in \ref ParaMEDMEMOverlapDECAlgoStep1
  79     "Step 1", each proc computes the TODO list per proc.
  80     The following rules is chosen : a pair (x,y) can be treated by either proc \#x or proc \#y,
  81     in order to reduce the amount of data transfert among
  82     processors. The algorithm chosen for load balancing is the following : Each processor has
  83     an empty \b local TODO list at the beginning. Then for each pair (k,m) in
  84     \b global TODO list, if proc\#k has less temporary local list than proc\#m pair, (k,m) is added
  85     to temparary local TODO list of proc\#k.
  86     If proc\#m has less temporary local TODO list than proc\#k pair, (k,m) is added to temporary
  87     local TODO list of proc\#m.
  88     If proc\#k and proc\#m have the same amount of temporary local TODO list pair, (k,m) is added to
  89     temporary local TODO list of proc\#k.
  90
  91     In the \ref ParaMEDMEMOverlapDECImgTest1 "example above" this computation leads to the following
  92     local TODO list :
  93
  94     - proc\#0 : (0,0)
  95     - proc\#1 : (0,1),(1,0)
  96     - proc\#2 : (1,2),(2,0),(2,1),(2,2)
  97
  98     The algorithm described here is not perfect for this use case, we hope to enhance it soon.
  99
 100     At this stage each proc knows precisely its \b local TODO list (with regard to interpolation).
 101     The \b local TODO list of other procs than local
 102     is kept for future computations.
 103
 104     \subsection ParaMEDMEMOverlapDECAlgoStep3 Step 3 : Matrix echange between procs
 105
 106     Knowing the \b local TODO list, the aim now is to exchange field-templates between procs.
 107     Each proc computes knowing TODO list per
 108     proc computed in \ref ParaMEDMEMOverlapDECAlgoStep2 "Step 2" the exchange TODO list :
 109
 110     In the \ref ParaMEDMEMOverlapDECImgTest1 "example above" the exchange TODO list gives the
 111     following results :
 112
 113     Sending TODO list per proc :
 114
 115     - proc \#0 : Send fieldtemplate A to Proc\#1, Send fieldtemplate B to Proc\#1, Send fieldtemplate
 116     B to Proc\#2
 117     - Proc \#1 : Send fieldtemplate A to Proc\#2, Send fieldtemplate B to Proc\#2
 118     - Proc \#2 : No send.
 119
 120     Receiving TODO list per proc :
 121
 122     - proc \#0 : No receiving
 123     - proc \#1 : receiving fieldtemplate A from Proc\#0,  receiving fieldtemplate B from Proc\#0
 124     - proc \#2 : receiving fieldtemplate B from Proc\#0, receiving fieldtemplate A from Proc\#1,
 125     receiving fieldtemplate B from Proc\#1
 126
 127     To avoid as much as possible large volumes of transfers between procs, only relevant parts of
 128     meshes are sent. In order for proc\#k to send fieldtemplate A to fieldtemplate B
 129     of proc \#m., proc\#k computes the part of mesh A contained in the boundingbox B of proc\#m. It
 130     implies that the corresponding cellIds or nodeIds of the
 131     corresponding part are sent to proc \#m too.
 132
 133     Let's consider the couple (k,m) in the TODO list. This couple is treated by either k or m as
 134     seen in \ref ParaMEDMEMOverlapDECAlgoStep2 "here in Step2".
 135
 136     As will be dealt in Step 6, for final matrix-vector computations, the resulting matrix of the
 137     couple (k,m) whereever it is computed (proc \#k or proc \#m)
 138     will be stored in \b proc\#m.
 139
 140     - If proc \#k is in charge (performs the matrix computation) for this couple (k,m), target ids
 141     (cells or nodes) of the mesh in proc \#m are renumbered, because proc \#m has seelected a sub mesh
 142     of the target mesh to avoid large amounts of data to transfer. In this case as proc \#m is ultimately
 143      in charge of the matrix, proc \#k must keep preciously the
 144     source ids needed to be sent to proc\#m. No problem will appear for matrix assembling in proc m
 145     for source ids because no restriction was done.
 146     Concerning source ids to be sent for the matrix-vector computation, proc k will know precisely
 147     which source ids field values to send to proc \#m.
 148     This is embodied by OverlapMapping::keepTracksOfTargetIds in proc m.
 149
 150     - If proc \#m is in charge (performs matrix computation) for this couple (k,m), source ids (cells
 151     or nodes) of the mesh in proc \#k are renumbered, because proc \#k has selected a sub mesh of the
 152      source mesh to avoid large amounts of data to transfer. In this case as proc \#k is ultimately
 153      in charge of the matrix, proc \#m receives the source ids
 154     from remote proc \#k, and thus the matrix is directly correct, no need for renumbering as
 155      in \ref ParaMEDMEMOverlapDECAlgoStep5 "Step 5". However proc \#k must
 156     keep track of the ids sent to proc \#m for te matrix-vector computation.
 157     This is incarnated by OverlapMapping::keepTracksOfSourceIds in proc k.
 158
 159     This step is performed in ParaMEDMEM::OverlapElementLocator::exchangeMeshes method.
 160
 161     \subsection ParaMEDMEMOverlapDECAlgoStep4 Step 4 : Computation of the interpolation matrix
 162
 163     After mesh exchange in \ref ParaMEDMEMOverlapDECAlgoStep3 "Step3" each processor has all the
 164     required information to treat its \b local TODO list computed in
 165     \ref ParaMEDMEMOverlapDECAlgoStep2 "Step2". This step is potentially CPU costly, which is why
 166     the \b local TODO list per proc is expected to
 167     be as well balanced as possible.
 168
 169     The interpolation is performed as the \ref ParaMEDMEM::MEDCouplingRemapper "remapper" does.
 170
 171     This operation is performed by OverlapInterpolationMatrix::addContribution method.
 172
 173     \subsection ParaMEDMEMOverlapDECAlgoStep5 Step 5 : Global matrix construction.
 174
 175     After having performed the TODO list at the end of \ref ParaMEDMEMOverlapDECAlgoStep4 "Step4"
 176     we need to assemble the final matrix.
 177
 178     The final aim is to have a distributed matrix \f$ M_k \f$ on each proc\#k. In order to reduce
 179     data exchange during the matrix product process,
 180     \f$ M_k \f$ is built using sizeof(Proc group) \c std::vector< \c std::map<int,double> \c >.
 181
 182     For a proc\#k, it is necessary to fetch info of all matrices built in
 183     \ref ParaMEDMEMOverlapDECAlgoStep4 "Step4" where the first element in pair (i,j)
 184     is equal to k.
 185
 186     After this step, the matrix repartition is the following after a call to
 187     ParaMEDMEM::OverlapMapping::prepare :
 188
 189     - proc\#0 : (0,0),(1,0),(2,0)
 190     - proc\#1 : (0,1),(2,1)
 191     - proc\#2 : (1,2),(2,2)
 192
 193     Tuple (2,1) computed on proc 2 is stored in proc 1 after execution of the function
 194     "prepare". This is an example of item 0 in \ref ParaMEDMEMOverlapDECAlgoStep2 "Step2".
 195     Tuple (0,1) computed on proc 1 is stored in proc 1 too. This is an example of item 1 in \ref ParaMEDMEMOverlapDECAlgoStep2 "Step2".
 196
 197     In the end ParaMEDMEM::OverlapMapping::_proc_ids_to_send_vector_st will contain :
 198
 199     - Proc\#0 : 0,1
 200     - Proc\#1 : 0,2
 201     - Proc\#2 : 0,1,2
 202
 203     In the end ParaMEDMEM::OverlapMapping::_proc_ids_to_recv_vector_st will contain :
 204
 205     - Proc\#0 : 0,1,2
 206     - Proc\#1 : 0,2
 207     - Proc\#2 : 1,2
 208
 209     The method in charge to perform this is : ParaMEDMEM::OverlapMapping::prepare.
 210 */
 211   OverlapDEC::OverlapDEC(const std::set<int>& procIds, const MPI_Comm& world_comm):
 212       _load_balancing_algo(1),
 213       _own_group(true),_interpolation_matrix(0), _locator(0),
 214       _source_field(0),_own_source_field(false),
 215       _target_field(0),_own_target_field(false),
 216       _default_field_value(0.0),
 217       _comm(MPI_COMM_NULL)
 218   {
 219     ParaMEDMEM::CommInterface comm;
 220     int *ranks_world=new int[procIds.size()]; // ranks of sources and targets in world_comm
 221     std::copy(procIds.begin(),procIds.end(),ranks_world);
 222     MPI_Group group,world_group;
 223     comm.commGroup(world_comm,&world_group);
 224     comm.groupIncl(world_group,procIds.size(),ranks_world,&group);
 225     delete [] ranks_world;
 226     comm.commCreate(world_comm,group,&_comm);
 227     comm.groupFree(&group);
 228     comm.groupFree(&world_group);
 229     if(_comm==MPI_COMM_NULL)
 230       {
 231         _group=0;
 232         return ;
 233       }
 234     std::set<int> idsUnion;
 235     for(std::size_t i=0;i<procIds.size();i++)
 236       idsUnion.insert(i);
 237     _group=new MPIProcessorGroup(comm,idsUnion,_comm);
 238   }
 239
 240   OverlapDEC::~OverlapDEC()
 241   {
 242     if(_own_group)
 243       delete _group;
 244     if(_own_source_field)
 245       delete _source_field;
 246     if(_own_target_field)
 247       delete _target_field;
 248     delete _interpolation_matrix;
 249     delete _locator;
 250     if (_comm != MPI_COMM_NULL)
 251       {
 252         ParaMEDMEM::CommInterface comm;
 253         comm.commFree(&_comm);
 254       }
 255   }
 256
 257   void OverlapDEC::sendRecvData(bool way)
 258   {
 259     if(way)
 260       sendData();
 261     else
 262       recvData();
 263   }
 264
 265   void OverlapDEC::sendData()
 266   {
 267     _interpolation_matrix->multiply(_default_field_value);
 268   }
 269
 270   void OverlapDEC::recvData()
 271   {
 272     throw INTERP_KERNEL::Exception("Not implemented yet !!!!");
 273     //_interpolation_matrix->transposeMultiply();
 274   }
 275
 276   void OverlapDEC::synchronize()
 277   {
 278     if(!isInGroup())
 279       return ;
 280     // Check number of components of field on both side (for now allowing void field/mesh on one proc is not allowed)
 281     if (!_source_field || !_source_field->getField())
 282       throw INTERP_KERNEL::Exception("OverlapDEC::synchronize(): currently, having a void source field on a proc is not allowed!");
 283     if (!_target_field || !_target_field->getField())
 284       throw INTERP_KERNEL::Exception("OverlapDEC::synchronize(): currently, having a void target field on a proc is not allowed!");
 285     if (_target_field->getField()->getNumberOfComponents() != _source_field->getField()->getNumberOfComponents())
 286       throw INTERP_KERNEL::Exception("OverlapDEC::synchronize(): source and target field have different number of components!");
 287     delete _interpolation_matrix;
 288     _locator = new OverlapElementLocator(_source_field,_target_field,*_group, getBoundingBoxAdjustmentAbs(), _load_balancing_algo);
 289     _interpolation_matrix=new OverlapInterpolationMatrix(_source_field,_target_field,*_group,*this,*this, *_locator);
 290     _locator->copyOptions(*this);
 291     _locator->exchangeMeshes(*_interpolation_matrix);
 292     std::vector< std::pair<int,int> > jobs=_locator->getToDoList();
 293     std::string srcMeth=_locator->getSourceMethod();
 294     std::string trgMeth=_locator->getTargetMethod();
 295     for(std::vector< std::pair<int,int> >::const_iterator it=jobs.begin();it!=jobs.end();it++)
 296       {
 297         const MEDCouplingPointSet *src=_locator->getSourceMesh((*it).first);
 298         const DataArrayInt *srcIds=_locator->getSourceIds((*it).first);
 299         const MEDCouplingPointSet *trg=_locator->getTargetMesh((*it).second);
 300         const DataArrayInt *trgIds=_locator->getTargetIds((*it).second);
 301         _interpolation_matrix->computeLocalIntersection(src,srcIds,srcMeth,(*it).first,trg,trgIds,trgMeth,(*it).second);
 302       }
 303     _interpolation_matrix->prepare(_locator->getProcsToSendFieldData());
 304     _interpolation_matrix->computeSurfacesAndDeno();
 305   }
 306
 307   void OverlapDEC::attachSourceLocalField(ParaFIELD *field, bool ownPt)
 308   {
 309     if(!isInGroup())
 310       return ;
 311     if(_own_source_field)
 312       delete _source_field;
 313     _source_field=field;
 314     _own_source_field=ownPt;
 315   }
 316
 317   void OverlapDEC::attachTargetLocalField(ParaFIELD *field, bool ownPt)
 318   {
 319     if(!isInGroup())
 320       return ;
 321     if(_own_target_field)
 322       delete _target_field;
 323     _target_field=field;
 324     _own_target_field=ownPt;
 325   }
 326
 327   void OverlapDEC::attachSourceLocalField(MEDCouplingFieldDouble *field)
 328   {
 329     if(!isInGroup())
 330       return ;
 331
 332     ParaMESH *paramesh = new ParaMESH(static_cast<MEDCouplingPointSet *>(const_cast<MEDCouplingMesh *>(field->getMesh())),
 333                                       *_group,field->getMesh()->getName());
 334     ParaFIELD *tmpField=new ParaFIELD(field, paramesh, *_group);
 335     tmpField->setOwnSupport(true);
 336     attachSourceLocalField(tmpField,true);
 337   }
 338
 339   void OverlapDEC::attachTargetLocalField(MEDCouplingFieldDouble *field)
 340   {
 341     if(!isInGroup())
 342       return ;
 343
 344     ParaMESH *paramesh = new ParaMESH(static_cast<MEDCouplingPointSet *>(const_cast<MEDCouplingMesh *>(field->getMesh())),
 345                                       *_group,field->getMesh()->getName());
 346     ParaFIELD *tmpField=new ParaFIELD(field, paramesh, *_group);
 347     tmpField->setOwnSupport(true);
 348     attachTargetLocalField(tmpField,true);
 349   }
 350
 351   bool OverlapDEC::isInGroup() const
 352   {
 353     if(!_group)
 354       return false;
 355     return _group->containsMyRank();
 356   }
 357
 358   void OverlapDEC::debugPrintWorkSharing(std::ostream & ostr) const
 359   {
 360     _locator->debugPrintWorkSharing(ostr);
 361   }
 362 }