src/ParaMEDMEM/OverlapDEC.cxx

   1 // Copyright (C) 2007-2015  CEA/DEN, EDF R&D
   2 //
   3 // This library is free software; you can redistribute it and/or
   4 // modify it under the terms of the GNU Lesser General Public
   5 // License as published by the Free Software Foundation; either
   6 // version 2.1 of the License, or (at your option) any later version.
   7 //
   8 // This library is distributed in the hope that it will be useful,
   9 // but WITHOUT ANY WARRANTY; without even the implied warranty of
  10 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  11 // Lesser General Public License for more details.
  12 //
  13 // You should have received a copy of the GNU Lesser General Public
  14 // License along with this library; if not, write to the Free Software
  15 // Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307 USA
  16 //
  17 // See http://www.salome-platform.org/ or email : webmaster.salome@opencascade.com
  18 //
  19 // Author : Anthony Geay (CEA/DEN)
  20
  21 #include "OverlapDEC.hxx"
  22 #include "CommInterface.hxx"
  23 #include "ParaFIELD.hxx"
  24 #include "MPIProcessorGroup.hxx"
  25 #include "OverlapElementLocator.hxx"
  26 #include "OverlapInterpolationMatrix.hxx"
  27
  28 namespace ParaMEDMEM
  29 {
  30 /*!
  31     \anchor OverlapDEC-det
  32     \class OverlapDEC
  33
  34     The \c OverlapDEC enables the \ref InterpKerRemapGlobal "conservative remapping" of fields between
  35     two parallel codes. This remapping is based on the computation of intersection volumes on
  36     a \b same \b processor \b group. On this processor group are defined two field-templates called A
  37     and B. The computation is possible for 3D meshes, 2D meshes, 3D-surface meshes, 1D meshes and
  38     2D-curve meshes. Dimensions must be similar for the distribution templates A and B.
  39     The main difference with \ref InterpKernelDEC-det is that this \ref para-dec "DEC" manages 2 field templates
  40     on each processor of the processor group (A and B) called source and target.
  41     Furthermore all processors in processor group cooperates in global interpolation matrix
  42     computation. In this respect \ref InterpKernelDEC is a specialization of \c OverlapDEC.
  43
  44     \section ParaMEDMEMOverlapDECAlgorithmDescription Algorithm Description
  45
  46     Let's consider the following use case that is ran in ParaMEDMEMTest_OverlapDEC.cxx to describes
  47     the different steps of the computation. The processor group contains 3 processors.
  48     \anchor ParaMEDMEMOverlapDECImgTest1
  49     \image html OverlapDEC1.png "Example showing the use case in order to explain the different steps."
  50
  51     \subsection ParaMEDMEMOverlapDECAlgoStep1 Step 1 : Bounding box exchange and global interaction
  52     between procs computation.
  53
  54     In order to reduce as much as possible the amount of communications between distant processors,
  55     every processor computes a bounding box for A and B. Then a AllToAll communication is performed
  56     so that
  57     every processor can compute the \b global interactions between processor.
  58     This computation leads every processor to compute the same global TODO list expressed as a list
  59     of pair. A pair ( x, y ) means that proc \b x fieldtemplate A can interact with fieltemplate B of
  60     proc \b y because the two bounding boxes interact.
  61     In the \ref ParaMEDMEMOverlapDECImgTest1 "example above" this computation leads to the following
  62     a \b global TODO list :
  63
  64     \b (0,0),(0,1),(1,0),(1,2),(2,0),(2,1),(2,2)
  65
  66     Here the pair (0,2) does not appear because the bounding box of fieldtemplateA of proc#2 does
  67     not intersect that of fieldtemplate B on proc#0.
  68
  69     Stage performed by ParaMEDMEM::OverlapElementLocator::computeBoundingBoxes.
  70
  71     \subsection ParaMEDMEMOverlapDECAlgoStep2 Step 2 : Computation of local TODO list
  72
  73     Starting from the global interaction previously computed in \ref ParaMEDMEMOverlapDECAlgoStep1
  74     "Step 1", each proc computes the TODO list per proc.
  75     The following rules is chosen : a pair (x,y) can be treated by either proc \#x or proc \#y,
  76     in order to reduce the amount of data transfert among
  77     processors. The algorithm chosen for load balancing is the following : Each processor has
  78     an empty \b local TODO list at the beginning. Then for each pair (k,m) in
  79     \b global TODO list, if proc\#k has less temporary local list than proc\#m pair, (k,m) is added
  80     to temparary local TODO list of proc\#k.
  81     If proc\#m has less temporary local TODO list than proc\#k pair, (k,m) is added to temporary
  82     local TODO list of proc\#m.
  83     If proc\#k and proc\#m have the same amount of temporary local TODO list pair, (k,m) is added to
  84     temporary local TODO list of proc\#k.
  85
  86     In the \ref ParaMEDMEMOverlapDECImgTest1 "example above" this computation leads to the following
  87     local TODO list :
  88
  89     - proc\#0 : (0,0)
  90     - proc\#1 : (0,1),(1,0)
  91     - proc\#2 : (1,2),(2,0),(2,1),(2,2)
  92
  93     The algorithm described here is not perfect for this use case, we hope to enhance it soon.
  94
  95     At this stage each proc knows precisely its \b local TODO list (with regard to interpolation).
  96     The \b local TODO list of other procs than local
  97     is kept for future computations.
  98
  99     \subsection ParaMEDMEMOverlapDECAlgoStep3 Step 3 : Matrix echange between procs
 100
 101     Knowing the \b local TODO list, the aim now is to exchange field-templates between procs.
 102     Each proc computes knowing TODO list per
 103     proc computed in \ref ParaMEDMEMOverlapDECAlgoStep2 "Step 2" the exchange TODO list :
 104
 105     In the \ref ParaMEDMEMOverlapDECImgTest1 "example above" the exchange TODO list gives the
 106     following results :
 107
 108     Sending TODO list per proc :
 109
 110     - proc \#0 : Send fieldtemplate A to Proc\#1, Send fieldtemplate B to Proc\#1, Send fieldtemplate
 111     B to Proc\#2
 112     - Proc \#1 : Send fieldtemplate A to Proc\#2, Send fieldtemplate B to Proc\#2
 113     - Proc \#2 : No send.
 114
 115     Receiving TODO list per proc :
 116
 117     - proc \#0 : No receiving
 118     - proc \#1 : receiving fieldtemplate A from Proc\#0,  receiving fieldtemplate B from Proc\#0
 119     - proc \#2 : receiving fieldtemplate B from Proc\#0, receiving fieldtemplate A from Proc\#1,
 120     receiving fieldtemplate B from Proc\#1
 121
 122     To avoid as much as possible large volumes of transfers between procs, only relevant parts of
 123     meshes are sent. In order for proc\#k to send fieldtemplate A to fieldtemplate B
 124     of proc \#m., proc\#k computes the part of mesh A contained in the boundingbox B of proc\#m. It
 125     implies that the corresponding cellIds or nodeIds of the
 126     corresponding part are sent to proc \#m too.
 127
 128     Let's consider the couple (k,m) in the TODO list. This couple is treated by either k or m as
 129     seen in \ref ParaMEDMEMOverlapDECAlgoStep2 "here in Step2".
 130
 131     As will be dealt in Step 6, for final matrix-vector computations, the resulting matrix of the
 132     couple (k,m) whereever it is computed (proc \#k or proc \#m)
 133     will be stored in \b proc\#m.
 134
 135     - If proc \#k is in charge (performs the matrix computation) for this couple (k,m), target ids
 136     (cells or nodes) of the mesh in proc \#m are renumbered, because proc \#m has seelected a sub mesh
 137     of the target mesh to avoid large amounts of data to transfer. In this case as proc \#m is ultimately
 138      in charge of the matrix, proc \#k must keep preciously the
 139     source ids needed to be sent to proc\#m. No problem will appear for matrix assembling in proc m
 140     for source ids because no restriction was done.
 141     Concerning source ids to be sent for the matrix-vector computation, proc k will know precisely
 142     which source ids field values to send to proc \#m.
 143     This is embodied by OverlapMapping::keepTracksOfTargetIds in proc m.
 144
 145     - If proc \#m is in charge (performs matrix computation) for this couple (k,m), source ids (cells
 146     or nodes) of the mesh in proc \#k are renumbered, because proc \#k has selected a sub mesh of the
 147      source mesh to avoid large amounts of data to transfer. In this case as proc \#k is ultimately
 148      in charge of the matrix, proc \#m receives the source ids
 149     from remote proc \#k, and thus the matrix is directly correct, no need for renumbering as
 150      in \ref ParaMEDMEMOverlapDECAlgoStep5 "Step 5". However proc \#k must
 151     keep track of the ids sent to proc \#m for te matrix-vector computation.
 152     This is incarnated by OverlapMapping::keepTracksOfSourceIds in proc k.
 153
 154     This step is performed in ParaMEDMEM::OverlapElementLocator::exchangeMeshes method.
 155
 156     \subsection ParaMEDMEMOverlapDECAlgoStep4 Step 4 : Computation of the interpolation matrix
 157
 158     After mesh exchange in \ref ParaMEDMEMOverlapDECAlgoStep3 "Step3" each processor has all the
 159     required information to treat its \b local TODO list computed in
 160     \ref ParaMEDMEMOverlapDECAlgoStep2 "Step2". This step is potentially CPU costly, which is why
 161     the \b local TODO list per proc is expected to
 162     be as well balanced as possible.
 163
 164     The interpolation is performed as \ref ParaMEDMEM::MEDCouplingRemapper "Remapper" does.
 165
 166     This operation is performed by OverlapInterpolationMatrix::addContribution method.
 167
 168     \subsection ParaMEDMEMOverlapDECAlgoStep5 Step 5 : Global matrix construction.
 169
 170     After having performed the TODO list at the end of \ref ParaMEDMEMOverlapDECAlgoStep4 "Step4"
 171     we need to assemble the final matrix.
 172
 173     The final aim is to have a distributed matrix \f$ M_k \f$ on each proc\#k. In order to reduce
 174     data exchange during the matrix product process,
 175     \f$ M_k \f$ is built using sizeof(Proc group) \c std::vector< \c std::map<int,double> \c >.
 176
 177     For a proc\#k, it is necessary to fetch info of all matrices built in
 178     \ref ParaMEDMEMOverlapDECAlgoStep4 "Step4" where the first element in pair (i,j)
 179     is equal to k.
 180
 181     After this step, the matrix repartition is the following after a call to
 182     ParaMEDMEM::OverlapMapping::prepare :
 183
 184     - proc\#0 : (0,0),(1,0),(2,0)
 185     - proc\#1 : (0,1),(2,1)
 186     - proc\#2 : (1,2),(2,2)
 187
 188     Tuple (2,1) computed on proc 2 is stored in proc 1 after execution of the function
 189     "prepare". This is an example of item 0 in \ref ParaMEDMEMOverlapDECAlgoStep2 "Step2".
 190     Tuple (0,1) computed on proc 1 is stored in proc 1 too. This is an example of item 1 in \ref ParaMEDMEMOverlapDECAlgoStep2 "Step2".
 191
 192     In the end ParaMEDMEM::OverlapMapping::_proc_ids_to_send_vector_st will contain :
 193
 194     - Proc\#0 : 0,1
 195     - Proc\#1 : 0,2
 196     - Proc\#2 : 0,1,2
 197
 198     In the end ParaMEDMEM::OverlapMapping::_proc_ids_to_recv_vector_st will contain :
 199
 200     - Proc\#0 : 0,1,2
 201     - Proc\#1 : 0,2
 202     - Proc\#2 : 1,2
 203
 204     The method in charge to perform this is : ParaMEDMEM::OverlapMapping::prepare.
 205 */
 206   OverlapDEC::OverlapDEC(const std::set<int>& procIds, const MPI_Comm& world_comm):_own_group(true),_interpolation_matrix(0),
 207                                                                                    _source_field(0),_own_source_field(false),
 208                                                                                    _target_field(0),_own_target_field(false)
 209   {
 210     ParaMEDMEM::CommInterface comm;
 211     int *ranks_world=new int[procIds.size()]; // ranks of sources and targets in world_comm
 212     std::copy(procIds.begin(),procIds.end(),ranks_world);
 213     MPI_Group group,world_group;
 214     comm.commGroup(world_comm,&world_group);
 215     comm.groupIncl(world_group,procIds.size(),ranks_world,&group);
 216     delete [] ranks_world;
 217     MPI_Comm theComm;
 218     comm.commCreate(world_comm,group,&theComm);
 219     comm.groupFree(&group);
 220     if(theComm==MPI_COMM_NULL)
 221       {
 222         _group=0;
 223         return ;
 224       }
 225     std::set<int> idsUnion;
 226     for(std::size_t i=0;i<procIds.size();i++)
 227       idsUnion.insert(i);
 228     _group=new MPIProcessorGroup(comm,idsUnion,theComm);
 229   }
 230
 231   OverlapDEC::~OverlapDEC()
 232   {
 233     if(_own_group)
 234       delete _group;
 235     if(_own_source_field)
 236       delete _source_field;
 237     if(_own_target_field)
 238       delete _target_field;
 239     delete _interpolation_matrix;
 240   }
 241
 242   void OverlapDEC::sendRecvData(bool way)
 243   {
 244     if(way)
 245       sendData();
 246     else
 247       recvData();
 248   }
 249
 250   void OverlapDEC::sendData()
 251   {
 252     _interpolation_matrix->multiply();
 253   }
 254
 255   void OverlapDEC::recvData()
 256   {
 257     throw INTERP_KERNEL::Exception("Not implemented yet !!!!");
 258     //_interpolation_matrix->transposeMultiply();
 259   }
 260
 261   void OverlapDEC::synchronize()
 262   {
 263     if(!isInGroup())
 264       return ;
 265     delete _interpolation_matrix;
 266     _interpolation_matrix=new OverlapInterpolationMatrix(_source_field,_target_field,*_group,*this,*this);
 267     OverlapElementLocator locator(_source_field,_target_field,*_group);
 268     locator.copyOptions(*this);
 269     locator.exchangeMeshes(*_interpolation_matrix);
 270     std::vector< std::pair<int,int> > jobs=locator.getToDoList();
 271     std::string srcMeth=locator.getSourceMethod();
 272     std::string trgMeth=locator.getTargetMethod();
 273     for(std::vector< std::pair<int,int> >::const_iterator it=jobs.begin();it!=jobs.end();it++)
 274       {
 275         const MEDCouplingPointSet *src=locator.getSourceMesh((*it).first);
 276         const DataArrayInt *srcIds=locator.getSourceIds((*it).first);
 277         const MEDCouplingPointSet *trg=locator.getTargetMesh((*it).second);
 278         const DataArrayInt *trgIds=locator.getTargetIds((*it).second);
 279         _interpolation_matrix->addContribution(src,srcIds,srcMeth,(*it).first,trg,trgIds,trgMeth,(*it).second);
 280       }
 281     _interpolation_matrix->prepare(locator.getProcsInInteraction());
 282     _interpolation_matrix->computeDeno();
 283   }
 284
 285   void OverlapDEC::attachSourceLocalField(ParaFIELD *field, bool ownPt)
 286   {
 287     if(!isInGroup())
 288       return ;
 289     if(_own_source_field)
 290       delete _source_field;
 291     _source_field=field;
 292     _own_source_field=ownPt;
 293   }
 294
 295   void OverlapDEC::attachTargetLocalField(ParaFIELD *field, bool ownPt)
 296   {
 297     if(!isInGroup())
 298       return ;
 299     if(_own_target_field)
 300       delete _target_field;
 301     _target_field=field;
 302     _own_target_field=ownPt;
 303   }
 304
 305   bool OverlapDEC::isInGroup() const
 306   {
 307     if(!_group)
 308       return false;
 309     return _group->containsMyRank();
 310   }
 311 }