src/ParaMEDMEM/OverlapDEC.cxx

   1 // Copyright (C) 2007-2014  CEA/DEN, EDF R&D
   2 //
   3 // This library is free software; you can redistribute it and/or
   4 // modify it under the terms of the GNU Lesser General Public
   5 // License as published by the Free Software Foundation; either
   6 // version 2.1 of the License, or (at your option) any later version.
   7 //
   8 // This library is distributed in the hope that it will be useful,
   9 // but WITHOUT ANY WARRANTY; without even the implied warranty of
  10 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  11 // Lesser General Public License for more details.
  12 //
  13 // You should have received a copy of the GNU Lesser General Public
  14 // License along with this library; if not, write to the Free Software
  15 // Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307 USA
  16 //
  17 // See http://www.salome-platform.org/ or email : webmaster.salome@opencascade.com
  18 //
  19 // Author : Anthony Geay (CEA/DEN)
  20
  21 #include "OverlapDEC.hxx"
  22 #include "CommInterface.hxx"
  23 #include "ParaFIELD.hxx"
  24 #include "MPIProcessorGroup.hxx"
  25 #include "OverlapElementLocator.hxx"
  26 #include "OverlapInterpolationMatrix.hxx"
  27 /*!
  28     \defgroup overlapdec OverlapDEC
  29     The \c OverlapDEC enables the \ref InterpKerRemapGlobal "conservative remapping" of fields between two parallel codes. This remapping is based on the computation of intersection volumes on a \b same \b processor \b group. On this processor group are defined two field-templates called A and B. The computation is possible for 3D meshes, 2D meshes, 3D-surface meshes, 1D meshes and 2D-curve meshes. Dimensions must be similar for the distribution templates A and B.
  30     The main difference with \ref interpkerneldec is that this \ref dec manages 2 field templates on each processor of the processor group (A and B) called source and target.
  31     Furthermore all processors in processor group cooperates in global interpolation matrix computation. In this respect \ref InterpKernelIDEC is a specialization of \c OverlapDEC.
  32
  33     \section ParaMEDMEMOverlapDECAlgorithmDescription Algorithm Description
  34
  35     Let's consider the following use case that is ran in ParaMEDMEMTest_OverlapDEC.cxx to describes the different steps of the computation. The processor group contains 3 processors.
  36     \anchor ParaMEDMEMOverlapDECImgTest1
  37     \image html OverlapDEC1.png "Example showing the use case in order to explain the different steps."
  38
  39     \subsection ParaMEDMEMOverlapDECAlgoStep1 Step 1 : Bounding box exchange and global interaction between procs computation.
  40
  41     In order to reduce as much as possible the amount of communications between distant processors, every processor computes a bounding box for A and B. Then a AllToAll communication is performed so that
  42     every processor can compute the \b global interactions between processor.
  43     This computation leads every processor to compute the same global TODO list expressed as a list of pair. A pair (x,y) means that proc \b x fieldtemplate A can interact with fieltemplate B of proc \b y because the two bounding boxes interact.
  44     In the \ref ParaMEDMEMOverlapDECImgTest1 "example above" this computation leads to the following a \b global TODO list :
  45
  46     \b (0,0),(0,1),(1,0),(1,2),(2,0),(2,1),(2,2)
  47
  48     Here the pair (0,2) does not appear because the bounding box of fieldtemplateA of proc#2 does not intersect that of fieldtemplate B on proc#0.
  49
  50     Stage performed by ParaMEDMEM::OverlapElementLocator::computeBoundingBoxes.
  51
  52     \subsection ParaMEDMEMOverlapDECAlgoStep2 Step 2 : Computation of local TODO list
  53
  54     Starting from the global interaction previously computed in \ref ParaMEDMEMOverlapDECAlgoStep1 "Step 1", each proc computes the TODO list per proc.
  55     The following rules is chosen : a pair (x,y) can be treated by either proc #x or proc #y, in order to reduce the amount of data transfert among
  56     processors. The algorithm chosen for load balancing is the following : Each processor has an empty \b local TODO list at the beginning. Then for each pair (k,m) in
  57     \b global TODO list, if proc#k has less temporary local list than proc#m pair, (k,m) is added to temparary local TODO list of proc#k.
  58     If proc#m has less temporary local TODO list than proc#k pair, (k,m) is added to temporary local TODO list of proc#m.
  59     If proc#k and proc#m have the same amount of temporary local TODO list pair, (k,m) is added to temporary local TODO list of proc#k.
  60
  61     In the \ref ParaMEDMEMOverlapDECImgTest1 "example above" this computation leads to the following local TODO list :
  62
  63     - proc#0 : (0,0)
  64     - proc#1 : (0,1),(1,0)
  65     - proc#2 : (1,2),(2,0),(2,1),(2,2)
  66
  67     The algorithm described here is not perfect for this use case, we hope to enhance it soon.
  68
  69     At this stage each proc knows precisely its \b local TODO list (with regard to interpolation). The \b local TODO list of other procs than local
  70     is kept for future computations.
  71
  72     \subsection ParaMEDMEMOverlapDECAlgoStep3 Step 3 : Matrix echange between procs
  73
  74     Knowing the \b local TODO list, the aim now is to exchange field-templates between procs. Each proc computes knowing TODO list per
  75     proc computed in \ref ParaMEDMEMOverlapDECAlgoStep2 "Step 2" the exchange TODO list :
  76
  77     In the \ref ParaMEDMEMOverlapDECImgTest1 "example above" the exchange TODO list gives the following results :
  78
  79     Sending TODO list per proc :
  80
  81     - proc #0 : Send fieldtemplate A to Proc#1, Send fieldtemplate B to Proc#1, Send fieldtemplate B to Proc#2
  82     - Proc #1 : Send fieldtemplate A to Proc#2, Send fieldtemplate B to Proc#2
  83     - Proc #2 : No send.
  84
  85     Receiving TODO list per proc :
  86
  87     - proc #0 : No receiving
  88     - proc #1 : receiving fieldtemplate A from Proc#0,  receiving fieldtemplate B from Proc#0
  89     - proc #2 : receiving fieldtemplate B from Proc#0, receiving fieldtemplate A from Proc#1,  receiving fieldtemplate B from Proc#1
  90
  91     To avoid as much as possible large volumes of transfers between procs, only relevant parts of meshes are sent. In order for proc#k to send fieldtemplate A to fieldtemplate B
  92     of proc #m., proc#k computes the part of mesh A contained in the boundingbox B of proc#m. It implies that the corresponding cellIds or nodeIds of the
  93     corresponding part are sent to proc #m too.
  94
  95     Let's consider the couple (k,m) in the TODO list. This couple is treated by either k or m as seen in \ref ParaMEDMEMOverlapDECAlgoStep2 "here in Step2".
  96
  97     As will be dealt in Step 6, for final matrix-vector computations, the resulting matrix of the couple (k,m) whereever it is computed (proc #k or proc #m)
  98     will be stored in \b proc#m.
  99
 100     - If proc #k is in charge (performs the matrix computation) for this couple (k,m), target ids (cells or nodes) of the mesh in proc #m are renumbered, because proc #m has seelected a sub mesh of the target mesh to avoid large amounts of data to transfer. In this case as proc #m is ultimately in charge of the matrix, proc #k must keep preciously the
 101     source ids needed to be sent to proc#m. No problem will appear for matrix assembling in proc m for source ids because no restriction was done.
 102     Concerning source ids to be sent for the matrix-vector computation, proc k will know precisely which source ids field values to send to proc #m.
 103     This is embodied by OverlapMapping::keepTracksOfTargetIds in proc m.
 104
 105     - If proc #m is in charge (performs matrix computation) for this couple (k,m), source ids (cells or nodes) of the mesh in proc #k are renumbered, because proc #k has selected a sub mesh of the source mesh to avoid large amounts of data to transfer. In this case as proc #k is ultimately in charge of the matrix, proc #m receives the source ids
 106     from remote proc #k, and thus the matrix is directly correct, no need for renumbering as in \ref ParaMEDMEMOverlapDECAlgoStep5 "Step 5". However proc #k must
 107     keep track of the ids sent to proc #m for te matrix-vector computation.
 108     This is incarnated by OverlapMapping::keepTracksOfSourceIds in proc k.
 109
 110     This step is performed in ParaMEDMEM::OverlapElementLocator::exchangeMeshes method.
 111
 112     \subsection ParaMEDMEMOverlapDECAlgoStep4 Step 4 : Computation of the interpolation matrix
 113
 114     After mesh exchange in \ref ParaMEDMEMOverlapDECAlgoStep3 "Step3" each processor has all the required information to treat its \b local TODO list computed in
 115     \ref ParaMEDMEMOverlapDECAlgoStep2 "Step2". This step is potentially CPU costly, which is why the \b local TODO list per proc is expected to
 116     be as well balanced as possible.
 117
 118     The interpolation is performed as \ref ParaMEDMEM::MEDCouplingRemapper "Remapper" does.
 119
 120     This operation is performed by OverlapInterpolationMatrix::addContribution method.
 121
 122     \subsection ParaMEDMEMOverlapDECAlgoStep5 Step 5 : Global matrix construction.
 123
 124     After having performed the TODO list at the end of \ref ParaMEDMEMOverlapDECAlgoStep4 "Step4" we need to assemble the final matrix.
 125
 126     The final aim is to have a distributed matrix \f$ M_k \f$ on each proc#k. In order to reduce data exchange during the matrix product process,
 127     \f$ M_k \f$ is built using sizeof(Proc group) \c std::vector< \c std::map<int,double> \c >.
 128
 129     For a proc#k, it is necessary to fetch info of all matrices built in \ref ParaMEDMEMOverlapDECAlgoStep4 "Step4" where the first element in pair (i,j)
 130     is equal to k.
 131
 132     After this step, the matrix repartition is the following after a call to ParaMEDMEM::OverlapMapping::prepare :
 133
 134     - proc#0 : (0,0),(1,0),(2,0)
 135     - proc#1 : (0,1),(2,1)
 136     - proc#2 : (1,2),(2,2)
 137
 138     Tuple (2,1) computed on proc 2 is stored in proc 1 after execution of the function "prepare". This is an example of item 0 in \ref ParaMEDMEMOverlapDECAlgoStep2 "Step2".
 139     Tuple (0,1) computed on proc 1 is stored in proc 1 too. This is an example of item 1 in \ref ParaMEDMEMOverlapDECAlgoStep2 "Step2".
 140
 141     In the end ParaMEDMEM::OverlapMapping::_proc_ids_to_send_vector_st will contain :
 142
 143     - Proc#0 : 0,1
 144     - Proc#1 : 0,2
 145     - Proc#2 : 0,1,2
 146
 147     In the end ParaMEDMEM::OverlapMapping::_proc_ids_to_recv_vector_st will contain :
 148
 149     - Proc#0 : 0,1,2
 150     - Proc#1 : 0,2
 151     - Proc#2 : 1,2
 152
 153     The method in charge to perform this is : ParaMEDMEM::OverlapMapping::prepare.
 154 */
 155 namespace ParaMEDMEM
 156 {
 157   OverlapDEC::OverlapDEC(const std::set<int>& procIds, const MPI_Comm& world_comm):_own_group(true),_interpolation_matrix(0),
 158                                                                                    _source_field(0),_own_source_field(false),
 159                                                                                    _target_field(0),_own_target_field(false)
 160   {
 161     ParaMEDMEM::CommInterface comm;
 162     int *ranks_world=new int[procIds.size()]; // ranks of sources and targets in world_comm
 163     std::copy(procIds.begin(),procIds.end(),ranks_world);
 164     MPI_Group group,world_group;
 165     comm.commGroup(world_comm,&world_group);
 166     comm.groupIncl(world_group,procIds.size(),ranks_world,&group);
 167     delete [] ranks_world;
 168     MPI_Comm theComm;
 169     comm.commCreate(world_comm,group,&theComm);
 170     comm.groupFree(&group);
 171     if(theComm==MPI_COMM_NULL)
 172       {
 173         _group=0;
 174         return ;
 175       }
 176     std::set<int> idsUnion;
 177     for(std::size_t i=0;i<procIds.size();i++)
 178       idsUnion.insert(i);
 179     _group=new MPIProcessorGroup(comm,idsUnion,theComm);
 180   }
 181
 182   OverlapDEC::~OverlapDEC()
 183   {
 184     if(_own_group)
 185       delete _group;
 186     if(_own_source_field)
 187       delete _source_field;
 188     if(_own_target_field)
 189       delete _target_field;
 190     delete _interpolation_matrix;
 191   }
 192
 193   void OverlapDEC::sendRecvData(bool way)
 194   {
 195     if(way)
 196       sendData();
 197     else
 198       recvData();
 199   }
 200
 201   void OverlapDEC::sendData()
 202   {
 203     _interpolation_matrix->multiply();
 204   }
 205
 206   void OverlapDEC::recvData()
 207   {
 208     throw INTERP_KERNEL::Exception("Not implemented yet !!!!");
 209     //_interpolation_matrix->transposeMultiply();
 210   }
 211
 212   void OverlapDEC::synchronize()
 213   {
 214     if(!isInGroup())
 215       return ;
 216     delete _interpolation_matrix;
 217     _interpolation_matrix=new OverlapInterpolationMatrix(_source_field,_target_field,*_group,*this,*this);
 218     OverlapElementLocator locator(_source_field,_target_field,*_group);
 219     locator.copyOptions(*this);
 220     locator.exchangeMeshes(*_interpolation_matrix);
 221     std::vector< std::pair<int,int> > jobs=locator.getToDoList();
 222     std::string srcMeth=locator.getSourceMethod();
 223     std::string trgMeth=locator.getTargetMethod();
 224     for(std::vector< std::pair<int,int> >::const_iterator it=jobs.begin();it!=jobs.end();it++)
 225       {
 226         const MEDCouplingPointSet *src=locator.getSourceMesh((*it).first);
 227         const DataArrayInt *srcIds=locator.getSourceIds((*it).first);
 228         const MEDCouplingPointSet *trg=locator.getTargetMesh((*it).second);
 229         const DataArrayInt *trgIds=locator.getTargetIds((*it).second);
 230         _interpolation_matrix->addContribution(src,srcIds,srcMeth,(*it).first,trg,trgIds,trgMeth,(*it).second);
 231       }
 232     _interpolation_matrix->prepare(locator.getProcsInInteraction());
 233     _interpolation_matrix->computeDeno();
 234   }
 235
 236   void OverlapDEC::attachSourceLocalField(ParaFIELD *field, bool ownPt)
 237   {
 238     if(!isInGroup())
 239       return ;
 240     if(_own_source_field)
 241       delete _source_field;
 242     _source_field=field;
 243     _own_source_field=ownPt;
 244   }
 245
 246   void OverlapDEC::attachTargetLocalField(ParaFIELD *field, bool ownPt)
 247   {
 248     if(!isInGroup())
 249       return ;
 250     if(_own_target_field)
 251       delete _target_field;
 252     _target_field=field;
 253     _own_target_field=ownPt;
 254   }
 255
 256   bool OverlapDEC::isInGroup() const
 257   {
 258     if(!_group)
 259       return false;
 260     return _group->containsMyRank();
 261   }
 262 }