X-Git-Url: http://git.salome-platform.org/gitweb/?a=blobdiff_plain;f=src%2FParaMEDMEM%2FOverlapDEC.cxx;h=af842fb9dd08f600d6886b8afa3d109c564d4660;hb=9f966e0ed55cc976b59a17318a7be82445529eda;hp=7d77475725a1025901b911278b7f1752934e93d5;hpb=fb512e2b77325290aaa2b4c9fd8f22d5949b6369;p=tools%2Fmedcoupling.git diff --git a/src/ParaMEDMEM/OverlapDEC.cxx b/src/ParaMEDMEM/OverlapDEC.cxx index 7d7747572..af842fb9d 100644 --- a/src/ParaMEDMEM/OverlapDEC.cxx +++ b/src/ParaMEDMEM/OverlapDEC.cxx @@ -1,4 +1,4 @@ -// Copyright (C) 2007-2014 CEA/DEN, EDF R&D +// Copyright (C) 2007-2015 CEA/DEN, EDF R&D // // This library is free software; you can redistribute it and/or // modify it under the terms of the GNU Lesser General Public @@ -20,143 +20,201 @@ #include "OverlapDEC.hxx" #include "CommInterface.hxx" +#include "ParaMESH.hxx" #include "ParaFIELD.hxx" #include "MPIProcessorGroup.hxx" #include "OverlapElementLocator.hxx" #include "OverlapInterpolationMatrix.hxx" + +namespace ParaMEDMEM +{ /*! - \defgroup overlapdec OverlapDEC - The \c OverlapDEC enables the \ref InterpKerRemapGlobal "conservative remapping" of fields between two parallel codes. This remapping is based on the computation of intersection volumes on a \b same \b processor \b group. On this processor group are defined two field-templates called A and B. The computation is possible for 3D meshes, 2D meshes, 3D-surface meshes, 1D meshes and 2D-curve meshes. Dimensions must be similar for the distribution templates A and B. - The main difference with \ref interpkerneldec is that this \ref dec manages 2 field templates on each processor of the processor group (A and B) called source and target. - Furthermore all processors in processor group cooperates in global interpolation matrix computation. In this respect \ref InterpKernelIDEC is a specialization of \c OverlapDEC. + \anchor OverlapDEC-det + \class OverlapDEC + + \section OverlapDEC-over Overview - \section ParaMEDMEMOverlapDECAlgorithmDescription Algorithm Description + The \c OverlapDEC enables the \ref InterpKerRemapGlobal "conservative remapping" of fields between + two parallel codes. This remapping is based on the computation of intersection volumes on + a \b single \b processor \b group. On this processor group are defined two field-templates called A + and B. The computation is possible for 3D meshes, 2D meshes, 3D-surface meshes, 1D meshes and + 2D-curve meshes. Dimensions must be similar for the distribution templates A and B. - Let's consider the following use case that is ran in ParaMEDMEMTest_OverlapDEC.cxx to describes the different steps of the computation. The processor group contains 3 processors. + The main difference with \ref InterpKernelDEC-det "InterpKernelDEC" is that this + \ref para-dec "DEC" works with a *single* processor group, in which processors will share the work. + Consequently each processor manages two \ref MEDCouplingFieldTemplatesPage "field templates" (A and B) + called source and target. + Furthermore all processors in the processor group cooperate in the global interpolation matrix + computation. In this respect \c InterpKernelDEC is a specialization of \c OverlapDEC. + + \section ParaMEDMEMOverlapDECAlgorithmDescription Algorithm description + + Let's consider the following use case that is ran in ParaMEDMEMTest_OverlapDEC.cxx to describes + the different steps of the computation. The processor group contains 3 processors. \anchor ParaMEDMEMOverlapDECImgTest1 - \image html OverlapDEC1.png "Example showing the use case in order to explain the different steps." + \image html OverlapDEC1.png "Example split of the source and target mesh among the 3 procs" \subsection ParaMEDMEMOverlapDECAlgoStep1 Step 1 : Bounding box exchange and global interaction between procs computation. - In order to reduce as much as possible the amount of communications between distant processors, every processor computes a bounding box for A and B. Then a AllToAll communication is performed so that + In order to reduce as much as possible the amount of communications between distant processors, + every processor computes a bounding box for A and B. Then a AllToAll communication is performed + so that every processor can compute the \b global interactions between processor. - This computation leads every processor to compute the same global TODO list expressed as a list of pair. A pair (x,y) means that proc \b x fieldtemplate A can interact with fieltemplate B of proc \b y because the two bounding boxes interact. - In the \ref ParaMEDMEMOverlapDECImgTest1 "example above" this computation leads to the following a \b global TODO list : + This computation leads every processor to compute the same global TODO list expressed as a list + of pair. A pair ( x, y ) means that proc \b x fieldtemplate A can interact with fieltemplate B of + proc \b y because the two bounding boxes interact. + In the \ref ParaMEDMEMOverlapDECImgTest1 "example above" this computation leads to the following + a \b global TODO list : \b (0,0),(0,1),(1,0),(1,2),(2,0),(2,1),(2,2) - Here the pair (0,2) does not appear because the bounding box of fieldtemplateA of proc#2 does not intersect that of fieldtemplate B on proc#0. + Here the pair (0,2) does not appear because the bounding box of fieldtemplateA of proc#2 does + not intersect that of fieldtemplate B on proc#0. Stage performed by ParaMEDMEM::OverlapElementLocator::computeBoundingBoxes. \subsection ParaMEDMEMOverlapDECAlgoStep2 Step 2 : Computation of local TODO list - Starting from the global interaction previously computed in \ref ParaMEDMEMOverlapDECAlgoStep1 "Step 1", each proc computes the TODO list per proc. - The following rules is chosen : a pair (x,y) can be treated by either proc #x or proc #y, in order to reduce the amount of data transfert among - processors. The algorithm chosen for load balancing is the following : Each processor has an empty \b local TODO list at the beginning. Then for each pair (k,m) in - \b global TODO list, if proc#k has less temporary local list than proc#m pair, (k,m) is added to temparary local TODO list of proc#k. - If proc#m has less temporary local TODO list than proc#k pair, (k,m) is added to temporary local TODO list of proc#m. - If proc#k and proc#m have the same amount of temporary local TODO list pair, (k,m) is added to temporary local TODO list of proc#k. - - In the \ref ParaMEDMEMOverlapDECImgTest1 "example above" this computation leads to the following local TODO list : - - - proc#0 : (0,0) - - proc#1 : (0,1),(1,0) - - proc#2 : (1,2),(2,0),(2,1),(2,2) + Starting from the global interaction previously computed in \ref ParaMEDMEMOverlapDECAlgoStep1 + "Step 1", each proc computes the TODO list per proc. + The following rules is chosen : a pair (x,y) can be treated by either proc \#x or proc \#y, + in order to reduce the amount of data transfert among + processors. The algorithm chosen for load balancing is the following : Each processor has + an empty \b local TODO list at the beginning. Then for each pair (k,m) in + \b global TODO list, if proc\#k has less temporary local list than proc\#m pair, (k,m) is added + to temparary local TODO list of proc\#k. + If proc\#m has less temporary local TODO list than proc\#k pair, (k,m) is added to temporary + local TODO list of proc\#m. + If proc\#k and proc\#m have the same amount of temporary local TODO list pair, (k,m) is added to + temporary local TODO list of proc\#k. + + In the \ref ParaMEDMEMOverlapDECImgTest1 "example above" this computation leads to the following + local TODO list : + + - proc\#0 : (0,0) + - proc\#1 : (0,1),(1,0) + - proc\#2 : (1,2),(2,0),(2,1),(2,2) The algorithm described here is not perfect for this use case, we hope to enhance it soon. - At this stage each proc knows precisely its \b local TODO list (with regard to interpolation). The \b local TODO list of other procs than local + At this stage each proc knows precisely its \b local TODO list (with regard to interpolation). + The \b local TODO list of other procs than local is kept for future computations. \subsection ParaMEDMEMOverlapDECAlgoStep3 Step 3 : Matrix echange between procs - Knowing the \b local TODO list, the aim now is to exchange field-templates between procs. Each proc computes knowing TODO list per + Knowing the \b local TODO list, the aim now is to exchange field-templates between procs. + Each proc computes knowing TODO list per proc computed in \ref ParaMEDMEMOverlapDECAlgoStep2 "Step 2" the exchange TODO list : - In the \ref ParaMEDMEMOverlapDECImgTest1 "example above" the exchange TODO list gives the following results : + In the \ref ParaMEDMEMOverlapDECImgTest1 "example above" the exchange TODO list gives the + following results : Sending TODO list per proc : - - proc #0 : Send fieldtemplate A to Proc#1, Send fieldtemplate B to Proc#1, Send fieldtemplate B to Proc#2 - - Proc #1 : Send fieldtemplate A to Proc#2, Send fieldtemplate B to Proc#2 - - Proc #2 : No send. + - proc \#0 : Send fieldtemplate A to Proc\#1, Send fieldtemplate B to Proc\#1, Send fieldtemplate + B to Proc\#2 + - Proc \#1 : Send fieldtemplate A to Proc\#2, Send fieldtemplate B to Proc\#2 + - Proc \#2 : No send. Receiving TODO list per proc : - - proc #0 : No receiving - - proc #1 : receiving fieldtemplate A from Proc#0, receiving fieldtemplate B from Proc#0 - - proc #2 : receiving fieldtemplate B from Proc#0, receiving fieldtemplate A from Proc#1, receiving fieldtemplate B from Proc#1 - - To avoid as much as possible large volumes of transfers between procs, only relevant parts of meshes are sent. In order for proc#k to send fieldtemplate A to fieldtemplate B - of proc #m., proc#k computes the part of mesh A contained in the boundingbox B of proc#m. It implies that the corresponding cellIds or nodeIds of the - corresponding part are sent to proc #m too. - - Let's consider the couple (k,m) in the TODO list. This couple is treated by either k or m as seen in \ref ParaMEDMEMOverlapDECAlgoStep2 "here in Step2". - - As will be dealt in Step 6, for final matrix-vector computations, the resulting matrix of the couple (k,m) whereever it is computed (proc #k or proc #m) - will be stored in \b proc#m. - - - If proc #k is in charge (performs the matrix computation) for this couple (k,m), target ids (cells or nodes) of the mesh in proc #m are renumbered, because proc #m has seelected a sub mesh of the target mesh to avoid large amounts of data to transfer. In this case as proc #m is ultimately in charge of the matrix, proc #k must keep preciously the - source ids needed to be sent to proc#m. No problem will appear for matrix assembling in proc m for source ids because no restriction was done. - Concerning source ids to be sent for the matrix-vector computation, proc k will know precisely which source ids field values to send to proc #m. + - proc \#0 : No receiving + - proc \#1 : receiving fieldtemplate A from Proc\#0, receiving fieldtemplate B from Proc\#0 + - proc \#2 : receiving fieldtemplate B from Proc\#0, receiving fieldtemplate A from Proc\#1, + receiving fieldtemplate B from Proc\#1 + + To avoid as much as possible large volumes of transfers between procs, only relevant parts of + meshes are sent. In order for proc\#k to send fieldtemplate A to fieldtemplate B + of proc \#m., proc\#k computes the part of mesh A contained in the boundingbox B of proc\#m. It + implies that the corresponding cellIds or nodeIds of the + corresponding part are sent to proc \#m too. + + Let's consider the couple (k,m) in the TODO list. This couple is treated by either k or m as + seen in \ref ParaMEDMEMOverlapDECAlgoStep2 "here in Step2". + + As will be dealt in Step 6, for final matrix-vector computations, the resulting matrix of the + couple (k,m) whereever it is computed (proc \#k or proc \#m) + will be stored in \b proc\#m. + + - If proc \#k is in charge (performs the matrix computation) for this couple (k,m), target ids + (cells or nodes) of the mesh in proc \#m are renumbered, because proc \#m has seelected a sub mesh + of the target mesh to avoid large amounts of data to transfer. In this case as proc \#m is ultimately + in charge of the matrix, proc \#k must keep preciously the + source ids needed to be sent to proc\#m. No problem will appear for matrix assembling in proc m + for source ids because no restriction was done. + Concerning source ids to be sent for the matrix-vector computation, proc k will know precisely + which source ids field values to send to proc \#m. This is embodied by OverlapMapping::keepTracksOfTargetIds in proc m. - - If proc #m is in charge (performs matrix computation) for this couple (k,m), source ids (cells or nodes) of the mesh in proc #k are renumbered, because proc #k has selected a sub mesh of the source mesh to avoid large amounts of data to transfer. In this case as proc #k is ultimately in charge of the matrix, proc #m receives the source ids - from remote proc #k, and thus the matrix is directly correct, no need for renumbering as in \ref ParaMEDMEMOverlapDECAlgoStep5 "Step 5". However proc #k must - keep track of the ids sent to proc #m for te matrix-vector computation. + - If proc \#m is in charge (performs matrix computation) for this couple (k,m), source ids (cells + or nodes) of the mesh in proc \#k are renumbered, because proc \#k has selected a sub mesh of the + source mesh to avoid large amounts of data to transfer. In this case as proc \#k is ultimately + in charge of the matrix, proc \#m receives the source ids + from remote proc \#k, and thus the matrix is directly correct, no need for renumbering as + in \ref ParaMEDMEMOverlapDECAlgoStep5 "Step 5". However proc \#k must + keep track of the ids sent to proc \#m for te matrix-vector computation. This is incarnated by OverlapMapping::keepTracksOfSourceIds in proc k. This step is performed in ParaMEDMEM::OverlapElementLocator::exchangeMeshes method. \subsection ParaMEDMEMOverlapDECAlgoStep4 Step 4 : Computation of the interpolation matrix - After mesh exchange in \ref ParaMEDMEMOverlapDECAlgoStep3 "Step3" each processor has all the required information to treat its \b local TODO list computed in - \ref ParaMEDMEMOverlapDECAlgoStep2 "Step2". This step is potentially CPU costly, which is why the \b local TODO list per proc is expected to + After mesh exchange in \ref ParaMEDMEMOverlapDECAlgoStep3 "Step3" each processor has all the + required information to treat its \b local TODO list computed in + \ref ParaMEDMEMOverlapDECAlgoStep2 "Step2". This step is potentially CPU costly, which is why + the \b local TODO list per proc is expected to be as well balanced as possible. - The interpolation is performed as \ref ParaMEDMEM::MEDCouplingRemapper "Remapper" does. + The interpolation is performed as the \ref ParaMEDMEM::MEDCouplingRemapper "remapper" does. This operation is performed by OverlapInterpolationMatrix::addContribution method. \subsection ParaMEDMEMOverlapDECAlgoStep5 Step 5 : Global matrix construction. - After having performed the TODO list at the end of \ref ParaMEDMEMOverlapDECAlgoStep4 "Step4" we need to assemble the final matrix. + After having performed the TODO list at the end of \ref ParaMEDMEMOverlapDECAlgoStep4 "Step4" + we need to assemble the final matrix. - The final aim is to have a distributed matrix \f$ M_k \f$ on each proc#k. In order to reduce data exchange during the matrix product process, + The final aim is to have a distributed matrix \f$ M_k \f$ on each proc\#k. In order to reduce + data exchange during the matrix product process, \f$ M_k \f$ is built using sizeof(Proc group) \c std::vector< \c std::map \c >. - For a proc#k, it is necessary to fetch info of all matrices built in \ref ParaMEDMEMOverlapDECAlgoStep4 "Step4" where the first element in pair (i,j) + For a proc\#k, it is necessary to fetch info of all matrices built in + \ref ParaMEDMEMOverlapDECAlgoStep4 "Step4" where the first element in pair (i,j) is equal to k. - After this step, the matrix repartition is the following after a call to ParaMEDMEM::OverlapMapping::prepare : + After this step, the matrix repartition is the following after a call to + ParaMEDMEM::OverlapMapping::prepare : - - proc#0 : (0,0),(1,0),(2,0) - - proc#1 : (0,1),(2,1) - - proc#2 : (1,2),(2,2) + - proc\#0 : (0,0),(1,0),(2,0) + - proc\#1 : (0,1),(2,1) + - proc\#2 : (1,2),(2,2) - Tuple (2,1) computed on proc 2 is stored in proc 1 after execution of the function "prepare". This is an example of item 0 in \ref ParaMEDMEMOverlapDECAlgoStep2 "Step2". + Tuple (2,1) computed on proc 2 is stored in proc 1 after execution of the function + "prepare". This is an example of item 0 in \ref ParaMEDMEMOverlapDECAlgoStep2 "Step2". Tuple (0,1) computed on proc 1 is stored in proc 1 too. This is an example of item 1 in \ref ParaMEDMEMOverlapDECAlgoStep2 "Step2". In the end ParaMEDMEM::OverlapMapping::_proc_ids_to_send_vector_st will contain : - - Proc#0 : 0,1 - - Proc#1 : 0,2 - - Proc#2 : 0,1,2 + - Proc\#0 : 0,1 + - Proc\#1 : 0,2 + - Proc\#2 : 0,1,2 In the end ParaMEDMEM::OverlapMapping::_proc_ids_to_recv_vector_st will contain : - - Proc#0 : 0,1,2 - - Proc#1 : 0,2 - - Proc#2 : 1,2 + - Proc\#0 : 0,1,2 + - Proc\#1 : 0,2 + - Proc\#2 : 1,2 The method in charge to perform this is : ParaMEDMEM::OverlapMapping::prepare. */ -namespace ParaMEDMEM -{ - OverlapDEC::OverlapDEC(const std::set& procIds, const MPI_Comm& world_comm):_own_group(true),_interpolation_matrix(0), - _source_field(0),_own_source_field(false), - _target_field(0),_own_target_field(false) + OverlapDEC::OverlapDEC(const std::set& procIds, const MPI_Comm& world_comm): + _load_balancing_algo(1), + _own_group(true),_interpolation_matrix(0), _locator(0), + _source_field(0),_own_source_field(false), + _target_field(0),_own_target_field(false), + _default_field_value(0.0), + _comm(MPI_COMM_NULL) { ParaMEDMEM::CommInterface comm; int *ranks_world=new int[procIds.size()]; // ranks of sources and targets in world_comm @@ -165,10 +223,10 @@ namespace ParaMEDMEM comm.commGroup(world_comm,&world_group); comm.groupIncl(world_group,procIds.size(),ranks_world,&group); delete [] ranks_world; - MPI_Comm theComm; - comm.commCreate(world_comm,group,&theComm); + comm.commCreate(world_comm,group,&_comm); comm.groupFree(&group); - if(theComm==MPI_COMM_NULL) + comm.groupFree(&world_group); + if(_comm==MPI_COMM_NULL) { _group=0; return ; @@ -176,7 +234,7 @@ namespace ParaMEDMEM std::set idsUnion; for(std::size_t i=0;imultiply(); + _interpolation_matrix->multiply(_default_field_value); } void OverlapDEC::recvData() @@ -213,24 +277,31 @@ namespace ParaMEDMEM { if(!isInGroup()) return ; + // Check number of components of field on both side (for now allowing void field/mesh on one proc is not allowed) + if (!_source_field || !_source_field->getField()) + throw INTERP_KERNEL::Exception("OverlapDEC::synchronize(): currently, having a void source field on a proc is not allowed!"); + if (!_target_field || !_target_field->getField()) + throw INTERP_KERNEL::Exception("OverlapDEC::synchronize(): currently, having a void target field on a proc is not allowed!"); + if (_target_field->getField()->getNumberOfComponents() != _source_field->getField()->getNumberOfComponents()) + throw INTERP_KERNEL::Exception("OverlapDEC::synchronize(): source and target field have different number of components!"); delete _interpolation_matrix; - _interpolation_matrix=new OverlapInterpolationMatrix(_source_field,_target_field,*_group,*this,*this); - OverlapElementLocator locator(_source_field,_target_field,*_group); - locator.copyOptions(*this); - locator.exchangeMeshes(*_interpolation_matrix); - std::vector< std::pair > jobs=locator.getToDoList(); - std::string srcMeth=locator.getSourceMethod(); - std::string trgMeth=locator.getTargetMethod(); + _locator = new OverlapElementLocator(_source_field,_target_field,*_group, getBoundingBoxAdjustmentAbs(), _load_balancing_algo); + _interpolation_matrix=new OverlapInterpolationMatrix(_source_field,_target_field,*_group,*this,*this, *_locator); + _locator->copyOptions(*this); + _locator->exchangeMeshes(*_interpolation_matrix); + std::vector< std::pair > jobs=_locator->getToDoList(); + std::string srcMeth=_locator->getSourceMethod(); + std::string trgMeth=_locator->getTargetMethod(); for(std::vector< std::pair >::const_iterator it=jobs.begin();it!=jobs.end();it++) { - const MEDCouplingPointSet *src=locator.getSourceMesh((*it).first); - const DataArrayInt *srcIds=locator.getSourceIds((*it).first); - const MEDCouplingPointSet *trg=locator.getTargetMesh((*it).second); - const DataArrayInt *trgIds=locator.getTargetIds((*it).second); - _interpolation_matrix->addContribution(src,srcIds,srcMeth,(*it).first,trg,trgIds,trgMeth,(*it).second); + const MEDCouplingPointSet *src=_locator->getSourceMesh((*it).first); + const DataArrayInt *srcIds=_locator->getSourceIds((*it).first); + const MEDCouplingPointSet *trg=_locator->getTargetMesh((*it).second); + const DataArrayInt *trgIds=_locator->getTargetIds((*it).second); + _interpolation_matrix->computeLocalIntersection(src,srcIds,srcMeth,(*it).first,trg,trgIds,trgMeth,(*it).second); } - _interpolation_matrix->prepare(locator.getProcsInInteraction()); - _interpolation_matrix->computeDeno(); + _interpolation_matrix->prepare(_locator->getProcsToSendFieldData()); + _interpolation_matrix->computeSurfacesAndDeno(); } void OverlapDEC::attachSourceLocalField(ParaFIELD *field, bool ownPt) @@ -253,10 +324,39 @@ namespace ParaMEDMEM _own_target_field=ownPt; } + void OverlapDEC::attachSourceLocalField(MEDCouplingFieldDouble *field) + { + if(!isInGroup()) + return ; + + ParaMESH *paramesh = new ParaMESH(static_cast(const_cast(field->getMesh())), + *_group,field->getMesh()->getName()); + ParaFIELD *tmpField=new ParaFIELD(field, paramesh, *_group); + tmpField->setOwnSupport(true); + attachSourceLocalField(tmpField,true); + } + + void OverlapDEC::attachTargetLocalField(MEDCouplingFieldDouble *field) + { + if(!isInGroup()) + return ; + + ParaMESH *paramesh = new ParaMESH(static_cast(const_cast(field->getMesh())), + *_group,field->getMesh()->getName()); + ParaFIELD *tmpField=new ParaFIELD(field, paramesh, *_group); + tmpField->setOwnSupport(true); + attachTargetLocalField(tmpField,true); + } + bool OverlapDEC::isInGroup() const { if(!_group) return false; return _group->containsMyRank(); } + + void OverlapDEC::debugPrintWorkSharing(std::ostream & ostr) const + { + _locator->debugPrintWorkSharing(ostr); + } }