From 93aa2b4d6b1873e76a0226143dcc7f7a577489bf Mon Sep 17 00:00:00 2001
From: abn <adrien.bruneton@cea.fr>
Date: Mon, 30 Nov 2015 09:20:28 +0100
Subject: [PATCH] OverlapDEC: many improvements/fixes:

+ fixed incorrect cell mapping in multiply()
+ local field data not sent anymore
+ introduced default field value for non overlapping (non tested yet)
+ enhance algo readibility
---
 src/ParaMEDMEM/OverlapDEC.cxx                 |  30 +-
 src/ParaMEDMEM/OverlapDEC.hxx                 |   6 +
 src/ParaMEDMEM/OverlapElementLocator.cxx      |  18 +-
 src/ParaMEDMEM/OverlapElementLocator.hxx      |   9 +-
 src/ParaMEDMEM/OverlapInterpolationMatrix.cxx |   9 +-
 src/ParaMEDMEM/OverlapInterpolationMatrix.hxx |   6 +-
 src/ParaMEDMEM/OverlapMapping.cxx             | 581 +++++++++++-------
 src/ParaMEDMEM/OverlapMapping.hxx             |  53 +-
 src/ParaMEDMEMTest/ParaMEDMEMTest.hxx         |  89 +--
 .../ParaMEDMEMTest_OverlapDEC.cxx             | 237 ++++++-
 10 files changed, 706 insertions(+), 332 deletions(-)
diff --git a/src/ParaMEDMEM/OverlapDEC.cxx b/src/ParaMEDMEM/OverlapDEC.cxx
index 7093dac9b..6c2cfcd1b 100644
--- a/src/ParaMEDMEM/OverlapDEC.cxx
+++ b/src/ParaMEDMEM/OverlapDEC.cxx
@@ -208,9 +208,10 @@ namespace ParaMEDMEM
     The method in charge to perform this is : ParaMEDMEM::OverlapMapping::prepare.
 */
   OverlapDEC::OverlapDEC(const std::set<int>& procIds, const MPI_Comm& world_comm):
-      _own_group(true),_interpolation_matrix(0),
+      _own_group(true),_interpolation_matrix(0), _locator(0),
       _source_field(0),_own_source_field(false),
       _target_field(0),_own_target_field(false),
+      _default_field_value(0.0),
       _comm(MPI_COMM_NULL)
   {
     ParaMEDMEM::CommInterface comm;
@@ -243,6 +244,7 @@ namespace ParaMEDMEM
     if(_own_target_field)
       delete _target_field;
     delete _interpolation_matrix;
+    delete _locator;
     if (_comm != MPI_COMM_NULL)
       {
         ParaMEDMEM::CommInterface comm;
@@ -260,7 +262,7 @@ namespace ParaMEDMEM
 
   void OverlapDEC::sendData()
   {
-    _interpolation_matrix->multiply();
+    _interpolation_matrix->multiply(_default_field_value);
   }
 
   void OverlapDEC::recvData()
@@ -281,22 +283,22 @@ namespace ParaMEDMEM
     if (_target_field->getField()->getNumberOfComponents() != _source_field->getField()->getNumberOfComponents())
       throw INTERP_KERNEL::Exception("OverlapDEC::synchronize(): source and target field have different number of components!");
     delete _interpolation_matrix;
-    _interpolation_matrix=new OverlapInterpolationMatrix(_source_field,_target_field,*_group,*this,*this);
-    OverlapElementLocator locator(_source_field,_target_field,*_group, getBoundingBoxAdjustmentAbs());
-    locator.copyOptions(*this);
-    locator.exchangeMeshes(*_interpolation_matrix);
-    std::vector< std::pair<int,int> > jobs=locator.getToDoList();
-    std::string srcMeth=locator.getSourceMethod();
-    std::string trgMeth=locator.getTargetMethod();
+    _locator = new OverlapElementLocator(_source_field,_target_field,*_group, getBoundingBoxAdjustmentAbs());
+    _interpolation_matrix=new OverlapInterpolationMatrix(_source_field,_target_field,*_group,*this,*this, *_locator);
+    _locator->copyOptions(*this);
+    _locator->exchangeMeshes(*_interpolation_matrix);
+    std::vector< std::pair<int,int> > jobs=_locator->getToDoList();
+    std::string srcMeth=_locator->getSourceMethod();
+    std::string trgMeth=_locator->getTargetMethod();
     for(std::vector< std::pair<int,int> >::const_iterator it=jobs.begin();it!=jobs.end();it++)
       {
-        const MEDCouplingPointSet *src=locator.getSourceMesh((*it).first);
-        const DataArrayInt *srcIds=locator.getSourceIds((*it).first);
-        const MEDCouplingPointSet *trg=locator.getTargetMesh((*it).second);
-        const DataArrayInt *trgIds=locator.getTargetIds((*it).second);
+        const MEDCouplingPointSet *src=_locator->getSourceMesh((*it).first);
+        const DataArrayInt *srcIds=_locator->getSourceIds((*it).first);
+        const MEDCouplingPointSet *trg=_locator->getTargetMesh((*it).second);
+        const DataArrayInt *trgIds=_locator->getTargetIds((*it).second);
         _interpolation_matrix->addContribution(src,srcIds,srcMeth,(*it).first,trg,trgIds,trgMeth,(*it).second);
       }
-    _interpolation_matrix->prepare(locator.getProcsToSendFieldData());
+    _interpolation_matrix->prepare(_locator->getProcsToSendFieldData());
     _interpolation_matrix->computeDeno();
   }
 
diff --git a/src/ParaMEDMEM/OverlapDEC.hxx b/src/ParaMEDMEM/OverlapDEC.hxx
index 0bf57b2ea..871cdc6e6 100644
--- a/src/ParaMEDMEM/OverlapDEC.hxx
+++ b/src/ParaMEDMEM/OverlapDEC.hxx
@@ -29,6 +29,7 @@
 namespace ParaMEDMEM
 {
   class OverlapInterpolationMatrix;
+  class OverlapElementLocator;
   class ProcessorGroup;
   class ParaFIELD;
 
@@ -45,11 +46,16 @@ namespace ParaMEDMEM
     void attachTargetLocalField(ParaFIELD *field, bool ownPt=false);
     ProcessorGroup *getGroup() { return _group; }
     bool isInGroup() const;
+
+    void setDefaultValue(double val) {_default_field_value = val;}
   private:
     bool _own_group;
     OverlapInterpolationMatrix* _interpolation_matrix;
+    OverlapElementLocator* _locator;
     ProcessorGroup *_group;
 
+    double _default_field_value;
+
     ParaFIELD *_source_field;
     bool _own_source_field;
     ParaFIELD *_target_field;
diff --git a/src/ParaMEDMEM/OverlapElementLocator.cxx b/src/ParaMEDMEM/OverlapElementLocator.cxx
index 134fc4008..0410526bf 100644
--- a/src/ParaMEDMEM/OverlapElementLocator.cxx
+++ b/src/ParaMEDMEM/OverlapElementLocator.cxx
@@ -119,7 +119,6 @@ namespace ParaMEDMEM
           if(intersectsBoundingBox(i,j))
             {
             _proc_pairs[i].push_back(j);
-//            std::cout << "("  << _group.myRank() << ") PROC pair: " << i << "," << j << std::endl;
             }
         }
     // OK now let's assigning as balanced as possible, job to each proc of group
@@ -141,11 +140,13 @@ namespace ParaMEDMEM
     int myProcId=_group.myRank();
     _to_do_list=pairsToBeDonePerProc[myProcId];
 
-//    std::stringstream scout;
-//    scout << "(" << myProcId << ") my TODO list is: ";
-//    for (std::vector< ProcCouple >::const_iterator itdbg=_to_do_list.begin(); itdbg!=_to_do_list.end(); itdbg++)
-//      scout << "(" << (*itdbg).first << "," << (*itdbg).second << ")";
-//    std::cout << scout.str() << "\n";
+#ifdef DEC_DEBUG
+    std::stringstream scout;
+    scout << "(" << myProcId << ") my TODO list is: ";
+    for (std::vector< ProcCouple >::const_iterator itdbg=_to_do_list.begin(); itdbg!=_to_do_list.end(); itdbg++)
+      scout << "(" << (*itdbg).first << "," << (*itdbg).second << ")";
+    std::cout << scout.str() << "\n";
+#endif
 
     // Feeding now '_procs_to_send*'. A same id can appears twice. The second parameter in pair means what
     // to send true=source, false=target
@@ -265,6 +266,11 @@ namespace ParaMEDMEM
     return (*it).second;
   }
 
+  bool OverlapElementLocator::isInMyTodoList(int i, int j) const
+  {
+    ProcCouple cpl = std::make_pair(i, j);
+    return std::find(_to_do_list.begin(), _to_do_list.end(), cpl)!=_to_do_list.end();
+  }
 
   bool OverlapElementLocator::intersectsBoundingBox(int isource, int itarget) const
   {
diff --git a/src/ParaMEDMEM/OverlapElementLocator.hxx b/src/ParaMEDMEM/OverlapElementLocator.hxx
index 566395f9a..493cd7ee4 100644
--- a/src/ParaMEDMEM/OverlapElementLocator.hxx
+++ b/src/ParaMEDMEM/OverlapElementLocator.hxx
@@ -48,13 +48,14 @@ namespace ParaMEDMEM
     const MPI_Comm *getCommunicator() const;
     void exchangeMeshes(OverlapInterpolationMatrix& matrix);
     std::vector< std::pair<int,int> > getToDoList() const { return _to_do_list; }
-    std::vector< int > getProcsToSendFieldData() const { return _procs_to_send_field; }
+    std::vector< int > getProcsToSendFieldData() const { return _procs_to_send_field; }  // same set as the set of procs we sent mesh data to
     std::string getSourceMethod() const;
     std::string getTargetMethod() const;
     const MEDCouplingPointSet *getSourceMesh(int procId) const;
     const DataArrayInt *getSourceIds(int procId) const;
     const MEDCouplingPointSet *getTargetMesh(int procId) const;
     const DataArrayInt *getTargetIds(int procId) const;
+    bool isInMyTodoList(int i, int j) const;
   private:
     void computeBoundingBoxesAndTodoList();
     bool intersectsBoundingBox(int i, int j) const;
@@ -83,9 +84,9 @@ namespace ParaMEDMEM
     std::vector< ProcCouple > _to_do_list;
     //! list of procs the local proc will have to send mesh data to:
     std::vector< Proc_SrcOrTgt > _procs_to_send_mesh;
-    /*! list of procs the local proc will have to send field data to for the final matrix-vector computation:
-     * This can be different from _procs_to_send_mesh because interpolation matrix bits are computed on a potentially
-     * different proc than the target one.   */
+//    /*! list of procs the local proc will have to send field data to for the final matrix-vector computation:
+//     * This can be different from _procs_to_send_mesh (restricted to Source) because interpolation matrix bits are computed on a potentially
+//     * different proc than the target one.   */
     std::vector< int > _procs_to_send_field;
     //! Set of distant meshes
     std::map< Proc_SrcOrTgt,  AutoMCPointSet > _remote_meshes;
diff --git a/src/ParaMEDMEM/OverlapInterpolationMatrix.cxx b/src/ParaMEDMEM/OverlapInterpolationMatrix.cxx
index 95b7e1b94..52f89e47e 100644
--- a/src/ParaMEDMEM/OverlapInterpolationMatrix.cxx
+++ b/src/ParaMEDMEM/OverlapInterpolationMatrix.cxx
@@ -48,14 +48,15 @@ namespace ParaMEDMEM
                                                          ParaFIELD *target_field,
                                                          const ProcessorGroup& group,
                                                          const DECOptions& dec_options,
-                                                         const INTERP_KERNEL::InterpolationOptions& i_opt):
+                                                         const INTERP_KERNEL::InterpolationOptions& i_opt,
+                                                         const OverlapElementLocator & locator):
     INTERP_KERNEL::InterpolationOptions(i_opt),
     DECOptions(dec_options),
     _source_field(source_field),
     _target_field(target_field),
     _source_support(source_field->getSupport()->getCellMesh()),
     _target_support(target_field->getSupport()->getCellMesh()),
-    _mapping(group),
+    _mapping(group, locator),
     _group(group)
   {
   }
@@ -261,9 +262,9 @@ namespace ParaMEDMEM
       throw INTERP_KERNEL::Exception("Policy Not implemented yet : only ConservativeVolumic defined !");
   }
 
-  void OverlapInterpolationMatrix::multiply()
+  void OverlapInterpolationMatrix::multiply(double default_val)
   {
-    _mapping.multiply(_source_field->getField(),_target_field->getField());
+    _mapping.multiply(_source_field->getField(),_target_field->getField(), default_val);
   }
 
   void OverlapInterpolationMatrix::transposeMultiply()
diff --git a/src/ParaMEDMEM/OverlapInterpolationMatrix.hxx b/src/ParaMEDMEM/OverlapInterpolationMatrix.hxx
index 2447079ee..30319196a 100644
--- a/src/ParaMEDMEM/OverlapInterpolationMatrix.hxx
+++ b/src/ParaMEDMEM/OverlapInterpolationMatrix.hxx
@@ -45,7 +45,8 @@ namespace ParaMEDMEM
                                ParaFIELD *target_field,
                                const ProcessorGroup& group,
                                const DECOptions& dec_opt,
-                               const InterpolationOptions& i_opt);
+                               const InterpolationOptions& i_opt,
+                               const OverlapElementLocator & loc);
 
     void keepTracksOfSourceIds(int procId, DataArrayInt *ids);
 
@@ -58,7 +59,7 @@ namespace ParaMEDMEM
     
     void computeDeno();
 
-    void multiply();
+    void multiply(double default_val);
 
     void transposeMultiply();
     
@@ -67,7 +68,6 @@ namespace ParaMEDMEM
 
     static void TransposeMatrix(const std::vector<SparseDoubleVec>& matIn, int nbColsMatIn,
                                 std::vector<SparseDoubleVec>& matOut);
-//    bool isSurfaceComputationNeeded(const std::string& method) const;
   private:
     ParaFIELD           *_source_field;
     ParaFIELD           *_target_field;
diff --git a/src/ParaMEDMEM/OverlapMapping.cxx b/src/ParaMEDMEM/OverlapMapping.cxx
index 6871f1f35..28e1f4b3e 100644
--- a/src/ParaMEDMEM/OverlapMapping.cxx
+++ b/src/ParaMEDMEM/OverlapMapping.cxx
@@ -31,7 +31,8 @@
 
 using namespace ParaMEDMEM;
 
-OverlapMapping::OverlapMapping(const ProcessorGroup& group):_group(group)
+OverlapMapping::OverlapMapping(const ProcessorGroup& group, const OverlapElementLocator & loc):
+    _group(group),_locator(loc)
 {
 }
 
@@ -70,21 +71,21 @@ void OverlapMapping::addContributionST(const std::vector< SparseDoubleVec >& mat
   _matrixes_st.push_back(matrixST);
   _source_proc_id_st.push_back(srcProcId);
   _target_proc_id_st.push_back(trgProcId);
-  if(srcIds)  // source mesh part is remote
-    {//item#1 of step2 algorithm in proc m. Only to know in advanced nb of recv ids [ (0,1) computed on proc1 and Matrix-Vector on proc1 ]
-      _nb_of_src_ids_proc_st2.push_back(srcIds->getNumberOfTuples());
-      _src_ids_proc_st2.push_back(srcProcId);
+  if(srcIds)  // source mesh part is remote <=> srcProcId != myRank
+    {
+      _rcv_src_ids_proc_st2.push_back(srcProcId);
+      _nb_of_rcv_src_ids_proc_st2.push_back(srcIds->getNumberOfTuples());
     }
   else        // source mesh part is local
-    {//item#0 of step2 algorithm in proc k
+    {
       std::set<int> s;
       // For all source IDs (=col indices) in the sparse matrix:
       for(std::vector< SparseDoubleVec >::const_iterator it1=matrixST.begin();it1!=matrixST.end();it1++)
         for(SparseDoubleVec::const_iterator it2=(*it1).begin();it2!=(*it1).end();it2++)
           s.insert((*it2).first);
+      _src_ids_zip_proc_st2.push_back(trgProcId);
       _src_ids_zip_st2.resize(_src_ids_zip_st2.size()+1);
       _src_ids_zip_st2.back().insert(_src_ids_zip_st2.back().end(),s.begin(),s.end());
-      _src_ids_zip_proc_st2.push_back(trgProcId);
     }
 }
 
@@ -98,7 +99,9 @@ void OverlapMapping::addContributionST(const std::vector< SparseDoubleVec >& mat
  */
 void OverlapMapping::prepare(const std::vector< int >& procsToSendField, int nbOfTrgElems)
 {
-//  printMatrixesST();
+#ifdef DEC_DEBUG
+  printMatrixesST();
+#endif
 
   CommInterface commInterface=_group.getCommInterface();
   const MPIProcessorGroup *group=static_cast<const MPIProcessorGroup*>(&_group);
@@ -150,51 +153,45 @@ void OverlapMapping::prepare(const std::vector< int >& procsToSendField, int nbO
   unserializationST(nbOfTrgElems,nbrecv,bigArrRecv,nbrecv1,nbrecv2,
                     bigArrRecv2,bigArrDRecv2,nbrecv3,nbrecv4);
   //updating _src_ids_zip_st2 and _src_ids_zip_st2 with received matrix.
-  updateZipSourceIdsForFuture();
+  updateZipSourceIdsForMultiply();
   //finish to fill _the_matrix_st with already in place matrix in _matrixes_st (local computation)
   finishToFillFinalMatrixST();
   // Prepare proc lists for future field data exchange (mutliply()):
-  fillProcToSendRcvForMultiply(procsToSendField);
+  _proc_ids_to_recv_vector_st = _the_matrix_st_source_proc_id;
+  _proc_ids_to_send_vector_st = procsToSendField;
   // Make some space on local proc:
   _matrixes_st.clear();
 
-//  std::stringstream scout;
-//  scout << "\n(" << myProcId << ") to send:";
-//  for (std::vector< int >::const_iterator itdbg=_proc_ids_to_send_vector_st.begin(); itdbg!=_proc_ids_to_send_vector_st.end(); itdbg++)
-//    scout << "," << *itdbg;
-//  scout << "\n(" << myProcId << ") to recv:";
-//    for (std::vector< int >::const_iterator itdbg=_proc_ids_to_recv_vector_st.begin(); itdbg!=_proc_ids_to_recv_vector_st.end(); itdbg++)
-//      scout << "," << *itdbg;
-//  std::cout << scout.str() << "\n";
-//
-//  printTheMatrix();
+#ifdef DEC_DEBUG
+  printTheMatrix();
+#endif
 }
 
-/**
- * Fill the members _proc_ids_to_send_vector_st and _proc_ids_to_recv_vector_st
- * indicating for each proc to which proc it should send its source field data
- * and from which proc it should receive source field data.
- *
- * Should be called once finishToFillFinalMatrixST() has been executed, i.e. when the
- * local matrices are completly ready.
- */
-void OverlapMapping::fillProcToSendRcvForMultiply(const std::vector< int >& procsToSendField)
-{
-//  _proc_ids_to_send_vector_st.clear();
-  _proc_ids_to_recv_vector_st.clear();
-  // Receiving side is easy - just inspect non-void terms in the final matrix
-  int i=0;
-  std::vector< std::vector< SparseDoubleVec > >::const_iterator it;
-  std::vector< SparseDoubleVec >::const_iterator it2;
-  for(it=_the_matrix_st.begin(); it!=_the_matrix_st.end(); it++, i++)
-    _proc_ids_to_recv_vector_st.push_back(_the_matrix_st_source_proc_id[i]);
-
-  // Sending side was computed in OverlapElementLocator::computeBoundingBoxesAndTodoList()
-  _proc_ids_to_send_vector_st = procsToSendField;
-}
+///**
+// * Fill the members _proc_ids_to_send_vector_st and _proc_ids_to_recv_vector_st
+// * indicating for each proc to which proc it should send its source field data
+// * and from which proc it should receive source field data.
+// *
+// * Should be called once finishToFillFinalMatrixST() has been executed, i.e. when the
+// * local matrices are completely ready.
+// */
+//void OverlapMapping::fillProcToSendRcvForMultiply(const std::vector< int >& procsToSendField)
+//{
+////  _proc_ids_to_send_vector_st.clear();
+//  _proc_ids_to_recv_vector_st.clear();
+//  // Receiving side is easy - just inspect non-void terms in the final matrix
+//  int i=0;
+//  std::vector< std::vector< SparseDoubleVec > >::const_iterator it;
+//  std::vector< SparseDoubleVec >::const_iterator it2;
+//  for(it=_the_matrix_st.begin(); it!=_the_matrix_st.end(); it++, i++)
+//    _proc_ids_to_recv_vector_st.push_back(_the_matrix_st_source_proc_id[i]);
+//
+//  // Sending side was computed in OverlapElementLocator::computeBoundingBoxesAndTodoList()
+//  _proc_ids_to_send_vector_st = procsToSendField;
+//}
 
 /*!
- * Compute denominators.
+ * Compute denominators for IntegralGlobConstraint interp.
  */
 void OverlapMapping::computeDenoGlobConstraint()
 {
@@ -219,7 +216,7 @@ void OverlapMapping::computeDenoGlobConstraint()
 }
 
 /*!
- * Compute denominators.
+ * Compute denominators for ConvervativeVolumic interp.
  */
 void OverlapMapping::computeDenoConservativeVolumic(int nbOfTuplesTrg)
 {
@@ -279,6 +276,7 @@ void OverlapMapping::computeDenoConservativeVolumic(int nbOfTuplesTrg)
               denoM[rowId][(*it2).first]=deno[trgIds2[rowId]];
         }
     }
+//  printDenoMatrix();
 }
 
 /*!
@@ -468,77 +466,105 @@ void OverlapMapping::finishToFillFinalMatrixST()
  * This method performs a transpose multiply of 'fieldInput' and put the result into 'fieldOutput'.
  * 'fieldInput' is expected to be the sourcefield and 'fieldOutput' the targetfield.
  */
-void OverlapMapping::multiply(const MEDCouplingFieldDouble *fieldInput, MEDCouplingFieldDouble *fieldOutput) const
+void OverlapMapping::multiply(const MEDCouplingFieldDouble *fieldInput, MEDCouplingFieldDouble *fieldOutput, double default_val) const
 {
+  using namespace std;
+
   int nbOfCompo=fieldInput->getNumberOfComponents();//to improve same number of components to test
   CommInterface commInterface=_group.getCommInterface();
   const MPIProcessorGroup *group=static_cast<const MPIProcessorGroup*>(&_group);
   const MPI_Comm *comm=group->getComm();
   int grpSize=_group.size();
-  int myProcId=_group.myRank();
+  int myProcID=_group.myRank();
   //
   INTERP_KERNEL::AutoPtr<int> nbsend=new int[grpSize];
   INTERP_KERNEL::AutoPtr<int> nbsend2=new int[grpSize];
   INTERP_KERNEL::AutoPtr<int> nbrecv=new int[grpSize];
   INTERP_KERNEL::AutoPtr<int> nbrecv2=new int[grpSize];
-  std::fill<int *>(nbsend,nbsend+grpSize,0);
-  std::fill<int *>(nbrecv,nbrecv+grpSize,0);
+  fill<int *>(nbsend,nbsend+grpSize,0);
+  fill<int *>(nbrecv,nbrecv+grpSize,0);
   nbsend2[0]=0;
   nbrecv2[0]=0;
-  std::vector<double> valsToSend;
+  vector<double> valsToSend;
 
   /*
-   * FIELD VALUE XCHGE
+   * FIELD VALUE XCHGE:
+   * We call the 'BB source IDs' (bounding box source IDs) the set of source cell IDs transmitted just based on the bounding box information.
+   * This is potentially bigger than what is finally in the interp matrix and this is stored in _sent_src_ids_st2.
+   * We call 'interp source IDs' the set of source cell IDs with non null entries in the interp matrix. This is a sub-set of the above.
    */
-  for(int i=0;i<grpSize;i++)
+  for(int procID=0;procID<grpSize;procID++)
     {
-      // Prepare field data to be sent/received through a MPI_AllToAllV
-      if(std::find(_proc_ids_to_send_vector_st.begin(),_proc_ids_to_send_vector_st.end(),i)!=_proc_ids_to_send_vector_st.end())
-        {
-          std::vector<int>::const_iterator isItem1=std::find(_sent_src_proc_st2.begin(),_sent_src_proc_st2.end(),i);
-          MEDCouplingAutoRefCountObjectPtr<DataArrayDouble> vals;
-          if(isItem1!=_sent_src_proc_st2.end()) // source data was sent to proc 'i'
-            {
-              // Prepare local field data to send to proc i
-              int id=std::distance(_sent_src_proc_st2.begin(),isItem1);
-              vals=fieldInput->getArray()->selectByTupleId(*_sent_src_ids_st2[id]);
-            }
-          else                                  // no source data was sent to proc 'i'
-            {//item0 of step2 main algo
-              std::vector<int>::const_iterator isItem22 = std::find(_src_ids_zip_proc_st2.begin(),_src_ids_zip_proc_st2.end(),i );
-              int id=std::distance(_src_ids_zip_proc_st2.begin(),isItem22);
-              if (isItem22 == _src_ids_zip_proc_st2.end())
-                std::cout << "(" << myProcId << ") has end iterator!!!!!\n";
-              vals=fieldInput->getArray()->selectByTupleId(&(_src_ids_zip_st2[id])[0],&(_src_ids_zip_st2[id])[0]+_src_ids_zip_st2[id].size());
-            }
-          nbsend[i]=vals->getNbOfElems();
-          valsToSend.insert(valsToSend.end(),vals->getConstPointer(),vals->getConstPointer()+nbsend[i]);
-        }
-      if(std::find(_proc_ids_to_recv_vector_st.begin(),_proc_ids_to_recv_vector_st.end(),i)!=_proc_ids_to_recv_vector_st.end())
-        {
-          std::vector<int>::const_iterator isItem0=std::find(_sent_trg_proc_st2.begin(),_sent_trg_proc_st2.end(),i);
-          if(isItem0==_sent_trg_proc_st2.end())//item1 of step2 main algo [ (0,1) computed on proc1 and Matrix-Vector on proc1 ]
-            {
-              std::vector<int>::const_iterator it1=std::find(_src_ids_proc_st2.begin(),_src_ids_proc_st2.end(),i);
-              if(it1!=_src_ids_proc_st2.end())
-                {
-                  int id=std::distance(_src_ids_proc_st2.begin(),it1);
-                  nbrecv[i]=_nb_of_src_ids_proc_st2[id]*nbOfCompo;
-                }
-              else if(i==myProcId)   // diagonal element (i.e. proc #i talking to same proc #i)
-                {
-                  nbrecv[i] = nbsend[i];
-                }
-              else
-                throw INTERP_KERNEL::Exception("OverlapMapping::multiply(): Internal error at field values exchange!");
-            }
-          else
-            {//item0 of step2 main algo [ (2,1) computed on proc2 but Matrix-Vector on proc1 ] [(1,0) computed on proc1 but Matrix-Vector on proc0]
-              int id=std::distance(_src_ids_zip_proc_st2.begin(),std::find(_src_ids_zip_proc_st2.begin(),_src_ids_zip_proc_st2.end(),i));
-              nbrecv[i]=_src_ids_zip_st2[id].size()*nbOfCompo;
-            }
-        }
+      /* SENDING part: compute field values to be SENT (and how many of them)
+       *   - for all proc 'procID' in group
+       *      * if procID == myProcID, send nothing
+       *      * elif 'procID' in _proc_ids_to_send_vector_st (computed from the BB intersection)
+       *        % if myProcID computed the job (myProcID, procID)
+       *           => send only 'interp source IDs' field values (i.e. IDs stored in _src_ids_zip_proc_st2)
+       *        % else (=we just sent mesh data to procID, but have never seen the matrix, i.e. matrix was computed remotely by procID)
+       *           => send 'BB source IDs' set of field values (i.e. IDs stored in _sent_src_ids_st2)
+       */
+      if (procID == myProcID)
+        nbsend[procID] = 0;
+      else
+        if(find(_proc_ids_to_send_vector_st.begin(),_proc_ids_to_send_vector_st.end(),procID)!=_proc_ids_to_send_vector_st.end())
+          {
+            MEDCouplingAutoRefCountObjectPtr<DataArrayDouble> vals;
+            if(_locator.isInMyTodoList(myProcID, procID))
+              {
+                vector<int>::const_iterator isItem11 = find(_src_ids_zip_proc_st2.begin(),_src_ids_zip_proc_st2.end(),procID);
+                if (isItem11 == _src_ids_zip_proc_st2.end())
+                  throw INTERP_KERNEL::Exception("OverlapMapping::multiply(): internal error: SEND: unexpected end iterator in _src_ids_zip_proc_st2!");
+                int id=distance(_src_ids_zip_proc_st2.begin(),isItem11);
+                int sz=_src_ids_zip_st2[id].size();
+                vals=fieldInput->getArray()->selectByTupleId(&(_src_ids_zip_st2[id])[0],&(_src_ids_zip_st2[id])[0]+sz);
+              }
+            else
+              {
+                vector<int>::const_iterator isItem11 = find(_sent_src_proc_st2.begin(),_sent_src_proc_st2.end(),procID );
+                if (isItem11 == _sent_src_proc_st2.end())
+                  throw INTERP_KERNEL::Exception("OverlapMapping::multiply(): internal error: SEND: unexpected end iterator in _sent_src_proc_st2!");
+                int id=distance(_sent_src_proc_st2.begin(),isItem11);
+                vals=fieldInput->getArray()->selectByTupleId(*_sent_src_ids_st2[id]);
+              }
+            nbsend[procID] = vals->getNbOfElems();
+            valsToSend.insert(valsToSend.end(),vals->getConstPointer(),vals->getConstPointer()+nbsend[procID]);
+          }
+
+      /* RECEIVE: compute number of field values to be RECEIVED
+       *   - for all proc 'procID' in group
+       *      * if procID == myProcID, rcv nothing
+       *      * elif 'procID' in _proc_ids_to_recv_vector_st (computed from BB intersec)
+       *        % if myProcID computed the job (procID, myProcID)
+       *          => receive full set ('BB source IDs') of field data from proc #procID which has never seen the matrix
+       *             i.e. prepare to receive the numb in _nb_of_rcv_src_ids_proc_st2
+       *        % else (=we did NOT compute the job, hence procID has, and knows the matrix)
+       *          => receive 'interp source IDs' set of field values
+       */
+      if (procID == myProcID)
+        nbrecv[procID] = 0;
+      else
+        if(find(_proc_ids_to_recv_vector_st.begin(),_proc_ids_to_recv_vector_st.end(),procID)!=_proc_ids_to_recv_vector_st.end())
+          {
+            if(_locator.isInMyTodoList(procID, myProcID))
+              {
+                vector<int>::const_iterator isItem11 = find(_rcv_src_ids_proc_st2.begin(),_rcv_src_ids_proc_st2.end(),procID);
+                if (isItem11 == _rcv_src_ids_proc_st2.end())
+                  throw INTERP_KERNEL::Exception("OverlapMapping::multiply(): internal error: RCV: unexpected end iterator in _rcv_src_ids_proc_st2!");
+                int id=distance(_rcv_src_ids_proc_st2.begin(),isItem11);
+                nbrecv[procID] = _nb_of_rcv_src_ids_proc_st2[id];
+              }
+            else
+              {
+                vector<int>::const_iterator isItem11 = find(_src_ids_zip_proc_st2.begin(),_src_ids_zip_proc_st2.end(),procID);
+                if (isItem11 == _src_ids_zip_proc_st2.end())
+                  throw INTERP_KERNEL::Exception("OverlapMapping::multiply(): internal error: RCV: unexpected end iterator in _src_ids_zip_proc_st2!");
+                int id=distance(_src_ids_zip_proc_st2.begin(),isItem11);
+                nbrecv[procID] = _src_ids_zip_st2[id].size()*nbOfCompo;
+              }
+          }
     }
+  // Compute offsets in the sending/receiving array.
   for(int i=1;i<grpSize;i++)
     {
       nbsend2[i]=nbsend2[i-1]+nbsend[i-1];
@@ -546,83 +572,172 @@ void OverlapMapping::multiply(const MEDCouplingFieldDouble *fieldInput, MEDCoupl
     }
   INTERP_KERNEL::AutoPtr<double> bigArr=new double[nbrecv2[grpSize-1]+nbrecv[grpSize-1]];
 
-//  scout << "("  << myProcId << ") nbsend :" << nbsend[0] << "," << nbsend[1] << "," << nbsend[2] << "\n";
-//  scout << "("  << myProcId << ") nbrecv :" << nbrecv[0] << "," << nbrecv[1] << "," << nbrecv[2] << "\n";
-//  std::cout << scout.str() << "\n";
+#ifdef DEC_DEBUG
+  stringstream scout;
+  scout << "("  << myProcID << ") nbsend :" << nbsend[0] << "," << nbsend[1] << "," << nbsend[2] << "\n";
+  scout << "("  << myProcID << ") nbrecv :" << nbrecv[0] << "," << nbrecv[1] << "," << nbrecv[2] << "\n";
+  scout << "("  << myProcID << ") valsToSend: ";
+  for (int iii=0; iii<valsToSend.size(); iii++)
+    scout << ", " << valsToSend[iii];
+#endif
+
   /*
    * *********************** ALL-TO-ALL
    */
   commInterface.allToAllV(&valsToSend[0],nbsend,nbsend2,MPI_DOUBLE,
                           bigArr,nbrecv,nbrecv2,MPI_DOUBLE,*comm);
+#ifdef DEC_DEBUG
+  MPI_Barrier(MPI_COMM_WORLD);
+  scout << "("  << myProcID << ") bigArray: ";
+    for (int iii=0; iii<nbrecv2[grpSize-1]+nbrecv[grpSize-1]; iii++)
+      scout << ", " << bigArr[iii];
+  cout << scout.str() << "\n";
+#endif
+
   /*
-   *
    * TARGET FIELD COMPUTATION (matrix-vec computation)
    */
   fieldOutput->getArray()->fillWithZero();
   INTERP_KERNEL::AutoPtr<double> tmp=new double[nbOfCompo];
-  for(int i=0;i<grpSize;i++)
+
+  // By default field value set to default value - so mark which cells are hit
+  INTERP_KERNEL::AutoPtr<bool> hit_cells = new bool[fieldOutput->getNumberOfTuples()];
+
+  for(vector<int>::const_iterator itProc=_the_matrix_st_source_proc_id.begin(); itProc != _the_matrix_st_source_proc_id.end();itProc++)
+  // For each source processor corresponding to a locally held matrix:
     {
-      if(nbrecv[i]>0)
+      int srcProcID = *itProc;
+      int id = distance(_the_matrix_st_source_proc_id.begin(),itProc);
+      const vector< SparseDoubleVec >& mat =_the_matrix_st[id];
+      const vector< SparseDoubleVec >& deno = _the_deno_st[id];
+
+      /*   FINAL MULTIPLICATION
+       *      * if srcProcID == myProcID, local multiplication without any mapping
+       *         => for all target cell ID 'tgtCellID'
+       *           => for all src cell ID 'srcCellID' in the sparse vector
+       *             => tgtFieldLocal[tgtCellID] += srcFieldLocal[srcCellID] * matrix[tgtCellID][srcCellID] / deno[tgtCellID][srcCellID]
+       */
+      if (srcProcID == myProcID)
         {
-          double *pt=fieldOutput->getArray()->getPointer();
-          std::vector<int>::const_iterator it=std::find(_the_matrix_st_source_proc_id.begin(),_the_matrix_st_source_proc_id.end(),i);
-          if(it==_the_matrix_st_source_proc_id.end())
-            throw INTERP_KERNEL::Exception("Big problem !");
-          int id=std::distance(_the_matrix_st_source_proc_id.begin(),it);
-          const std::vector< SparseDoubleVec >& mat=_the_matrix_st[id];
-          const std::vector< SparseDoubleVec >& deno=_the_deno_st[id];
-          std::vector<int>::const_iterator isItem0=std::find(_sent_trg_proc_st2.begin(),_sent_trg_proc_st2.end(),i);
-          if(isItem0==_sent_trg_proc_st2.end())//item1 of step2 main algo [ (0,1) computed on proc1 and Matrix-Vector on proc1 ]
+          int nbOfTrgTuples=mat.size();
+          double * targetBase = fieldOutput->getArray()->getPointer();
+          for(int j=0; j<nbOfTrgTuples; j++)
             {
-              int nbOfTrgTuples=mat.size();
-              for(int j=0;j<nbOfTrgTuples;j++,pt+=nbOfCompo)
+              const SparseDoubleVec& mat1=mat[j];
+              const SparseDoubleVec& deno1=deno[j];
+              SparseDoubleVec::const_iterator it5=deno1.begin();
+              const double * localSrcField = fieldInput->getArray()->getConstPointer();
+              double * targetPt = targetBase+j*nbOfCompo;
+              for(SparseDoubleVec::const_iterator it3=mat1.begin();it3!=mat1.end();it3++,it5++)
                 {
-                  const SparseDoubleVec& mat1=mat[j];
-                  const SparseDoubleVec& deno1=deno[j];
-                  SparseDoubleVec::const_iterator it4=deno1.begin();
-                  for(SparseDoubleVec::const_iterator it3=mat1.begin();it3!=mat1.end();it3++,it4++)
-                    {
-                      std::transform(bigArr+nbrecv2[i]+((*it3).first)*nbOfCompo,
-                                     bigArr+nbrecv2[i]+((*it3).first+1)*(nbOfCompo),
-                                     (double *)tmp,
-                                     std::bind2nd(std::multiplies<double>(),(*it3).second/(*it4).second));
-                      std::transform((double *)tmp,(double *)tmp+nbOfCompo,pt,pt,std::plus<double>());
-                    }
+                  // Apply the multiplication for all components:
+                  double ratio = (*it3).second/(*it5).second;
+                  transform(localSrcField+((*it3).first)*nbOfCompo,
+                            localSrcField+((*it3).first+1)*nbOfCompo,
+                            (double *)tmp,
+                            bind2nd(multiplies<double>(),ratio) );
+                  // Accumulate with current value:
+                  transform((double *)tmp,(double *)tmp+nbOfCompo,targetPt,targetPt,plus<double>());
+                  hit_cells[j] = true;
                 }
             }
-          else
-            {//item0 of step2 main algo [ (2,1) computed on proc2 but Matrix-Vector on proc1 ]
-              double *pt=fieldOutput->getArray()->getPointer();
-              std::map<int,int> zipCor;
-              int id=std::distance(_src_ids_zip_proc_st2.begin(),std::find(_src_ids_zip_proc_st2.begin(),_src_ids_zip_proc_st2.end(),i));
-              const std::vector<int> zipIds=_src_ids_zip_st2[id];
-              int newId=0;
-              for(std::vector<int>::const_iterator it=zipIds.begin();it!=zipIds.end();it++,newId++)
-                zipCor[*it]=newId;
-              int id2=std::distance(_sent_trg_proc_st2.begin(),std::find(_sent_trg_proc_st2.begin(),_sent_trg_proc_st2.end(),i));
-              const DataArrayInt *tgrIds=_sent_trg_ids_st2[id2];
-              const int *tgrIds2=tgrIds->getConstPointer();
-              int nbOfTrgTuples=mat.size();
-              for(int j=0;j<nbOfTrgTuples;j++)
+        }
+
+      if(nbrecv[srcProcID]<=0)  // also covers the preceding 'if'
+        continue;
+
+      /*      * if something was received
+       *         %  if received matrix (=we didn't compute the job), this means that :
+       *            1. we sent part of our targetIDs to srcProcID before, so that srcProcId can do the computation.
+       *            2. srcProcID has sent us only the 'interp source IDs' field values
+       *            => invert _src_ids_zip_st2 -> 'revert_zip'
+       *            => for all target cell ID 'tgtCellID'
+       *              => mappedTgtID = _sent_trg_ids_st2[srcProcID][tgtCellID]
+       *              => for all src cell ID 'srcCellID' in the sparse vector
+       *                 => idx = revert_zip[srcCellID]
+       *                 => tgtFieldLocal[mappedTgtID] += rcvValue[srcProcID][idx] * matrix[tgtCellID][srcCellID] / deno[tgtCellID][srcCellID]
+       */
+      if(!_locator.isInMyTodoList(srcProcID, myProcID))
+        {
+          // invert _src_ids_zip_proc_st2
+          map<int,int> revert_zip;
+          vector< int >::const_iterator it11= find(_src_ids_zip_proc_st2.begin(),_src_ids_zip_proc_st2.end(),srcProcID);
+          if (it11 == _src_ids_zip_proc_st2.end())
+            throw INTERP_KERNEL::Exception("OverlapMapping::multiply(): internal error: MULTIPLY: unexpected end iterator in _src_ids_zip_proc_st2!");
+          int id1=distance(_src_ids_zip_proc_st2.begin(),it11);
+          int newId=0;
+          for(vector<int>::const_iterator it=_src_ids_zip_st2[id1].begin();it!=_src_ids_zip_st2[id1].end();it++,newId++)
+            revert_zip[*it]=newId;
+          vector<int>::const_iterator isItem24 = find(_sent_trg_proc_st2.begin(),_sent_trg_proc_st2.end(),srcProcID);
+          if (isItem24 == _sent_trg_proc_st2.end())
+            throw INTERP_KERNEL::Exception("OverlapMapping::multiply(): internal error: MULTIPLY: unexpected end iterator in _sent_trg_proc_st2!");
+          int id2 = distance(_sent_trg_proc_st2.begin(),isItem24);
+          const DataArrayInt *tgrIdsDA=_sent_trg_ids_st2[id2];
+          const int *tgrIds = tgrIdsDA->getConstPointer();
+
+          int nbOfTrgTuples=mat.size();
+          double * targetBase = fieldOutput->getArray()->getPointer();
+          for(int j=0;j<nbOfTrgTuples;j++)
+            {
+              const SparseDoubleVec& mat1=mat[j];
+              const SparseDoubleVec& deno1=deno[j];
+              SparseDoubleVec::const_iterator it5=deno1.begin();
+              double * targetPt = targetBase+tgrIds[j]*nbOfCompo;
+              for(SparseDoubleVec::const_iterator it3=mat1.begin();it3!=mat1.end();it3++,it5++)
+                {
+                  map<int,int>::const_iterator it4=revert_zip.find((*it3).first);
+                  if(it4==revert_zip.end())
+                    throw INTERP_KERNEL::Exception("OverlapMapping::multiply(): internal error: MULTIPLY: unexpected end iterator in revert_zip!");
+                  double ratio = (*it3).second/(*it5).second;
+                  transform(bigArr+nbrecv2[srcProcID]+((*it4).second)*nbOfCompo,
+                            bigArr+nbrecv2[srcProcID]+((*it4).second+1)*nbOfCompo,
+                            (double *)tmp,
+                            bind2nd(multiplies<double>(),ratio) );
+                  transform((double *)tmp,(double *)tmp+nbOfCompo,targetPt,targetPt,plus<double>());
+                  hit_cells[tgrIds[j]] = true;
+                }
+            }
+        }
+      else
+        /*         % else (=we computed the job and we received the 'BB source IDs' set of source field values)
+         *            => for all target cell ID 'tgtCellID'
+         *              => for all src cell ID 'srcCellID' in the sparse vector
+         *                => tgtFieldLocal[tgtCellID] += rcvValue[srcProcID][srcCellID] * matrix[tgtCellID][srcCellID] / deno[tgtCellID][srcCellID]
+         */
+        {
+          // Same loop as in the case srcProcID == myProcID, except that instead of working on local field data, we work on bigArr
+          int nbOfTrgTuples=mat.size();
+          double * targetBase = fieldOutput->getArray()->getPointer();
+          for(int j=0;j<nbOfTrgTuples;j++)
+            {
+              const SparseDoubleVec& mat1=mat[j];
+              const SparseDoubleVec& deno1=deno[j];
+              SparseDoubleVec::const_iterator it5=deno1.begin();
+              double * targetPt = targetBase+j*nbOfCompo;
+              for(SparseDoubleVec::const_iterator it3=mat1.begin();it3!=mat1.end();it3++,it5++)
                 {
-                  const SparseDoubleVec& mat1=mat[j];
-                  const SparseDoubleVec& deno1=deno[j];
-                  SparseDoubleVec::const_iterator it5=deno1.begin();
-                  for(SparseDoubleVec::const_iterator it3=mat1.begin();it3!=mat1.end();it3++,it5++)
-                    {
-                      std::map<int,int>::const_iterator it4=zipCor.find((*it3).first);
-                      if(it4==zipCor.end())
-                        throw INTERP_KERNEL::Exception("OverlapMapping::multiply(): Internal error in final value computation!");
-                      std::transform(bigArr+nbrecv2[i]+((*it4).second)*nbOfCompo,
-                                     bigArr+nbrecv2[i]+((*it4).second+1)*(nbOfCompo),
-                                     (double *)tmp,
-                                     std::bind2nd(std::multiplies<double>(),(*it3).second/(*it5).second));
-                      std::transform((double *)tmp,(double *)tmp+nbOfCompo,pt+tgrIds2[j]*nbOfCompo,pt+tgrIds2[j]*nbOfCompo,std::plus<double>());
-                    }
+                  // Apply the multiplication for all components:
+                  double ratio = (*it3).second/(*it5).second;
+                  transform(bigArr+nbrecv2[srcProcID]+((*it3).first)*nbOfCompo,
+                            bigArr+nbrecv2[srcProcID]+((*it3).first+1)*nbOfCompo,
+                            (double *)tmp,
+                            bind2nd(multiplies<double>(),ratio));
+                  // Accumulate with current value:
+                  transform((double *)tmp,(double *)tmp+nbOfCompo,targetPt,targetPt,plus<double>());
+                  hit_cells[j] = true;
                 }
             }
         }
     }
+
+  // Fill in default values for cells which haven't been hit:
+  int i = 0;
+  for(bool * hit_cells_ptr=hit_cells; i< fieldOutput->getNumberOfTuples(); hit_cells_ptr++,i++)
+    if (!(*hit_cells_ptr))
+      {
+        double * targetPt=fieldOutput->getArray()->getPointer();
+        fill(targetPt+i*nbOfCompo, targetPt+(i+1)*nbOfCompo, default_val);
+      }
 }
 
 /*!
@@ -636,10 +751,9 @@ void OverlapMapping::transposeMultiply(const MEDCouplingFieldDouble *fieldInput,
 /*!
  * This method should be called immediately after _the_matrix_st has been filled with remote computed matrix
  * put in this proc for Matrix-Vector.
- * This method computes for these matrix the minimal set of source ids corresponding to the source proc id.
- *
+ * It finishes the filling _src_ids_zip_st2 and _src_ids_zip_proc_st2 (see member doc)
  */
-void OverlapMapping::updateZipSourceIdsForFuture()
+void OverlapMapping::updateZipSourceIdsForMultiply()
 {
   /* When it is called, only the bits received from other processors (i.e. the remotely executed jobs) are in the
     big matrix _the_matrix_st. */
@@ -650,7 +764,7 @@ void OverlapMapping::updateZipSourceIdsForFuture()
   for(int i=0;i<nbOfMatrixRecveived;i++)
     {
       int curSrcProcId=_the_matrix_st_source_proc_id[i];
-      if(curSrcProcId!=myProcId)
+      if(curSrcProcId!=myProcId)  // if =, data has been populated by addContributionST()
         {
           const std::vector< SparseDoubleVec >& mat=_the_matrix_st[i];
           _src_ids_zip_proc_st2.push_back(curSrcProcId);
@@ -660,70 +774,97 @@ void OverlapMapping::updateZipSourceIdsForFuture()
             for(SparseDoubleVec::const_iterator it2=(*it1).begin();it2!=(*it1).end();it2++)
               s.insert((*it2).first);
           _src_ids_zip_st2.back().insert(_src_ids_zip_st2.back().end(),s.begin(),s.end());
-
-//          std::stringstream scout;
-//          scout << "(" << myProcId << ") pushed " << _src_ids_zip_st2.back().size() << " values for tgt proc " << curSrcProcId;
-//          std::cout << scout.str() << "\n";
         }
     }
 }
 
-// void OverlapMapping::printTheMatrix() const
-// {
-//   CommInterface commInterface=_group.getCommInterface();
-//   const MPIProcessorGroup *group=static_cast<const MPIProcessorGroup*>(&_group);
-//   const MPI_Comm *comm=group->getComm();
-//   int grpSize=_group.size();
-//   int myProcId=_group.myRank();
-//   std::stringstream oscerr;
-//   int nbOfMat=_the_matrix_st.size();
-//   oscerr << "(" <<  myProcId <<  ") I hold " << nbOfMat << " matrix(ces) : "<< std::endl;
-//   for(int i=0;i<nbOfMat;i++)
-//     {
-//       oscerr << "   - Matrix #" << i << " coming from source proc #" << _the_matrix_st_source_proc_id[i] << ": ";
-//       const std::vector< SparseDoubleVec >& locMat=_the_matrix_st[i];
-//       int j = 0;
-//       for(std::vector< SparseDoubleVec >::const_iterator it1=locMat.begin();it1!=locMat.end();it1++, j++)
-//         {
-//           oscerr << " Target Cell #" << j;
-//           for(SparseDoubleVec::const_iterator it2=(*it1).begin();it2!=(*it1).end();it2++)
-//             oscerr << " (" << (*it2).first << "," << (*it2).second << "), ";
-//           oscerr << std::endl;
-//         }
-//     }
-//   oscerr << "*********" << std::endl;
-//
-//   // Hope this will be flushed in one go:
-//   std::cerr << oscerr.str() << std::endl;
-////   if(myProcId != 0)
-////     MPI_Barrier(MPI_COMM_WORLD);
-// }
-//
-// void OverlapMapping::printMatrixesST() const
-//  {
-//    CommInterface commInterface=_group.getCommInterface();
-//    const MPIProcessorGroup *group=static_cast<const MPIProcessorGroup*>(&_group);
-//    const MPI_Comm *comm=group->getComm();
-//    int grpSize=_group.size();
-//    int myProcId=_group.myRank();
-//    std::stringstream oscerr;
-//    int nbOfMat=_matrixes_st.size();
-//    oscerr << "(" <<  myProcId <<  ") I hold " << nbOfMat << " LOCAL matrix(ces) : "<< std::endl;
-//    for(int i=0;i<nbOfMat;i++)
-//      {
-//        oscerr << "   - Matrix #" << i << ": (source proc #" << _source_proc_id_st[i] << " / tgt proc#" << _target_proc_id_st[i] << "):";
-//        const std::vector< SparseDoubleVec >& locMat=_matrixes_st[i];
-//        int j = 0;
-//        for(std::vector< SparseDoubleVec >::const_iterator it1=locMat.begin();it1!=locMat.end();it1++, j++)
-//          {
-//            oscerr << " Target Cell #" << j;
-//            for(SparseDoubleVec::const_iterator it2=(*it1).begin();it2!=(*it1).end();it2++)
-//              oscerr << " (" << (*it2).first << "," << (*it2).second << "), ";
-//            oscerr << std::endl;
-//          }
-//      }
-//    oscerr << "*********" << std::endl;
-//
-//    // Hope this will be flushed in one go:
-//    std::cerr << oscerr.str() << std::endl;
-//  }
+#ifdef DEC_DEBUG
+ void OverlapMapping::printTheMatrix() const
+ {
+   CommInterface commInterface=_group.getCommInterface();
+   const MPIProcessorGroup *group=static_cast<const MPIProcessorGroup*>(&_group);
+   const MPI_Comm *comm=group->getComm();
+   int grpSize=_group.size();
+   int myProcId=_group.myRank();
+   std::stringstream oscerr;
+   int nbOfMat=_the_matrix_st.size();
+   oscerr << "(" <<  myProcId <<  ") I hold " << nbOfMat << " matrix(ces) : "<< std::endl;
+   for(int i=0;i<nbOfMat;i++)
+     {
+       oscerr << "   - Matrix #" << i << " coming from source proc #" << _the_matrix_st_source_proc_id[i] << ":\n ";
+       const std::vector< SparseDoubleVec >& locMat=_the_matrix_st[i];
+       int j = 0;
+       for(std::vector< SparseDoubleVec >::const_iterator it1=locMat.begin();it1!=locMat.end();it1++, j++)
+         {
+           oscerr << " Target Cell #" << j;
+           for(SparseDoubleVec::const_iterator it2=(*it1).begin();it2!=(*it1).end();it2++)
+             oscerr << " (" << (*it2).first << "," << (*it2).second << "), ";
+           oscerr << std::endl;
+         }
+     }
+   oscerr << "*********" << std::endl;
+
+   // Hope this will be flushed in one go:
+   std::cerr << oscerr.str() << std::endl;
+//   if(myProcId != 0)
+//     MPI_Barrier(MPI_COMM_WORLD);
+ }
+
+ void OverlapMapping::printMatrixesST() const
+  {
+    CommInterface commInterface=_group.getCommInterface();
+    const MPIProcessorGroup *group=static_cast<const MPIProcessorGroup*>(&_group);
+    const MPI_Comm *comm=group->getComm();
+    int grpSize=_group.size();
+    int myProcId=_group.myRank();
+    std::stringstream oscerr;
+    int nbOfMat=_matrixes_st.size();
+    oscerr << "(" <<  myProcId <<  ") I hold " << nbOfMat << " LOCAL matrix(ces) : "<< std::endl;
+    for(int i=0;i<nbOfMat;i++)
+      {
+        oscerr << "   - Matrix #" << i << ": (source proc #" << _source_proc_id_st[i] << " / tgt proc#" << _target_proc_id_st[i] << "): \n";
+        const std::vector< SparseDoubleVec >& locMat=_matrixes_st[i];
+        int j = 0;
+        for(std::vector< SparseDoubleVec >::const_iterator it1=locMat.begin();it1!=locMat.end();it1++, j++)
+          {
+            oscerr << " Target Cell #" << j;
+            for(SparseDoubleVec::const_iterator it2=(*it1).begin();it2!=(*it1).end();it2++)
+              oscerr << " (" << (*it2).first << "," << (*it2).second << "), ";
+            oscerr << std::endl;
+          }
+      }
+    oscerr << "*********" << std::endl;
+
+    // Hope this will be flushed in one go:
+    std::cerr << oscerr.str() << std::endl;
+  }
+
+ void OverlapMapping::printDenoMatrix() const
+   {
+     CommInterface commInterface=_group.getCommInterface();
+     const MPIProcessorGroup *group=static_cast<const MPIProcessorGroup*>(&_group);
+     const MPI_Comm *comm=group->getComm();
+     int grpSize=_group.size();
+     int myProcId=_group.myRank();
+     std::stringstream oscerr;
+     int nbOfMat=_the_deno_st.size();
+     oscerr << "(" <<  myProcId <<  ") I hold " << nbOfMat << " DENOMINATOR matrix(ces) : "<< std::endl;
+     for(int i=0;i<nbOfMat;i++)
+       {
+         oscerr << "   - Matrix #" << i << " coming from source proc #" << _the_matrix_st_source_proc_id[i] << ": \n";
+         const std::vector< SparseDoubleVec >& locMat=_the_deno_st[i];
+         int j = 0;
+         for(std::vector< SparseDoubleVec >::const_iterator it1=locMat.begin();it1!=locMat.end();it1++, j++)
+           {
+             oscerr << " Target Cell #" << j;
+             for(SparseDoubleVec::const_iterator it2=(*it1).begin();it2!=(*it1).end();it2++)
+               oscerr << " (" << (*it2).first << "," << (*it2).second << "), ";
+             oscerr << std::endl;
+           }
+       }
+     oscerr << "*********" << std::endl;
+
+     // Hope this will be flushed in one go:
+     std::cerr << oscerr.str() << std::endl;
+   }
+#endif
diff --git a/src/ParaMEDMEM/OverlapMapping.hxx b/src/ParaMEDMEM/OverlapMapping.hxx
index 2a5b32e4b..b30afe418 100644
--- a/src/ParaMEDMEM/OverlapMapping.hxx
+++ b/src/ParaMEDMEM/OverlapMapping.hxx
@@ -22,6 +22,7 @@
 #define __OVERLAPMAPPING_HXX__
 
 #include "MEDCouplingAutoRefCountObjectPtr.hxx"
+#include "OverlapElementLocator.hxx"
 
 #include <vector>
 #include <map>
@@ -45,7 +46,7 @@ namespace ParaMEDMEM
   {
   public:
 
-    OverlapMapping(const ProcessorGroup& group);
+    OverlapMapping(const ProcessorGroup& group, const OverlapElementLocator& locator);
     void keepTracksOfSourceIds(int procId, DataArrayInt *ids);
     void keepTracksOfTargetIds(int procId, DataArrayInt *ids);
     void addContributionST(const std::vector< SparseDoubleVec >& matrixST, const DataArrayInt *srcIds, int srcProcId, const DataArrayInt *trgIds, int trgProcId);
@@ -53,10 +54,10 @@ namespace ParaMEDMEM
     void computeDenoConservativeVolumic(int nbOfTuplesTrg);
     void computeDenoGlobConstraint();
     //
-    void multiply(const MEDCouplingFieldDouble *fieldInput, MEDCouplingFieldDouble *fieldOutput) const;
+    void multiply(const MEDCouplingFieldDouble *fieldInput, MEDCouplingFieldDouble *fieldOutput, double default_val) const;
     void transposeMultiply(const MEDCouplingFieldDouble *fieldInput, MEDCouplingFieldDouble *fieldOutput);
   private:
-    void fillProcToSendRcvForMultiply(const std::vector< int >& procsToSendField);
+//    void fillProcToSendRcvForMultiply(const std::vector< int >& procsToSendField);
     void serializeMatrixStep0ST(const int *nbOfElemsSrc, int *&bigArr, int *count, int *offsets,
                                 int *countForRecv, int *offsetsForRecv) const;
     int serializeMatrixStep1ST(const int *nbOfElemsSrc, const int *recvStep0, const int *countStep0, const int *offsStep0,
@@ -65,15 +66,20 @@ namespace ParaMEDMEM
     void unserializationST(int nbOfTrgElems, const int *nbOfElemsSrcPerProc, const int *bigArrRecv, const int *bigArrRecvCounts, const int *bigArrRecvOffs,
                            const int *bigArrRecv2, const double *bigArrDRecv2, const int *bigArrRecv2Count, const int *bigArrRecv2Offs);
     void finishToFillFinalMatrixST();
-    void updateZipSourceIdsForFuture();
+    void updateZipSourceIdsForMultiply();
 
-    // Debug
-//    void printMatrixesST() const;
-//    void printTheMatrix() const;
+#ifdef DEC_DEBUG
+    void printMatrixesST() const;
+    void printTheMatrix() const;
+    void printDenoMatrix() const;
+#endif
   private:
     const ProcessorGroup &_group;
+    const OverlapElementLocator& _locator;
+
     /**! Vector of DAInt of cell identifiers. The 2 following class members work in pair. For a proc ID i,
-     * first member gives an old2new map for the local part of the source mesh that has been sent.
+     * first member gives an old2new map for the local part of the source mesh that has been sent to proc#i, just based on the
+     * bounding box computation (this is potentially a larger set than what is finally in the interp matrix).
      * Second member gives proc ID.  */
     std::vector< MEDCouplingAutoRefCountObjectPtr<DataArrayInt> > _sent_src_ids_st2;
     //! see above _sent_src_ids_st2
@@ -85,7 +91,7 @@ namespace ParaMEDMEM
     std::vector< int > _sent_trg_proc_st2;
 
 
-    /**! Vector of matrixes (partial interpolation ratios), result of the local interpolator run.
+    /**! Vector of matrixes (partial interpolation ratios), result of the LOCAL interpolator run.
      * Indexing shared with _source_proc_id_st, and _target_proc_id_st.   */
     std::vector< std::vector< SparseDoubleVec > > _matrixes_st;
     //! See _matrixes_st - vec of source proc IDs
@@ -93,21 +99,24 @@ namespace ParaMEDMEM
     //! See _matrixes_st - vec of target proc IDs
     std::vector< int > _target_proc_id_st;
 
-    //! Vector of remote remote proc IDs for source mesh. Indexing shared with _nb_of_src_ids_proc_st2
-    std::vector< int > _src_ids_proc_st2;
-    //! Number of cells in the mesh/mapping received from the remote proc i for source mesh. See _src_ids_proc_st2 above
-    std::vector< int > _nb_of_src_ids_proc_st2;
-
-    /**! Specifies for each remote proc ID (given in _src_ids_zip_proc_st2 below) the corresponding local
-     * source cell IDs to use/send. Same indexing as _src_ids_zip_proc_st2. Sorted.
-     * On a given proc, those two members contain exactly the same set of cell identifiers as what is given
-     * in the locally held interpolation matrices.   */
+    /**! Vector of remote proc IDs from which this proc received cell IDs of the source mesh.
+     * Indexing shared with _nb_of_rcv_src_ids_proc_st2 */
+    std::vector< int > _rcv_src_ids_proc_st2;
+    /**! Number of received source mesh IDs at mesh data exchange. See _src_ids_proc_st2 above.
+     Counting the number of IDs suffices, as we just need this to prepare the receive when doing the final vector matrix multiplication */
+    std::vector< int > _nb_of_rcv_src_ids_proc_st2;
+
+    /**! Specifies for each (target) remote proc ID (given in _src_ids_zip_proc_st2 below) the corresponding
+     * source cell IDs to use. Same indexing as _src_ids_zip_proc_st2. Sorted.
+     * On a given proc, and after updateZipSourceIdsForMultiply(), this member contains exactly the same set of source cell IDs as what is given
+     * in the locally held interpolation matrices.
+     * IMPORTANT: as a consequence cell IDs in _src_ids_zip_st2 are *remote* identifiers.   */
     std::vector< std::vector<int> > _src_ids_zip_st2;
     //! Vector of remote proc ID to which the local source mapping above corresponds. See _src_ids_zip_st2 above.
     std::vector< int > _src_ids_zip_proc_st2;
 
     /**! THE matrix for matrix-vector product. The first dimension is indexed in the set of target procs
-    * that interacts with local source mesh. The second dim is the pseudo id of source proc.
+    * that interacts with local source mesh. The second dim is the target cell ID.
     * Same indexing as _the_matrix_st_source_proc_id  */
     std::vector< std::vector< SparseDoubleVec > > _the_matrix_st;
     //! See _the_matrix_st above. List of source proc IDs contributing to _the_matrix_st
@@ -118,12 +127,8 @@ namespace ParaMEDMEM
     //! Proc IDs from which data will be received (on this current proc) for matrix-vector computation
     std::vector< int > _proc_ids_to_recv_vector_st;
 
-    // Denominators (computed from the numerator matrix)
+    // Denominators (computed from the numerator matrix). As for _the_matrix_st it is paired with _the_matrix_st_source_proc_id
     std::vector< std::vector< SparseDoubleVec > > _the_deno_st;
-
-//    std::vector< std::vector<int> > _the_matrix_st_source_ids;
-//    //! this attribute is of size _group.size(); for each procId in _group _source_ids_to_send_st[procId] contains tupleId to send abroad
-//    std::vector< std::vector<int> > _source_ids_to_send_st;
   };
 }
 
diff --git a/src/ParaMEDMEMTest/ParaMEDMEMTest.hxx b/src/ParaMEDMEMTest/ParaMEDMEMTest.hxx
index 54db5e2e0..1273da391 100644
--- a/src/ParaMEDMEMTest/ParaMEDMEMTest.hxx
+++ b/src/ParaMEDMEMTest/ParaMEDMEMTest.hxx
@@ -31,56 +31,59 @@
 class ParaMEDMEMTest : public CppUnit::TestFixture
 {
   CPPUNIT_TEST_SUITE( ParaMEDMEMTest );
-  CPPUNIT_TEST(testMPIProcessorGroup_constructor);
-  CPPUNIT_TEST(testMPIProcessorGroup_boolean);
-  CPPUNIT_TEST(testMPIProcessorGroup_rank);
-  CPPUNIT_TEST(testBlockTopology_constructor);
-  CPPUNIT_TEST(testBlockTopology_serialize);
-  CPPUNIT_TEST(testInterpKernelDEC_1D);
-  CPPUNIT_TEST(testInterpKernelDEC_2DCurve);
-  CPPUNIT_TEST(testInterpKernelDEC_2D);
-  CPPUNIT_TEST(testInterpKernelDEC2_2D);
-  CPPUNIT_TEST(testInterpKernelDEC_2DP0P1);
-  CPPUNIT_TEST(testInterpKernelDEC_3D);
-  CPPUNIT_TEST(testInterpKernelDECNonOverlapp_2D_P0P0);
-  CPPUNIT_TEST(testInterpKernelDECNonOverlapp_2D_P0P1P1P0);
-  CPPUNIT_TEST(testInterpKernelDEC2DM1D_P0P0);
-  CPPUNIT_TEST(testInterpKernelDECPartialProcs);
-  CPPUNIT_TEST(testInterpKernelDEC3DSurfEmptyBBox);
-  CPPUNIT_TEST(testOverlapDEC1);
-  CPPUNIT_TEST(testOverlapDEC2);
-//    CPPUNIT_TEST(testOverlapDEC_LMEC_seq);
-//    CPPUNIT_TEST(testOverlapDEC_LMEC_para);
-//
-  CPPUNIT_TEST(testSynchronousEqualInterpKernelWithoutInterpNativeDEC_2D);
-  CPPUNIT_TEST(testSynchronousEqualInterpKernelWithoutInterpDEC_2D);
-  CPPUNIT_TEST(testSynchronousEqualInterpKernelDEC_2D);
-  CPPUNIT_TEST(testSynchronousFasterSourceInterpKernelDEC_2D);
-  CPPUNIT_TEST(testSynchronousSlowerSourceInterpKernelDEC_2D);
-  CPPUNIT_TEST(testSynchronousSlowSourceInterpKernelDEC_2D);
-  CPPUNIT_TEST(testSynchronousFastSourceInterpKernelDEC_2D);
-  CPPUNIT_TEST(testAsynchronousEqualInterpKernelDEC_2D);
-  CPPUNIT_TEST(testAsynchronousFasterSourceInterpKernelDEC_2D);
-  CPPUNIT_TEST(testAsynchronousSlowerSourceInterpKernelDEC_2D);
-  CPPUNIT_TEST(testAsynchronousSlowSourceInterpKernelDEC_2D);
-  CPPUNIT_TEST(testAsynchronousFastSourceInterpKernelDEC_2D);
+  CPPUNIT_TEST(testMPIProcessorGroup_constructor);  // 1 and 2 procs
+  CPPUNIT_TEST(testMPIProcessorGroup_boolean);      // 1 and 2 procs
+  CPPUNIT_TEST(testMPIProcessorGroup_rank);         // >=2 procs
+  CPPUNIT_TEST(testBlockTopology_constructor);      // >=2 procs
+  CPPUNIT_TEST(testBlockTopology_serialize);        // 1 proc
+  CPPUNIT_TEST(testInterpKernelDEC_1D);             // 5 procs
+  CPPUNIT_TEST(testInterpKernelDEC_2DCurve);        // 5 procs
+  CPPUNIT_TEST(testInterpKernelDEC_2D);             // 5 procs
+  CPPUNIT_TEST(testInterpKernelDEC2_2D);            // 5 procs
+  CPPUNIT_TEST(testInterpKernelDEC_2DP0P1);         // Not impl.
+  CPPUNIT_TEST(testInterpKernelDEC_3D);             // 3 procs
+  CPPUNIT_TEST(testInterpKernelDECNonOverlapp_2D_P0P0);     // 5 procs
+  CPPUNIT_TEST(testInterpKernelDECNonOverlapp_2D_P0P1P1P0); // 5 procs
+  CPPUNIT_TEST(testInterpKernelDEC2DM1D_P0P0);      // 3 procs
+  CPPUNIT_TEST(testInterpKernelDECPartialProcs);    // 3 procs
+  CPPUNIT_TEST(testInterpKernelDEC3DSurfEmptyBBox); // 3 procs
+  CPPUNIT_TEST(testOverlapDEC1);                    // 3 procs
+  CPPUNIT_TEST(testOverlapDEC2);                    // 3 procs
+  CPPUNIT_TEST(testOverlapDEC3);                    // 2 procs
+//  CPPUNIT_TEST(testOverlapDEC4);
+
+
+  CPPUNIT_TEST(testSynchronousEqualInterpKernelWithoutInterpNativeDEC_2D);// 5 procs
+  CPPUNIT_TEST(testSynchronousEqualInterpKernelWithoutInterpDEC_2D);      // 5 procs
+  CPPUNIT_TEST(testSynchronousEqualInterpKernelDEC_2D);                   // 5 procs
+  CPPUNIT_TEST(testSynchronousFasterSourceInterpKernelDEC_2D);            // 5 procs
+  CPPUNIT_TEST(testSynchronousSlowerSourceInterpKernelDEC_2D);            // 5 procs
+  CPPUNIT_TEST(testSynchronousSlowSourceInterpKernelDEC_2D);              // 5 procs
+  CPPUNIT_TEST(testSynchronousFastSourceInterpKernelDEC_2D);              // 5 procs
+  CPPUNIT_TEST(testAsynchronousEqualInterpKernelDEC_2D);                  // 5 procs
+  CPPUNIT_TEST(testAsynchronousFasterSourceInterpKernelDEC_2D);           // 5 procs
+  CPPUNIT_TEST(testAsynchronousSlowerSourceInterpKernelDEC_2D);           // 5 procs
+  CPPUNIT_TEST(testAsynchronousSlowSourceInterpKernelDEC_2D);             // 5 procs
+  CPPUNIT_TEST(testAsynchronousFastSourceInterpKernelDEC_2D);             // 5 procs
 //#ifdef MED_ENABLE_FVM
 //  //can be added again after FVM correction for 2D
 //  //  CPPUNIT_TEST(testNonCoincidentDEC_2D);
 //  CPPUNIT_TEST(testNonCoincidentDEC_3D);
 //#endif
-  CPPUNIT_TEST(testStructuredCoincidentDEC);
-  CPPUNIT_TEST(testStructuredCoincidentDEC);
-  CPPUNIT_TEST(testICoco1);
-  CPPUNIT_TEST(testGauthier1);
-  CPPUNIT_TEST(testGauthier2);
-  CPPUNIT_TEST(testGauthier3);
-  CPPUNIT_TEST(testGauthier4);
-  CPPUNIT_TEST(testFabienAPI1);
-  CPPUNIT_TEST(testFabienAPI2);
+  CPPUNIT_TEST(testStructuredCoincidentDEC);     // 5 procs
+  CPPUNIT_TEST(testICoco1);           // 2 procs
+  CPPUNIT_TEST(testGauthier1);        // 4 procs
+  CPPUNIT_TEST(testGauthier2);        // >= 2 procs
+  CPPUNIT_TEST(testGauthier3);        // 4 procs
+  CPPUNIT_TEST(testGauthier4);        // 3 procs
+  CPPUNIT_TEST(testFabienAPI1);       // 3 procs
+  CPPUNIT_TEST(testFabienAPI2);       // 3 procs
+
+  //
   CPPUNIT_TEST(testMEDLoaderRead1);
   CPPUNIT_TEST(testMEDLoaderPolygonRead);
   CPPUNIT_TEST(testMEDLoaderPolyhedronRead);
+
   CPPUNIT_TEST_SUITE_END();
   
 
@@ -108,6 +111,8 @@ public:
   void testInterpKernelDEC3DSurfEmptyBBox();
   void testOverlapDEC1();
   void testOverlapDEC2();
+  void testOverlapDEC3();
+  void testOverlapDEC4();
   void testOverlapDEC_LMEC_seq();
   void testOverlapDEC_LMEC_para();
 #ifdef MED_ENABLE_FVM
diff --git a/src/ParaMEDMEMTest/ParaMEDMEMTest_OverlapDEC.cxx b/src/ParaMEDMEMTest/ParaMEDMEMTest_OverlapDEC.cxx
index c1f5bb92f..d0f41d12c 100644
--- a/src/ParaMEDMEMTest/ParaMEDMEMTest_OverlapDEC.cxx
+++ b/src/ParaMEDMEMTest/ParaMEDMEMTest_OverlapDEC.cxx
@@ -181,7 +181,7 @@ typedef  MEDCouplingAutoRefCountObjectPtr<MEDCouplingFieldDouble> MFDouble;
 //  MPI_Barrier(MPI_COMM_WORLD);
 //}
 
-void prepareData1(int rank, ProcessorGroup * grp,
+void prepareData1(int rank, ProcessorGroup * grp, NatureOfField nature,
                                   MEDCouplingUMesh *& meshS, MEDCouplingUMesh *& meshT,
                                   ParaMESH*& parameshS, ParaMESH*& parameshT,
                                   ParaFIELD*& parafieldS, ParaFIELD*& parafieldT)
@@ -205,7 +205,7 @@ void prepareData1(int rank, ProcessorGroup * grp,
       ComponentTopology comptopo;
       parameshS=new ParaMESH(meshS, *grp,"source mesh");
       parafieldS=new ParaFIELD(ON_CELLS,NO_TIME,parameshS,comptopo);
-      parafieldS->getField()->setNature(ConservativeVolumic);//IntegralGlobConstraint
+      parafieldS->getField()->setNature(nature);
       double *valsS=parafieldS->getField()->getArray()->getPointer();
       valsS[0]=7.; valsS[1]=8.;
       //
@@ -222,7 +222,7 @@ void prepareData1(int rank, ProcessorGroup * grp,
       meshT->finishInsertingCells();
       parameshT=new ParaMESH(meshT,*grp,"target mesh");
       parafieldT=new ParaFIELD(ON_CELLS,NO_TIME,parameshT,comptopo);
-      parafieldT->getField()->setNature(ConservativeVolumic);//IntegralGlobConstraint
+      parafieldT->getField()->setNature(nature);
       double *valsT=parafieldT->getField()->getArray()->getPointer();
       valsT[0]=7.;
     }
@@ -246,7 +246,7 @@ void prepareData1(int rank, ProcessorGroup * grp,
       ComponentTopology comptopo;
       parameshS=new ParaMESH(meshS,*grp,"source mesh");
       parafieldS=new ParaFIELD(ON_CELLS,NO_TIME,parameshS,comptopo);
-      parafieldS->getField()->setNature(ConservativeVolumic);//IntegralGlobConstraint
+      parafieldS->getField()->setNature(nature);
       double *valsS=parafieldS->getField()->getArray()->getPointer();
       valsS[0]=9.;
       valsS[1]=11.;
@@ -264,7 +264,7 @@ void prepareData1(int rank, ProcessorGroup * grp,
       meshT->finishInsertingCells();
       parameshT=new ParaMESH(meshT,*grp,"target mesh");
       parafieldT=new ParaFIELD(ON_CELLS,NO_TIME,parameshT,comptopo);
-      parafieldT->getField()->setNature(ConservativeVolumic);//IntegralGlobConstraint
+      parafieldT->getField()->setNature(nature);
       double *valsT=parafieldT->getField()->getArray()->getPointer();
       valsT[0]=8.;
     }
@@ -287,7 +287,7 @@ void prepareData1(int rank, ProcessorGroup * grp,
       ComponentTopology comptopo;
       parameshS=new ParaMESH(meshS,*grp,"source mesh");
       parafieldS=new ParaFIELD(ON_CELLS,NO_TIME,parameshS,comptopo);
-      parafieldS->getField()->setNature(ConservativeVolumic);//IntegralGlobConstraint
+      parafieldS->getField()->setNature(nature);
       double *valsS=parafieldS->getField()->getArray()->getPointer();
       valsS[0]=10.;
       //
@@ -304,11 +304,10 @@ void prepareData1(int rank, ProcessorGroup * grp,
       meshT->finishInsertingCells();
       parameshT=new ParaMESH(meshT,*grp,"target mesh");
       parafieldT=new ParaFIELD(ON_CELLS,NO_TIME,parameshT,comptopo);
-      parafieldT->getField()->setNature(ConservativeVolumic);//IntegralGlobConstraint
+      parafieldT->getField()->setNature(nature);
       double *valsT=parafieldT->getField()->getArray()->getPointer();
       valsT[0]=9.;
     }
-
 }
 
 /*! Test case from the official doc of the OverlapDEC.
@@ -324,11 +323,16 @@ void ParaMEDMEMTest::testOverlapDEC1()
   //  printf("(%d) PID %d on localhost ready for attach\n", rank, getpid());
   //  fflush(stdout);
 
-  if (size != 3) return ;
+//    if (rank == 1)
+//      {
+//        int i=1, j=0;
+//        while (i!=0)
+//          j=2;
+//      }
 
+  if (size != 3) return ;
   int nproc = 3;
   std::set<int> procs;
-
   for (int i=0; i<nproc; i++)
     procs.insert(i);
 
@@ -340,12 +344,13 @@ void ParaMEDMEMTest::testOverlapDEC1()
   ParaFIELD* parafieldS=0, *parafieldT=0;
 
   MPI_Barrier(MPI_COMM_WORLD);
-  prepareData1(rank, grp, meshS, meshT, parameshS, parameshT, parafieldS, parafieldT);
+  prepareData1(rank, grp, ConservativeVolumic, meshS, meshT, parameshS, parameshT, parafieldS, parafieldT);
 
   /*!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
    *  HACK ON BOUNDING BOX TO MAKE THIS CASE SIMPLE AND USABLE IN DEBUG
-   * Bounding boxes are slightly smaller than should be thus localising the work to be done
+   * Bounding boxes are slightly smaller than should be, thus localizing the work to be done
    * and avoiding every proc talking to everyone else.
+   * Obviously this is NOT a good idea to do this in production code :-)
    * !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
    */
   dec.setBoundingBoxAdjustmentAbs(-1.0e-12);
@@ -355,6 +360,7 @@ void ParaMEDMEMTest::testOverlapDEC1()
   dec.synchronize();
   dec.sendRecvData(true);
   //
+  MPI_Barrier(MPI_COMM_WORLD);
   if(rank==0)
     {
       CPPUNIT_ASSERT_DOUBLES_EQUAL(8.75,parafieldT->getField()->getArray()->getIJ(0,0),1e-12);
@@ -367,6 +373,7 @@ void ParaMEDMEMTest::testOverlapDEC1()
     {
       CPPUNIT_ASSERT_DOUBLES_EQUAL(10.5,parafieldT->getField()->getArray()->getIJ(0,0),1e-12);
     }
+
   delete parafieldS;
   delete parafieldT;
   delete parameshS;
@@ -388,13 +395,19 @@ void ParaMEDMEMTest::testOverlapDEC2()
   MPI_Comm_rank(MPI_COMM_WORLD,&rank);
 
   if (size != 3) return ;
-
   int nproc = 3;
   std::set<int> procs;
-
   for (int i=0; i<nproc; i++)
     procs.insert(i);
 
+//      if (rank == 0)
+//        {
+//          int i=1, j=0;
+//          while (i!=0)
+//            j=2;
+//        }
+
+
   CommInterface interface;
   OverlapDEC dec(procs);
   ProcessorGroup * grp = dec.getGroup();
@@ -403,7 +416,7 @@ void ParaMEDMEMTest::testOverlapDEC2()
   ParaFIELD* parafieldS=0, *parafieldT=0;
 
   MPI_Barrier(MPI_COMM_WORLD);
-  prepareData1(rank, grp, meshS, meshT, parameshS, parameshT, parafieldS, parafieldT);
+  prepareData1(rank, grp, ConservativeVolumic,meshS, meshT, parameshS, parameshT, parafieldS, parafieldT);
 
   /* Normal bounding boxes here: */
   dec.setBoundingBoxAdjustmentAbs(+1.0e-12);
@@ -435,3 +448,197 @@ void ParaMEDMEMTest::testOverlapDEC2()
   MPI_Barrier(MPI_COMM_WORLD);
 }
 
+void prepareData2_buildOneSquare(MEDCouplingUMesh* & meshS_0, MEDCouplingUMesh* & meshT_0)
+{
+  const double coords[10] = {0.0,0.0,  0.0,1.0,  1.0,1.0,  1.0,0.0, 0.5,0.5};
+  meshS_0 = MEDCouplingUMesh::New("source", 2);
+  DataArrayDouble *myCoords=DataArrayDouble::New();
+  myCoords->alloc(5,2);
+  std::copy(coords,coords+10,myCoords->getPointer());
+  meshS_0->setCoords(myCoords);  myCoords->decrRef();
+  int connS[4]={0,1,2,3};
+  meshS_0->allocateCells(2);
+  meshS_0->insertNextCell(INTERP_KERNEL::NORM_QUAD4,4,connS);
+  //
+  meshT_0 = MEDCouplingUMesh::New("target", 2);
+  myCoords=DataArrayDouble::New();
+  myCoords->alloc(5,2);
+  std::copy(coords,coords+10,myCoords->getPointer());
+  meshT_0->setCoords(myCoords);
+  myCoords->decrRef();
+  int connT[12]={0,1,4,  1,2,4,  2,3,4,  3,0,4};
+  meshT_0->allocateCells(4);
+  meshT_0->insertNextCell(INTERP_KERNEL::NORM_TRI3,3,connT);
+  meshT_0->insertNextCell(INTERP_KERNEL::NORM_TRI3,3,connT+3);
+  meshT_0->insertNextCell(INTERP_KERNEL::NORM_TRI3,3,connT+6);
+  meshT_0->insertNextCell(INTERP_KERNEL::NORM_TRI3,3,connT+9);
+}
+
+/**
+ * Prepare five (detached) QUAD4 disposed like this:
+ *   (0)  (1)  (2)
+ *   (3)  (4)
+ *
+ * On the target side the global mesh is identical except that each QUAD4 is split in 4 TRI3 (along the diagonals).
+ * This is a case for two procs:
+ *    - proc #0 has source squares 0,1,2 and target squares 0,3 (well, sets of TRI3s actually)
+ *    - proc #1 has source squares 3,4 and target squares 1,2,4
+ */
+void prepareData2(int rank, ProcessorGroup * grp, NatureOfField nature,
+                  MEDCouplingUMesh *& meshS, MEDCouplingUMesh *& meshT,
+                  ParaMESH*& parameshS, ParaMESH*& parameshT,
+                  ParaFIELD*& parafieldS, ParaFIELD*& parafieldT)
+{
+  MEDCouplingUMesh *meshS_0 = 0, *meshT_0 = 0;
+  prepareData2_buildOneSquare(meshS_0, meshT_0);
+
+  if(rank==0)
+    {
+      const double tr1[] = {1.5, 0.0};
+      MEDCouplingUMesh *meshS_1 = static_cast<MEDCouplingUMesh*>(meshS_0->deepCpy());
+      meshS_1->translate(tr1);
+      const double tr2[] = {3.0, 0.0};
+      MEDCouplingUMesh *meshS_2 = static_cast<MEDCouplingUMesh*>(meshS_0->deepCpy());
+      meshS_2->translate(tr2);
+
+      std::vector<const MEDCouplingUMesh*> vec;
+      vec.push_back(meshS_0);vec.push_back(meshS_1);vec.push_back(meshS_2);
+      meshS = MEDCouplingUMesh::MergeUMeshes(vec);
+      meshS_1->decrRef(); meshS_2->decrRef();
+
+      ComponentTopology comptopo;
+      parameshS=new ParaMESH(meshS, *grp,"source mesh");
+      parafieldS=new ParaFIELD(ON_CELLS,ONE_TIME,parameshS,comptopo);
+      parafieldS->getField()->setNature(nature);
+      double *valsS=parafieldS->getField()->getArray()->getPointer();
+      valsS[0]=1.; valsS[1]=2.; valsS[2]=3.;
+
+      //
+      const double tr3[] = {0.0, -1.5};
+      MEDCouplingUMesh *meshT_3 = static_cast<MEDCouplingUMesh*>(meshT_0->deepCpy());
+      meshT_3->translate(tr3);
+      vec.clear();
+      vec.push_back(meshT_0);vec.push_back(meshT_3);
+      meshT = MEDCouplingUMesh::MergeUMeshes(vec);
+      meshT_3->decrRef();
+
+      parameshT=new ParaMESH(meshT,*grp,"target mesh");
+      parafieldT=new ParaFIELD(ON_CELLS,ONE_TIME,parameshT,comptopo);
+      parafieldT->getField()->setNature(nature);
+    }
+  //
+  if(rank==1)
+    {
+      const double tr3[] = {0.0, -1.5};
+      MEDCouplingUMesh *meshS_3 = static_cast<MEDCouplingUMesh*>(meshS_0->deepCpy());
+      meshS_3->translate(tr3);
+      const double tr4[] = {1.5, -1.5};
+      MEDCouplingUMesh *meshS_4 = static_cast<MEDCouplingUMesh*>(meshS_0->deepCpy());
+      meshS_4->translate(tr4);
+
+      std::vector<const MEDCouplingUMesh*> vec;
+      vec.push_back(meshS_3);vec.push_back(meshS_4);
+      meshS = MEDCouplingUMesh::MergeUMeshes(vec);
+      meshS_3->decrRef(); meshS_4->decrRef();
+
+      ComponentTopology comptopo;
+      parameshS=new ParaMESH(meshS, *grp,"source mesh");
+      parafieldS=new ParaFIELD(ON_CELLS,ONE_TIME,parameshS,comptopo);
+      parafieldS->getField()->setNature(nature);
+      double *valsS=parafieldS->getField()->getArray()->getPointer();
+      valsS[0]=4.; valsS[1]=5.;
+
+      //
+      const double tr5[] = {1.5, 0.0};
+      MEDCouplingUMesh *meshT_1 = static_cast<MEDCouplingUMesh*>(meshT_0->deepCpy());
+      meshT_1->translate(tr5);
+      const double tr6[] = {3.0, 0.0};
+      MEDCouplingUMesh *meshT_2 = static_cast<MEDCouplingUMesh*>(meshT_0->deepCpy());
+      meshT_2->translate(tr6);
+      const double tr7[] = {1.5, -1.5};
+      MEDCouplingUMesh *meshT_4 = static_cast<MEDCouplingUMesh*>(meshT_0->deepCpy());
+      meshT_4->translate(tr7);
+
+      vec.clear();
+      vec.push_back(meshT_1);vec.push_back(meshT_2);vec.push_back(meshT_4);
+      meshT = MEDCouplingUMesh::MergeUMeshes(vec);
+      meshT_1->decrRef(); meshT_2->decrRef(); meshT_4->decrRef();
+
+      parameshT=new ParaMESH(meshT,*grp,"target mesh");
+      parafieldT=new ParaFIELD(ON_CELLS,ONE_TIME,parameshT,comptopo);
+      parafieldT->getField()->setNature(nature);
+    }
+  meshS_0->decrRef();
+  meshT_0->decrRef();
+}
+
+/*! Test focused on the mapping of cell IDs.
+ * (i.e. when only part of the source/target mesh is transmitted)
+ */
+void ParaMEDMEMTest::testOverlapDEC3()
+{
+  int size, rank;
+  MPI_Comm_size(MPI_COMM_WORLD,&size);
+  MPI_Comm_rank(MPI_COMM_WORLD,&rank);
+
+  int nproc = 2;
+  if (size != nproc) return ;
+  std::set<int> procs;
+  for (int i=0; i<nproc; i++)
+    procs.insert(i);
+
+  CommInterface interface;
+  OverlapDEC dec(procs);
+  ProcessorGroup * grp = dec.getGroup();
+  MEDCouplingUMesh* meshS=0, *meshT=0;
+  ParaMESH* parameshS=0, *parameshT=0;
+  ParaFIELD* parafieldS=0, *parafieldT=0;
+
+  prepareData2(rank, grp, ConservativeVolumic, meshS, meshT, parameshS, parameshT, parafieldS, parafieldT);
+//  if (rank == 1)
+//    {
+//      int i=1, j=0;
+//      while (i!=0)
+//        j=2;
+//    }
+
+  dec.attachSourceLocalField(parafieldS);
+  dec.attachTargetLocalField(parafieldT);
+  dec.synchronize();
+  dec.sendRecvData(true);
+  //
+  MEDCouplingFieldDouble * resField = parafieldT->getField();
+  if(rank==0)
+    {
+      CPPUNIT_ASSERT_EQUAL(8, resField->getNumberOfTuples());
+      for(int i=0;i<4;i++)
+        CPPUNIT_ASSERT_DOUBLES_EQUAL(1.0,resField->getArray()->getIJ(i,0),1e-12);
+      for(int i=4;i<8;i++)
+        CPPUNIT_ASSERT_DOUBLES_EQUAL(4.0,resField->getArray()->getIJ(i,0),1e-12);
+    }
+  if(rank==1)
+    {
+      CPPUNIT_ASSERT_EQUAL(12, resField->getNumberOfTuples());
+      for(int i=0;i<4;i++)
+        CPPUNIT_ASSERT_DOUBLES_EQUAL(2.0,resField->getArray()->getIJ(i,0),1e-12);
+      for(int i=4;i<8;i++)
+        CPPUNIT_ASSERT_DOUBLES_EQUAL(3.0,resField->getArray()->getIJ(i,0),1e-12);
+      for(int i=8;i<12;i++)
+        CPPUNIT_ASSERT_DOUBLES_EQUAL(5.0,resField->getArray()->getIJ(i,0),1e-12);
+    }
+  delete parafieldS;
+  delete parafieldT;
+  delete parameshS;
+  delete parameshT;
+  meshS->decrRef();
+  meshT->decrRef();
+
+  MPI_Barrier(MPI_COMM_WORLD);
+}
+
+/*!
+ * Test default values and multi-component fields
+ */
+void ParaMEDMEMTest::testOverlapDEC4()
+{
+}
-- 
2.39.2