Reduce number of collective calls that kill performance even if mem peak is higher

author Anthony Geay <anthony.geay@edf.fr>

Wed, 4 Nov 2020 15:30:45 +0000 (16:30 +0100)

committer Anthony Geay <anthony.geay@edf.fr>

Mon, 9 Nov 2020 08:00:58 +0000 (09:00 +0100)
author Anthony Geay <anthony.geay@edf.fr>
Wed, 4 Nov 2020 15:30:45 +0000 (16:30 +0100)
committer Anthony Geay <anthony.geay@edf.fr>
Mon, 9 Nov 2020 08:00:58 +0000 (09:00 +0100)
diff --git a/src/ParaMEDMEM/ParaUMesh.cxx b/src/ParaMEDMEM/ParaUMesh.cxx

index 678c5ea9a0824adb1babc3a760f4725dd65239e2..7503a36aaa720b2144585da5741a183b00436450 100644 (file)
--- a/src/ParaMEDMEM/ParaUMesh.cxx
+++ b/src/ParaMEDMEM/ParaUMesh.cxx
@@ -88,19 +88,20 @@ MCAuto<DataArrayIdType> ParaUMesh::getCellIdsLyingOnNodesTrue(const DataArrayIdT
    std::unique_ptr<mcIdType[]> nbOfElems(new mcIdType[size]),nbOfElems2(new mcIdType[size]),nbOfElems3(new mcIdType[size]);
    mcIdType nbOfNodeIdsLoc(globalNodeIds->getNumberOfTuples());
    ci.allGather(&nbOfNodeIdsLoc,1,MPI_ID_TYPE,nbOfElems.get(),1,MPI_ID_TYPE,comm);
-  //store for each proc the local nodeids intercepted by current proc
    std::vector< MCAuto<DataArrayIdType> > tabs(size);
+  //store for each proc the local nodeids intercepted by current proc
+  int nbOfCollectiveCalls = 1;// this parameter controls the memory peak
    // loop to avoid to all procs to have all the nodes per proc
-  for(int subDiv = 0 ; subDiv < size ; ++subDiv)
+  for(int subDiv = 0 ; subDiv < nbOfCollectiveCalls ; ++subDiv)
    {
-    std::unique_ptr<mcIdType[]> nbOfElemsSp(CommInterface::SplitArrayOfLength(nbOfElems,size,subDiv,size));
+    std::unique_ptr<mcIdType[]> nbOfElemsSp(CommInterface::SplitArrayOfLength(nbOfElems,size,subDiv,nbOfCollectiveCalls));
      mcIdType nbOfNodeIdsSum(std::accumulate(nbOfElemsSp.get(),nbOfElemsSp.get()+size,0));
      std::unique_ptr<mcIdType[]> allGlobalNodeIds(new mcIdType[nbOfNodeIdsSum]);
      std::unique_ptr<int[]> nbOfElemsInt( CommInterface::ToIntArray<mcIdType>(nbOfElemsSp,size) );
      std::unique_ptr<int[]> offsetsIn( CommInterface::ComputeOffset(nbOfElemsInt,size) );
      mcIdType startGlobalNodeIds,endGlobalNodeIds;
-    DataArray::GetSlice(0,globalNodeIds->getNumberOfTuples(),1,subDiv,size,startGlobalNodeIds,endGlobalNodeIds);
-    ci.allGatherV(globalNodeIds->begin()+startGlobalNodeIds,endGlobalNodeIds-startGlobalNodeIds,MPI_ID_TYPE,allGlobalNodeIds.get(),nbOfElemsInt.get(),offsetsIn.get(),MPI_ID_TYPE,comm);
+    DataArray::GetSlice(0,globalNodeIds->getNumberOfTuples(),1,subDiv,nbOfCollectiveCalls,startGlobalNodeIds,endGlobalNodeIds);
+    ci.allGatherV(globalNodeIds->begin()+startGlobalNodeIds,FromIdType<int>(endGlobalNodeIds-startGlobalNodeIds),MPI_ID_TYPE,allGlobalNodeIds.get(),nbOfElemsInt.get(),offsetsIn.get(),MPI_ID_TYPE,comm);
      mcIdType offset(0);
      for(int curRk = 0 ; curRk < size ; ++curRk)
      {
@@ -156,18 +157,19 @@ MCAuto<DataArrayIdType> ParaUMesh::getCellIdsLyingOnNodesFalse(const DataArrayId
    std::unique_ptr<mcIdType[]> nbOfElems(new mcIdType[size]),nbOfElems2(new mcIdType[size]),nbOfElems3(new mcIdType[size]);
    mcIdType nbOfNodeIdsLoc(globalNodeIds->getNumberOfTuples());
    ci.allGather(&nbOfNodeIdsLoc,1,MPI_ID_TYPE,nbOfElems.get(),1,MPI_ID_TYPE,comm);
-  std::vector< MCAuto<DataArrayIdType> > tabs(size);
    // loop to avoid to all procs to have all the nodes per proc
-  for(int subDiv = 0 ; subDiv < size ; ++subDiv)
+  int nbOfCollectiveCalls = 1;// this parameter controls the memory peak
+  std::vector< MCAuto<DataArrayIdType> > tabs(size);
+  for(int subDiv = 0 ; subDiv < nbOfCollectiveCalls ; ++subDiv)
    {
-    std::unique_ptr<mcIdType[]> nbOfElemsSp(CommInterface::SplitArrayOfLength(nbOfElems,size,subDiv,size));
+    std::unique_ptr<mcIdType[]> nbOfElemsSp(CommInterface::SplitArrayOfLength(nbOfElems,size,subDiv,nbOfCollectiveCalls));
      mcIdType nbOfNodeIdsSum(std::accumulate(nbOfElemsSp.get(),nbOfElemsSp.get()+size,0));
      std::unique_ptr<mcIdType[]> allGlobalNodeIds(new mcIdType[nbOfNodeIdsSum]);
      std::unique_ptr<int[]> nbOfElemsInt( CommInterface::ToIntArray<mcIdType>(nbOfElemsSp,size) );
      std::unique_ptr<int[]> offsetsIn( CommInterface::ComputeOffset(nbOfElemsInt,size) );
      mcIdType startGlobalNodeIds,endGlobalNodeIds;
-    DataArray::GetSlice(0,globalNodeIds->getNumberOfTuples(),1,subDiv,size,startGlobalNodeIds,endGlobalNodeIds);
-    ci.allGatherV(globalNodeIds->begin()+startGlobalNodeIds,endGlobalNodeIds-startGlobalNodeIds,MPI_ID_TYPE,allGlobalNodeIds.get(),nbOfElemsInt.get(),offsetsIn.get(),MPI_ID_TYPE,comm);
+    DataArray::GetSlice(0,globalNodeIds->getNumberOfTuples(),1,subDiv,nbOfCollectiveCalls,startGlobalNodeIds,endGlobalNodeIds);
+    ci.allGatherV(globalNodeIds->begin()+startGlobalNodeIds,FromIdType<int>(endGlobalNodeIds-startGlobalNodeIds),MPI_ID_TYPE,allGlobalNodeIds.get(),nbOfElemsInt.get(),offsetsIn.get(),MPI_ID_TYPE,comm);
      mcIdType offset(0);
      for(int curRk = 0 ; curRk < size ; ++curRk)
      {
author	Anthony Geay <anthony.geay@edf.fr>
	Wed, 4 Nov 2020 15:30:45 +0000 (16:30 +0100)
committer	Anthony Geay <anthony.geay@edf.fr>
	Mon, 9 Nov 2020 08:00:58 +0000 (09:00 +0100)