src/Basics/HeatMarcel.cxx

   1 // Copyright (C) 2023-2024  CEA/DEN, EDF R&D
   2 //
   3 // This library is free software; you can redistribute it and/or
   4 // modify it under the terms of the GNU Lesser General Public
   5 // License as published by the Free Software Foundation; either
   6 // version 2.1 of the License, or (at your option) any later version.
   7 //
   8 // This library is distributed in the hope that it will be useful,
   9 // but WITHOUT ANY WARRANTY; without even the implied warranty of
  10 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  11 // Lesser General Public License for more details.
  12 //
  13 // You should have received a copy of the GNU Lesser General Public
  14 // License along with this library; if not, write to the Free Software
  15 // Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307 USA
  16 //
  17 // See http://www.salome-platform.org/ or email : webmaster.salome@opencascade.com
  18 //
  19
  20 #include "HeatMarcel.hxx"
  21
  22 #include "baseutilities.h"
  23
  24 #include <thread>
  25 #include <cstdint>
  26 #include <cmath>
  27 #include <vector>
  28 #include <numeric>
  29 #include <chrono>
  30 #include <iomanip>
  31
  32 template<class T>
  33 static void GetSlice(T start, T stop, const unsigned int sliceId, const unsigned int nbOfSlices, T& startSlice, T& stopSlice)
  34 {
  35   T nbElems=stop - start;
  36   T minNbOfElemsPerSlice=nbElems/nbOfSlices;
  37   startSlice=start+minNbOfElemsPerSlice*sliceId;
  38   if(sliceId<nbOfSlices-1)
  39     stopSlice=start+minNbOfElemsPerSlice*(sliceId+1);
  40   else
  41     stopSlice=stop;
  42 }
  43
  44 /*!
  45  * Compute integration of 1/(1+x^2) function (atan function) between start/nbTurn and end/nbTurn.
  46  */
  47 static void SimulateOneCoreOfComputationNode(std::uint64_t start, std::uint64_t end, std::uint64_t nbTurn, long double* ret)
  48 {
  49   long double retLoc = 0.0;
  50   long double delta = 1.0 / ((long double) nbTurn);
  51   for(std::uint64_t i = start ; i < end ; ++i)
  52   {
  53     long double x = i*delta;
  54     retLoc += delta/(1+x*x);
  55   }
  56   *ret = retLoc;
  57 }
  58
  59 static long double SimulateOnAllCoresOfComputationNodeInternal(std::uint64_t nbTurn, unsigned int nbThreads)
  60 {
  61   SIMPLE_MESSAGE( "Number of turn = " << std::scientific << std::setprecision(12) << (double)nbTurn );
  62   std::vector< std::thread > threads(nbThreads);
  63   std::vector<long double> res(nbThreads);
  64   for(auto iCore = 0 ; iCore < nbThreads ; ++iCore)
  65   {
  66     std::uint64_t startSlice,stopSlice;
  67     GetSlice<std::uint64_t>(0,nbTurn,iCore,nbThreads,startSlice,stopSlice);
  68     long double *resLoc = res.data()+iCore;
  69     threads[iCore] = std::thread([nbTurn,startSlice,stopSlice,resLoc]{ SimulateOneCoreOfComputationNode(startSlice,stopSlice,nbTurn,resLoc);});
  70   }
  71   SIMPLE_MESSAGE( "Number of processors " << nbThreads );
  72   for(auto& th : threads)
  73     th.join();
  74   long double ret = std::accumulate(res.begin(),res.end(),0.0);
  75   return ret;
  76 }
  77
  78 constexpr long double CST = 161000000; // CST - estabished with gcc8.5.0 in debug mode
  79
  80 static std::uint64_t GetNbTurnFromTimeControler(double timeControler)
  81 {
  82   return static_cast<std::uint64_t>( timeControler * CST );
  83 }
  84
  85 /*!
  86 * long double& ret let it in output to force computation
  87  */
  88 static std::int64_t GetNbOfNsFor(double timeControler, long double& ret)
  89 {
  90   std::uint64_t nbTurn = GetNbTurnFromTimeControler( timeControler );
  91   auto start = std::chrono::high_resolution_clock::now();
  92   SimulateOneCoreOfComputationNode(0,nbTurn,nbTurn,&ret);
  93   auto end = std::chrono::high_resolution_clock::now();
  94   return std::chrono::duration_cast<std::chrono::nanoseconds>(end - start).count();
  95 }
  96
  97 /*!
  98 * long double& ret let it in output to force computation
  99  */
 100 static double FunctionToSolve(double timeControler, long double& fake)
 101 {
 102   std::int64_t timeInNs = GetNbOfNsFor(timeControler,fake);
 103   long double ret = (long double )timeInNs / ((long double) 1000000000);
 104   return ret;
 105 }
 106
 107 /*!
 108 * long double& ret let it in output to force computation
 109  */
 110 static double Derivative(double timeControler, long double& fake)
 111 {
 112   long double fake1,fake2;
 113   double a = FunctionToSolve(timeControler,fake1);
 114   double b = FunctionToSolve(2*timeControler,fake2);
 115   fake = fake1 + fake2;
 116   return (b-a)/(timeControler);
 117 }
 118
 119 /*
 120  * The returned value is those to pass to ActiveSleepOfComputationNode to
 121  * have an active sleep of 1s on one core.
 122  *
 123  * Higher is the returned value most performant is your processor and/or the generated binary
 124  *
 125  * Some exemple on core i7-6700 of 2020 on gcc 6.3 and 8.5
 126  * In debug mode : ~ 1
 127  * In O2 mode : ~ 4.3
 128  */
 129 double SALOME::GetTimeAdjustmentCst()
 130 {
 131   long double fake;
 132   double ret = Derivative(0.5,fake);
 133   // Do not remove fake variable because if you do so compiler is too smart and stop computing :)
 134   if(fake > 0.0)
 135     return 1.0/ret;
 136   return 1.0;
 137 }
 138
 139 /*
 140  * This method can be seen as a active sleep that occupies cores (piloted by \a nbThreads) of hosting computation node.
 141  * Of course the time spend by this method cannot be controled finely due to dependancy of the activity on hosted machine / OS.
 142  * This method only occupies cores without any stress of the memory.
 143  * This method can be used to assess good usage of computation nodes on a massive YACS graph.
 144  *
 145  * /param [in] timeAjustment : pilot time spend for the active sleep. The duration is linearly controled by this input and inversly linearly to the \a nbThreads parameter.
 146  * \param [out] timeInS : return the time in second of the active sleep
 147  * \param [in] nbThreads : Specify the number of thread on which the computation will be launched. If 0 (the default) all cores of hosting machine will be used.
 148  * \return is pi value computed in a naive way :) This return value is necessary to prevent aggressive compiler to not compute.
 149  *
 150  * \sa GetTimeAdjustmentCstOnYourSystem : to help you to fine tune \a timeAjustment parameter
 151  */
 152 long double SALOME::HeatMarcel(double timeAjustment, double& timeInS, unsigned int nbThreads)
 153 {
 154   std::uint64_t nbTurn = GetNbTurnFromTimeControler( timeAjustment );
 155   unsigned int nbThreadsEff = nbThreads == 0?std::thread::hardware_concurrency():nbThreads;
 156   auto start = std::chrono::high_resolution_clock::now();
 157   long double ret = SimulateOnAllCoresOfComputationNodeInternal(nbTurn, nbThreadsEff);
 158   auto end = std::chrono::high_resolution_clock::now();
 159   timeInS = ((double)(std::chrono::duration_cast<std::chrono::nanoseconds>(end - start).count()))/1000000000.0;
 160   return 4.0 * ret;
 161 }