src/Basics/HeatMarcel.cxx

   1 // Copyright (C) 2023  CEA/DEN, EDF R&D
   2 //
   3 // This library is free software; you can redistribute it and/or
   4 // modify it under the terms of the GNU Lesser General Public
   5 // License as published by the Free Software Foundation; either
   6 // version 2.1 of the License, or (at your option) any later version.
   7 //
   8 // This library is distributed in the hope that it will be useful,
   9 // but WITHOUT ANY WARRANTY; without even the implied warranty of
  10 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  11 // Lesser General Public License for more details.
  12 //
  13 // You should have received a copy of the GNU Lesser General Public
  14 // License along with this library; if not, write to the Free Software
  15 // Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307 USA
  16 //
  17 // See http://www.salome-platform.org/ or email : webmaster.salome@opencascade.com
  18 //
  19
  20 #include "HeatMarcel.hxx"
  21
  22 #include "baseutilities.h"
  23
  24 #include <thread>
  25 #include <cstdint>
  26 #include <cmath>
  27 #include <vector>
  28 #include <numeric>
  29 #include <chrono>
  30
  31 #include <iostream>
  32
  33 template<class T>
  34 static void GetSlice(T start, T stop, const unsigned int sliceId, const unsigned int nbOfSlices, T& startSlice, T& stopSlice)
  35 {
  36   T nbElems=stop - start;
  37   T minNbOfElemsPerSlice=nbElems/nbOfSlices;
  38   startSlice=start+minNbOfElemsPerSlice*sliceId;
  39   if(sliceId<nbOfSlices-1)
  40     stopSlice=start+minNbOfElemsPerSlice*(sliceId+1);
  41   else
  42     stopSlice=stop;
  43 }
  44
  45 /*!
  46  * Compute integration of 1/(1+x^2) function (atan function) between start/nbTurn and end/nbTurn.
  47  */
  48 static void SimulateOneCoreOfComputationNode(std::uint64_t start, std::uint64_t end, std::uint64_t nbTurn, long double* ret)
  49 {
  50   long double retLoc = 0.0;
  51   long double delta = 1.0 / ((long double) nbTurn);
  52   for(std::uint64_t i = start ; i < end ; ++i)
  53   {
  54     long double x = i*delta;
  55     retLoc += delta/(1+x*x);
  56   }
  57   *ret = retLoc;
  58 }
  59
  60 static long double SimulateOnAllCoresOfComputationNodeInternal(std::uint64_t nbTurn, unsigned int nbThreads)
  61 {
  62   SIMPLE_MESSAGE( "Number of turn = 10**" << std::log10((double)nbTurn) );
  63   std::vector< std::thread > threads(nbThreads);
  64   std::vector<long double> res(nbThreads);
  65   for(auto iCore = 0 ; iCore < nbThreads ; ++iCore)
  66   {
  67     std::uint64_t startSlice,stopSlice;
  68     GetSlice<std::uint64_t>(0,nbTurn,iCore,nbThreads,startSlice,stopSlice);
  69     long double *resLoc = res.data()+iCore;
  70     threads[iCore] = std::thread([nbTurn,startSlice,stopSlice,resLoc]{ SimulateOneCoreOfComputationNode(startSlice,stopSlice,nbTurn,resLoc);});
  71   }
  72   SIMPLE_MESSAGE( "Number of processors " << nbThreads );
  73   for(auto& th : threads)
  74     th.join();
  75   long double ret = std::accumulate(res.begin(),res.end(),0.0);
  76   return ret;
  77 }
  78
  79 constexpr long double CST = 161000000; // CST - estabished with gcc8.5.0 in debug mode
  80
  81 static std::uint64_t GetNbTurnFromTimeControler(double timeControler)
  82 {
  83   return static_cast<std::uint64_t>( timeControler * CST );
  84 }
  85
  86 /*!
  87 * long double& ret let it in output to force computation
  88  */
  89 static std::int64_t GetNbOfNsFor(double timeControler, long double& ret)
  90 {
  91   std::uint64_t nbTurn = GetNbTurnFromTimeControler( timeControler );
  92   auto start = std::chrono::high_resolution_clock::now();
  93   SimulateOneCoreOfComputationNode(0,nbTurn,nbTurn,&ret);
  94   auto end = std::chrono::high_resolution_clock::now();
  95   return std::chrono::duration_cast<std::chrono::nanoseconds>(end - start).count();
  96 }
  97
  98 /*!
  99 * long double& ret let it in output to force computation
 100  */
 101 static double FunctionToSolve(double timeControler, long double& fake)
 102 {
 103   std::int64_t timeInNs = GetNbOfNsFor(timeControler,fake);
 104   long double ret = (long double )timeInNs / ((long double) 1000000000);
 105   return ret;
 106 }
 107
 108 /*!
 109 * long double& ret let it in output to force computation
 110  */
 111 static double Derivative(double timeControler, long double& fake)
 112 {
 113   long double fake1,fake2;
 114   double a = FunctionToSolve(timeControler,fake1);
 115   double b = FunctionToSolve(2*timeControler,fake2);
 116   fake = fake1 + fake2;
 117   return (b-a)/(timeControler);
 118 }
 119
 120 /*
 121  * The returned value is those to pass to ActiveSleepOfComputationNode to
 122  * have an active sleep of 1s on one core.
 123  *
 124  * Higher is the returned value most performant is your processor and/or the generated binary
 125  *
 126  * Some exemple on core i7-6700 of 2020 on gcc 6.3 and 8.5
 127  * In debug mode : ~ 1
 128  * In O2 mode : ~ 4.3
 129  */
 130 double SALOME::GetTimeAdjustmentCst()
 131 {
 132   long double fake;
 133   double ret = Derivative(0.5,fake);
 134   // Do not remove fake variable because if you do so compiler is too smart and stop computing :)
 135   if(fake > 0.0)
 136     return 1.0/ret;
 137   return 1.0;
 138 }
 139
 140 /*
 141  * This method can be seen as a active sleep that occupies cores (piloted by \a nbThreads) of hosting computation node.
 142  * Of course the time spend by this method cannot be controled finely due to dependancy of the activity on hosted machine / OS.
 143  * This method only occupies cores without any stress of the memory.
 144  * This method can be used to assess good usage of computation nodes on a massive YACS graph.
 145  *
 146  * /param [in] timeAjustment : pilot time spend for the active sleep. The duration is linearly controled by this input and inversly linearly to the \a nbThreads parameter.
 147  * \param [out] timeInS : return the time in second of the active sleep
 148  * \param [in] nbThreads : Specify the number of thread on which the computation will be launched. If 0 (the default) all cores of hosting machine will be used.
 149  * \return is pi value computed in a naive way :) This return value is necessary to prevent aggressive compiler to not compute.
 150  *
 151  * \sa GetTimeAdjustmentCstOnYourSystem : to help you to fine tune \a timeAjustment parameter
 152  */
 153 long double SALOME::HeatMarcel(double timeAjustment, double& timeInS, unsigned int nbThreads)
 154 {
 155   std::uint64_t nbTurn = GetNbTurnFromTimeControler( timeAjustment );
 156   unsigned int nbThreadsEff = nbThreads == 0?std::thread::hardware_concurrency():nbThreads;
 157   auto start = std::chrono::high_resolution_clock::now();
 158   long double ret = SimulateOnAllCoresOfComputationNodeInternal(nbTurn, nbThreadsEff);
 159   auto end = std::chrono::high_resolution_clock::now();
 160   timeInS = ((double)(std::chrono::duration_cast<std::chrono::nanoseconds>(end - start).count()))/1000000000.0;
 161   return 4.0 * ret;
 162 }