1 // Copyright (C) 2023 CEA/DEN, EDF R&D
3 // This library is free software; you can redistribute it and/or
4 // modify it under the terms of the GNU Lesser General Public
5 // License as published by the Free Software Foundation; either
6 // version 2.1 of the License, or (at your option) any later version.
8 // This library is distributed in the hope that it will be useful,
9 // but WITHOUT ANY WARRANTY; without even the implied warranty of
10 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
11 // Lesser General Public License for more details.
13 // You should have received a copy of the GNU Lesser General Public
14 // License along with this library; if not, write to the Free Software
15 // Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
17 // See http://www.salome-platform.org/ or email : webmaster.salome@opencascade.com
20 #include "HeatMarcel.hxx"
22 #include "baseutilities.h"
34 static void GetSlice(T start, T stop, const unsigned int sliceId, const unsigned int nbOfSlices, T& startSlice, T& stopSlice)
36 T nbElems=stop - start;
37 T minNbOfElemsPerSlice=nbElems/nbOfSlices;
38 startSlice=start+minNbOfElemsPerSlice*sliceId;
39 if(sliceId<nbOfSlices-1)
40 stopSlice=start+minNbOfElemsPerSlice*(sliceId+1);
46 * Compute integration of 1/(1+x^2) function (atan function) between start/nbTurn and end/nbTurn.
48 static void SimulateOneCoreOfComputationNode(std::uint64_t start, std::uint64_t end, std::uint64_t nbTurn, long double* ret)
50 long double retLoc = 0.0;
51 long double delta = 1.0 / ((long double) nbTurn);
52 for(std::uint64_t i = start ; i < end ; ++i)
54 long double x = i*delta;
55 retLoc += delta/(1+x*x);
60 static long double SimulateOnAllCoresOfComputationNodeInternal(std::uint64_t nbTurn, unsigned int nbThreads)
62 SIMPLE_MESSAGE( "Number of turn = 10**" << std::log10((double)nbTurn) );
63 std::vector< std::thread > threads(nbThreads);
64 std::vector<long double> res(nbThreads);
65 for(auto iCore = 0 ; iCore < nbThreads ; ++iCore)
67 std::uint64_t startSlice,stopSlice;
68 GetSlice<std::uint64_t>(0,nbTurn,iCore,nbThreads,startSlice,stopSlice);
69 long double *resLoc = res.data()+iCore;
70 threads[iCore] = std::thread([nbTurn,startSlice,stopSlice,resLoc]{ SimulateOneCoreOfComputationNode(startSlice,stopSlice,nbTurn,resLoc);});
72 SIMPLE_MESSAGE( "Number of processors " << nbThreads );
73 for(auto& th : threads)
75 long double ret = std::accumulate(res.begin(),res.end(),0.0);
79 constexpr long double CST = 161000000; // CST - estabished with gcc8.5.0 in debug mode
81 static std::uint64_t GetNbTurnFromTimeControler(double timeControler)
83 return static_cast<std::uint64_t>( timeControler * CST );
87 * long double& ret let it in output to force computation
89 static std::int64_t GetNbOfNsFor(double timeControler, long double& ret)
91 std::uint64_t nbTurn = GetNbTurnFromTimeControler( timeControler );
92 auto start = std::chrono::high_resolution_clock::now();
93 SimulateOneCoreOfComputationNode(0,nbTurn,nbTurn,&ret);
94 auto end = std::chrono::high_resolution_clock::now();
95 return std::chrono::duration_cast<std::chrono::nanoseconds>(end - start).count();
99 * long double& ret let it in output to force computation
101 static double FunctionToSolve(double timeControler, long double& fake)
103 std::int64_t timeInNs = GetNbOfNsFor(timeControler,fake);
104 long double ret = (long double )timeInNs / ((long double) 1000000000);
109 * long double& ret let it in output to force computation
111 static double Derivative(double timeControler, long double& fake)
113 long double fake1,fake2;
114 double a = FunctionToSolve(timeControler,fake1);
115 double b = FunctionToSolve(2*timeControler,fake2);
116 fake = fake1 + fake2;
117 return (b-a)/(timeControler);
121 * The returned value is those to pass to ActiveSleepOfComputationNode to
122 * have an active sleep of 1s on one core.
124 * Higher is the returned value most performant is your processor and/or the generated binary
126 * Some exemple on core i7-6700 of 2020 on gcc 6.3 and 8.5
127 * In debug mode : ~ 1
130 double SALOME::GetTimeAdjustmentCst()
133 double ret = Derivative(0.5,fake);
134 // Do not remove fake variable because if you do so compiler is too smart and stop computing :)
141 * This method can be seen as a active sleep that occupies cores (piloted by \a nbThreads) of hosting computation node.
142 * Of course the time spend by this method cannot be controled finely due to dependancy of the activity on hosted machine / OS.
143 * This method only occupies cores without any stress of the memory.
144 * This method can be used to assess good usage of computation nodes on a massive YACS graph.
146 * /param [in] timeAjustment : pilot time spend for the active sleep. The duration is linearly controled by this input and inversly linearly to the \a nbThreads parameter.
147 * \param [out] timeInS : return the time in second of the active sleep
148 * \param [in] nbThreads : Specify the number of thread on which the computation will be launched. If 0 (the default) all cores of hosting machine will be used.
149 * \return is pi value computed in a naive way :) This return value is necessary to prevent aggressive compiler to not compute.
151 * \sa GetTimeAdjustmentCstOnYourSystem : to help you to fine tune \a timeAjustment parameter
153 long double SALOME::HeatMarcel(double timeAjustment, double& timeInS, unsigned int nbThreads)
155 std::uint64_t nbTurn = GetNbTurnFromTimeControler( timeAjustment );
156 unsigned int nbThreadsEff = nbThreads == 0?std::thread::hardware_concurrency():nbThreads;
157 auto start = std::chrono::high_resolution_clock::now();
158 long double ret = SimulateOnAllCoresOfComputationNodeInternal(nbTurn, nbThreadsEff);
159 auto end = std::chrono::high_resolution_clock::now();
160 timeInS = ((double)(std::chrono::duration_cast<std::chrono::nanoseconds>(end - start).count()))/1000000000.0;