1 // Copyright (C) 2023-2024 CEA/DEN, EDF R&D
3 // This library is free software; you can redistribute it and/or
4 // modify it under the terms of the GNU Lesser General Public
5 // License as published by the Free Software Foundation; either
6 // version 2.1 of the License, or (at your option) any later version.
8 // This library is distributed in the hope that it will be useful,
9 // but WITHOUT ANY WARRANTY; without even the implied warranty of
10 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
11 // Lesser General Public License for more details.
13 // You should have received a copy of the GNU Lesser General Public
14 // License along with this library; if not, write to the Free Software
15 // Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
17 // See http://www.salome-platform.org/ or email : webmaster.salome@opencascade.com
20 #include "HeatMarcel.hxx"
22 #include "baseutilities.h"
33 static void GetSlice(T start, T stop, const unsigned int sliceId, const unsigned int nbOfSlices, T& startSlice, T& stopSlice)
35 T nbElems=stop - start;
36 T minNbOfElemsPerSlice=nbElems/nbOfSlices;
37 startSlice=start+minNbOfElemsPerSlice*sliceId;
38 if(sliceId<nbOfSlices-1)
39 stopSlice=start+minNbOfElemsPerSlice*(sliceId+1);
45 * Compute integration of 1/(1+x^2) function (atan function) between start/nbTurn and end/nbTurn.
47 static void SimulateOneCoreOfComputationNode(std::uint64_t start, std::uint64_t end, std::uint64_t nbTurn, long double* ret)
49 long double retLoc = 0.0;
50 long double delta = 1.0 / ((long double) nbTurn);
51 for(std::uint64_t i = start ; i < end ; ++i)
53 long double x = i*delta;
54 retLoc += delta/(1+x*x);
59 static long double SimulateOnAllCoresOfComputationNodeInternal(std::uint64_t nbTurn, unsigned int nbThreads)
61 SIMPLE_MESSAGE( "Number of turn = " << std::scientific << std::setprecision(12) << (double)nbTurn );
62 std::vector< std::thread > threads(nbThreads);
63 std::vector<long double> res(nbThreads);
64 for(auto iCore = 0 ; iCore < nbThreads ; ++iCore)
66 std::uint64_t startSlice,stopSlice;
67 GetSlice<std::uint64_t>(0,nbTurn,iCore,nbThreads,startSlice,stopSlice);
68 long double *resLoc = res.data()+iCore;
69 threads[iCore] = std::thread([nbTurn,startSlice,stopSlice,resLoc]{ SimulateOneCoreOfComputationNode(startSlice,stopSlice,nbTurn,resLoc);});
71 SIMPLE_MESSAGE( "Number of processors " << nbThreads );
72 for(auto& th : threads)
74 long double ret = std::accumulate(res.begin(),res.end(),0.0);
78 constexpr long double CST = 161000000; // CST - estabished with gcc8.5.0 in debug mode
80 static std::uint64_t GetNbTurnFromTimeControler(double timeControler)
82 return static_cast<std::uint64_t>( timeControler * CST );
86 * long double& ret let it in output to force computation
88 static std::int64_t GetNbOfNsFor(double timeControler, long double& ret)
90 std::uint64_t nbTurn = GetNbTurnFromTimeControler( timeControler );
91 auto start = std::chrono::high_resolution_clock::now();
92 SimulateOneCoreOfComputationNode(0,nbTurn,nbTurn,&ret);
93 auto end = std::chrono::high_resolution_clock::now();
94 return std::chrono::duration_cast<std::chrono::nanoseconds>(end - start).count();
98 * long double& ret let it in output to force computation
100 static double FunctionToSolve(double timeControler, long double& fake)
102 std::int64_t timeInNs = GetNbOfNsFor(timeControler,fake);
103 long double ret = (long double )timeInNs / ((long double) 1000000000);
108 * long double& ret let it in output to force computation
110 static double Derivative(double timeControler, long double& fake)
112 long double fake1,fake2;
113 double a = FunctionToSolve(timeControler,fake1);
114 double b = FunctionToSolve(2*timeControler,fake2);
115 fake = fake1 + fake2;
116 return (b-a)/(timeControler);
120 * The returned value is those to pass to ActiveSleepOfComputationNode to
121 * have an active sleep of 1s on one core.
123 * Higher is the returned value most performant is your processor and/or the generated binary
125 * Some exemple on core i7-6700 of 2020 on gcc 6.3 and 8.5
126 * In debug mode : ~ 1
129 double SALOME::GetTimeAdjustmentCst()
132 double ret = Derivative(0.5,fake);
133 // Do not remove fake variable because if you do so compiler is too smart and stop computing :)
140 * This method can be seen as a active sleep that occupies cores (piloted by \a nbThreads) of hosting computation node.
141 * Of course the time spend by this method cannot be controled finely due to dependancy of the activity on hosted machine / OS.
142 * This method only occupies cores without any stress of the memory.
143 * This method can be used to assess good usage of computation nodes on a massive YACS graph.
145 * /param [in] timeAjustment : pilot time spend for the active sleep. The duration is linearly controled by this input and inversly linearly to the \a nbThreads parameter.
146 * \param [out] timeInS : return the time in second of the active sleep
147 * \param [in] nbThreads : Specify the number of thread on which the computation will be launched. If 0 (the default) all cores of hosting machine will be used.
148 * \return is pi value computed in a naive way :) This return value is necessary to prevent aggressive compiler to not compute.
150 * \sa GetTimeAdjustmentCstOnYourSystem : to help you to fine tune \a timeAjustment parameter
152 long double SALOME::HeatMarcel(double timeAjustment, double& timeInS, unsigned int nbThreads)
154 std::uint64_t nbTurn = GetNbTurnFromTimeControler( timeAjustment );
155 unsigned int nbThreadsEff = nbThreads == 0?std::thread::hardware_concurrency():nbThreads;
156 auto start = std::chrono::high_resolution_clock::now();
157 long double ret = SimulateOnAllCoresOfComputationNodeInternal(nbTurn, nbThreadsEff);
158 auto end = std::chrono::high_resolution_clock::now();
159 timeInS = ((double)(std::chrono::duration_cast<std::chrono::nanoseconds>(end - start).count()))/1000000000.0;