From: Julien Schueller Date: Fri, 12 Jul 2024 15:57:57 +0000 (+0200) Subject: Add MEDCouplingIterativeStatistics X-Git-Url: http://git.salome-platform.org/gitweb/?a=commitdiff_plain;h=479fa0796cefe1513bb672bbe509fcb258aac6ed;p=tools%2Fmedcoupling.git Add MEDCouplingIterativeStatistics --- diff --git a/doc/user/input/data_analysis.rst b/doc/user/input/data_analysis.rst index 1ac5b2b66..a7c5f3bc1 100644 --- a/doc/user/input/data_analysis.rst +++ b/doc/user/input/data_analysis.rst @@ -571,3 +571,22 @@ To get value of a *field* at certain *points* call :start-after: UG_MEDCouplingFieldDouble_8 :end-before: UG_MEDCouplingFieldDouble_8 +Iterative statistics on fields +------------------------------ + +Statistical moments accross several *field* instances can be computed without having to store all fields in memory. +The mean, standard-deviation, variance, and the variance-covariance matrix of field components +on each field tuple can be computed by calling + +.. literalinclude:: ../../../src/Stat/Test/TestMEDCouplingIterativeStatistics.py + :start-after: UG_MEDCouplingIterativeStatistics_1 + :end-before: UG_MEDCouplingIterativeStatistics_1 + +In a similar fashion Sobol indices from *field* instances can be computed without having to store all fields in memory. +The parameters of the fields must be sampled in a *pick-freeze* manner and the fields must be generated +on the fly according to these generated parameters combinations. + +.. literalinclude:: ../../../src/Stat/Test/TestMEDCouplingIterativeStatistics.py + :start-after: UG_MEDCouplingIterativeStatistics_2 + :end-before: UG_MEDCouplingIterativeStatistics_2 + diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index b2514fb0f..2f70e4d86 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -34,6 +34,9 @@ ADD_SUBDIRECTORY(ICoCo) IF(MEDCOUPLING_ENABLE_PYTHON) ADD_SUBDIRECTORY(MEDCoupling_Swig) ADD_SUBDIRECTORY(PyWrapping) + + find_package(SalomeBasicIterativeStatistics) + add_subdirectory(Stat) ENDIF(MEDCOUPLING_ENABLE_PYTHON) IF(NOT MEDCOUPLING_MICROMED) diff --git a/src/PyWrapping/medcoupling_pycode b/src/PyWrapping/medcoupling_pycode index 728e536d5..be8527b8e 100644 --- a/src/PyWrapping/medcoupling_pycode +++ b/src/PyWrapping/medcoupling_pycode @@ -53,6 +53,14 @@ def AdvancedExtensionsStr(sz=60): SubExtension(MEDPartitioner.AllAlgorithms(),MEDPartitioner.AvailableAlgorithms(),pad,tab, sts) pass pass + + try: + import iterative_stats + isOK = 1 + except ImportError: + isOk = 0 + sts.append(f"Iterative statistics {tab[isOK].rjust(3):.>{sz-21}}") + return "\n".join(sts) def ShowAdvancedExtensions(sz=60): diff --git a/src/Stat/CMakeLists.txt b/src/Stat/CMakeLists.txt new file mode 100644 index 000000000..fe35a9b67 --- /dev/null +++ b/src/Stat/CMakeLists.txt @@ -0,0 +1,8 @@ + +if (BASICITERATIVESTATISTICS_FOUND) + install(FILES MEDCouplingIterativeStatistics.py DESTINATION ${MEDCOUPLING_INSTALL_PYTHON}) + + if (MEDCOUPLING_BUILD_PY_TESTS) + add_subdirectory(Test) + endif () +endif () diff --git a/src/Stat/MEDCouplingIterativeStatistics.py b/src/Stat/MEDCouplingIterativeStatistics.py new file mode 100644 index 000000000..59d306886 --- /dev/null +++ b/src/Stat/MEDCouplingIterativeStatistics.py @@ -0,0 +1,231 @@ +#! /usr/bin/env python3 +# -*- coding: utf-8 -*- +# Copyright (C) 2021-2024 CEA, EDF +# +# This library is free software; you can redistribute it and/or +# modify it under the terms of the GNU Lesser General Public +# License as published by the Free Software Foundation; either +# version 2.1 of the License, or (at your option) any later version. +# +# This library is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +# Lesser General Public License for more details. +# +# You should have received a copy of the GNU Lesser General Public +# License along with this library; if not, write to the Free Software +# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +# +# See http://www.salome-platform.org/ or email : webmaster.salome@opencascade.com +# +# Author : Anthony GEAY (EDF R&D) + +from iterative_stats.iterative_mean import IterativeMean +from iterative_stats.iterative_variance import IterativeVariance +from iterative_stats.iterative_covariance import IterativeCovariance +from iterative_stats.sensitivity.sensitivity_saltelli import IterativeSensitivitySaltelli + +import numpy as np +try: + import openturns as ot + have_ot = True +except ImportError: + have_ot = False + + +class IterativeFieldMoments: + """ + Compute statistics over fields. + + Parameters + ---------- + enable_mean : bool, optional + Whether to aggregate mean + Default is True + enable_variance : bool, optional + Whether to aggregate variance + Default is True + enable_covariance : bool, optional + Whether to aggregate covariance + Default is True + """ + + def __init__(self, enable_mean=True, enable_variance=True, enable_covariance=True): + self._dim = None + self._size = None + self._enable_mean = enable_mean + self._enable_variance = enable_variance + self._enable_covariance = enable_covariance + + def increment(self, field): + """ + Increment state. + + Parameters + ---------- + field : medcoupling.MEDCouplingFieldDouble + Field to compute statistics from + """ + values = field.getArray() + + if self._dim is None: + self._dim = values.getNumberOfComponents() + self._size = values.getNumberOfTuples() + if have_ot: + self._agg_mean = [ot.IterativeMoments(1, self._dim) for k in range(self._size)] if self._enable_mean else None + self._agg_variance = [ot.IterativeMoments(2, self._dim) for k in range(self._size)] if self._enable_variance else None + else: + self._agg_mean = [IterativeMean(dim=self._dim) for k in range(self._size)] if self._enable_mean else None + self._agg_variance = [IterativeVariance(dim=self._dim) for k in range(self._size)] if self._enable_variance else None + + if self._enable_covariance: + n_tri = (self._dim * (self._dim + 1) // 2) + self._agg_covariance = [[IterativeCovariance(dim=1) for i in range(n_tri)] for k in range(self._size)] + else: + self._agg_covariance = None + + if values.getNumberOfComponents() != self._dim: + raise ValueError(f"Incorrect number of components {values.getNumberOfComponents()} expected {self._dim}") + if values.getNumberOfTuples() != self._size: + raise ValueError(f"Incorrect number of tuples {values.getNumberOfTuples()} expected {self._size}") + for k in range(self._size): + tk = values.getTuple(k) + if self._enable_mean: + self._agg_mean[k].increment(tk) + if self._enable_variance: + self._agg_variance[k].increment(tk) + if self._enable_covariance: + for i in range(self._dim): + for j in range(i + 1): + self._agg_covariance[k][i * (i + 1) // 2 + j].increment(tk[i], tk[j]) + + def mean(self): + """ + Mean. + + Returns + ------- + mean : numpy.array of shape (n, d) + Mean field + """ + if self._agg_mean is None: + raise ValueError(f"No data aggregated") + + mean = np.zeros((self._size, self._dim)) + for i in range(self._size): + if have_ot: + mean[i] = self._agg_mean[i].getMean() + else: + mean[i] = self._agg_mean[i].get_stats() + return mean + + def variance(self): + """ + Variance. + + Returns + ------- + variance : numpy.array + Variance per component + """ + if self._agg_variance is None: + raise ValueError(f"No data aggregated") + + variance = np.zeros((self._size, self._dim)) + for i in range(self._size): + if have_ot: + variance[i] = self._agg_variance[i].getVariance() + else: + variance[i] = self._agg_variance[i].get_stats() + return variance + + def stddev(self): + """ + Standard deviation. + + Returns + ------- + stddev : numpy.array of shape (n, d) + Standard deviation per component + """ + return np.sqrt(self.variance()) + + def covariance(self): + """ + Compute the variance-covariance matrix. + + Returns + ------- + cov : numpy.array of shape (n, d, d) + Variance-covariance matrix + """ + if self._agg_covariance is None: + raise ValueError(f"No data aggregated") + + covariance = np.zeros((self._size, self._dim, self._dim)) + for k in range(self._size): + for i in range(self._dim): + for j in range(i + 1): + covij = self._agg_covariance[k][i * (i + 1) // 2 + j].get_stats()[0] + covariance[k, i, j] = covij + if i != j: + covariance[k, j, i] = covij + return covariance + + +class IterativeFieldSobol: + """ + Iterative Sobol indices. + + Parameters + ---------- + nb_params : int + Number of parameters of the field + """ + def __init__(self, nb_parms: int): + if nb_parms < 2: + raise ValueError(f"Got {nb_parms} parameters, expected at least 2") + self._nb_parms = nb_parms + self._dim = None + self._size = None + self._agg_sobol = None + + def increment(self, fields): + """ + Update data. + + Parameters + ---------- + fields : List[medcoupling.MEDCouplingFieldDouble] + List of fields for each pick-freeze combination of the parameters + """ + + if len(fields) != self._nb_parms + 2: + raise ValueError(f"Got {len(fields)} fields, expected {self._nb_parms + 2}") + if self._dim == None: + self._dim = fields[0].getArray().getNumberOfComponents() + self._size = fields[0].getArray().getNumberOfTuples() + self._agg_sobol = [IterativeSensitivitySaltelli(self._nb_parms, dim=self._dim) for k in range(self._size)] + + for k in range(self._size): + tks = np.array([field.getArray().getTuple(k) for field in fields]) + self._agg_sobol[k].increment(tks.reshape((self._nb_parms + 2,))) + + def indices(self, k): + """ + Sobol indices accessor. + + Parameters + ---------- + k : int + Tuple index in the field + + Returns + ------- + first, total : np.array + First and total order Sobol' indices of shape (nb_parms, dim) + """ + if self._agg_sobol is None: + raise ValueError("No data aggregated") + result = self._agg_sobol[k].getFirstOrderIndices(), self._agg_sobol[k].getTotalOrderIndices() + return result diff --git a/src/Stat/Test/CMakeLists.txt b/src/Stat/Test/CMakeLists.txt new file mode 100644 index 000000000..2b41ac0c7 --- /dev/null +++ b/src/Stat/Test/CMakeLists.txt @@ -0,0 +1,7 @@ + +SALOME_ACCUMULATE_ENVIRONMENT(PYTHONPATH NOCHECK ${CMAKE_CURRENT_BINARY_DIR}/../../Stat) +SALOME_GENERATE_TESTS_ENVIRONMENT(tests_env) + +ADD_TEST(NAME MEDCouplingIterativeStatistics + COMMAND ${PYTHON_EXECUTABLE} ${CMAKE_CURRENT_SOURCE_DIR}/TestMEDCouplingIterativeStatistics.py) +SET_TESTS_PROPERTIES(MEDCouplingIterativeStatistics PROPERTIES ENVIRONMENT "${tests_env}") diff --git a/src/Stat/Test/TestMEDCouplingIterativeStatistics.py b/src/Stat/Test/TestMEDCouplingIterativeStatistics.py new file mode 100644 index 000000000..3327bb9cc --- /dev/null +++ b/src/Stat/Test/TestMEDCouplingIterativeStatistics.py @@ -0,0 +1,191 @@ +#! /usr/bin/env python3 +# -*- coding: utf-8 -*- +# Copyright (C) 2021-2024 CEA, EDF +# +# This library is free software; you can redistribute it and/or +# modify it under the terms of the GNU Lesser General Public +# License as published by the Free Software Foundation; either +# version 2.1 of the License, or (at your option) any later version. +# +# This library is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +# Lesser General Public License for more details. +# +# You should have received a copy of the GNU Lesser General Public +# License along with this library; if not, write to the Free Software +# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +# +# See http://www.salome-platform.org/ or email : webmaster.salome@opencascade.com +# +# Author : Anthony GEAY (EDF R&D) + +import medcoupling as mc +from MEDCouplingIterativeStatistics import IterativeFieldMoments, IterativeFieldSobol +from iterative_stats.experimental_design.experiment import AbstractExperiment +from numpy.testing import assert_allclose +import numpy as np +import unittest + + +class MEDCouplingIterativeStatisticsTest(unittest.TestCase): + def test1(self): + #! [UG_MEDCouplingIterativeStatistics_1] + # check moments on simple linear 3-d field defined on an 1-d mesh + import medcoupling as mc + from MEDCouplingIterativeStatistics import IterativeFieldMoments + + # 1-d mesh + size = int(1e1) + coords = [i / size for i in range(size)] + arrX = mc.DataArrayDouble(coords, size, 1) + mesh = mc.MEDCouplingCMesh() + mesh.setCoords(arrX) + + # compute statistics + istats = IterativeFieldMoments() + + # aggregate 3-d fields + sampling_size = 10 + dim = 3 + for i in range(sampling_size): + coeff = 1.0 + i / sampling_size + field = mesh.fillFromAnalytic(mc.ON_NODES, dim, f"2*x*IVec + 3*x*{coeff}*JVec + 5*KVec") + istats.increment(field) + + # check moments on last node + mean = istats.mean() + variance = istats.variance() + stddev = istats.stddev() + covariance = istats.covariance() + #! [UG_MEDCouplingIterativeStatistics_1] + + print(f"mean={mean[-1]}") + assert mean.shape == (size, dim), f"wrong shape: {mean.shape}" + assert_allclose(mean[-1], [1.8, 3.915, 5.0]) + + print(f"variance={variance[-1]}") + assert variance.shape == (size, dim), f"wrong shape: {variance.shape}" + assert_allclose(variance[-1], [0., 0.66825, 0.], atol=1e-9) + + print(f"stddev={stddev[-1]}") + assert stddev.shape == (size, dim), f"wrong shape: {stddev.shape}" + assert_allclose(stddev[-1], [0., 0.8174656, 0.], atol=1e-9) + + print(f"covariance={covariance[-1]}") + assert covariance.shape == (size, dim, dim), f"wrong shape: {covariance.shape}" + assert_allclose( + covariance[-1].flatten(), [0.0, 0.0, 0.0, 0.0, 0.66825, 0.0, 0.0, 0.0, 0.0] + ) + + def test2(self): + # check ref values with OT-only + try: + import openturns as ot + except ImportError: + return + + size = int(1e1) + mesh = ot.RegularGrid(0.0, 1.0 / size, size) + + sampling_size = 10 + sample = ot.Sample(sampling_size, 3) + for i in range(sampling_size): + coeff = 1.0 + i / sampling_size + f = ot.SymbolicFunction(["x"], ["2*x", f"3*x*{coeff}", "5"]) + values = f(mesh.getVertices()) + sample[i] = values[-1] + + # check moments on last node + mean = sample.computeMean() + print(f"mean={mean}") + assert_allclose(np.asarray(mean), [1.8, 3.915, 5.0], atol=1e-9) + + variance = sample.computeVariance() + print(f"variance={variance}") + assert_allclose(np.asarray(variance), [0., 0.668250, 0.], atol=1e-9) + + covariance = sample.computeCovariance() + print(f"covariance={covariance}") + assert_allclose( + np.asarray(covariance).flatten(), [0.0, 0.0, 0.0, 0.0, 0.66825, 0.0, 0.0, 0.0, 0.0], + atol=1e-9 + ) + + def test3(self): + # test on bigger mesh + + # 1-d mesh + size = int(1e4) + coords = [i / size for i in range(size)] + arrX = mc.DataArrayDouble(coords, size, 1) + mesh = mc.MEDCouplingCMesh() + mesh.setCoords(arrX) + + # compute mean only + istats = IterativeFieldMoments(enable_variance=False, enable_covariance=False) + + # aggregate 2-d fields + sampling_size = 10 + dim = 2 + for i in range(sampling_size): + coeff = 1.0 + i / sampling_size + field = mesh.fillFromAnalytic(mc.ON_NODES, dim, f"2*x*IVec + 3*x*{coeff}*JVec") + istats.increment(field) + + mean = istats.mean() + print(f"mean={mean[-1]}") + assert mean.shape == (size, dim), f"wrong shape: {mean.shape}" + assert_allclose(mean[-1], [1.9998 , 4.349565]) + + def test4(self): + #! [UG_MEDCouplingIterativeStatistics_2] + + # Sobol' test over an 1-d mesh + import medcoupling as mc + from MEDCouplingIterativeStatistics import IterativeFieldSobol + from iterative_stats.experimental_design.experiment import AbstractExperiment + + # 1-d mesh [-1; -1] + size = int(1e2) + coords = [-1 + 2*i / size for i in range(size)] + arrX = mc.DataArrayDouble(coords, size, 1) + mesh = mc.MEDCouplingCMesh() + mesh.setCoords(arrX) + + # class to sample 2 parameters: in [0,1]^2 for nb_sim repetitions in a pick-freeze manner + nb_parms = 2 + nb_sim = 300 + class ABSampler(AbstractExperiment): + def __init__(self): + super(ABSampler, self).__init__(nb_parms, nb_sim) + np.random.seed(42) + def draw(self): + return np.random.rand(1, self.nb_parms) + + # aggregate the nb_sim simulations + isobol = IterativeFieldSobol(nb_parms) + for pf_sample in ABSampler().generator(): + # build the fields of the time-series f(a, b)_x=a+bx^2 for each (a,b) tuple in the pick-freeze sample + fields = [mesh.fillFromAnalytic(mc.ON_NODES, 1, f"({a}+{b}*x*x)*IVec") for a,b in pf_sample] + # note that for each simulation we need to instanciate nb_parms+2 fields + isobol.increment(fields) + + # retrieve the Sobol' indices of f(a, b)_x in the beginning (x=-1) and middle (x=0) tuples of the time grid + first_beg, total_beg = isobol.indices(0) + print(f"at 0 : first={first_beg} total={total_beg}") + + first_mid, total_mid = isobol.indices(size // 2) + print(f"at mid: first={first_mid} total={total_mid}") + + #! [UG_MEDCouplingIterativeStatistics_2] + + assert_allclose(first_beg[0], [0.59680837, 0.56303733]) + assert_allclose(total_beg[0], [0.52188301, 0.47895543]) + + assert_allclose(first_mid[0], [0.94759752, -0.00683533], rtol=1e-6) + assert_allclose(total_mid[0], [1.00739829e+00, -9.41396650e-07]) + + +if __name__ == "__main__": + unittest.main()