]> SALOME platform Git repositories - tools/medcoupling.git/commitdiff
Salome HOME
Add MEDCouplingIterativeStatistics
authorJulien Schueller <schueller@phimeca.com>
Fri, 12 Jul 2024 15:57:57 +0000 (17:57 +0200)
committerJulien Schueller <schueller@phimeca.com>
Mon, 5 Aug 2024 15:57:20 +0000 (17:57 +0200)
doc/user/input/data_analysis.rst
src/CMakeLists.txt
src/PyWrapping/medcoupling_pycode
src/Stat/CMakeLists.txt [new file with mode: 0644]
src/Stat/MEDCouplingIterativeStatistics.py [new file with mode: 0644]
src/Stat/Test/CMakeLists.txt [new file with mode: 0644]
src/Stat/Test/TestMEDCouplingIterativeStatistics.py [new file with mode: 0644]

index 1ac5b2b666c4483ff703e8eea74f3cac2b7de28e..a7c5f3bc1727a695e164c8b611ef91b986b8ffae 100644 (file)
@@ -571,3 +571,22 @@ To get value of a *field* at certain *points* call
    :start-after: UG_MEDCouplingFieldDouble_8
    :end-before:  UG_MEDCouplingFieldDouble_8
 
+Iterative statistics on fields
+------------------------------
+
+Statistical moments accross several *field* instances can be computed without having to store all fields in memory.
+The mean, standard-deviation, variance, and the variance-covariance matrix of field components
+on each field tuple can be computed by calling
+
+.. literalinclude:: ../../../src/Stat/Test/TestMEDCouplingIterativeStatistics.py
+   :start-after: UG_MEDCouplingIterativeStatistics_1
+   :end-before:  UG_MEDCouplingIterativeStatistics_1
+
+In a similar fashion Sobol indices from *field* instances can be computed without having to store all fields in memory.
+The parameters of the fields must be sampled in a *pick-freeze* manner and the fields must be generated
+on the fly according to these generated parameters combinations.
+
+.. literalinclude:: ../../../src/Stat/Test/TestMEDCouplingIterativeStatistics.py
+   :start-after: UG_MEDCouplingIterativeStatistics_2
+   :end-before:  UG_MEDCouplingIterativeStatistics_2
+   
index b2514fb0fb8b15e80573f64aebb39043ce48babc..2f70e4d86f31737f0257cfa47f91aceb0ad74f5a 100644 (file)
@@ -34,6 +34,9 @@ ADD_SUBDIRECTORY(ICoCo)
 IF(MEDCOUPLING_ENABLE_PYTHON)
   ADD_SUBDIRECTORY(MEDCoupling_Swig)
   ADD_SUBDIRECTORY(PyWrapping)
+
+  find_package(SalomeBasicIterativeStatistics)
+  add_subdirectory(Stat)
 ENDIF(MEDCOUPLING_ENABLE_PYTHON)
 
 IF(NOT MEDCOUPLING_MICROMED)
index 728e536d5740571deb9fd9d9624a14458583c081..be8527b8eda2004f0a3f9da30b0b782291f21301 100644 (file)
@@ -53,6 +53,14 @@ def AdvancedExtensionsStr(sz=60):
             SubExtension(MEDPartitioner.AllAlgorithms(),MEDPartitioner.AvailableAlgorithms(),pad,tab, sts)
             pass
         pass
+
+    try:
+        import iterative_stats
+        isOK = 1
+    except ImportError:
+        isOk = 0
+    sts.append(f"Iterative statistics {tab[isOK].rjust(3):.>{sz-21}}")
+
     return "\n".join(sts)
 
 def ShowAdvancedExtensions(sz=60):
diff --git a/src/Stat/CMakeLists.txt b/src/Stat/CMakeLists.txt
new file mode 100644 (file)
index 0000000..fe35a9b
--- /dev/null
@@ -0,0 +1,8 @@
+
+if (BASICITERATIVESTATISTICS_FOUND)
+  install(FILES MEDCouplingIterativeStatistics.py DESTINATION ${MEDCOUPLING_INSTALL_PYTHON})
+
+  if (MEDCOUPLING_BUILD_PY_TESTS)
+    add_subdirectory(Test)
+  endif ()
+endif ()
diff --git a/src/Stat/MEDCouplingIterativeStatistics.py b/src/Stat/MEDCouplingIterativeStatistics.py
new file mode 100644 (file)
index 0000000..59d3068
--- /dev/null
@@ -0,0 +1,231 @@
+#! /usr/bin/env python3
+#  -*- coding: utf-8 -*-
+# Copyright (C) 2021-2024  CEA, EDF
+#
+# This library is free software; you can redistribute it and/or
+# modify it under the terms of the GNU Lesser General Public
+# License as published by the Free Software Foundation; either
+# version 2.1 of the License, or (at your option) any later version.
+#
+# This library is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+# Lesser General Public License for more details.
+#
+# You should have received a copy of the GNU Lesser General Public
+# License along with this library; if not, write to the Free Software
+# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307 USA
+#
+# See http://www.salome-platform.org/ or email : webmaster.salome@opencascade.com
+#
+# Author : Anthony GEAY (EDF R&D)
+
+from iterative_stats.iterative_mean import IterativeMean
+from iterative_stats.iterative_variance import IterativeVariance
+from iterative_stats.iterative_covariance import IterativeCovariance
+from iterative_stats.sensitivity.sensitivity_saltelli import IterativeSensitivitySaltelli
+
+import numpy as np
+try:
+    import openturns as ot
+    have_ot = True
+except ImportError:
+    have_ot = False
+
+
+class IterativeFieldMoments:
+    """
+    Compute statistics over fields.
+    
+    Parameters
+    ----------
+    enable_mean : bool, optional
+        Whether to aggregate mean
+        Default is True
+    enable_variance : bool, optional
+        Whether to aggregate variance
+        Default is True
+    enable_covariance : bool, optional
+        Whether to aggregate covariance
+        Default is True
+    """
+
+    def __init__(self, enable_mean=True, enable_variance=True, enable_covariance=True):
+        self._dim = None
+        self._size = None
+        self._enable_mean = enable_mean
+        self._enable_variance = enable_variance
+        self._enable_covariance = enable_covariance
+
+    def increment(self, field):
+        """
+        Increment state.
+
+        Parameters
+        ----------
+        field : medcoupling.MEDCouplingFieldDouble
+            Field to compute statistics from
+        """
+        values = field.getArray()
+
+        if self._dim is None:
+            self._dim = values.getNumberOfComponents()
+            self._size = values.getNumberOfTuples()
+            if have_ot:
+                self._agg_mean = [ot.IterativeMoments(1, self._dim) for k in range(self._size)] if self._enable_mean else None
+                self._agg_variance = [ot.IterativeMoments(2, self._dim) for k in range(self._size)] if self._enable_variance else None
+            else:
+                self._agg_mean = [IterativeMean(dim=self._dim) for k in range(self._size)] if self._enable_mean else None
+                self._agg_variance = [IterativeVariance(dim=self._dim) for k in range(self._size)] if self._enable_variance else None
+
+            if self._enable_covariance:
+                n_tri = (self._dim * (self._dim + 1) // 2)
+                self._agg_covariance = [[IterativeCovariance(dim=1) for i in range(n_tri)] for k in range(self._size)]
+            else:
+                self._agg_covariance = None
+
+        if values.getNumberOfComponents() != self._dim:
+            raise ValueError(f"Incorrect number of components {values.getNumberOfComponents()} expected {self._dim}")
+        if values.getNumberOfTuples() != self._size:
+            raise ValueError(f"Incorrect number of tuples {values.getNumberOfTuples()} expected {self._size}")
+        for k in range(self._size):
+            tk = values.getTuple(k)
+            if self._enable_mean:
+                self._agg_mean[k].increment(tk)
+            if self._enable_variance:
+                self._agg_variance[k].increment(tk)
+            if self._enable_covariance:
+                for i in range(self._dim):
+                    for j in range(i + 1):
+                        self._agg_covariance[k][i * (i + 1) // 2 + j].increment(tk[i], tk[j])
+
+    def mean(self):
+        """
+        Mean.
+
+        Returns
+        -------
+        mean : numpy.array of shape (n, d)
+            Mean field
+        """
+        if self._agg_mean is None:
+            raise ValueError(f"No data aggregated")
+
+        mean = np.zeros((self._size, self._dim))
+        for i in range(self._size):
+            if have_ot:
+                mean[i] = self._agg_mean[i].getMean()
+            else:
+                mean[i] = self._agg_mean[i].get_stats()
+        return mean
+
+    def variance(self):
+        """
+        Variance.
+
+        Returns
+        -------
+        variance : numpy.array
+            Variance per component
+        """
+        if self._agg_variance is None:
+            raise ValueError(f"No data aggregated")
+
+        variance = np.zeros((self._size, self._dim))
+        for i in range(self._size):
+            if have_ot:
+                variance[i] = self._agg_variance[i].getVariance()
+            else:
+                variance[i] = self._agg_variance[i].get_stats()
+        return variance
+
+    def stddev(self):
+        """
+        Standard deviation.
+
+        Returns
+        -------
+        stddev : numpy.array of shape (n, d)
+            Standard deviation per component
+        """
+        return np.sqrt(self.variance())
+
+    def covariance(self):
+        """
+        Compute the variance-covariance matrix.
+
+        Returns
+        -------
+        cov : numpy.array of shape (n, d, d)
+            Variance-covariance matrix
+        """
+        if self._agg_covariance is None:
+            raise ValueError(f"No data aggregated")
+
+        covariance = np.zeros((self._size, self._dim, self._dim))
+        for k in range(self._size):
+            for i in range(self._dim):
+                for j in range(i + 1):
+                    covij = self._agg_covariance[k][i * (i + 1) // 2 + j].get_stats()[0]
+                    covariance[k, i, j] = covij
+                    if i != j:
+                        covariance[k, j, i] = covij
+        return covariance
+
+
+class IterativeFieldSobol:
+    """
+    Iterative Sobol indices.
+    
+    Parameters
+    ----------
+    nb_params : int
+        Number of parameters of the field
+    """
+    def __init__(self, nb_parms: int):
+        if nb_parms < 2:
+            raise ValueError(f"Got {nb_parms} parameters, expected at least 2")
+        self._nb_parms = nb_parms
+        self._dim = None
+        self._size = None
+        self._agg_sobol = None
+
+    def increment(self, fields):
+        """
+        Update data.
+
+        Parameters
+        ----------
+        fields : List[medcoupling.MEDCouplingFieldDouble]
+            List of fields for each pick-freeze combination of the parameters
+        """
+
+        if len(fields) != self._nb_parms + 2:
+            raise ValueError(f"Got {len(fields)} fields, expected {self._nb_parms + 2}")
+        if self._dim == None:
+            self._dim = fields[0].getArray().getNumberOfComponents()
+            self._size = fields[0].getArray().getNumberOfTuples()
+            self._agg_sobol = [IterativeSensitivitySaltelli(self._nb_parms, dim=self._dim) for k in range(self._size)]
+
+        for k in range(self._size):
+            tks = np.array([field.getArray().getTuple(k) for field in fields])
+            self._agg_sobol[k].increment(tks.reshape((self._nb_parms + 2,)))
+
+    def indices(self, k):
+        """
+        Sobol indices accessor.
+
+        Parameters
+        ----------
+        k : int
+            Tuple index in the field
+
+        Returns
+        -------
+        first, total : np.array
+            First and total order Sobol' indices of shape (nb_parms, dim)
+        """
+        if self._agg_sobol is None:
+            raise ValueError("No data aggregated")
+        result = self._agg_sobol[k].getFirstOrderIndices(), self._agg_sobol[k].getTotalOrderIndices()
+        return result
diff --git a/src/Stat/Test/CMakeLists.txt b/src/Stat/Test/CMakeLists.txt
new file mode 100644 (file)
index 0000000..2b41ac0
--- /dev/null
@@ -0,0 +1,7 @@
+
+SALOME_ACCUMULATE_ENVIRONMENT(PYTHONPATH NOCHECK ${CMAKE_CURRENT_BINARY_DIR}/../../Stat)
+SALOME_GENERATE_TESTS_ENVIRONMENT(tests_env)
+
+ADD_TEST(NAME MEDCouplingIterativeStatistics
+          COMMAND ${PYTHON_EXECUTABLE} ${CMAKE_CURRENT_SOURCE_DIR}/TestMEDCouplingIterativeStatistics.py)
+SET_TESTS_PROPERTIES(MEDCouplingIterativeStatistics PROPERTIES ENVIRONMENT "${tests_env}")
diff --git a/src/Stat/Test/TestMEDCouplingIterativeStatistics.py b/src/Stat/Test/TestMEDCouplingIterativeStatistics.py
new file mode 100644 (file)
index 0000000..3327bb9
--- /dev/null
@@ -0,0 +1,191 @@
+#! /usr/bin/env python3
+#  -*- coding: utf-8 -*-
+# Copyright (C) 2021-2024  CEA, EDF
+#
+# This library is free software; you can redistribute it and/or
+# modify it under the terms of the GNU Lesser General Public
+# License as published by the Free Software Foundation; either
+# version 2.1 of the License, or (at your option) any later version.
+#
+# This library is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+# Lesser General Public License for more details.
+#
+# You should have received a copy of the GNU Lesser General Public
+# License along with this library; if not, write to the Free Software
+# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307 USA
+#
+# See http://www.salome-platform.org/ or email : webmaster.salome@opencascade.com
+#
+# Author : Anthony GEAY (EDF R&D)
+
+import medcoupling as mc
+from MEDCouplingIterativeStatistics import IterativeFieldMoments, IterativeFieldSobol
+from iterative_stats.experimental_design.experiment import AbstractExperiment
+from numpy.testing import assert_allclose
+import numpy as np
+import unittest
+
+
+class MEDCouplingIterativeStatisticsTest(unittest.TestCase):
+    def test1(self):
+        #! [UG_MEDCouplingIterativeStatistics_1]
+        # check moments on simple linear 3-d field defined on an 1-d mesh
+        import medcoupling as mc
+        from MEDCouplingIterativeStatistics import IterativeFieldMoments
+
+        # 1-d mesh
+        size = int(1e1)
+        coords = [i / size for i in range(size)]
+        arrX = mc.DataArrayDouble(coords, size, 1)
+        mesh = mc.MEDCouplingCMesh()
+        mesh.setCoords(arrX)
+
+        # compute statistics
+        istats = IterativeFieldMoments()
+
+        # aggregate 3-d fields
+        sampling_size = 10
+        dim = 3
+        for i in range(sampling_size):
+            coeff = 1.0 + i / sampling_size
+            field = mesh.fillFromAnalytic(mc.ON_NODES, dim, f"2*x*IVec + 3*x*{coeff}*JVec + 5*KVec")
+            istats.increment(field)
+
+        # check moments on last node
+        mean = istats.mean()
+        variance = istats.variance()
+        stddev = istats.stddev()
+        covariance = istats.covariance()
+        #! [UG_MEDCouplingIterativeStatistics_1]
+
+        print(f"mean={mean[-1]}")
+        assert mean.shape == (size, dim), f"wrong shape: {mean.shape}"
+        assert_allclose(mean[-1], [1.8, 3.915, 5.0])
+
+        print(f"variance={variance[-1]}")
+        assert variance.shape == (size, dim), f"wrong shape: {variance.shape}"
+        assert_allclose(variance[-1], [0., 0.66825, 0.], atol=1e-9)
+
+        print(f"stddev={stddev[-1]}")
+        assert stddev.shape == (size, dim), f"wrong shape: {stddev.shape}"
+        assert_allclose(stddev[-1], [0., 0.8174656, 0.], atol=1e-9)
+
+        print(f"covariance={covariance[-1]}")
+        assert covariance.shape == (size, dim, dim), f"wrong shape: {covariance.shape}"
+        assert_allclose(
+            covariance[-1].flatten(), [0.0, 0.0, 0.0, 0.0, 0.66825, 0.0, 0.0, 0.0, 0.0]
+        )
+
+    def test2(self):
+        # check ref values with OT-only
+        try:
+            import openturns as ot
+        except ImportError:
+            return
+
+        size = int(1e1)
+        mesh = ot.RegularGrid(0.0, 1.0 / size, size)
+        
+        sampling_size = 10
+        sample = ot.Sample(sampling_size, 3)
+        for i in range(sampling_size):
+            coeff = 1.0 + i / sampling_size
+            f = ot.SymbolicFunction(["x"], ["2*x", f"3*x*{coeff}", "5"])
+            values = f(mesh.getVertices())
+            sample[i] = values[-1]
+
+        # check moments on last node
+        mean = sample.computeMean()
+        print(f"mean={mean}")
+        assert_allclose(np.asarray(mean), [1.8, 3.915, 5.0], atol=1e-9)
+
+        variance = sample.computeVariance()
+        print(f"variance={variance}")
+        assert_allclose(np.asarray(variance), [0., 0.668250, 0.], atol=1e-9)
+
+        covariance = sample.computeCovariance()
+        print(f"covariance={covariance}")
+        assert_allclose(
+            np.asarray(covariance).flatten(), [0.0, 0.0, 0.0, 0.0, 0.66825, 0.0, 0.0, 0.0, 0.0],
+            atol=1e-9
+        )
+
+    def test3(self):
+        # test on bigger mesh
+
+        # 1-d mesh
+        size = int(1e4)
+        coords = [i / size for i in range(size)]
+        arrX = mc.DataArrayDouble(coords, size, 1)
+        mesh = mc.MEDCouplingCMesh()
+        mesh.setCoords(arrX)
+
+        # compute mean only
+        istats = IterativeFieldMoments(enable_variance=False, enable_covariance=False)
+
+        # aggregate 2-d fields
+        sampling_size = 10
+        dim = 2
+        for i in range(sampling_size):
+            coeff = 1.0 + i / sampling_size
+            field = mesh.fillFromAnalytic(mc.ON_NODES, dim, f"2*x*IVec + 3*x*{coeff}*JVec")
+            istats.increment(field)
+
+        mean = istats.mean()
+        print(f"mean={mean[-1]}")
+        assert mean.shape == (size, dim), f"wrong shape: {mean.shape}"
+        assert_allclose(mean[-1], [1.9998 , 4.349565])
+
+    def test4(self):
+        #! [UG_MEDCouplingIterativeStatistics_2]
+
+        # Sobol' test over an 1-d mesh
+        import medcoupling as mc
+        from MEDCouplingIterativeStatistics import IterativeFieldSobol
+        from iterative_stats.experimental_design.experiment import AbstractExperiment
+
+        # 1-d mesh [-1; -1]
+        size = int(1e2)
+        coords = [-1 + 2*i / size for i in range(size)]
+        arrX = mc.DataArrayDouble(coords, size, 1)
+        mesh = mc.MEDCouplingCMesh()
+        mesh.setCoords(arrX)
+
+        # class to sample 2 parameters: in [0,1]^2 for nb_sim repetitions in a pick-freeze manner
+        nb_parms = 2
+        nb_sim = 300
+        class ABSampler(AbstractExperiment):
+            def __init__(self):
+                super(ABSampler, self).__init__(nb_parms, nb_sim)
+                np.random.seed(42)
+            def draw(self):
+                return np.random.rand(1, self.nb_parms)
+
+        # aggregate the nb_sim simulations
+        isobol = IterativeFieldSobol(nb_parms)
+        for pf_sample in ABSampler().generator():
+            # build the fields of the time-series f(a, b)_x=a+bx^2 for each (a,b) tuple in the pick-freeze sample
+            fields = [mesh.fillFromAnalytic(mc.ON_NODES, 1, f"({a}+{b}*x*x)*IVec") for a,b in pf_sample]
+            # note that for each simulation we need to instanciate nb_parms+2 fields
+            isobol.increment(fields)
+
+        # retrieve the Sobol' indices of f(a, b)_x in the beginning (x=-1) and middle (x=0) tuples of the time grid
+        first_beg, total_beg = isobol.indices(0)
+        print(f"at 0  : first={first_beg} total={total_beg}")
+
+        first_mid, total_mid = isobol.indices(size // 2)
+        print(f"at mid: first={first_mid} total={total_mid}")
+
+        #! [UG_MEDCouplingIterativeStatistics_2]
+
+        assert_allclose(first_beg[0], [0.59680837, 0.56303733])
+        assert_allclose(total_beg[0], [0.52188301, 0.47895543])
+
+        assert_allclose(first_mid[0], [0.94759752, -0.00683533], rtol=1e-6)
+        assert_allclose(total_mid[0], [1.00739829e+00, -9.41396650e-07])
+
+
+if __name__ == "__main__":
+    unittest.main()