Adding QuantileRegression algorithm

author Jean-Philippe ARGAUD <jean-philippe.argaud@edf.fr>

Sat, 7 Apr 2012 21:13:32 +0000 (23:13 +0200)

committer Jean-Philippe ARGAUD <jean-philippe.argaud@edf.fr>

Sat, 7 Apr 2012 22:06:08 +0000 (00:06 +0200)
author Jean-Philippe ARGAUD <jean-philippe.argaud@edf.fr>
Sat, 7 Apr 2012 21:13:32 +0000 (23:13 +0200)
committer Jean-Philippe ARGAUD <jean-philippe.argaud@edf.fr>
Sat, 7 Apr 2012 22:06:08 +0000 (00:06 +0200)
diff --git a/doc/using.rst b/doc/using.rst

index de92759f9e2d8d3eda4bb4b84471a7609bf36535..1ad43e17cc2c36c3249cdd02d8d065f27885f84d 100644 (file)
--- a/doc/using.rst
+++ b/doc/using.rst
@@ -381,7 +381,7 @@ unused.
        This key indicates the maximum number of iterations allowed for iterative
        optimization. The default is 15000, which very similar to no limit on
        iterations. It is then recommended to adapt this parameter to the needs on
-      real problems. For some algorithms, the effective stopping step can be
+      real problems. For some minimizers, the effective stopping step can be
        slightly different due to algorihtm internal control requirements.
  
      :CalculateAPosterioriCovariance:
@@ -428,7 +428,7 @@ unused.
        This key indicates the maximum number of iterations allowed for iterative
        optimization. The default is 15000, which very similar to no limit on
        iterations. It is then recommended to adapt this parameter to the needs on
-      real problems. For some algorithms, the effective stopping step can be
+      real problems. For some minimizers, the effective stopping step can be
        slightly different due to algorihtm internal control requirements.
  
      :CostDecrementTolerance:
@@ -458,6 +458,29 @@ unused.
        1000. By default, the seed is left uninitialized, and so use the default
        initialization from the computer.
  
+:"QuantileRegression":
+
+    :Quantile:
+      This key allows to define the real value of the desired quantile, between
+      0 and 1. The default is 0.5, corresponding to the median.
+
+    :Minimizer:
+      This key allows to choose the optimization minimizer. The default choice
+      and only available choice is "MMQR" (Majorize-Minimize for Quantile
+      Regression).
+
+    :MaximumNumberOfSteps:
+      This key indicates the maximum number of iterations allowed for iterative
+      optimization. The default is 15000, which very similar to no limit on
+      iterations. It is then recommended to adapt this parameter to the needs on
+      real problems.
+
+    :CostDecrementTolerance:
+      This key indicates a limit value, leading to stop successfully the
+      iterative optimization process when the cost function or the surrogate
+      decreases less than this tolerance at the last step. The default is 10e-6,
+      and it is recommended to adapt it the needs on real problems.
+
  Examples of using these commands are available in the section
  :ref:`section_examples` and in example files installed with ADAO module.
  
diff --git a/src/daComposant/daAlgorithms/QuantileRegression.py b/src/daComposant/daAlgorithms/QuantileRegression.py

new file mode 100644 (file)

index 0000000..d72157c
--- /dev/null
+++ b/src/daComposant/daAlgorithms/QuantileRegression.py
@@ -0,0 +1,164 @@
+#-*-coding:iso-8859-1-*-
+#
+#  Copyright (C) 2008-2012 EDF R&D
+#
+#  This library is free software; you can redistribute it and/or
+#  modify it under the terms of the GNU Lesser General Public
+#  License as published by the Free Software Foundation; either
+#  version 2.1 of the License.
+#
+#  This library is distributed in the hope that it will be useful,
+#  but WITHOUT ANY WARRANTY; without even the implied warranty of
+#  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+#  Lesser General Public License for more details.
+#
+#  You should have received a copy of the GNU Lesser General Public
+#  License along with this library; if not, write to the Free Software
+#  Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307 USA
+#
+#  See http://www.salome-platform.org/ or email : webmaster.salome@opencascade.com
+#
+
+import logging
+from daCore import BasicObjects, PlatformInfo
+m = PlatformInfo.SystemUsage()
+
+import numpy
+
+# ==============================================================================
+class ElementaryAlgorithm(BasicObjects.Algorithm):
+    def __init__(self):
+        BasicObjects.Algorithm.__init__(self)
+        self._name = "QUANTILEREGRESSION"
+        logging.debug("%s Initialisation"%self._name)
+
+    def run(self, Xb=None, Y=None, H=None, M=None, R=None, B=None, Q=None, Parameters=None):
+        """
+        Calcul des parametres definissant le quantile
+        """
+        logging.debug("%s Lancement"%self._name)
+        logging.debug("%s Taille mémoire utilisée de %.1f Mo"%(self._name, m.getUsedMemory("Mo")))
+        #
+        # Opérateur d'observation
+        # -----------------------
+        Hm = H["Direct"].appliedTo
+        Ht = H["Adjoint"].appliedInXTo
+        #
+        # Utilisation éventuelle d'un vecteur H(Xb) précalculé
+        # ----------------------------------------------------
+        if H["AppliedToX"] is not None and H["AppliedToX"].has_key("HXb"):
+            logging.debug("%s Utilisation de HXb"%self._name)
+            HXb = H["AppliedToX"]["HXb"]
+        else:
+            logging.debug("%s Calcul de Hm(Xb)"%self._name)
+            HXb = Hm( Xb )
+        HXb = numpy.asmatrix(HXb).flatten().T
+        #
+        # Calcul de l'innovation
+        # ----------------------
+        if Y.size != HXb.size:
+            raise ValueError("The size %i of observations Y and %i of observed calculation H(X) are different, they have to be identical."%(Y.size,HXb.size))
+        if max(Y.shape) != max(HXb.shape):
+            raise ValueError("The shapes %s of observations Y and %s of observed calculation H(X) are different, they have to be identical."%(Y.shape,HXb.shape))
+        d  = Y - HXb
+        logging.debug("%s Innovation d = %s"%(self._name, d))
+        #
+        # Définition de la fonction-coût
+        # ------------------------------
+        def CostFunction(x):
+            _X  = numpy.asmatrix(x).flatten().T
+            logging.debug("%s CostFunction X  = %s"%(self._name, numpy.asmatrix( _X ).flatten()))
+            _HX = Hm( _X )
+            _HX = numpy.asmatrix(_HX).flatten().T
+            Jb  = 0.
+            Jo  = 0.
+            J   = Jb + Jo
+            logging.debug("%s CostFunction Jb = %s"%(self._name, Jb))
+            logging.debug("%s CostFunction Jo = %s"%(self._name, Jo))
+            logging.debug("%s CostFunction J  = %s"%(self._name, J))
+            self.StoredVariables["CurrentState"].store( _X.A1 )
+            self.StoredVariables["CostFunctionJb"].store( Jb )
+            self.StoredVariables["CostFunctionJo"].store( Jo )
+            self.StoredVariables["CostFunctionJ" ].store( J )
+            return _HX
+        #
+        def GradientOfCostFunction(x):
+            _X      = numpy.asmatrix(x).flatten().T
+            logging.debug("%s GradientOfCostFunction X      = %s"%(self._name, numpy.asmatrix( _X ).flatten()))
+            Hg = H["Tangent"].asMatrix( _X )
+            return Hg
+        #
+        # Point de démarrage de l'optimisation : Xini = Xb
+        # ------------------------------------
+        if type(Xb) is type(numpy.matrix([])):
+            Xini = Xb.A1.tolist()
+        else:
+            Xini = list(Xb)
+        logging.debug("%s Point de démarrage Xini = %s"%(self._name, Xini))
+        #
+        # Paramètres de pilotage
+        # ----------------------
+        # Potentiels : "Quantile", "Minimizer", "MaximumNumberOfSteps", "CostDecrementTolerance"
+        if Parameters.has_key("Quantile") and (0. <= Parameters["Quantile"] <= 1.):
+            quantile = float(Parameters["Quantile"])
+        else:
+            quantile = 0.5
+        logging.debug("%s Quantile pour la regression de quantile = %s"%(self._name, str(quantile)))
+        MinimizerList = ["MMQR",]
+        if Parameters.has_key("Minimizer") and (Parameters["Minimizer"] in MinimizerList):
+            Minimizer = str( Parameters["Minimizer"] )
+        else:
+            Minimizer = "MMQR"
+            logging.warning("%s Unknown or undefined minimizer, replaced by the default one \"%s\""%(self._name,Minimizer))
+        logging.debug("%s Minimiseur utilisé = %s"%(self._name, Minimizer))
+        if Parameters.has_key("MaximumNumberOfSteps") and (Parameters["MaximumNumberOfSteps"] > -1):
+            maxiter = int( Parameters["MaximumNumberOfSteps"] )
+        else:
+            maxiter = 15000
+        logging.debug("%s Nombre maximal de pas d'optimisation = %s"%(self._name, str(maxiter)))
+        if Parameters.has_key("CostDecrementTolerance") and (Parameters["CostDecrementTolerance"] > 0):
+            ftol = float(Parameters["CostDecrementTolerance"])
+        else:
+            ftol = 1.e-06
+        logging.debug("%s Maximum de variation de la fonction d'estimation lors de l'arrêt = %s"%(self._name, str(ftol)))
+        #
+        # Minimisation de la fonctionnelle
+        # --------------------------------
+        if Minimizer == "MMQR":
+            import mmqr
+            Minimum, J_optimal, Informations = mmqr.mmqr(
+                func        = CostFunction,
+                x0          = Xini,
+                fprime      = GradientOfCostFunction,
+                quantile    = quantile,
+                maxfun      = maxiter,
+                toler       = ftol,
+                y           = Y,
+                )
+            nfeval = Informations[2]
+            rc     = Informations[4]
+        else:
+            raise ValueError("Error in Minimizer name: %s"%Minimizer)
+        #
+        logging.debug("%s %s Step of min cost  = %s"%(self._name, Minimizer, nfeval))
+        logging.debug("%s %s Minimum cost      = %s"%(self._name, Minimizer, J_optimal))
+        logging.debug("%s %s Minimum state     = %s"%(self._name, Minimizer, Minimum))
+        logging.debug("%s %s Nb of F           = %s"%(self._name, Minimizer, nfeval))
+        logging.debug("%s %s RetCode           = %s"%(self._name, Minimizer, rc))
+        #
+        # Obtention de l'analyse
+        # ----------------------
+        Xa = numpy.asmatrix(Minimum).T
+        logging.debug("%s Analyse Xa = %s"%(self._name, Xa))
+        #
+        self.StoredVariables["Analysis"].store( Xa.A1 )
+        self.StoredVariables["Innovation"].store( d.A1 )
+        #
+        logging.debug("%s Taille mémoire utilisée de %.1f Mo"%(self._name, m.getUsedMemory("MB")))
+        logging.debug("%s Terminé"%self._name)
+        #
+        return 0
+
+# ==============================================================================
+if __name__ == "__main__":
+    print '\n AUTODIAGNOSTIC \n'
diff --git a/src/daComposant/daCore/BasicObjects.py b/src/daComposant/daCore/BasicObjects.py

index cdfa091770c475626b6f809c4a7a01a11b344aa3..2533079f9122314c4bbb3336d047cf0cbbc4a24e 100644 (file)
--- a/src/daComposant/daCore/BasicObjects.py
+++ b/src/daComposant/daCore/BasicObjects.py
@@ -85,14 +85,16 @@ class Operator:
          else:
              return self.__Method( (xNominal, xValue) )
  
-    def asMatrix(self):
+    def asMatrix(self, ValueForMethodForm = None):
          """
          Permet de renvoyer l'opérateur sous la forme d'une matrice
          """
          if self.__Matrix is not None:
              return self.__Matrix
+        elif ValueForMethodForm is not None:
+            return self.__Method( ValueForMethodForm )
          else:
-            raise ValueError("Matrix form of the operator is not available but is required")
+            raise ValueError("Matrix form of the operator defined as a function/method requires to give an operating point.")
  
      def shape(self):
          """
diff --git a/src/daComposant/daNumerics/mmqr.py b/src/daComposant/daNumerics/mmqr.py

new file mode 100644 (file)

index 0000000..dea513b
--- /dev/null
+++ b/src/daComposant/daNumerics/mmqr.py
@@ -0,0 +1,96 @@
+#-*-coding:iso-8859-1-*-
+#
+#  Copyright (C) 2008-2012 EDF R&D
+#
+#  This library is free software; you can redistribute it and/or
+#  modify it under the terms of the GNU Lesser General Public
+#  License as published by the Free Software Foundation; either
+#  version 2.1 of the License.
+#
+#  This library is distributed in the hope that it will be useful,
+#  but WITHOUT ANY WARRANTY; without even the implied warranty of
+#  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+#  Lesser General Public License for more details.
+#
+#  You should have received a copy of the GNU Lesser General Public
+#  License along with this library; if not, write to the Free Software
+#  Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307 USA
+#
+#  See http://www.salome-platform.org/ or email : webmaster.salome@opencascade.com
+#
+
+# L'algorithme est base sur la publication : David R. Hunter, Kenneth Lange,
+# "Quantile Regression via an MM Algorithm", Journal of Computational and
+# Graphical Statistics, 9, 1, pp.60-77, 2000
+
+import sys, math
+from numpy import sum, array, matrix, dot, linalg, asarray, asmatrix
+
+# ==============================================================================
+def mmqr(
+        func     = None,
+        x0       = None,
+        fprime   = None,
+        quantile = 0.5,
+        maxfun   = 15000,
+        toler    = 1.e-06,
+        y        = None,
+        ):
+    #
+    # Recuperation des donnees et informations initiales
+    # --------------------------------------------------
+    variables = asmatrix(x0).A1
+    mesures   = asmatrix(asmatrix(y).A1).T
+    increment = sys.float_info[0]
+    p         = len(variables.flat)
+    n         = len(mesures.flat)
+    #
+    # Calcul des parametres du MM
+    # ---------------------------
+    tn      = float(toler) / n
+    e0      = -tn / math.log(tn)
+    epsilon = (e0-tn)/(1+math.log(e0))
+    #
+    # Calculs d'initialisation
+    # ------------------------
+    residus  = asmatrix( mesures - func( variables ) ).A1
+    poids    = 1./(epsilon+abs(residus))
+    veps     = 1. - 2. * quantile - residus * poids
+    lastsurrogate = -sum(residus*veps) - (1.-2.*quantile)*sum(residus)
+    iteration = 0
+    #
+    # Recherche iterative
+    # -------------------
+    while (increment > toler) and (iteration < maxfun) :
+        iteration += 1
+        #
+        Derivees  = fprime(variables)
+        DeriveesT = matrix(Derivees).T
+        M         = - dot( DeriveesT , (array(matrix(p*[poids]).T)*array(Derivees)) )
+        SM        =   dot( DeriveesT , veps ).T
+        step      = linalg.lstsq( M, SM )[0].A1
+        #
+        variables = asarray(variables) + asarray(step)
+        residus   = ( mesures - func(variables) ).A1
+        surrogate = sum(residus**2 * poids) + (4.*quantile-2.) * sum(residus)
+        #
+        while ( (surrogate > lastsurrogate) and ( max(list(abs(step))) > 1.e-16 ) ) :
+            step      = step/2.
+            variables = variables - step
+            residus   = ( mesures-func(variables) ).A1
+            surrogate = sum(residus**2 * poids) + (4.*quantile-2.) * sum(residus)
+        #
+        increment     = lastsurrogate-surrogate
+        poids         = 1./(epsilon+abs(residus))
+        veps          = 1. - 2. * quantile - residus * poids
+        lastsurrogate = -sum(residus * veps) - (1.-2.*quantile)*sum(residus)
+    #
+    # Mesure d'écart : q*Sum(residus)-sum(residus negatifs)
+    # ----------------
+    Ecart = quantile * sum(residus) - sum( residus[residus<0] )
+    #
+    return variables, Ecart, [n,p,iteration,increment,0]
+
+# ==============================================================================
+if __name__ == "__main__":
+    print '\n AUTODIAGNOSTIC \n'
diff --git a/src/daSalome/daYacsSchemaCreator/infos_daComposant.py b/src/daSalome/daYacsSchemaCreator/infos_daComposant.py

index 16b8625e2d7724bc03f61a54c74c64d4067e7855..a80a4138ba55b14c25badafd2d4da181df2bd176 100644 (file)
--- a/src/daSalome/daYacsSchemaCreator/infos_daComposant.py
+++ b/src/daSalome/daYacsSchemaCreator/infos_daComposant.py
@@ -57,6 +57,7 @@ AssimAlgos = [
  #     "KalmanFilter", # Removed because EvolutionModel must be available in OptLoop
      "LinearLeastSquares",
      "NonLinearLeastSquares",
+    "QuantileRegression",
      ]
  
  AlgoDataRequirements = {}
@@ -86,9 +87,15 @@ AlgoDataRequirements["LinearLeastSquares"] = [
      "ObservationOperator",
      ]
  AlgoDataRequirements["NonLinearLeastSquares"] = [
+    "Background",
      "Observation", "ObservationError",
      "ObservationOperator",
      ]
+AlgoDataRequirements["QuantileRegression"] = [
+    "Background",
+    "Observation",
+    "ObservationOperator",
+    ]
  
  AlgoType = {}
  AlgoType["3DVAR"] = "Optim"
@@ -97,6 +104,7 @@ AlgoType["EnsembleBlue"] = "Optim"
  # AlgoType["KalmanFilter"] = "Optim"
  AlgoType["LinearLeastSquares"] = "Optim"
  AlgoType["NonLinearLeastSquares"] = "Optim"
+AlgoType["QuantileRegression"] = "Optim"
  
  # Variables qui sont partages avec le generateur de
  # catalogue Eficas
author	Jean-Philippe ARGAUD <jean-philippe.argaud@edf.fr>
	Sat, 7 Apr 2012 21:13:32 +0000 (23:13 +0200)
committer	Jean-Philippe ARGAUD <jean-philippe.argaud@edf.fr>
	Sat, 7 Apr 2012 22:06:08 +0000 (00:06 +0200)
doc/using.rst		patch \| blob \| history
src/daComposant/daAlgorithms/QuantileRegression.py	[new file with mode: 0644]	patch \| blob
src/daComposant/daCore/BasicObjects.py		patch \| blob \| history
src/daComposant/daNumerics/mmqr.py	[new file with mode: 0644]	patch \| blob
src/daSalome/daYacsSchemaCreator/infos_daComposant.py		patch \| blob \| history