From: Jean-Philippe ARGAUD Date: Sat, 7 Apr 2012 21:13:32 +0000 (+0200) Subject: Adding QuantileRegression algorithm X-Git-Tag: V6_5_0~6 X-Git-Url: http://git.salome-platform.org/gitweb/?a=commitdiff_plain;h=20eb95491587638a9732f7bfcddf527769469615;p=modules%2Fadao.git Adding QuantileRegression algorithm --- diff --git a/doc/using.rst b/doc/using.rst index de92759..1ad43e1 100644 --- a/doc/using.rst +++ b/doc/using.rst @@ -381,7 +381,7 @@ unused. This key indicates the maximum number of iterations allowed for iterative optimization. The default is 15000, which very similar to no limit on iterations. It is then recommended to adapt this parameter to the needs on - real problems. For some algorithms, the effective stopping step can be + real problems. For some minimizers, the effective stopping step can be slightly different due to algorihtm internal control requirements. :CalculateAPosterioriCovariance: @@ -428,7 +428,7 @@ unused. This key indicates the maximum number of iterations allowed for iterative optimization. The default is 15000, which very similar to no limit on iterations. It is then recommended to adapt this parameter to the needs on - real problems. For some algorithms, the effective stopping step can be + real problems. For some minimizers, the effective stopping step can be slightly different due to algorihtm internal control requirements. :CostDecrementTolerance: @@ -458,6 +458,29 @@ unused. 1000. By default, the seed is left uninitialized, and so use the default initialization from the computer. +:"QuantileRegression": + + :Quantile: + This key allows to define the real value of the desired quantile, between + 0 and 1. The default is 0.5, corresponding to the median. + + :Minimizer: + This key allows to choose the optimization minimizer. The default choice + and only available choice is "MMQR" (Majorize-Minimize for Quantile + Regression). + + :MaximumNumberOfSteps: + This key indicates the maximum number of iterations allowed for iterative + optimization. The default is 15000, which very similar to no limit on + iterations. It is then recommended to adapt this parameter to the needs on + real problems. + + :CostDecrementTolerance: + This key indicates a limit value, leading to stop successfully the + iterative optimization process when the cost function or the surrogate + decreases less than this tolerance at the last step. The default is 10e-6, + and it is recommended to adapt it the needs on real problems. + Examples of using these commands are available in the section :ref:`section_examples` and in example files installed with ADAO module. diff --git a/src/daComposant/daAlgorithms/QuantileRegression.py b/src/daComposant/daAlgorithms/QuantileRegression.py new file mode 100644 index 0000000..d72157c --- /dev/null +++ b/src/daComposant/daAlgorithms/QuantileRegression.py @@ -0,0 +1,164 @@ +#-*-coding:iso-8859-1-*- +# +# Copyright (C) 2008-2012 EDF R&D +# +# This library is free software; you can redistribute it and/or +# modify it under the terms of the GNU Lesser General Public +# License as published by the Free Software Foundation; either +# version 2.1 of the License. +# +# This library is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +# Lesser General Public License for more details. +# +# You should have received a copy of the GNU Lesser General Public +# License along with this library; if not, write to the Free Software +# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +# +# See http://www.salome-platform.org/ or email : webmaster.salome@opencascade.com +# + +import logging +from daCore import BasicObjects, PlatformInfo +m = PlatformInfo.SystemUsage() + +import numpy + +# ============================================================================== +class ElementaryAlgorithm(BasicObjects.Algorithm): + def __init__(self): + BasicObjects.Algorithm.__init__(self) + self._name = "QUANTILEREGRESSION" + logging.debug("%s Initialisation"%self._name) + + def run(self, Xb=None, Y=None, H=None, M=None, R=None, B=None, Q=None, Parameters=None): + """ + Calcul des parametres definissant le quantile + """ + logging.debug("%s Lancement"%self._name) + logging.debug("%s Taille mémoire utilisée de %.1f Mo"%(self._name, m.getUsedMemory("Mo"))) + # + # Opérateur d'observation + # ----------------------- + Hm = H["Direct"].appliedTo + Ht = H["Adjoint"].appliedInXTo + # + # Utilisation éventuelle d'un vecteur H(Xb) précalculé + # ---------------------------------------------------- + if H["AppliedToX"] is not None and H["AppliedToX"].has_key("HXb"): + logging.debug("%s Utilisation de HXb"%self._name) + HXb = H["AppliedToX"]["HXb"] + else: + logging.debug("%s Calcul de Hm(Xb)"%self._name) + HXb = Hm( Xb ) + HXb = numpy.asmatrix(HXb).flatten().T + # + # Calcul de l'innovation + # ---------------------- + if Y.size != HXb.size: + raise ValueError("The size %i of observations Y and %i of observed calculation H(X) are different, they have to be identical."%(Y.size,HXb.size)) + if max(Y.shape) != max(HXb.shape): + raise ValueError("The shapes %s of observations Y and %s of observed calculation H(X) are different, they have to be identical."%(Y.shape,HXb.shape)) + d = Y - HXb + logging.debug("%s Innovation d = %s"%(self._name, d)) + # + # Définition de la fonction-coût + # ------------------------------ + def CostFunction(x): + _X = numpy.asmatrix(x).flatten().T + logging.debug("%s CostFunction X = %s"%(self._name, numpy.asmatrix( _X ).flatten())) + _HX = Hm( _X ) + _HX = numpy.asmatrix(_HX).flatten().T + Jb = 0. + Jo = 0. + J = Jb + Jo + logging.debug("%s CostFunction Jb = %s"%(self._name, Jb)) + logging.debug("%s CostFunction Jo = %s"%(self._name, Jo)) + logging.debug("%s CostFunction J = %s"%(self._name, J)) + self.StoredVariables["CurrentState"].store( _X.A1 ) + self.StoredVariables["CostFunctionJb"].store( Jb ) + self.StoredVariables["CostFunctionJo"].store( Jo ) + self.StoredVariables["CostFunctionJ" ].store( J ) + return _HX + # + def GradientOfCostFunction(x): + _X = numpy.asmatrix(x).flatten().T + logging.debug("%s GradientOfCostFunction X = %s"%(self._name, numpy.asmatrix( _X ).flatten())) + Hg = H["Tangent"].asMatrix( _X ) + return Hg + # + # Point de démarrage de l'optimisation : Xini = Xb + # ------------------------------------ + if type(Xb) is type(numpy.matrix([])): + Xini = Xb.A1.tolist() + else: + Xini = list(Xb) + logging.debug("%s Point de démarrage Xini = %s"%(self._name, Xini)) + # + # Paramètres de pilotage + # ---------------------- + # Potentiels : "Quantile", "Minimizer", "MaximumNumberOfSteps", "CostDecrementTolerance" + if Parameters.has_key("Quantile") and (0. <= Parameters["Quantile"] <= 1.): + quantile = float(Parameters["Quantile"]) + else: + quantile = 0.5 + logging.debug("%s Quantile pour la regression de quantile = %s"%(self._name, str(quantile))) + MinimizerList = ["MMQR",] + if Parameters.has_key("Minimizer") and (Parameters["Minimizer"] in MinimizerList): + Minimizer = str( Parameters["Minimizer"] ) + else: + Minimizer = "MMQR" + logging.warning("%s Unknown or undefined minimizer, replaced by the default one \"%s\""%(self._name,Minimizer)) + logging.debug("%s Minimiseur utilisé = %s"%(self._name, Minimizer)) + if Parameters.has_key("MaximumNumberOfSteps") and (Parameters["MaximumNumberOfSteps"] > -1): + maxiter = int( Parameters["MaximumNumberOfSteps"] ) + else: + maxiter = 15000 + logging.debug("%s Nombre maximal de pas d'optimisation = %s"%(self._name, str(maxiter))) + if Parameters.has_key("CostDecrementTolerance") and (Parameters["CostDecrementTolerance"] > 0): + ftol = float(Parameters["CostDecrementTolerance"]) + else: + ftol = 1.e-06 + logging.debug("%s Maximum de variation de la fonction d'estimation lors de l'arrêt = %s"%(self._name, str(ftol))) + # + # Minimisation de la fonctionnelle + # -------------------------------- + if Minimizer == "MMQR": + import mmqr + Minimum, J_optimal, Informations = mmqr.mmqr( + func = CostFunction, + x0 = Xini, + fprime = GradientOfCostFunction, + quantile = quantile, + maxfun = maxiter, + toler = ftol, + y = Y, + ) + nfeval = Informations[2] + rc = Informations[4] + else: + raise ValueError("Error in Minimizer name: %s"%Minimizer) + # + logging.debug("%s %s Step of min cost = %s"%(self._name, Minimizer, nfeval)) + logging.debug("%s %s Minimum cost = %s"%(self._name, Minimizer, J_optimal)) + logging.debug("%s %s Minimum state = %s"%(self._name, Minimizer, Minimum)) + logging.debug("%s %s Nb of F = %s"%(self._name, Minimizer, nfeval)) + logging.debug("%s %s RetCode = %s"%(self._name, Minimizer, rc)) + # + # Obtention de l'analyse + # ---------------------- + Xa = numpy.asmatrix(Minimum).T + logging.debug("%s Analyse Xa = %s"%(self._name, Xa)) + # + self.StoredVariables["Analysis"].store( Xa.A1 ) + self.StoredVariables["Innovation"].store( d.A1 ) + # + logging.debug("%s Taille mémoire utilisée de %.1f Mo"%(self._name, m.getUsedMemory("MB"))) + logging.debug("%s Terminé"%self._name) + # + return 0 + +# ============================================================================== +if __name__ == "__main__": + print '\n AUTODIAGNOSTIC \n' diff --git a/src/daComposant/daCore/BasicObjects.py b/src/daComposant/daCore/BasicObjects.py index cdfa091..2533079 100644 --- a/src/daComposant/daCore/BasicObjects.py +++ b/src/daComposant/daCore/BasicObjects.py @@ -85,14 +85,16 @@ class Operator: else: return self.__Method( (xNominal, xValue) ) - def asMatrix(self): + def asMatrix(self, ValueForMethodForm = None): """ Permet de renvoyer l'opérateur sous la forme d'une matrice """ if self.__Matrix is not None: return self.__Matrix + elif ValueForMethodForm is not None: + return self.__Method( ValueForMethodForm ) else: - raise ValueError("Matrix form of the operator is not available but is required") + raise ValueError("Matrix form of the operator defined as a function/method requires to give an operating point.") def shape(self): """ diff --git a/src/daComposant/daNumerics/mmqr.py b/src/daComposant/daNumerics/mmqr.py new file mode 100644 index 0000000..dea513b --- /dev/null +++ b/src/daComposant/daNumerics/mmqr.py @@ -0,0 +1,96 @@ +#-*-coding:iso-8859-1-*- +# +# Copyright (C) 2008-2012 EDF R&D +# +# This library is free software; you can redistribute it and/or +# modify it under the terms of the GNU Lesser General Public +# License as published by the Free Software Foundation; either +# version 2.1 of the License. +# +# This library is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +# Lesser General Public License for more details. +# +# You should have received a copy of the GNU Lesser General Public +# License along with this library; if not, write to the Free Software +# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +# +# See http://www.salome-platform.org/ or email : webmaster.salome@opencascade.com +# + +# L'algorithme est base sur la publication : David R. Hunter, Kenneth Lange, +# "Quantile Regression via an MM Algorithm", Journal of Computational and +# Graphical Statistics, 9, 1, pp.60-77, 2000 + +import sys, math +from numpy import sum, array, matrix, dot, linalg, asarray, asmatrix + +# ============================================================================== +def mmqr( + func = None, + x0 = None, + fprime = None, + quantile = 0.5, + maxfun = 15000, + toler = 1.e-06, + y = None, + ): + # + # Recuperation des donnees et informations initiales + # -------------------------------------------------- + variables = asmatrix(x0).A1 + mesures = asmatrix(asmatrix(y).A1).T + increment = sys.float_info[0] + p = len(variables.flat) + n = len(mesures.flat) + # + # Calcul des parametres du MM + # --------------------------- + tn = float(toler) / n + e0 = -tn / math.log(tn) + epsilon = (e0-tn)/(1+math.log(e0)) + # + # Calculs d'initialisation + # ------------------------ + residus = asmatrix( mesures - func( variables ) ).A1 + poids = 1./(epsilon+abs(residus)) + veps = 1. - 2. * quantile - residus * poids + lastsurrogate = -sum(residus*veps) - (1.-2.*quantile)*sum(residus) + iteration = 0 + # + # Recherche iterative + # ------------------- + while (increment > toler) and (iteration < maxfun) : + iteration += 1 + # + Derivees = fprime(variables) + DeriveesT = matrix(Derivees).T + M = - dot( DeriveesT , (array(matrix(p*[poids]).T)*array(Derivees)) ) + SM = dot( DeriveesT , veps ).T + step = linalg.lstsq( M, SM )[0].A1 + # + variables = asarray(variables) + asarray(step) + residus = ( mesures - func(variables) ).A1 + surrogate = sum(residus**2 * poids) + (4.*quantile-2.) * sum(residus) + # + while ( (surrogate > lastsurrogate) and ( max(list(abs(step))) > 1.e-16 ) ) : + step = step/2. + variables = variables - step + residus = ( mesures-func(variables) ).A1 + surrogate = sum(residus**2 * poids) + (4.*quantile-2.) * sum(residus) + # + increment = lastsurrogate-surrogate + poids = 1./(epsilon+abs(residus)) + veps = 1. - 2. * quantile - residus * poids + lastsurrogate = -sum(residus * veps) - (1.-2.*quantile)*sum(residus) + # + # Mesure d'écart : q*Sum(residus)-sum(residus negatifs) + # ---------------- + Ecart = quantile * sum(residus) - sum( residus[residus<0] ) + # + return variables, Ecart, [n,p,iteration,increment,0] + +# ============================================================================== +if __name__ == "__main__": + print '\n AUTODIAGNOSTIC \n' diff --git a/src/daSalome/daYacsSchemaCreator/infos_daComposant.py b/src/daSalome/daYacsSchemaCreator/infos_daComposant.py index 16b8625..a80a413 100644 --- a/src/daSalome/daYacsSchemaCreator/infos_daComposant.py +++ b/src/daSalome/daYacsSchemaCreator/infos_daComposant.py @@ -57,6 +57,7 @@ AssimAlgos = [ # "KalmanFilter", # Removed because EvolutionModel must be available in OptLoop "LinearLeastSquares", "NonLinearLeastSquares", + "QuantileRegression", ] AlgoDataRequirements = {} @@ -86,9 +87,15 @@ AlgoDataRequirements["LinearLeastSquares"] = [ "ObservationOperator", ] AlgoDataRequirements["NonLinearLeastSquares"] = [ + "Background", "Observation", "ObservationError", "ObservationOperator", ] +AlgoDataRequirements["QuantileRegression"] = [ + "Background", + "Observation", + "ObservationOperator", + ] AlgoType = {} AlgoType["3DVAR"] = "Optim" @@ -97,6 +104,7 @@ AlgoType["EnsembleBlue"] = "Optim" # AlgoType["KalmanFilter"] = "Optim" AlgoType["LinearLeastSquares"] = "Optim" AlgoType["NonLinearLeastSquares"] = "Optim" +AlgoType["QuantileRegression"] = "Optim" # Variables qui sont partages avec le generateur de # catalogue Eficas