src/daComposant/daAlgorithms/Atoms/lbfgsb114hlt.py

   1 # Modification de la version 1.14.0
   2 # flake8: noqa
   3 """
   4 Functions
   5 ---------
   6 .. autosummary::
   7    :toctree: generated/
   8
   9     fmin_l_bfgs_b
  10
  11 """
  12
  13 ## License for the Python wrapper
  14 ## ==============================
  15
  16 ## Copyright (c) 2004 David M. Cooke <cookedm@physics.mcmaster.ca>
  17
  18 ## Permission is hereby granted, free of charge, to any person obtaining a
  19 ## copy of this software and associated documentation files (the "Software"),
  20 ## to deal in the Software without restriction, including without limitation
  21 ## the rights to use, copy, modify, merge, publish, distribute, sublicense,
  22 ## and/or sell copies of the Software, and to permit persons to whom the
  23 ## Software is furnished to do so, subject to the following conditions:
  24
  25 ## The above copyright notice and this permission notice shall be included in
  26 ## all copies or substantial portions of the Software.
  27
  28 ## THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  29 ## IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  30 ## FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
  31 ## AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  32 ## LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
  33 ## FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
  34 ## DEALINGS IN THE SOFTWARE.
  35
  36 ## Modifications by Travis Oliphant and Enthought, Inc. for inclusion in SciPy
  37
  38 import numpy as np
  39 from numpy import array, asarray, float64, zeros
  40 from scipy.optimize import _lbfgsb
  41 from scipy.optimize._optimize import (MemoizeJac, OptimizeResult, _call_callback_maybe_halt,
  42                         _wrap_callback, _check_unknown_options,
  43                         _prepare_scalar_function)
  44 from scipy.optimize._constraints import old_bound_to_new
  45
  46 from scipy.sparse.linalg import LinearOperator
  47
  48 __all__ = ['fmin_l_bfgs_b', 'LbfgsInvHessProduct']
  49
  50
  51 def fmin_l_bfgs_b(func, x0, fprime=None, args=(),
  52                   approx_grad=0,
  53                   bounds=None, m=10, factr=1e7, pgtol=1e-5,
  54                   epsilon=1e-8,
  55                   iprint=-1, maxfun=15000, maxiter=15000, disp=None,
  56                   callback=None, maxls=20):
  57     """
  58     Minimize a function func using the L-BFGS-B algorithm.
  59
  60     Parameters
  61     ----------
  62     func : callable f(x,*args)
  63         Function to minimize.
  64     x0 : ndarray
  65         Initial guess.
  66     fprime : callable fprime(x,*args), optional
  67         The gradient of `func`. If None, then `func` returns the function
  68         value and the gradient (``f, g = func(x, *args)``), unless
  69         `approx_grad` is True in which case `func` returns only ``f``.
  70     args : sequence, optional
  71         Arguments to pass to `func` and `fprime`.
  72     approx_grad : bool, optional
  73         Whether to approximate the gradient numerically (in which case
  74         `func` returns only the function value).
  75     bounds : list, optional
  76         ``(min, max)`` pairs for each element in ``x``, defining
  77         the bounds on that parameter. Use None or +-inf for one of ``min`` or
  78         ``max`` when there is no bound in that direction.
  79     m : int, optional
  80         The maximum number of variable metric corrections
  81         used to define the limited memory matrix. (The limited memory BFGS
  82         method does not store the full hessian but uses this many terms in an
  83         approximation to it.)
  84     factr : float, optional
  85         The iteration stops when
  86         ``(f^k - f^{k+1})/max{|f^k|,|f^{k+1}|,1} <= factr * eps``,
  87         where ``eps`` is the machine precision, which is automatically
  88         generated by the code. Typical values for `factr` are: 1e12 for
  89         low accuracy; 1e7 for moderate accuracy; 10.0 for extremely
  90         high accuracy. See Notes for relationship to `ftol`, which is exposed
  91         (instead of `factr`) by the `scipy.optimize.minimize` interface to
  92         L-BFGS-B.
  93     pgtol : float, optional
  94         The iteration will stop when
  95         ``max{|proj g_i | i = 1, ..., n} <= pgtol``
  96         where ``proj g_i`` is the i-th component of the projected gradient.
  97     epsilon : float, optional
  98         Step size used when `approx_grad` is True, for numerically
  99         calculating the gradient
 100     iprint : int, optional
 101         Controls the frequency of output. ``iprint < 0`` means no output;
 102         ``iprint = 0``    print only one line at the last iteration;
 103         ``0 < iprint < 99`` print also f and ``|proj g|`` every iprint iterations;
 104         ``iprint = 99``   print details of every iteration except n-vectors;
 105         ``iprint = 100``  print also the changes of active set and final x;
 106         ``iprint > 100``  print details of every iteration including x and g.
 107     disp : int, optional
 108         If zero, then no output. If a positive number, then this over-rides
 109         `iprint` (i.e., `iprint` gets the value of `disp`).
 110     maxfun : int, optional
 111         Maximum number of function evaluations. Note that this function
 112         may violate the limit because of evaluating gradients by numerical
 113         differentiation.
 114     maxiter : int, optional
 115         Maximum number of iterations.
 116     callback : callable, optional
 117         Called after each iteration, as ``callback(xk)``, where ``xk`` is the
 118         current parameter vector.
 119     maxls : int, optional
 120         Maximum number of line search steps (per iteration). Default is 20.
 121
 122     Returns
 123     -------
 124     x : array_like
 125         Estimated position of the minimum.
 126     f : float
 127         Value of `func` at the minimum.
 128     d : dict
 129         Information dictionary.
 130
 131         * d['warnflag'] is
 132
 133           - 0 if converged,
 134           - 1 if too many function evaluations or too many iterations,
 135           - 2 if stopped for another reason, given in d['task']
 136
 137         * d['grad'] is the gradient at the minimum (should be 0 ish)
 138         * d['funcalls'] is the number of function calls made.
 139         * d['nit'] is the number of iterations.
 140
 141     See also
 142     --------
 143     minimize: Interface to minimization algorithms for multivariate
 144         functions. See the 'L-BFGS-B' `method` in particular. Note that the
 145         `ftol` option is made available via that interface, while `factr` is
 146         provided via this interface, where `factr` is the factor multiplying
 147         the default machine floating-point precision to arrive at `ftol`:
 148         ``ftol = factr * numpy.finfo(float).eps``.
 149
 150     Notes
 151     -----
 152     License of L-BFGS-B (FORTRAN code):
 153
 154     The version included here (in fortran code) is 3.0
 155     (released April 25, 2011). It was written by Ciyou Zhu, Richard Byrd,
 156     and Jorge Nocedal <nocedal@ece.nwu.edu>. It carries the following
 157     condition for use:
 158
 159     This software is freely available, but we expect that all publications
 160     describing work using this software, or all commercial products using it,
 161     quote at least one of the references given below. This software is released
 162     under the BSD License.
 163
 164     References
 165     ----------
 166     * R. H. Byrd, P. Lu and J. Nocedal. A Limited Memory Algorithm for Bound
 167       Constrained Optimization, (1995), SIAM Journal on Scientific and
 168       Statistical Computing, 16, 5, pp. 1190-1208.
 169     * C. Zhu, R. H. Byrd and J. Nocedal. L-BFGS-B: Algorithm 778: L-BFGS-B,
 170       FORTRAN routines for large scale bound constrained optimization (1997),
 171       ACM Transactions on Mathematical Software, 23, 4, pp. 550 - 560.
 172     * J.L. Morales and J. Nocedal. L-BFGS-B: Remark on Algorithm 778: L-BFGS-B,
 173       FORTRAN routines for large scale bound constrained optimization (2011),
 174       ACM Transactions on Mathematical Software, 38, 1.
 175
 176     Examples
 177     --------
 178     Solve a linear regression problem via `fmin_l_bfgs_b`. To do this, first we define
 179     an objective function ``f(m, b) = (y - y_model)**2``, where `y` describes the
 180     observations and `y_model` the prediction of the linear model as
 181     ``y_model = m*x + b``. The bounds for the parameters, ``m`` and ``b``, are arbitrarily
 182     chosen as ``(0,5)`` and ``(5,10)`` for this example.
 183
 184     >>> import numpy as np
 185     >>> from scipy.optimize import fmin_l_bfgs_b
 186     >>> X = np.arange(0, 10, 1)
 187     >>> M = 2
 188     >>> B = 3
 189     >>> Y = M * X + B
 190     >>> def func(parameters, *args):
 191     ...     x = args[0]
 192     ...     y = args[1]
 193     ...     m, b = parameters
 194     ...     y_model = m*x + b
 195     ...     error = sum(np.power((y - y_model), 2))
 196     ...     return error
 197
 198     >>> initial_values = np.array([0.0, 1.0])
 199
 200     >>> x_opt, f_opt, info = fmin_l_bfgs_b(func, x0=initial_values, args=(X, Y),
 201     ...                                    approx_grad=True)
 202     >>> x_opt, f_opt
 203     array([1.99999999, 3.00000006]), 1.7746231151323805e-14  # may vary
 204
 205     The optimized parameters in ``x_opt`` agree with the ground truth parameters
 206     ``m`` and ``b``. Next, let us perform a bound contrained optimization using the `bounds`
 207     parameter.
 208
 209     >>> bounds = [(0, 5), (5, 10)]
 210     >>> x_opt, f_op, info = fmin_l_bfgs_b(func, x0=initial_values, args=(X, Y),
 211     ...                                   approx_grad=True, bounds=bounds)
 212     >>> x_opt, f_opt
 213     array([1.65990508, 5.31649385]), 15.721334516453945  # may vary
 214     """
 215     # handle fprime/approx_grad
 216     if approx_grad:
 217         fun = func
 218         jac = None
 219     elif fprime is None:
 220         fun = MemoizeJac(func)
 221         jac = fun.derivative
 222     else:
 223         fun = func
 224         jac = fprime
 225
 226     # build options
 227     if disp is None:
 228         disp = iprint
 229     callback = _wrap_callback(callback)
 230     opts = {'disp': disp,
 231             'iprint': iprint,
 232             'maxcor': m,
 233             'ftol': factr * np.finfo(float).eps,
 234             'gtol': pgtol,
 235             'eps': epsilon,
 236             'maxfun': maxfun,
 237             'maxiter': maxiter,
 238             'callback': callback,
 239             'maxls': maxls}
 240
 241     res = _minimize_lbfgsb(fun, x0, args=args, jac=jac, bounds=bounds,
 242                            **opts)
 243     d = {'grad': res['jac'],
 244          'task': res['message'],
 245          'funcalls': res['nfev'],
 246          'nit': res['nit'],
 247          'warnflag': res['status']}
 248     f = res['fun']
 249     x = res['x']
 250
 251     return x, f, d
 252
 253
 254 def _minimize_lbfgsb(fun, x0, args=(), jac=None, bounds=None,
 255                      disp=None, maxcor=10, ftol=2.2204460492503131e-09,
 256                      gtol=1e-5, eps=1e-8, maxfun=15000, maxiter=15000,
 257                      iprint=-1, callback=None, maxls=20,
 258                      finite_diff_rel_step=None, **unknown_options):
 259     """
 260     Minimize a scalar function of one or more variables using the L-BFGS-B
 261     algorithm.
 262
 263     Options
 264     -------
 265     disp : None or int
 266         If `disp is None` (the default), then the supplied version of `iprint`
 267         is used. If `disp is not None`, then it overrides the supplied version
 268         of `iprint` with the behaviour you outlined.
 269     maxcor : int
 270         The maximum number of variable metric corrections used to
 271         define the limited memory matrix. (The limited memory BFGS
 272         method does not store the full hessian but uses this many terms
 273         in an approximation to it.)
 274     ftol : float
 275         The iteration stops when ``(f^k -
 276         f^{k+1})/max{|f^k|,|f^{k+1}|,1} <= ftol``.
 277     gtol : float
 278         The iteration will stop when ``max{|proj g_i | i = 1, ..., n}
 279         <= gtol`` where ``proj g_i`` is the i-th component of the
 280         projected gradient.
 281     eps : float or ndarray
 282         If `jac is None` the absolute step size used for numerical
 283         approximation of the jacobian via forward differences.
 284     maxfun : int
 285         Maximum number of function evaluations. Note that this function
 286         may violate the limit because of evaluating gradients by numerical
 287         differentiation.
 288     maxiter : int
 289         Maximum number of iterations.
 290     iprint : int, optional
 291         Controls the frequency of output. ``iprint < 0`` means no output;
 292         ``iprint = 0``    print only one line at the last iteration;
 293         ``0 < iprint < 99`` print also f and ``|proj g|`` every iprint iterations;
 294         ``iprint = 99``   print details of every iteration except n-vectors;
 295         ``iprint = 100``  print also the changes of active set and final x;
 296         ``iprint > 100``  print details of every iteration including x and g.
 297     callback : callable, optional
 298         Called after each iteration, as ``callback(xk)``, where ``xk`` is the
 299         current parameter vector.
 300     maxls : int, optional
 301         Maximum number of line search steps (per iteration). Default is 20.
 302     finite_diff_rel_step : None or array_like, optional
 303         If `jac in ['2-point', '3-point', 'cs']` the relative step size to
 304         use for numerical approximation of the jacobian. The absolute step
 305         size is computed as ``h = rel_step * sign(x) * max(1, abs(x))``,
 306         possibly adjusted to fit into the bounds. For ``method='3-point'``
 307         the sign of `h` is ignored. If None (default) then step is selected
 308         automatically.
 309
 310     Notes
 311     -----
 312     The option `ftol` is exposed via the `scipy.optimize.minimize` interface,
 313     but calling `scipy.optimize.fmin_l_bfgs_b` directly exposes `factr`. The
 314     relationship between the two is ``ftol = factr * numpy.finfo(float).eps``.
 315     I.e., `factr` multiplies the default machine floating-point precision to
 316     arrive at `ftol`.
 317
 318     """
 319     _check_unknown_options(unknown_options)
 320     m = maxcor
 321     pgtol = gtol
 322     factr = ftol / np.finfo(float).eps
 323
 324     x0 = asarray(x0).ravel()
 325     n, = x0.shape
 326
 327     # historically old-style bounds were/are expected by lbfgsb.
 328     # That's still the case but we'll deal with new-style from here on,
 329     # it's easier
 330     if bounds is None:
 331         pass
 332     elif len(bounds) != n:
 333         raise ValueError('length of x0 != length of bounds')
 334     else:
 335         bounds = np.array(old_bound_to_new(bounds))
 336
 337         # check bounds
 338         if (bounds[0] > bounds[1]).any():
 339             raise ValueError(
 340                 "LBFGSB - one of the lower bounds is greater than an upper bound."
 341             )
 342
 343         # initial vector must lie within the bounds. Otherwise ScalarFunction and
 344         # approx_derivative will cause problems
 345         x0 = np.clip(x0, bounds[0], bounds[1])
 346
 347     if disp is not None:
 348         if disp == 0:
 349             iprint = -1
 350         else:
 351             iprint = disp
 352
 353     # _prepare_scalar_function can use bounds=None to represent no bounds
 354     sf = _prepare_scalar_function(fun, x0, jac=jac, args=args, epsilon=eps,
 355                                   bounds=bounds,
 356                                   finite_diff_rel_step=finite_diff_rel_step)
 357
 358     func_and_grad = sf.fun_and_grad
 359
 360     fortran_int = _lbfgsb.types.intvar.dtype
 361
 362     nbd = zeros(n, fortran_int)
 363     low_bnd = zeros(n, float64)
 364     upper_bnd = zeros(n, float64)
 365     bounds_map = {(-np.inf, np.inf): 0,
 366                   (1, np.inf): 1,
 367                   (1, 1): 2,
 368                   (-np.inf, 1): 3}
 369
 370     if bounds is not None:
 371         for i in range(0, n):
 372             l, u = bounds[0, i], bounds[1, i]
 373             if not np.isinf(l):
 374                 low_bnd[i] = l
 375                 l = 1
 376             if not np.isinf(u):
 377                 upper_bnd[i] = u
 378                 u = 1
 379             nbd[i] = bounds_map[l, u]
 380
 381     if not maxls > 0:
 382         raise ValueError('maxls must be positive.')
 383
 384     x = array(x0, float64)
 385     f = array(0.0, float64)
 386     g = zeros((n,), float64)
 387     wa = zeros(2*m*n + 5*n + 11*m*m + 8*m, float64)
 388     iwa = zeros(3*n, fortran_int)
 389     task = zeros(1, 'S60')
 390     csave = zeros(1, 'S60')
 391     lsave = zeros(4, fortran_int)
 392     isave = zeros(44, fortran_int)
 393     dsave = zeros(29, float64)
 394
 395     task[:] = 'START'
 396
 397     n_iterations = 0
 398
 399     while 1:
 400         # g may become float32 if a user provides a function that calculates
 401         # the Jacobian in float32 (see gh-18730). The underlying Fortran code
 402         # expects float64, so upcast it
 403         g = g.astype(np.float64)
 404         # x, f, g, wa, iwa, task, csave, lsave, isave, dsave = \
 405         _lbfgsb.setulb(m, x, low_bnd, upper_bnd, nbd, f, g, factr,
 406                        pgtol, wa, iwa, task, iprint, csave, lsave,
 407                        isave, dsave, maxls)
 408         task_str = task.tobytes()
 409         if task_str.startswith(b'FG'):
 410             # The minimization routine wants f and g at the current x.
 411             # Note that interruptions due to maxfun are postponed
 412             # until the completion of the current minimization iteration.
 413             # Overwrite f and g:
 414             f, g = func_and_grad(x)
 415             if sf.nfev > maxfun:
 416                 task[:] = ('STOP: TOTAL NO. of f AND g EVALUATIONS '
 417                            'EXCEEDS LIMIT')
 418         elif task_str.startswith(b'NEW_X'):
 419             # new iteration
 420             n_iterations += 1
 421
 422             intermediate_result = OptimizeResult(x=x, fun=f)
 423             if _call_callback_maybe_halt(callback, intermediate_result):
 424                 task[:] = 'STOP: CALLBACK REQUESTED HALT'
 425             if n_iterations >= maxiter:
 426                 task[:] = 'STOP: TOTAL NO. of ITERATIONS REACHED LIMIT'
 427             elif sf.nfev > maxfun:
 428                 task[:] = ('STOP: TOTAL NO. of f AND g EVALUATIONS '
 429                            'EXCEEDS LIMIT')
 430         else:
 431             break
 432
 433     task_str = task.tobytes().strip(b'\x00').strip()
 434     if task_str.startswith(b'CONV'):
 435         warnflag = 0
 436     elif sf.nfev > maxfun or n_iterations >= maxiter:
 437         warnflag = 1
 438     else:
 439         warnflag = 2
 440
 441     # These two portions of the workspace are described in the mainlb
 442     # subroutine in lbfgsb.f. See line 363.
 443     s = wa[0: m*n].reshape(m, n)
 444     y = wa[m*n: 2*m*n].reshape(m, n)
 445
 446     # See lbfgsb.f line 160 for this portion of the workspace.
 447     # isave(31) = the total number of BFGS updates prior the current iteration;
 448     n_bfgs_updates = isave[30]
 449
 450     n_corrs = min(n_bfgs_updates, maxcor)
 451     hess_inv = LbfgsInvHessProduct(s[:n_corrs], y[:n_corrs])
 452
 453     task_str = task_str.decode()
 454     return OptimizeResult(fun=f, jac=g, nfev=sf.nfev,
 455                           njev=sf.ngev,
 456                           nit=n_iterations, status=warnflag, message=task_str,
 457                           x=x, success=(warnflag == 0), hess_inv=hess_inv)
 458
 459
 460 class LbfgsInvHessProduct(LinearOperator):
 461     """Linear operator for the L-BFGS approximate inverse Hessian.
 462
 463     This operator computes the product of a vector with the approximate inverse
 464     of the Hessian of the objective function, using the L-BFGS limited
 465     memory approximation to the inverse Hessian, accumulated during the
 466     optimization.
 467
 468     Objects of this class implement the ``scipy.sparse.linalg.LinearOperator``
 469     interface.
 470
 471     Parameters
 472     ----------
 473     sk : array_like, shape=(n_corr, n)
 474         Array of `n_corr` most recent updates to the solution vector.
 475         (See [1]).
 476     yk : array_like, shape=(n_corr, n)
 477         Array of `n_corr` most recent updates to the gradient. (See [1]).
 478
 479     References
 480     ----------
 481     .. [1] Nocedal, Jorge. "Updating quasi-Newton matrices with limited
 482        storage." Mathematics of computation 35.151 (1980): 773-782.
 483
 484     """
 485
 486     def __init__(self, sk, yk):
 487         """Construct the operator."""
 488         if sk.shape != yk.shape or sk.ndim != 2:
 489             raise ValueError('sk and yk must have matching shape, (n_corrs, n)')
 490         n_corrs, n = sk.shape
 491
 492         super().__init__(dtype=np.float64, shape=(n, n))
 493
 494         self.sk = sk
 495         self.yk = yk
 496         self.n_corrs = n_corrs
 497         self.rho = 1 / np.einsum('ij,ij->i', sk, yk)
 498
 499     def _matvec(self, x):
 500         """Efficient matrix-vector multiply with the BFGS matrices.
 501
 502         This calculation is described in Section (4) of [1].
 503
 504         Parameters
 505         ----------
 506         x : ndarray
 507             An array with shape (n,) or (n,1).
 508
 509         Returns
 510         -------
 511         y : ndarray
 512             The matrix-vector product
 513
 514         """
 515         s, y, n_corrs, rho = self.sk, self.yk, self.n_corrs, self.rho
 516         q = np.array(x, dtype=self.dtype, copy=True)
 517         if q.ndim == 2 and q.shape[1] == 1:
 518             q = q.reshape(-1)
 519
 520         alpha = np.empty(n_corrs)
 521
 522         for i in range(n_corrs-1, -1, -1):
 523             alpha[i] = rho[i] * np.dot(s[i], q)
 524             q = q - alpha[i]*y[i]
 525
 526         r = q
 527         for i in range(n_corrs):
 528             beta = rho[i] * np.dot(y[i], r)
 529             r = r + s[i] * (alpha[i] - beta)
 530
 531         return r
 532
 533     def todense(self):
 534         """Return a dense array representation of this operator.
 535
 536         Returns
 537         -------
 538         arr : ndarray, shape=(n, n)
 539             An array with the same shape and containing
 540             the same data represented by this `LinearOperator`.
 541
 542         """
 543         s, y, n_corrs, rho = self.sk, self.yk, self.n_corrs, self.rho
 544         I = np.eye(*self.shape, dtype=self.dtype)
 545         Hk = I
 546
 547         for i in range(n_corrs):
 548             A1 = I - s[i][:, np.newaxis] * y[i][np.newaxis, :] * rho[i]
 549             A2 = I - y[i][:, np.newaxis] * s[i][np.newaxis, :] * rho[i]
 550
 551             Hk = np.dot(A1, np.dot(Hk, A2)) + (rho[i] * s[i][:, np.newaxis] *
 552                                                         s[i][np.newaxis, :])
 553         return Hk