1 # Modification de la version 1.14.0
13 ## License for the Python wrapper
14 ## ==============================
16 ## Copyright (c) 2004 David M. Cooke <cookedm@physics.mcmaster.ca>
18 ## Permission is hereby granted, free of charge, to any person obtaining a
19 ## copy of this software and associated documentation files (the "Software"),
20 ## to deal in the Software without restriction, including without limitation
21 ## the rights to use, copy, modify, merge, publish, distribute, sublicense,
22 ## and/or sell copies of the Software, and to permit persons to whom the
23 ## Software is furnished to do so, subject to the following conditions:
25 ## The above copyright notice and this permission notice shall be included in
26 ## all copies or substantial portions of the Software.
28 ## THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
29 ## IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
30 ## FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
31 ## AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
32 ## LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
33 ## FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
34 ## DEALINGS IN THE SOFTWARE.
36 ## Modifications by Travis Oliphant and Enthought, Inc. for inclusion in SciPy
39 from numpy import array, asarray, float64, zeros
40 from scipy.optimize import _lbfgsb
41 from scipy.optimize._optimize import (MemoizeJac, OptimizeResult, _call_callback_maybe_halt,
42 _wrap_callback, _check_unknown_options,
43 _prepare_scalar_function)
44 from scipy.optimize._constraints import old_bound_to_new
46 from scipy.sparse.linalg import LinearOperator
48 __all__ = ['fmin_l_bfgs_b', 'LbfgsInvHessProduct']
51 def fmin_l_bfgs_b(func, x0, fprime=None, args=(),
53 bounds=None, m=10, factr=1e7, pgtol=1e-5,
55 iprint=-1, maxfun=15000, maxiter=15000, disp=None,
56 callback=None, maxls=20):
58 Minimize a function func using the L-BFGS-B algorithm.
62 func : callable f(x,*args)
66 fprime : callable fprime(x,*args), optional
67 The gradient of `func`. If None, then `func` returns the function
68 value and the gradient (``f, g = func(x, *args)``), unless
69 `approx_grad` is True in which case `func` returns only ``f``.
70 args : sequence, optional
71 Arguments to pass to `func` and `fprime`.
72 approx_grad : bool, optional
73 Whether to approximate the gradient numerically (in which case
74 `func` returns only the function value).
75 bounds : list, optional
76 ``(min, max)`` pairs for each element in ``x``, defining
77 the bounds on that parameter. Use None or +-inf for one of ``min`` or
78 ``max`` when there is no bound in that direction.
80 The maximum number of variable metric corrections
81 used to define the limited memory matrix. (The limited memory BFGS
82 method does not store the full hessian but uses this many terms in an
84 factr : float, optional
85 The iteration stops when
86 ``(f^k - f^{k+1})/max{|f^k|,|f^{k+1}|,1} <= factr * eps``,
87 where ``eps`` is the machine precision, which is automatically
88 generated by the code. Typical values for `factr` are: 1e12 for
89 low accuracy; 1e7 for moderate accuracy; 10.0 for extremely
90 high accuracy. See Notes for relationship to `ftol`, which is exposed
91 (instead of `factr`) by the `scipy.optimize.minimize` interface to
93 pgtol : float, optional
94 The iteration will stop when
95 ``max{|proj g_i | i = 1, ..., n} <= pgtol``
96 where ``proj g_i`` is the i-th component of the projected gradient.
97 epsilon : float, optional
98 Step size used when `approx_grad` is True, for numerically
99 calculating the gradient
100 iprint : int, optional
101 Controls the frequency of output. ``iprint < 0`` means no output;
102 ``iprint = 0`` print only one line at the last iteration;
103 ``0 < iprint < 99`` print also f and ``|proj g|`` every iprint iterations;
104 ``iprint = 99`` print details of every iteration except n-vectors;
105 ``iprint = 100`` print also the changes of active set and final x;
106 ``iprint > 100`` print details of every iteration including x and g.
108 If zero, then no output. If a positive number, then this over-rides
109 `iprint` (i.e., `iprint` gets the value of `disp`).
110 maxfun : int, optional
111 Maximum number of function evaluations. Note that this function
112 may violate the limit because of evaluating gradients by numerical
114 maxiter : int, optional
115 Maximum number of iterations.
116 callback : callable, optional
117 Called after each iteration, as ``callback(xk)``, where ``xk`` is the
118 current parameter vector.
119 maxls : int, optional
120 Maximum number of line search steps (per iteration). Default is 20.
125 Estimated position of the minimum.
127 Value of `func` at the minimum.
129 Information dictionary.
134 - 1 if too many function evaluations or too many iterations,
135 - 2 if stopped for another reason, given in d['task']
137 * d['grad'] is the gradient at the minimum (should be 0 ish)
138 * d['funcalls'] is the number of function calls made.
139 * d['nit'] is the number of iterations.
143 minimize: Interface to minimization algorithms for multivariate
144 functions. See the 'L-BFGS-B' `method` in particular. Note that the
145 `ftol` option is made available via that interface, while `factr` is
146 provided via this interface, where `factr` is the factor multiplying
147 the default machine floating-point precision to arrive at `ftol`:
148 ``ftol = factr * numpy.finfo(float).eps``.
152 License of L-BFGS-B (FORTRAN code):
154 The version included here (in fortran code) is 3.0
155 (released April 25, 2011). It was written by Ciyou Zhu, Richard Byrd,
156 and Jorge Nocedal <nocedal@ece.nwu.edu>. It carries the following
159 This software is freely available, but we expect that all publications
160 describing work using this software, or all commercial products using it,
161 quote at least one of the references given below. This software is released
162 under the BSD License.
166 * R. H. Byrd, P. Lu and J. Nocedal. A Limited Memory Algorithm for Bound
167 Constrained Optimization, (1995), SIAM Journal on Scientific and
168 Statistical Computing, 16, 5, pp. 1190-1208.
169 * C. Zhu, R. H. Byrd and J. Nocedal. L-BFGS-B: Algorithm 778: L-BFGS-B,
170 FORTRAN routines for large scale bound constrained optimization (1997),
171 ACM Transactions on Mathematical Software, 23, 4, pp. 550 - 560.
172 * J.L. Morales and J. Nocedal. L-BFGS-B: Remark on Algorithm 778: L-BFGS-B,
173 FORTRAN routines for large scale bound constrained optimization (2011),
174 ACM Transactions on Mathematical Software, 38, 1.
178 Solve a linear regression problem via `fmin_l_bfgs_b`. To do this, first we define
179 an objective function ``f(m, b) = (y - y_model)**2``, where `y` describes the
180 observations and `y_model` the prediction of the linear model as
181 ``y_model = m*x + b``. The bounds for the parameters, ``m`` and ``b``, are arbitrarily
182 chosen as ``(0,5)`` and ``(5,10)`` for this example.
184 >>> import numpy as np
185 >>> from scipy.optimize import fmin_l_bfgs_b
186 >>> X = np.arange(0, 10, 1)
190 >>> def func(parameters, *args):
193 ... m, b = parameters
194 ... y_model = m*x + b
195 ... error = sum(np.power((y - y_model), 2))
198 >>> initial_values = np.array([0.0, 1.0])
200 >>> x_opt, f_opt, info = fmin_l_bfgs_b(func, x0=initial_values, args=(X, Y),
201 ... approx_grad=True)
203 array([1.99999999, 3.00000006]), 1.7746231151323805e-14 # may vary
205 The optimized parameters in ``x_opt`` agree with the ground truth parameters
206 ``m`` and ``b``. Next, let us perform a bound contrained optimization using the `bounds`
209 >>> bounds = [(0, 5), (5, 10)]
210 >>> x_opt, f_op, info = fmin_l_bfgs_b(func, x0=initial_values, args=(X, Y),
211 ... approx_grad=True, bounds=bounds)
213 array([1.65990508, 5.31649385]), 15.721334516453945 # may vary
215 # handle fprime/approx_grad
220 fun = MemoizeJac(func)
229 callback = _wrap_callback(callback)
230 opts = {'disp': disp,
233 'ftol': factr * np.finfo(float).eps,
238 'callback': callback,
241 res = _minimize_lbfgsb(fun, x0, args=args, jac=jac, bounds=bounds,
243 d = {'grad': res['jac'],
244 'task': res['message'],
245 'funcalls': res['nfev'],
247 'warnflag': res['status']}
254 def _minimize_lbfgsb(fun, x0, args=(), jac=None, bounds=None,
255 disp=None, maxcor=10, ftol=2.2204460492503131e-09,
256 gtol=1e-5, eps=1e-8, maxfun=15000, maxiter=15000,
257 iprint=-1, callback=None, maxls=20,
258 finite_diff_rel_step=None, **unknown_options):
260 Minimize a scalar function of one or more variables using the L-BFGS-B
266 If `disp is None` (the default), then the supplied version of `iprint`
267 is used. If `disp is not None`, then it overrides the supplied version
268 of `iprint` with the behaviour you outlined.
270 The maximum number of variable metric corrections used to
271 define the limited memory matrix. (The limited memory BFGS
272 method does not store the full hessian but uses this many terms
273 in an approximation to it.)
275 The iteration stops when ``(f^k -
276 f^{k+1})/max{|f^k|,|f^{k+1}|,1} <= ftol``.
278 The iteration will stop when ``max{|proj g_i | i = 1, ..., n}
279 <= gtol`` where ``proj g_i`` is the i-th component of the
281 eps : float or ndarray
282 If `jac is None` the absolute step size used for numerical
283 approximation of the jacobian via forward differences.
285 Maximum number of function evaluations. Note that this function
286 may violate the limit because of evaluating gradients by numerical
289 Maximum number of iterations.
290 iprint : int, optional
291 Controls the frequency of output. ``iprint < 0`` means no output;
292 ``iprint = 0`` print only one line at the last iteration;
293 ``0 < iprint < 99`` print also f and ``|proj g|`` every iprint iterations;
294 ``iprint = 99`` print details of every iteration except n-vectors;
295 ``iprint = 100`` print also the changes of active set and final x;
296 ``iprint > 100`` print details of every iteration including x and g.
297 callback : callable, optional
298 Called after each iteration, as ``callback(xk)``, where ``xk`` is the
299 current parameter vector.
300 maxls : int, optional
301 Maximum number of line search steps (per iteration). Default is 20.
302 finite_diff_rel_step : None or array_like, optional
303 If `jac in ['2-point', '3-point', 'cs']` the relative step size to
304 use for numerical approximation of the jacobian. The absolute step
305 size is computed as ``h = rel_step * sign(x) * max(1, abs(x))``,
306 possibly adjusted to fit into the bounds. For ``method='3-point'``
307 the sign of `h` is ignored. If None (default) then step is selected
312 The option `ftol` is exposed via the `scipy.optimize.minimize` interface,
313 but calling `scipy.optimize.fmin_l_bfgs_b` directly exposes `factr`. The
314 relationship between the two is ``ftol = factr * numpy.finfo(float).eps``.
315 I.e., `factr` multiplies the default machine floating-point precision to
319 _check_unknown_options(unknown_options)
322 factr = ftol / np.finfo(float).eps
324 x0 = asarray(x0).ravel()
327 # historically old-style bounds were/are expected by lbfgsb.
328 # That's still the case but we'll deal with new-style from here on,
332 elif len(bounds) != n:
333 raise ValueError('length of x0 != length of bounds')
335 bounds = np.array(old_bound_to_new(bounds))
338 if (bounds[0] > bounds[1]).any():
340 "LBFGSB - one of the lower bounds is greater than an upper bound."
343 # initial vector must lie within the bounds. Otherwise ScalarFunction and
344 # approx_derivative will cause problems
345 x0 = np.clip(x0, bounds[0], bounds[1])
353 # _prepare_scalar_function can use bounds=None to represent no bounds
354 sf = _prepare_scalar_function(fun, x0, jac=jac, args=args, epsilon=eps,
356 finite_diff_rel_step=finite_diff_rel_step)
358 func_and_grad = sf.fun_and_grad
360 fortran_int = _lbfgsb.types.intvar.dtype
362 nbd = zeros(n, fortran_int)
363 low_bnd = zeros(n, float64)
364 upper_bnd = zeros(n, float64)
365 bounds_map = {(-np.inf, np.inf): 0,
370 if bounds is not None:
371 for i in range(0, n):
372 l, u = bounds[0, i], bounds[1, i]
379 nbd[i] = bounds_map[l, u]
382 raise ValueError('maxls must be positive.')
384 x = array(x0, float64)
385 f = array(0.0, float64)
386 g = zeros((n,), float64)
387 wa = zeros(2*m*n + 5*n + 11*m*m + 8*m, float64)
388 iwa = zeros(3*n, fortran_int)
389 task = zeros(1, 'S60')
390 csave = zeros(1, 'S60')
391 lsave = zeros(4, fortran_int)
392 isave = zeros(44, fortran_int)
393 dsave = zeros(29, float64)
400 # g may become float32 if a user provides a function that calculates
401 # the Jacobian in float32 (see gh-18730). The underlying Fortran code
402 # expects float64, so upcast it
403 g = g.astype(np.float64)
404 # x, f, g, wa, iwa, task, csave, lsave, isave, dsave = \
405 _lbfgsb.setulb(m, x, low_bnd, upper_bnd, nbd, f, g, factr,
406 pgtol, wa, iwa, task, iprint, csave, lsave,
408 task_str = task.tobytes()
409 if task_str.startswith(b'FG'):
410 # The minimization routine wants f and g at the current x.
411 # Note that interruptions due to maxfun are postponed
412 # until the completion of the current minimization iteration.
414 f, g = func_and_grad(x)
416 task[:] = ('STOP: TOTAL NO. of f AND g EVALUATIONS '
418 elif task_str.startswith(b'NEW_X'):
422 intermediate_result = OptimizeResult(x=x, fun=f)
423 if _call_callback_maybe_halt(callback, intermediate_result):
424 task[:] = 'STOP: CALLBACK REQUESTED HALT'
425 if n_iterations >= maxiter:
426 task[:] = 'STOP: TOTAL NO. of ITERATIONS REACHED LIMIT'
427 elif sf.nfev > maxfun:
428 task[:] = ('STOP: TOTAL NO. of f AND g EVALUATIONS '
433 task_str = task.tobytes().strip(b'\x00').strip()
434 if task_str.startswith(b'CONV'):
436 elif sf.nfev > maxfun or n_iterations >= maxiter:
441 # These two portions of the workspace are described in the mainlb
442 # subroutine in lbfgsb.f. See line 363.
443 s = wa[0: m*n].reshape(m, n)
444 y = wa[m*n: 2*m*n].reshape(m, n)
446 # See lbfgsb.f line 160 for this portion of the workspace.
447 # isave(31) = the total number of BFGS updates prior the current iteration;
448 n_bfgs_updates = isave[30]
450 n_corrs = min(n_bfgs_updates, maxcor)
451 hess_inv = LbfgsInvHessProduct(s[:n_corrs], y[:n_corrs])
453 task_str = task_str.decode()
454 return OptimizeResult(fun=f, jac=g, nfev=sf.nfev,
456 nit=n_iterations, status=warnflag, message=task_str,
457 x=x, success=(warnflag == 0), hess_inv=hess_inv)
460 class LbfgsInvHessProduct(LinearOperator):
461 """Linear operator for the L-BFGS approximate inverse Hessian.
463 This operator computes the product of a vector with the approximate inverse
464 of the Hessian of the objective function, using the L-BFGS limited
465 memory approximation to the inverse Hessian, accumulated during the
468 Objects of this class implement the ``scipy.sparse.linalg.LinearOperator``
473 sk : array_like, shape=(n_corr, n)
474 Array of `n_corr` most recent updates to the solution vector.
476 yk : array_like, shape=(n_corr, n)
477 Array of `n_corr` most recent updates to the gradient. (See [1]).
481 .. [1] Nocedal, Jorge. "Updating quasi-Newton matrices with limited
482 storage." Mathematics of computation 35.151 (1980): 773-782.
486 def __init__(self, sk, yk):
487 """Construct the operator."""
488 if sk.shape != yk.shape or sk.ndim != 2:
489 raise ValueError('sk and yk must have matching shape, (n_corrs, n)')
490 n_corrs, n = sk.shape
492 super().__init__(dtype=np.float64, shape=(n, n))
496 self.n_corrs = n_corrs
497 self.rho = 1 / np.einsum('ij,ij->i', sk, yk)
499 def _matvec(self, x):
500 """Efficient matrix-vector multiply with the BFGS matrices.
502 This calculation is described in Section (4) of [1].
507 An array with shape (n,) or (n,1).
512 The matrix-vector product
515 s, y, n_corrs, rho = self.sk, self.yk, self.n_corrs, self.rho
516 q = np.array(x, dtype=self.dtype, copy=True)
517 if q.ndim == 2 and q.shape[1] == 1:
520 alpha = np.empty(n_corrs)
522 for i in range(n_corrs-1, -1, -1):
523 alpha[i] = rho[i] * np.dot(s[i], q)
524 q = q - alpha[i]*y[i]
527 for i in range(n_corrs):
528 beta = rho[i] * np.dot(y[i], r)
529 r = r + s[i] * (alpha[i] - beta)
534 """Return a dense array representation of this operator.
538 arr : ndarray, shape=(n, n)
539 An array with the same shape and containing
540 the same data represented by this `LinearOperator`.
543 s, y, n_corrs, rho = self.sk, self.yk, self.n_corrs, self.rho
544 I = np.eye(*self.shape, dtype=self.dtype)
547 for i in range(n_corrs):
548 A1 = I - s[i][:, np.newaxis] * y[i][np.newaxis, :] * rho[i]
549 A2 = I - y[i][:, np.newaxis] * s[i][np.newaxis, :] * rho[i]
551 Hk = np.dot(A1, np.dot(Hk, A2)) + (rho[i] * s[i][:, np.newaxis] *