1 # Modification de la version 1.1.0
12 ## License for the Python wrapper
13 ## ==============================
15 ## Copyright (c) 2004 David M. Cooke <cookedm@physics.mcmaster.ca>
17 ## Permission is hereby granted, free of charge, to any person obtaining a
18 ## copy of this software and associated documentation files (the "Software"),
19 ## to deal in the Software without restriction, including without limitation
20 ## the rights to use, copy, modify, merge, publish, distribute, sublicense,
21 ## and/or sell copies of the Software, and to permit persons to whom the
22 ## Software is furnished to do so, subject to the following conditions:
24 ## The above copyright notice and this permission notice shall be included in
25 ## all copies or substantial portions of the Software.
27 ## THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
28 ## IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
29 ## FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
30 ## AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
31 ## LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
32 ## FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
33 ## DEALINGS IN THE SOFTWARE.
35 ## Modifications by Travis Oliphant and Enthought, Inc. for inclusion in SciPy
37 from __future__ import division, print_function, absolute_import
40 from numpy import array, asarray, float64, int32, zeros
41 from scipy.optimize import _lbfgsb
42 from scipy.optimize.optimize import (approx_fprime, MemoizeJac, OptimizeResult,
43 _check_unknown_options, wrap_function,
44 _approx_fprime_helper)
45 from scipy.sparse.linalg import LinearOperator
47 __all__ = ['fmin_l_bfgs_b', 'LbfgsInvHessProduct']
50 def fmin_l_bfgs_b(func, x0, fprime=None, args=(),
52 bounds=None, m=10, factr=1e7, pgtol=1e-5,
54 iprint=-1, maxfun=15000, maxiter=15000, disp=None,
55 callback=None, maxls=20):
57 Minimize a function func using the L-BFGS-B algorithm.
61 func : callable f(x,*args)
65 fprime : callable fprime(x,*args), optional
66 The gradient of `func`. If None, then `func` returns the function
67 value and the gradient (``f, g = func(x, *args)``), unless
68 `approx_grad` is True in which case `func` returns only ``f``.
69 args : sequence, optional
70 Arguments to pass to `func` and `fprime`.
71 approx_grad : bool, optional
72 Whether to approximate the gradient numerically (in which case
73 `func` returns only the function value).
74 bounds : list, optional
75 ``(min, max)`` pairs for each element in ``x``, defining
76 the bounds on that parameter. Use None or +-inf for one of ``min`` or
77 ``max`` when there is no bound in that direction.
79 The maximum number of variable metric corrections
80 used to define the limited memory matrix. (The limited memory BFGS
81 method does not store the full hessian but uses this many terms in an
83 factr : float, optional
84 The iteration stops when
85 ``(f^k - f^{k+1})/max{|f^k|,|f^{k+1}|,1} <= factr * eps``,
86 where ``eps`` is the machine precision, which is automatically
87 generated by the code. Typical values for `factr` are: 1e12 for
88 low accuracy; 1e7 for moderate accuracy; 10.0 for extremely
89 high accuracy. See Notes for relationship to `ftol`, which is exposed
90 (instead of `factr`) by the `scipy.optimize.minimize` interface to
92 pgtol : float, optional
93 The iteration will stop when
94 ``max{|proj g_i | i = 1, ..., n} <= pgtol``
95 where ``pg_i`` is the i-th component of the projected gradient.
96 epsilon : float, optional
97 Step size used when `approx_grad` is True, for numerically
98 calculating the gradient
99 iprint : int, optional
100 Controls the frequency of output. ``iprint < 0`` means no output;
101 ``iprint = 0`` print only one line at the last iteration;
102 ``0 < iprint < 99`` print also f and ``|proj g|`` every iprint iterations;
103 ``iprint = 99`` print details of every iteration except n-vectors;
104 ``iprint = 100`` print also the changes of active set and final x;
105 ``iprint > 100`` print details of every iteration including x and g.
107 If zero, then no output. If a positive number, then this over-rides
108 `iprint` (i.e., `iprint` gets the value of `disp`).
109 maxfun : int, optional
110 Maximum number of function evaluations.
111 maxiter : int, optional
112 Maximum number of iterations.
113 callback : callable, optional
114 Called after each iteration, as ``callback(xk)``, where ``xk`` is the
115 current parameter vector.
116 maxls : int, optional
117 Maximum number of line search steps (per iteration). Default is 20.
122 Estimated position of the minimum.
124 Value of `func` at the minimum.
126 Information dictionary.
131 - 1 if too many function evaluations or too many iterations,
132 - 2 if stopped for another reason, given in d['task']
134 * d['grad'] is the gradient at the minimum (should be 0 ish)
135 * d['funcalls'] is the number of function calls made.
136 * d['nit'] is the number of iterations.
140 minimize: Interface to minimization algorithms for multivariate
141 functions. See the 'L-BFGS-B' `method` in particular. Note that the
142 `ftol` option is made available via that interface, while `factr` is
143 provided via this interface, where `factr` is the factor multiplying
144 the default machine floating-point precision to arrive at `ftol`:
145 ``ftol = factr * numpy.finfo(float).eps``.
149 License of L-BFGS-B (FORTRAN code):
151 The version included here (in fortran code) is 3.0
152 (released April 25, 2011). It was written by Ciyou Zhu, Richard Byrd,
153 and Jorge Nocedal <nocedal@ece.nwu.edu>. It carries the following
156 This software is freely available, but we expect that all publications
157 describing work using this software, or all commercial products using it,
158 quote at least one of the references given below. This software is released
159 under the BSD License.
163 * R. H. Byrd, P. Lu and J. Nocedal. A Limited Memory Algorithm for Bound
164 Constrained Optimization, (1995), SIAM Journal on Scientific and
165 Statistical Computing, 16, 5, pp. 1190-1208.
166 * C. Zhu, R. H. Byrd and J. Nocedal. L-BFGS-B: Algorithm 778: L-BFGS-B,
167 FORTRAN routines for large scale bound constrained optimization (1997),
168 ACM Transactions on Mathematical Software, 23, 4, pp. 550 - 560.
169 * J.L. Morales and J. Nocedal. L-BFGS-B: Remark on Algorithm 778: L-BFGS-B,
170 FORTRAN routines for large scale bound constrained optimization (2011),
171 ACM Transactions on Mathematical Software, 38, 1.
174 # handle fprime/approx_grad
179 fun = MemoizeJac(func)
188 opts = {'disp': disp,
191 'ftol': factr * np.finfo(float).eps,
196 'callback': callback,
199 res = _minimize_lbfgsb(fun, x0, args=args, jac=jac, bounds=bounds,
201 d = {'grad': res['jac'],
202 'task': res['message'],
203 'funcalls': res['nfev'],
205 'warnflag': res['status']}
212 def _minimize_lbfgsb(fun, x0, args=(), jac=None, bounds=None,
213 disp=None, maxcor=10, ftol=2.2204460492503131e-09,
214 gtol=1e-5, eps=1e-8, maxfun=15000, maxiter=15000,
215 iprint=-1, callback=None, maxls=20, **unknown_options):
217 Minimize a scalar function of one or more variables using the L-BFGS-B
223 Set to True to print convergence messages.
225 The maximum number of variable metric corrections used to
226 define the limited memory matrix. (The limited memory BFGS
227 method does not store the full hessian but uses this many terms
228 in an approximation to it.)
230 The iteration stops when ``(f^k -
231 f^{k+1})/max{|f^k|,|f^{k+1}|,1} <= ftol``.
233 The iteration will stop when ``max{|proj g_i | i = 1, ..., n}
234 <= gtol`` where ``pg_i`` is the i-th component of the
237 Step size used for numerical approximation of the jacobian.
239 Set to True to print convergence messages.
241 Maximum number of function evaluations.
243 Maximum number of iterations.
244 maxls : int, optional
245 Maximum number of line search steps (per iteration). Default is 20.
249 The option `ftol` is exposed via the `scipy.optimize.minimize` interface,
250 but calling `scipy.optimize.fmin_l_bfgs_b` directly exposes `factr`. The
251 relationship between the two is ``ftol = factr * numpy.finfo(float).eps``.
252 I.e., `factr` multiplies the default machine floating-point precision to
256 _check_unknown_options(unknown_options)
260 factr = ftol / np.finfo(float).eps
262 x0 = asarray(x0).ravel()
266 bounds = [(None, None)] * n
268 raise ValueError('length of x0 != length of bounds')
269 # unbounded variables must use None, not +-inf, for optimizer to work properly
270 bounds = [(None if l == -np.inf else l, None if u == np.inf else u) for l, u in bounds]
278 n_function_evals, fun = wrap_function(fun, ())
280 def func_and_grad(x):
282 g = _approx_fprime_helper(x, fun, epsilon, args=args, f0=f)
285 def func_and_grad(x):
290 nbd = zeros(n, int32)
291 low_bnd = zeros(n, float64)
292 upper_bnd = zeros(n, float64)
293 bounds_map = {(None, None): 0,
297 for i in range(0, n):
305 nbd[i] = bounds_map[l, u]
308 raise ValueError('maxls must be positive.')
310 x = array(x0, float64)
311 f = array(0.0, float64)
312 g = zeros((n,), float64)
313 wa = zeros(2*m*n + 5*n + 11*m*m + 8*m, float64)
314 iwa = zeros(3*n, int32)
315 task = zeros(1, 'S60')
316 csave = zeros(1, 'S60')
317 lsave = zeros(4, int32)
318 isave = zeros(44, int32)
319 dsave = zeros(29, float64)
326 # x, f, g, wa, iwa, task, csave, lsave, isave, dsave = \
327 _lbfgsb.setulb(m, x, low_bnd, upper_bnd, nbd, f, g, factr,
328 pgtol, wa, iwa, task, iprint, csave, lsave,
330 task_str = task.tostring()
331 if task_str.startswith(b'FG'):
332 # The minimization routine wants f and g at the current x.
333 # Note that interruptions due to maxfun are postponed
334 # until the completion of the current minimization iteration.
336 f, g = func_and_grad(x)
337 if n_function_evals[0] > maxfun:
338 task[:] = ('STOP: TOTAL NO. of f AND g EVALUATIONS '
340 elif task_str.startswith(b'NEW_X'):
343 if callback is not None:
346 if n_iterations >= maxiter:
347 task[:] = 'STOP: TOTAL NO. of ITERATIONS REACHED LIMIT'
348 elif n_function_evals[0] > maxfun:
349 task[:] = ('STOP: TOTAL NO. of f AND g EVALUATIONS '
354 task_str = task.tostring().strip(b'\x00').strip()
355 if task_str.startswith(b'CONV'):
357 elif n_function_evals[0] > maxfun or n_iterations >= maxiter:
362 # These two portions of the workspace are described in the mainlb
363 # subroutine in lbfgsb.f. See line 363.
364 s = wa[0: m*n].reshape(m, n)
365 y = wa[m*n: 2*m*n].reshape(m, n)
367 # See lbfgsb.f line 160 for this portion of the workspace.
368 # isave(31) = the total number of BFGS updates prior the current iteration;
369 n_bfgs_updates = isave[30]
371 n_corrs = min(n_bfgs_updates, maxcor)
372 hess_inv = LbfgsInvHessProduct(s[:n_corrs], y[:n_corrs])
374 return OptimizeResult(fun=f, jac=g, nfev=n_function_evals[0],
375 nit=n_iterations, status=warnflag, message=task_str,
376 x=x, success=(warnflag == 0), hess_inv=hess_inv)
379 class LbfgsInvHessProduct(LinearOperator):
380 """Linear operator for the L-BFGS approximate inverse Hessian.
382 This operator computes the product of a vector with the approximate inverse
383 of the Hessian of the objective function, using the L-BFGS limited
384 memory approximation to the inverse Hessian, accumulated during the
387 Objects of this class implement the ``scipy.sparse.linalg.LinearOperator``
392 sk : array_like, shape=(n_corr, n)
393 Array of `n_corr` most recent updates to the solution vector.
395 yk : array_like, shape=(n_corr, n)
396 Array of `n_corr` most recent updates to the gradient. (See [1]).
400 .. [1] Nocedal, Jorge. "Updating quasi-Newton matrices with limited
401 storage." Mathematics of computation 35.151 (1980): 773-782.
404 def __init__(self, sk, yk):
405 """Construct the operator."""
406 if sk.shape != yk.shape or sk.ndim != 2:
407 raise ValueError('sk and yk must have matching shape, (n_corrs, n)')
408 n_corrs, n = sk.shape
410 super(LbfgsInvHessProduct, self).__init__(
411 dtype=np.float64, shape=(n, n))
415 self.n_corrs = n_corrs
416 self.rho = 1 / np.einsum('ij,ij->i', sk, yk)
418 def _matvec(self, x):
419 """Efficient matrix-vector multiply with the BFGS matrices.
421 This calculation is described in Section (4) of [1].
426 An array with shape (n,) or (n,1).
431 The matrix-vector product
434 s, y, n_corrs, rho = self.sk, self.yk, self.n_corrs, self.rho
435 q = np.array(x, dtype=self.dtype, copy=True)
436 if q.ndim == 2 and q.shape[1] == 1:
439 alpha = np.zeros(n_corrs)
441 for i in range(n_corrs-1, -1, -1):
442 alpha[i] = rho[i] * np.dot(s[i], q)
443 q = q - alpha[i]*y[i]
446 for i in range(n_corrs):
447 beta = rho[i] * np.dot(y[i], r)
448 r = r + s[i] * (alpha[i] - beta)
453 """Return a dense array representation of this operator.
457 arr : ndarray, shape=(n, n)
458 An array with the same shape and containing
459 the same data represented by this `LinearOperator`.
462 s, y, n_corrs, rho = self.sk, self.yk, self.n_corrs, self.rho
463 I = np.eye(*self.shape, dtype=self.dtype)
466 for i in range(n_corrs):
467 A1 = I - s[i][:, np.newaxis] * y[i][np.newaxis, :] * rho[i]
468 A2 = I - y[i][:, np.newaxis] * s[i][np.newaxis, :] * rho[i]
470 Hk = np.dot(A1, np.dot(Hk, A2)) + (rho[i] * s[i][:, np.newaxis] *