Source code for mr_utils.optimization.gd

'''General implementation of gradient descent algorithm.

More of a learning exercise for myself.
'''

import warnings

import numpy as np
from tqdm import tqdm
from scipy.optimize import line_search
from scipy.optimize.linesearch import LineSearchWarning

[docs]def gd(f, grad, x0, alpha=None, maxiter=1e6, tol=1e-8):
    '''Gradient descent algorithm.

    Parameters
    ==========
    f : callable
        Function to be optimized.
    grad : callable
        Function that computes the gradient of f.
    x0 : array_like
        Initial point to start to start descent.
    alpha : callable or float, optional
        Either a fixed step size or a function that returns step size.
    maxiter : int, optional
        Do not exceed this number of iterations.
    tol : float, optional
        Run until change in norm of gradient is within this number.

    Returns
    =======
    cur_x : array_like
        Estimate of optimal choice of x.
    int
        Number of iterations.
    '''

    if not isinstance(x0, np.ndarray):
        x0 = np.atleast_1d(x0).astype(float)

    # Use scipy.optimize.line_search by default
    if alpha is None:
        alpha = line_search
    elif not callable(alpha):
        # If stepsize is constant, package it in a constant function
        alpha0 = alpha
        def alpha(*args, **kwargs):  # pylint: disable=E0102,W0613
            return(alpha0, 0, 0, None, None, None)

    # Set up everything we need for the loop
    cur_x = x0.copy()
    previous_step_size = np.inf
    alpha0_default = 0.5
    alpha0_backup = alpha0_default
    f_vals = [None, None]

    # Do the thing!
    pbar = tqdm(total=100, desc='GD %s' % f.__name__, leave=False)
    for ii in range(int(maxiter)):

        prev_x = cur_x.copy()

        # Compute the search direction
        g0 = grad(f, prev_x)
        s0 = -g0

        # Get step size
        # Sometimes line_search doesn't converge - silently ignore this
        with warnings.catch_warnings():
            warnings.filterwarnings('error', category=LineSearchWarning)
            try:
                alpha0, _fc, _gc, f_vals[0], f_vals[1], _derphi_star = alpha(
                    f, lambda x: grad(f, x), prev_x, s0, g0,
                    f_vals[0], f_vals[1])
                # print('Working!')
                alpha0_backup = alpha0_default
            except LineSearchWarning:
                tqdm.write('Broke')
                alpha0 = alpha0_backup
                alpha0_backup /= 2

                # Take care of objective cycling
                f_vals[1] = f_vals[0]
                f_vals[0] = None

        # Take the step
        cur_x += alpha0*s0

        # Figure out if we can end
        # previous_step_size = np.abs(cur_x - prev_x)
        previous_step_size = np.linalg.norm(grad(f, cur_x))
        pbar.n = 0
        val = np.clip(np.round(
            100*tol/np.max(previous_step_size + np.finfo(float).eps)), 0, 100)
        pbar.update(val)

        if np.all(previous_step_size < tol):
            break

    if np.any(previous_step_size > tol):
        warnings.warn('GD hit maxiters! Change in step size is not < %g' % tol)

    # return the solution
    return(cur_x, ii+1)

if __name__ == '__main__':
    pass