Source code for mr_utils.optimization.linesearch

'''Linesearch functions.

Once we have a direction to step, for example, the negative gradient direction
in a gradient descent algorithm, then we need to know how big of a step to
take.  If we take too large or small a step, we may not find the minumum of
the object function along the line we are stepping.  A linesearch attempts to
find the optimal step size in a given direction with minimal gradient and
objective evaluations.
'''

import numpy as np

[docs]def linesearch_quad(f, x, a, s):
    '''Simple quadratic linesearch.

    Parameters
    ==========
    f : callable
        Objective function.
    x : array_like
        Current location.
    a : float
        Guess for stepsize.
    s : array_like
        Search direction.

    Returns
    =======
    a : float
        Stepsize for the current step.
    '''
    f1 = f(x + a*s)
    f2 = f(x + 2*a*s)
    f3 = f(x + 4*a*s)
    a *= (-f1*12 + f2*15 - f3*2)/(2*(-f1*2 + f2*3 - f3))
    return a

[docs]def linesearch(obj, x0, a0, s):
    '''More sophisticated linesearch.

    Parameters
    ==========
    obj : callable
        Objective function.
    x0 : array_like
        Current location.
    a0 : float
        Current guess at stepsize.
    s : array_like
        Search direction.

    Returns
    =======
    a : float
        Stepsize for the current step.
    '''
    f = [obj(x0), obj(x0 + a0*s)]  # initial conditions
    a = [0, a0]
    t = 1.3 # factor of increase

    n = len(x0)

    while (f[-1] < f[-2]) | (len(np.unique(f)) < n):
        a.append(a[-1]*t)
        f.append(obj(x0 + a[-1]*s))

    # Go back and fill in
    a_new = a[0::-1]
    a_new.append((a[-1] + a[-2])/2)
    a_new.append(a[-1])
    a = np.array(a_new)

    f_new = f[0::-1]
    f_new.append(obj(x0 + a[-2]*s))
    f_new.append(f[-1])
    f = f_new

    # Get rid of duplicate values if we have any
    f, idx = np.unique(f, return_index=True)
    a = a[idx]

    # I see people switching between quadratic and cubic fits, but we'll
    # just assume a quadratic for ease
    a = (f[0]*(a[1]**2 - a[2]**2) + f[1]*(a[2]**2 - a[0]**2) + f[2]*(a[0]**2 \
        - a[1]**2))/(2*(f[0]*(a[1] - a[2]) \
        + f[1]*(a[2] - a[0]) + f[2]*(a[0] - a[1])))
    return a