Module ktrain.lroptimize.lrfinder

Expand source code
from .. import utils as U
from ..imports import *


class LRFinder:
    """
    ```
    Tracks (and plots) the change in loss of a Keras model as learning rate is gradually increased.
    Used to visually identify a good learning rate, given model and data.
    Reference:
        Original Paper: https://arxiv.org/abs/1506.01186
    ```
    """

    def __init__(self, model, stop_factor=4):
        self.model = model
        self.losses = []
        self.lrs = []
        self.best_loss = 1e9
        self._weightfile = None
        self.stop_factor = stop_factor

        self.avg_loss = 0
        self.batch_num = 0
        self.beta = 0.98

        # stats computed by _compute_stats
        self.mg = None  # index of minimum numerical gradient
        self.ml = None  # index of minimum loss

    def on_batch_end(self, batch, logs):
        # Log the learning rate
        lr = K.get_value(self.model.optimizer.lr)
        self.lrs.append(lr)

        # Log the loss
        loss = logs["loss"]
        self.batch_num += 1
        self.avg_loss = self.beta * self.avg_loss + (1 - self.beta) * loss
        smoothed_loss = self.avg_loss / (1 - self.beta**self.batch_num)
        self.losses.append(smoothed_loss)

        # Check whether the loss got too large or NaN
        # print("\n%s:%s\n" % (smoothed_loss, self.stop_factor * self.best_loss))
        if self.batch_num > 1 and smoothed_loss > self.stop_factor * self.best_loss:
            self.model.stop_training = True
            return

        # record best loss
        if smoothed_loss < self.best_loss or self.batch_num == 1:
            self.best_loss = smoothed_loss

        # Increase the learning rate for the next batch
        lr *= self.lr_mult
        K.set_value(self.model.optimizer.lr, lr)

        # stop if LR grows too large
        if lr > 10.0:
            self.model.stop_training = True
            return

    def find(
        self,
        train_data,
        steps_per_epoch,
        use_gen=False,
        class_weight=None,
        start_lr=1e-7,
        lr_mult=1.01,
        max_epochs=None,
        batch_size=U.DEFAULT_BS,
        workers=1,
        use_multiprocessing=False,
        verbose=1,
    ):
        """
        ```
        Track loss as learning rate is increased.
        NOTE: batch_size is ignored when train_data is instance of Iterator.
        ```
        """

        # check arguments and initialize
        if train_data is None:
            raise ValueError("train_data is required")
        # U.data_arg_check(train_data=train_data, train_required=True)
        self.lrs = []
        self.losses = []

        # compute steps_per_epoch
        # num_samples = U.nsamples_from_data(train_data)
        # if U.is_iter(train_data):
        # use_gen = True
        # steps_per_epoch = num_samples // train_data.batch_size
        # else:
        # use_gen = False
        # steps_per_epoch = np.ceil(num_samples/batch_size)

        # max_epochs and lr_mult are None, set max_epochs
        # using sample size of 1500 batches
        if max_epochs is None and lr_mult is None:
            max_epochs = int(np.ceil(1500.0 / steps_per_epoch))

        if max_epochs:
            epochs = max_epochs
            num_batches = epochs * steps_per_epoch
            end_lr = 10 if start_lr < 10 else start_lr * 10
            self.lr_mult = (end_lr / start_lr) ** (1 / num_batches)
        else:
            epochs = 1024
            self.lr_mult = lr_mult

        # Save weights into a file
        new_file, self._weightfile = tempfile.mkstemp()
        self.model.save_weights(self._weightfile)

        # Remember the original learning rate
        original_lr = K.get_value(self.model.optimizer.lr)

        # Set the initial learning rate
        K.set_value(self.model.optimizer.lr, start_lr)

        callback = keras.callbacks.LambdaCallback(
            on_batch_end=lambda batch, logs: self.on_batch_end(batch, logs)
        )

        if use_gen:
            # *_generator methods are deprecated from TF 2.1.0
            fit_fn = self.model.fit
            fit_fn(
                train_data,
                steps_per_epoch=steps_per_epoch,
                epochs=epochs,
                class_weight=class_weight,
                workers=workers,
                use_multiprocessing=use_multiprocessing,
                verbose=verbose,
                callbacks=[callback],
            )
        else:
            self.model.fit(
                train_data[0],
                train_data[1],
                batch_size=batch_size,
                epochs=epochs,
                class_weight=class_weight,
                verbose=verbose,
                callbacks=[callback],
            )

        # Restore the weights to the state before model fitting
        self.model.load_weights(self._weightfile)
        self._weightfile = None

        # Restore the original learning rate
        K.set_value(self.model.optimizer.lr, original_lr)

        # compute stats for numerical estimates of lr
        self._compute_stats()

        return

    def plot_loss(
        self, n_skip_beginning=10, n_skip_end=1, suggest=False, return_fig=False
    ):
        """
        ```
        Plots the loss.
        Args:
            n_skip_beginning(int): number of batches to skip on the left.
            n_skip_end(int):  number of batches to skip on the right.
            suggest(bool): will highlight numerical estimate
                           of best lr if True - methods adapted from fastai
            return_fig(bool):  If True, return matplotlib.figure.Figure
        Returns:
          matplotlib.figure.Figure if return_fig else None
        ```
        """
        if not self.find_called:
            raise ValueError("Please call find first.")

        fig, ax = plt.subplots()
        plt.ylabel("loss")
        plt.xlabel("learning rate (log scale)")
        ax.plot(
            self.lrs[n_skip_beginning:-n_skip_end],
            self.losses[n_skip_beginning:-n_skip_end],
        )
        plt.xscale("log")

        fig = None
        if suggest:
            # this code was adapted from fastai: https://github.com/fastai/fastai
            if self.mg is None:
                print(
                    "Failed to compute the gradients, there might not be enough points.\n"
                    + "Plot displayed without suggestion."
                )
            else:
                valley = self.valley(self.lrs, self.losses)
                mg = self.mg
                ml = self.ml
                print("Three possible suggestions for LR from plot:")
                print(f"\tLongest valley (red): {self.lrs[valley]:.2E}")
                print(f"\tMin numerical gradient (purple): {self.lrs[mg]:.2E}")
                print(
                    f"\tMin loss divided by 10 (omitted from plot): {self.lrs[ml]/10:.2E}"
                )
                ax.plot(
                    self.lrs[valley],
                    self.losses[valley],
                    markersize=10,
                    marker="o",
                    color="red",
                )
                ax.plot(
                    self.lrs[mg],
                    self.losses[mg],
                    markersize=10,
                    marker="o",
                    color="purple",
                )
        fig = plt.gcf()
        plt.show()
        if return_fig:
            return fig
        return

    def valley(self, lrs, losses):
        """
        valley method for LR suggestions:
        https://github.com/fastai/fastai/pull/3377
        """
        n = len(losses)
        max_start, max_end = 0, 0

        # find the longest valley
        lds = [1] * n
        for i in range(1, n):
            for j in range(0, i):
                if (losses[i] < losses[j]) and (lds[i] < lds[j] + 1):
                    lds[i] = lds[j] + 1
                if lds[max_end] < lds[i]:
                    max_end = i
                    max_start = max_end - lds[max_end]

        sections = (max_end - max_start) / 3
        idx = max_start + int(sections) + int(sections / 2)

        # return lrs[idx], (lrs[idx], losses[idx])
        return idx

    def _compute_stats(self):
        """
        ```
        generates the index associated with minum numerical gradient and the
        index associated with minum loss.
        Stored as mg and ml respectively
        ```
        """
        # this code was adapted from fastai: https://github.com/fastai/fastai
        self.ml = np.argmin(self.losses)
        try:
            self.mg = (np.gradient(np.array(self.losses[32 : self.ml]))).argmin()
        except Exception as e:
            self.mg = None
            warnings.warn(str(e))
        return

    def estimate_lr(self):
        """
        ```
        Generates two numerical estimates of lr:
          1. lr associated with minum numerical gradient (None if gradient computation fails)
          2. lr associated with minimum loss divided by 10
          3. lr associated with longest valley
        Args:
          tuple: (float, float)

          If gradient computation fails, first element of tuple will be None.
        ```
        """
        if not self.find_called():
            raise ValueError("Please call find first.")
        lr1 = None
        lr2 = None
        if self.mg is not None:
            lr1 = self.lrs[self.mg]
        lr2 = self.lrs[self.ml] / 10
        lr3 = self.lrs[self.valley(self.lrs, self.losses)]

        return (lr1, lr2, lr3)

    def find_called(self):
        return self.ml is not None

    def plot_loss_change(
        self, sma=1, n_skip_beginning=10, n_skip_end=5, y_lim=(-0.01, 0.01)
    ):
        """
        ```
        Plots rate of change of the loss function.
        Parameters:
            sma - number of batches for simple moving average to smooth out the curve.
            n_skip_beginning - number of batches to skip on the left.
            n_skip_end - number of batches to skip on the right.
            y_lim - limits for the y axis.
        ```
        """
        assert sma >= 1
        derivatives = [0] * sma
        for i in range(sma, len(self.lrs)):
            derivative = (self.losses[i] - self.losses[i - sma]) / sma
            derivatives.append(derivative)

        plt.ylabel("rate of loss change")
        plt.xlabel("learning rate (log scale)")
        plt.plot(
            self.lrs[n_skip_beginning:-n_skip_end],
            derivatives[n_skip_beginning:-n_skip_end],
        )
        plt.xscale("log")
        plt.ylim(y_lim)

Classes

class LRFinder (model, stop_factor=4)
Tracks (and plots) the change in loss of a Keras model as learning rate is gradually increased.
Used to visually identify a good learning rate, given model and data.
Reference:
    Original Paper: https://arxiv.org/abs/1506.01186
Expand source code
class LRFinder:
    """
    ```
    Tracks (and plots) the change in loss of a Keras model as learning rate is gradually increased.
    Used to visually identify a good learning rate, given model and data.
    Reference:
        Original Paper: https://arxiv.org/abs/1506.01186
    ```
    """

    def __init__(self, model, stop_factor=4):
        self.model = model
        self.losses = []
        self.lrs = []
        self.best_loss = 1e9
        self._weightfile = None
        self.stop_factor = stop_factor

        self.avg_loss = 0
        self.batch_num = 0
        self.beta = 0.98

        # stats computed by _compute_stats
        self.mg = None  # index of minimum numerical gradient
        self.ml = None  # index of minimum loss

    def on_batch_end(self, batch, logs):
        # Log the learning rate
        lr = K.get_value(self.model.optimizer.lr)
        self.lrs.append(lr)

        # Log the loss
        loss = logs["loss"]
        self.batch_num += 1
        self.avg_loss = self.beta * self.avg_loss + (1 - self.beta) * loss
        smoothed_loss = self.avg_loss / (1 - self.beta**self.batch_num)
        self.losses.append(smoothed_loss)

        # Check whether the loss got too large or NaN
        # print("\n%s:%s\n" % (smoothed_loss, self.stop_factor * self.best_loss))
        if self.batch_num > 1 and smoothed_loss > self.stop_factor * self.best_loss:
            self.model.stop_training = True
            return

        # record best loss
        if smoothed_loss < self.best_loss or self.batch_num == 1:
            self.best_loss = smoothed_loss

        # Increase the learning rate for the next batch
        lr *= self.lr_mult
        K.set_value(self.model.optimizer.lr, lr)

        # stop if LR grows too large
        if lr > 10.0:
            self.model.stop_training = True
            return

    def find(
        self,
        train_data,
        steps_per_epoch,
        use_gen=False,
        class_weight=None,
        start_lr=1e-7,
        lr_mult=1.01,
        max_epochs=None,
        batch_size=U.DEFAULT_BS,
        workers=1,
        use_multiprocessing=False,
        verbose=1,
    ):
        """
        ```
        Track loss as learning rate is increased.
        NOTE: batch_size is ignored when train_data is instance of Iterator.
        ```
        """

        # check arguments and initialize
        if train_data is None:
            raise ValueError("train_data is required")
        # U.data_arg_check(train_data=train_data, train_required=True)
        self.lrs = []
        self.losses = []

        # compute steps_per_epoch
        # num_samples = U.nsamples_from_data(train_data)
        # if U.is_iter(train_data):
        # use_gen = True
        # steps_per_epoch = num_samples // train_data.batch_size
        # else:
        # use_gen = False
        # steps_per_epoch = np.ceil(num_samples/batch_size)

        # max_epochs and lr_mult are None, set max_epochs
        # using sample size of 1500 batches
        if max_epochs is None and lr_mult is None:
            max_epochs = int(np.ceil(1500.0 / steps_per_epoch))

        if max_epochs:
            epochs = max_epochs
            num_batches = epochs * steps_per_epoch
            end_lr = 10 if start_lr < 10 else start_lr * 10
            self.lr_mult = (end_lr / start_lr) ** (1 / num_batches)
        else:
            epochs = 1024
            self.lr_mult = lr_mult

        # Save weights into a file
        new_file, self._weightfile = tempfile.mkstemp()
        self.model.save_weights(self._weightfile)

        # Remember the original learning rate
        original_lr = K.get_value(self.model.optimizer.lr)

        # Set the initial learning rate
        K.set_value(self.model.optimizer.lr, start_lr)

        callback = keras.callbacks.LambdaCallback(
            on_batch_end=lambda batch, logs: self.on_batch_end(batch, logs)
        )

        if use_gen:
            # *_generator methods are deprecated from TF 2.1.0
            fit_fn = self.model.fit
            fit_fn(
                train_data,
                steps_per_epoch=steps_per_epoch,
                epochs=epochs,
                class_weight=class_weight,
                workers=workers,
                use_multiprocessing=use_multiprocessing,
                verbose=verbose,
                callbacks=[callback],
            )
        else:
            self.model.fit(
                train_data[0],
                train_data[1],
                batch_size=batch_size,
                epochs=epochs,
                class_weight=class_weight,
                verbose=verbose,
                callbacks=[callback],
            )

        # Restore the weights to the state before model fitting
        self.model.load_weights(self._weightfile)
        self._weightfile = None

        # Restore the original learning rate
        K.set_value(self.model.optimizer.lr, original_lr)

        # compute stats for numerical estimates of lr
        self._compute_stats()

        return

    def plot_loss(
        self, n_skip_beginning=10, n_skip_end=1, suggest=False, return_fig=False
    ):
        """
        ```
        Plots the loss.
        Args:
            n_skip_beginning(int): number of batches to skip on the left.
            n_skip_end(int):  number of batches to skip on the right.
            suggest(bool): will highlight numerical estimate
                           of best lr if True - methods adapted from fastai
            return_fig(bool):  If True, return matplotlib.figure.Figure
        Returns:
          matplotlib.figure.Figure if return_fig else None
        ```
        """
        if not self.find_called:
            raise ValueError("Please call find first.")

        fig, ax = plt.subplots()
        plt.ylabel("loss")
        plt.xlabel("learning rate (log scale)")
        ax.plot(
            self.lrs[n_skip_beginning:-n_skip_end],
            self.losses[n_skip_beginning:-n_skip_end],
        )
        plt.xscale("log")

        fig = None
        if suggest:
            # this code was adapted from fastai: https://github.com/fastai/fastai
            if self.mg is None:
                print(
                    "Failed to compute the gradients, there might not be enough points.\n"
                    + "Plot displayed without suggestion."
                )
            else:
                valley = self.valley(self.lrs, self.losses)
                mg = self.mg
                ml = self.ml
                print("Three possible suggestions for LR from plot:")
                print(f"\tLongest valley (red): {self.lrs[valley]:.2E}")
                print(f"\tMin numerical gradient (purple): {self.lrs[mg]:.2E}")
                print(
                    f"\tMin loss divided by 10 (omitted from plot): {self.lrs[ml]/10:.2E}"
                )
                ax.plot(
                    self.lrs[valley],
                    self.losses[valley],
                    markersize=10,
                    marker="o",
                    color="red",
                )
                ax.plot(
                    self.lrs[mg],
                    self.losses[mg],
                    markersize=10,
                    marker="o",
                    color="purple",
                )
        fig = plt.gcf()
        plt.show()
        if return_fig:
            return fig
        return

    def valley(self, lrs, losses):
        """
        valley method for LR suggestions:
        https://github.com/fastai/fastai/pull/3377
        """
        n = len(losses)
        max_start, max_end = 0, 0

        # find the longest valley
        lds = [1] * n
        for i in range(1, n):
            for j in range(0, i):
                if (losses[i] < losses[j]) and (lds[i] < lds[j] + 1):
                    lds[i] = lds[j] + 1
                if lds[max_end] < lds[i]:
                    max_end = i
                    max_start = max_end - lds[max_end]

        sections = (max_end - max_start) / 3
        idx = max_start + int(sections) + int(sections / 2)

        # return lrs[idx], (lrs[idx], losses[idx])
        return idx

    def _compute_stats(self):
        """
        ```
        generates the index associated with minum numerical gradient and the
        index associated with minum loss.
        Stored as mg and ml respectively
        ```
        """
        # this code was adapted from fastai: https://github.com/fastai/fastai
        self.ml = np.argmin(self.losses)
        try:
            self.mg = (np.gradient(np.array(self.losses[32 : self.ml]))).argmin()
        except Exception as e:
            self.mg = None
            warnings.warn(str(e))
        return

    def estimate_lr(self):
        """
        ```
        Generates two numerical estimates of lr:
          1. lr associated with minum numerical gradient (None if gradient computation fails)
          2. lr associated with minimum loss divided by 10
          3. lr associated with longest valley
        Args:
          tuple: (float, float)

          If gradient computation fails, first element of tuple will be None.
        ```
        """
        if not self.find_called():
            raise ValueError("Please call find first.")
        lr1 = None
        lr2 = None
        if self.mg is not None:
            lr1 = self.lrs[self.mg]
        lr2 = self.lrs[self.ml] / 10
        lr3 = self.lrs[self.valley(self.lrs, self.losses)]

        return (lr1, lr2, lr3)

    def find_called(self):
        return self.ml is not None

    def plot_loss_change(
        self, sma=1, n_skip_beginning=10, n_skip_end=5, y_lim=(-0.01, 0.01)
    ):
        """
        ```
        Plots rate of change of the loss function.
        Parameters:
            sma - number of batches for simple moving average to smooth out the curve.
            n_skip_beginning - number of batches to skip on the left.
            n_skip_end - number of batches to skip on the right.
            y_lim - limits for the y axis.
        ```
        """
        assert sma >= 1
        derivatives = [0] * sma
        for i in range(sma, len(self.lrs)):
            derivative = (self.losses[i] - self.losses[i - sma]) / sma
            derivatives.append(derivative)

        plt.ylabel("rate of loss change")
        plt.xlabel("learning rate (log scale)")
        plt.plot(
            self.lrs[n_skip_beginning:-n_skip_end],
            derivatives[n_skip_beginning:-n_skip_end],
        )
        plt.xscale("log")
        plt.ylim(y_lim)

Methods

def estimate_lr(self)
Generates two numerical estimates of lr:
  1. lr associated with minum numerical gradient (None if gradient computation fails)
  2. lr associated with minimum loss divided by 10
  3. lr associated with longest valley
Args:
  tuple: (float, float)

  If gradient computation fails, first element of tuple will be None.
Expand source code
def estimate_lr(self):
    """
    ```
    Generates two numerical estimates of lr:
      1. lr associated with minum numerical gradient (None if gradient computation fails)
      2. lr associated with minimum loss divided by 10
      3. lr associated with longest valley
    Args:
      tuple: (float, float)

      If gradient computation fails, first element of tuple will be None.
    ```
    """
    if not self.find_called():
        raise ValueError("Please call find first.")
    lr1 = None
    lr2 = None
    if self.mg is not None:
        lr1 = self.lrs[self.mg]
    lr2 = self.lrs[self.ml] / 10
    lr3 = self.lrs[self.valley(self.lrs, self.losses)]

    return (lr1, lr2, lr3)
def find(self, train_data, steps_per_epoch, use_gen=False, class_weight=None, start_lr=1e-07, lr_mult=1.01, max_epochs=None, batch_size=32, workers=1, use_multiprocessing=False, verbose=1)
Track loss as learning rate is increased.
NOTE: batch_size is ignored when train_data is instance of Iterator.
Expand source code
def find(
    self,
    train_data,
    steps_per_epoch,
    use_gen=False,
    class_weight=None,
    start_lr=1e-7,
    lr_mult=1.01,
    max_epochs=None,
    batch_size=U.DEFAULT_BS,
    workers=1,
    use_multiprocessing=False,
    verbose=1,
):
    """
    ```
    Track loss as learning rate is increased.
    NOTE: batch_size is ignored when train_data is instance of Iterator.
    ```
    """

    # check arguments and initialize
    if train_data is None:
        raise ValueError("train_data is required")
    # U.data_arg_check(train_data=train_data, train_required=True)
    self.lrs = []
    self.losses = []

    # compute steps_per_epoch
    # num_samples = U.nsamples_from_data(train_data)
    # if U.is_iter(train_data):
    # use_gen = True
    # steps_per_epoch = num_samples // train_data.batch_size
    # else:
    # use_gen = False
    # steps_per_epoch = np.ceil(num_samples/batch_size)

    # max_epochs and lr_mult are None, set max_epochs
    # using sample size of 1500 batches
    if max_epochs is None and lr_mult is None:
        max_epochs = int(np.ceil(1500.0 / steps_per_epoch))

    if max_epochs:
        epochs = max_epochs
        num_batches = epochs * steps_per_epoch
        end_lr = 10 if start_lr < 10 else start_lr * 10
        self.lr_mult = (end_lr / start_lr) ** (1 / num_batches)
    else:
        epochs = 1024
        self.lr_mult = lr_mult

    # Save weights into a file
    new_file, self._weightfile = tempfile.mkstemp()
    self.model.save_weights(self._weightfile)

    # Remember the original learning rate
    original_lr = K.get_value(self.model.optimizer.lr)

    # Set the initial learning rate
    K.set_value(self.model.optimizer.lr, start_lr)

    callback = keras.callbacks.LambdaCallback(
        on_batch_end=lambda batch, logs: self.on_batch_end(batch, logs)
    )

    if use_gen:
        # *_generator methods are deprecated from TF 2.1.0
        fit_fn = self.model.fit
        fit_fn(
            train_data,
            steps_per_epoch=steps_per_epoch,
            epochs=epochs,
            class_weight=class_weight,
            workers=workers,
            use_multiprocessing=use_multiprocessing,
            verbose=verbose,
            callbacks=[callback],
        )
    else:
        self.model.fit(
            train_data[0],
            train_data[1],
            batch_size=batch_size,
            epochs=epochs,
            class_weight=class_weight,
            verbose=verbose,
            callbacks=[callback],
        )

    # Restore the weights to the state before model fitting
    self.model.load_weights(self._weightfile)
    self._weightfile = None

    # Restore the original learning rate
    K.set_value(self.model.optimizer.lr, original_lr)

    # compute stats for numerical estimates of lr
    self._compute_stats()

    return
def find_called(self)
Expand source code
def find_called(self):
    return self.ml is not None
def on_batch_end(self, batch, logs)
Expand source code
def on_batch_end(self, batch, logs):
    # Log the learning rate
    lr = K.get_value(self.model.optimizer.lr)
    self.lrs.append(lr)

    # Log the loss
    loss = logs["loss"]
    self.batch_num += 1
    self.avg_loss = self.beta * self.avg_loss + (1 - self.beta) * loss
    smoothed_loss = self.avg_loss / (1 - self.beta**self.batch_num)
    self.losses.append(smoothed_loss)

    # Check whether the loss got too large or NaN
    # print("\n%s:%s\n" % (smoothed_loss, self.stop_factor * self.best_loss))
    if self.batch_num > 1 and smoothed_loss > self.stop_factor * self.best_loss:
        self.model.stop_training = True
        return

    # record best loss
    if smoothed_loss < self.best_loss or self.batch_num == 1:
        self.best_loss = smoothed_loss

    # Increase the learning rate for the next batch
    lr *= self.lr_mult
    K.set_value(self.model.optimizer.lr, lr)

    # stop if LR grows too large
    if lr > 10.0:
        self.model.stop_training = True
        return
def plot_loss(self, n_skip_beginning=10, n_skip_end=1, suggest=False, return_fig=False)
Plots the loss.
Args:
    n_skip_beginning(int): number of batches to skip on the left.
    n_skip_end(int):  number of batches to skip on the right.
    suggest(bool): will highlight numerical estimate
                   of best lr if True - methods adapted from fastai
    return_fig(bool):  If True, return matplotlib.figure.Figure
Returns:
  matplotlib.figure.Figure if return_fig else None
Expand source code
def plot_loss(
    self, n_skip_beginning=10, n_skip_end=1, suggest=False, return_fig=False
):
    """
    ```
    Plots the loss.
    Args:
        n_skip_beginning(int): number of batches to skip on the left.
        n_skip_end(int):  number of batches to skip on the right.
        suggest(bool): will highlight numerical estimate
                       of best lr if True - methods adapted from fastai
        return_fig(bool):  If True, return matplotlib.figure.Figure
    Returns:
      matplotlib.figure.Figure if return_fig else None
    ```
    """
    if not self.find_called:
        raise ValueError("Please call find first.")

    fig, ax = plt.subplots()
    plt.ylabel("loss")
    plt.xlabel("learning rate (log scale)")
    ax.plot(
        self.lrs[n_skip_beginning:-n_skip_end],
        self.losses[n_skip_beginning:-n_skip_end],
    )
    plt.xscale("log")

    fig = None
    if suggest:
        # this code was adapted from fastai: https://github.com/fastai/fastai
        if self.mg is None:
            print(
                "Failed to compute the gradients, there might not be enough points.\n"
                + "Plot displayed without suggestion."
            )
        else:
            valley = self.valley(self.lrs, self.losses)
            mg = self.mg
            ml = self.ml
            print("Three possible suggestions for LR from plot:")
            print(f"\tLongest valley (red): {self.lrs[valley]:.2E}")
            print(f"\tMin numerical gradient (purple): {self.lrs[mg]:.2E}")
            print(
                f"\tMin loss divided by 10 (omitted from plot): {self.lrs[ml]/10:.2E}"
            )
            ax.plot(
                self.lrs[valley],
                self.losses[valley],
                markersize=10,
                marker="o",
                color="red",
            )
            ax.plot(
                self.lrs[mg],
                self.losses[mg],
                markersize=10,
                marker="o",
                color="purple",
            )
    fig = plt.gcf()
    plt.show()
    if return_fig:
        return fig
    return
def plot_loss_change(self, sma=1, n_skip_beginning=10, n_skip_end=5, y_lim=(-0.01, 0.01))
Plots rate of change of the loss function.
Parameters:
    sma - number of batches for simple moving average to smooth out the curve.
    n_skip_beginning - number of batches to skip on the left.
    n_skip_end - number of batches to skip on the right.
    y_lim - limits for the y axis.
Expand source code
def plot_loss_change(
    self, sma=1, n_skip_beginning=10, n_skip_end=5, y_lim=(-0.01, 0.01)
):
    """
    ```
    Plots rate of change of the loss function.
    Parameters:
        sma - number of batches for simple moving average to smooth out the curve.
        n_skip_beginning - number of batches to skip on the left.
        n_skip_end - number of batches to skip on the right.
        y_lim - limits for the y axis.
    ```
    """
    assert sma >= 1
    derivatives = [0] * sma
    for i in range(sma, len(self.lrs)):
        derivative = (self.losses[i] - self.losses[i - sma]) / sma
        derivatives.append(derivative)

    plt.ylabel("rate of loss change")
    plt.xlabel("learning rate (log scale)")
    plt.plot(
        self.lrs[n_skip_beginning:-n_skip_end],
        derivatives[n_skip_beginning:-n_skip_end],
    )
    plt.xscale("log")
    plt.ylim(y_lim)
def valley(self, lrs, losses)

valley method for LR suggestions: https://github.com/fastai/fastai/pull/3377

Expand source code
def valley(self, lrs, losses):
    """
    valley method for LR suggestions:
    https://github.com/fastai/fastai/pull/3377
    """
    n = len(losses)
    max_start, max_end = 0, 0

    # find the longest valley
    lds = [1] * n
    for i in range(1, n):
        for j in range(0, i):
            if (losses[i] < losses[j]) and (lds[i] < lds[j] + 1):
                lds[i] = lds[j] + 1
            if lds[max_end] < lds[i]:
                max_end = i
                max_start = max_end - lds[max_end]

    sections = (max_end - max_start) / 3
    idx = max_start + int(sections) + int(sections / 2)

    # return lrs[idx], (lrs[idx], losses[idx])
    return idx