Module ktrain.lroptimize.triangular

Expand source code
from .. import utils as U
from ..imports import *


class CyclicLR(keras.callbacks.Callback):
    """
    This callback implements a cyclical learning rate policy (CLR).
    The method cycles the learning rate between two boundaries with
    some constant frequency, as detailed in this paper (https://arxiv.org/abs/1506.01186).
    The amplitude of the cycle can be scaled on a per-iteration or
    per-cycle basis.
    This class has three built-in policies, as put forth in the paper.
    "triangular":
        A basic triangular cycle w/ no amplitude scaling.
    "triangular2":
        A basic triangular cycle that scales initial amplitude by half each cycle.
    "exp_range":
        A cycle that scales initial amplitude by gamma**(cycle iterations) at each
        cycle iteration.
    For more detail, please see paper.

    # Example
        ```python
            clr = CyclicLR(base_lr=0.001, max_lr=0.006,
                                step_size=2000., mode='triangular')
            model.fit(X_train, Y_train, callbacks=[clr])
        ```

    Class also supports custom scaling functions:
        ```python
            clr_fn = lambda x: 0.5*(1+np.sin(x*np.pi/2.))
            clr = CyclicLR(base_lr=0.001, max_lr=0.006,
                                step_size=2000., scale_fn=clr_fn,
                                scale_mode='cycle')
            model.fit(X_train, Y_train, callbacks=[clr])
        ```
    # Arguments
        base_lr: initial learning rate which is the
            lower boundary in the cycle.
        max_lr: upper boundary in the cycle. Functionally,
            it defines the cycle amplitude (max_lr - base_lr).
            The lr at any cycle is the sum of base_lr
            and some scaling of the amplitude; therefore
            max_lr may not actually be reached depending on
            scaling function.
        step_size: number of training iterations per
            half cycle. Authors suggest setting step_size
            2-8 x training iterations in epoch.
        mode: one of {triangular, triangular2, exp_range}.
            Default 'triangular'.
            Values correspond to policies detailed above.
            If scale_fn is not None, this argument is ignored.
        gamma: constant in 'exp_range' scaling function:
            gamma**(cycle iterations)
        scale_fn: Custom scaling policy defined by a single
            argument lambda function, where
            0 <= scale_fn(x) <= 1 for all x >= 0.
            mode paramater is ignored
        scale_mode: {'cycle', 'iterations'}.
            Defines whether scale_fn is evaluated on
            cycle number or cycle iterations (training
            iterations since start of cycle). Default is 'cycle'.
        reduce_on_plateau (int): LR will be reduced after this many
                                 epochs with no improvement on validation loss.
                                 If zero or None, no reduction will take place
        reduce_factor(int):      LR is reduced by this factor (e.g., 2 = 1/2  = 0.5)
        monitor (str):           Value to monitor when reducing LR
        max_momentum(float):     maximum momentum when momentum is cycled
                                 If both max_momentum and min_momentum is None,
                                 default momentum for Adam is used.
                                 (only used if optimizer is Adam)
        min_momentum(float):     minimum momentum when momentum is cycled
                                 If both max_momentum and min_momentum is None,
                                 default momentum for Adam is used.
                                 (only used if optimizer is Adam)
        verbose (bool):          If True, will print information on LR reduction
    References:
        Original Paper: https://arxiv.org/abs/1803.09820
        Blog Post: https://sgugger.github.io/the-1cycle-policy.html
        Code Reference: https://github.com/bckenstler/CLR
    """

    def __init__(
        self,
        base_lr=0.001,
        max_lr=0.006,
        step_size=2000.0,
        mode="triangular",
        gamma=1.0,
        scale_fn=None,
        scale_mode="cycle",
        reduce_on_plateau=0,
        monitor="val_loss",
        reduce_factor=2,
        max_momentum=0.95,
        min_momentum=0.85,
        verbose=1,
    ):
        super(keras.callbacks.Callback, self).__init__()

        self.base_lr = base_lr
        self.max_lr = max_lr
        self.step_size = step_size
        self.mode = mode
        self.gamma = gamma
        if scale_fn == None:
            if self.mode == "triangular":
                self.scale_fn = lambda x: 1.0
                self.scale_mode = "cycle"
            elif self.mode == "triangular2":
                self.scale_fn = lambda x: 1 / (2.0 ** (x - 1))
                self.scale_mode = "cycle"
            elif self.mode == "exp_range":
                self.scale_fn = lambda x: gamma ** (x)
                self.scale_mode = "iterations"
        else:
            self.scale_fn = scale_fn
            self.scale_mode = scale_mode
        self.clr_iterations = 0.0
        self.trn_iterations = 0.0
        self.history = {}

        # restoring weights due to CRF bug
        self.best_weights = None

        # LR reduction
        self.verbose = verbose
        self.patience = reduce_on_plateau
        self.factor = 1.0 / reduce_factor
        self.monitor = monitor
        if "acc" not in self.monitor:
            self.monitor_op = lambda a, b: np.less(a, b)
            self.best = np.Inf
        else:
            self.monitor_op = lambda a, b: np.greater(a, b)
            self.best = -np.Inf

        # annihalting LR
        self.overhump = False

        # cyclical momentum
        self.max_momentum = max_momentum
        self.min_momentum = min_momentum
        if self.min_momentum is None and self.max_momentum:
            self.min_momentum = self.max_momentum
        elif self.min_momentum and self.max_momentum is None:
            self.max_momentum = self.min_momentum
        self.cycle_momentum = True if self.max_momentum is not None else False

        self._reset()

    def _reset(self, new_base_lr=None, new_max_lr=None, new_step_size=None):
        """Resets cycle iterations.
        Optional boundary/step size adjustment.
        """
        if new_base_lr != None:
            self.base_lr = new_base_lr
        if new_max_lr != None:
            self.max_lr = new_max_lr
        if new_step_size != None:
            self.step_size = new_step_size
        self.clr_iterations = 0.0

    def clr(self):
        cycle = np.floor(1 + self.clr_iterations / (2 * self.step_size))
        x = np.abs(self.clr_iterations / self.step_size - 2 * cycle + 1)
        if self.scale_mode == "cycle":
            return self.base_lr + (self.max_lr - self.base_lr) * np.maximum(
                0, (1 - x)
            ) * self.scale_fn(cycle)
        else:
            return self.base_lr + (self.max_lr - self.base_lr) * np.maximum(
                0, (1 - x)
            ) * self.scale_fn(self.clr_iterations)

    def on_train_begin(self, logs={}):
        logs = logs or {}

        if self.clr_iterations == 0:
            K.set_value(self.model.optimizer.lr, self.base_lr)
        else:
            K.set_value(self.model.optimizer.lr, self.clr())

        self.orig_base_lr = self.base_lr

    def on_batch_end(self, batch, logs=None):
        logs = logs or {}
        self.trn_iterations += 1
        self.clr_iterations += 1

        self.history.setdefault("lr", []).append(K.get_value(self.model.optimizer.lr))
        self.history.setdefault("iterations", []).append(self.trn_iterations)

        for k, v in logs.items():
            self.history.setdefault(k, []).append(v)

        K.set_value(self.model.optimizer.lr, self.clr())

        # annihilate learning rate
        prev_overhump = self.overhump
        iterations = (self.clr_iterations + 1) % (self.step_size * 2)
        if iterations / self.step_size > 1:
            self.overhump = True
        else:
            self.overhump = False
        if not prev_overhump and self.overhump:
            self.base_lr = self.max_lr / 1000
        elif prev_overhump and not self.overhump:
            self.base_lr = self.orig_base_lr

        # set momentum
        if self.cycle_momentum:
            if self.overhump:
                current_percentage = 1.0 - (
                    (iterations - self.step_size) / float(self.step_size)
                )
                new_momentum = self.max_momentum - current_percentage * (
                    self.max_momentum - self.min_momentum
                )
            else:
                current_percentage = iterations / float(self.step_size)
                new_momentum = self.max_momentum - current_percentage * (
                    self.max_momentum - self.min_momentum
                )
            K.set_value(self.model.optimizer.beta_1, new_momentum)
            self.history.setdefault("momentum", []).append(
                K.get_value(self.model.optimizer.beta_1)
            )

    def on_epoch_end(self, epoch, logs=None):
        # print(K.eval(self.model.optimizer.lr))

        # Stop training if training loss becomes zero or negative
        # to address bug in keras_contrib code for CRF.
        # We restore the weights from previous best epoch
        # rather than this epoch.
        crf = U.is_crf(self.model)
        if crf:
            current_loss = logs.get("loss")
            current_val_loss = logs.get("val_loss", None)
            if (current_loss is not None and current_loss <= 0.0) or (
                current_val_loss is not None and current_val_loss <= 0.0
            ):
                self.model.stop_training = True
                if crf and self.best_weights is not None:
                    if self.verbose > 0:
                        print(
                            "Restoring model weights from the end of " "the best epoch"
                        )
                    self.model.set_weights(self.best_weights)
                return

        if self.patience:
            current = logs.get(self.monitor)
            if current is None:
                raise Exception("cannot monitor %s" % (self.monitor))
            if self.monitor_op(current, self.best):
                self.best = current
                self.wait = 0
                if crf:
                    self.best_weights = self.model.get_weights()
            else:
                self.wait += 1
                if self.wait >= self.patience:
                    min_lr = 1e-7
                    current_lr = float(K.get_value(self.model.optimizer.lr))
                    if self.max_lr > min_lr:
                        self.base_lr = self.base_lr * self.factor
                        self.max_lr = self.max_lr * self.factor
                        new_lr = current_lr * self.factor
                        new_lr = max(new_lr, min_lr)
                        K.set_value(self.model.optimizer.lr, new_lr)
                        if self.verbose:
                            print(
                                "\nEpoch %05d: Reducing Max LR on Plateau: "
                                "new max lr will be %s (if not early_stopping)."
                                % (epoch + 1, self.max_lr)
                            )
                        self.wait = 0

Classes

class CyclicLR (base_lr=0.001, max_lr=0.006, step_size=2000.0, mode='triangular', gamma=1.0, scale_fn=None, scale_mode='cycle', reduce_on_plateau=0, monitor='val_loss', reduce_factor=2, max_momentum=0.95, min_momentum=0.85, verbose=1)

This callback implements a cyclical learning rate policy (CLR). The method cycles the learning rate between two boundaries with some constant frequency, as detailed in this paper (https://arxiv.org/abs/1506.01186). The amplitude of the cycle can be scaled on a per-iteration or per-cycle basis. This class has three built-in policies, as put forth in the paper. "triangular": A basic triangular cycle w/ no amplitude scaling. "triangular2": A basic triangular cycle that scales initial amplitude by half each cycle. "exp_range": A cycle that scales initial amplitude by gamma**(cycle iterations) at each cycle iteration. For more detail, please see paper.

Example

```python
    clr = CyclicLR(base_lr=0.001, max_lr=0.006,
                        step_size=2000., mode='triangular')
    model.fit(X_train, Y_train, callbacks=[clr])
```

Class also supports custom scaling functions: python clr_fn = lambda x: 0.5*(1+np.sin(x*np.pi/2.)) clr = CyclicLR(base_lr=0.001, max_lr=0.006, step_size=2000., scale_fn=clr_fn, scale_mode='cycle') model.fit(X_train, Y_train, callbacks=[clr])

Arguments

base_lr: initial learning rate which is the
    lower boundary in the cycle.
max_lr: upper boundary in the cycle. Functionally,
    it defines the cycle amplitude (max_lr - base_lr).
    The lr at any cycle is the sum of base_lr
    and some scaling of the amplitude; therefore
    max_lr may not actually be reached depending on
    scaling function.
step_size: number of training iterations per
    half cycle. Authors suggest setting step_size
    2-8 x training iterations in epoch.
mode: one of {triangular, triangular2, exp_range}.
    Default 'triangular'.
    Values correspond to policies detailed above.
    If scale_fn is not None, this argument is ignored.
gamma: constant in 'exp_range' scaling function:
    gamma**(cycle iterations)
scale_fn: Custom scaling policy defined by a single
    argument lambda function, where
    0 <= scale_fn(x) <= 1 for all x >= 0.
    mode paramater is ignored
scale_mode: {'cycle', 'iterations'}.
    Defines whether scale_fn is evaluated on
    cycle number or cycle iterations (training
    iterations since start of cycle). Default is 'cycle'.
reduce_on_plateau (int): LR will be reduced after this many
                         epochs with no improvement on validation loss.
                         If zero or None, no reduction will take place
reduce_factor(int):      LR is reduced by this factor (e.g., 2 = 1/2  = 0.5)
monitor (str):           Value to monitor when reducing LR
max_momentum(float):     maximum momentum when momentum is cycled
                         If both max_momentum and min_momentum is None,
                         default momentum for Adam is used.
                         (only used if optimizer is Adam)
min_momentum(float):     minimum momentum when momentum is cycled
                         If both max_momentum and min_momentum is None,
                         default momentum for Adam is used.
                         (only used if optimizer is Adam)
verbose (bool):          If True, will print information on LR reduction

References

Original Paper: https://arxiv.org/abs/1803.09820 Blog Post: https://sgugger.github.io/the-1cycle-policy.html Code Reference: https://github.com/bckenstler/CLR

Expand source code
class CyclicLR(keras.callbacks.Callback):
    """
    This callback implements a cyclical learning rate policy (CLR).
    The method cycles the learning rate between two boundaries with
    some constant frequency, as detailed in this paper (https://arxiv.org/abs/1506.01186).
    The amplitude of the cycle can be scaled on a per-iteration or
    per-cycle basis.
    This class has three built-in policies, as put forth in the paper.
    "triangular":
        A basic triangular cycle w/ no amplitude scaling.
    "triangular2":
        A basic triangular cycle that scales initial amplitude by half each cycle.
    "exp_range":
        A cycle that scales initial amplitude by gamma**(cycle iterations) at each
        cycle iteration.
    For more detail, please see paper.

    # Example
        ```python
            clr = CyclicLR(base_lr=0.001, max_lr=0.006,
                                step_size=2000., mode='triangular')
            model.fit(X_train, Y_train, callbacks=[clr])
        ```

    Class also supports custom scaling functions:
        ```python
            clr_fn = lambda x: 0.5*(1+np.sin(x*np.pi/2.))
            clr = CyclicLR(base_lr=0.001, max_lr=0.006,
                                step_size=2000., scale_fn=clr_fn,
                                scale_mode='cycle')
            model.fit(X_train, Y_train, callbacks=[clr])
        ```
    # Arguments
        base_lr: initial learning rate which is the
            lower boundary in the cycle.
        max_lr: upper boundary in the cycle. Functionally,
            it defines the cycle amplitude (max_lr - base_lr).
            The lr at any cycle is the sum of base_lr
            and some scaling of the amplitude; therefore
            max_lr may not actually be reached depending on
            scaling function.
        step_size: number of training iterations per
            half cycle. Authors suggest setting step_size
            2-8 x training iterations in epoch.
        mode: one of {triangular, triangular2, exp_range}.
            Default 'triangular'.
            Values correspond to policies detailed above.
            If scale_fn is not None, this argument is ignored.
        gamma: constant in 'exp_range' scaling function:
            gamma**(cycle iterations)
        scale_fn: Custom scaling policy defined by a single
            argument lambda function, where
            0 <= scale_fn(x) <= 1 for all x >= 0.
            mode paramater is ignored
        scale_mode: {'cycle', 'iterations'}.
            Defines whether scale_fn is evaluated on
            cycle number or cycle iterations (training
            iterations since start of cycle). Default is 'cycle'.
        reduce_on_plateau (int): LR will be reduced after this many
                                 epochs with no improvement on validation loss.
                                 If zero or None, no reduction will take place
        reduce_factor(int):      LR is reduced by this factor (e.g., 2 = 1/2  = 0.5)
        monitor (str):           Value to monitor when reducing LR
        max_momentum(float):     maximum momentum when momentum is cycled
                                 If both max_momentum and min_momentum is None,
                                 default momentum for Adam is used.
                                 (only used if optimizer is Adam)
        min_momentum(float):     minimum momentum when momentum is cycled
                                 If both max_momentum and min_momentum is None,
                                 default momentum for Adam is used.
                                 (only used if optimizer is Adam)
        verbose (bool):          If True, will print information on LR reduction
    References:
        Original Paper: https://arxiv.org/abs/1803.09820
        Blog Post: https://sgugger.github.io/the-1cycle-policy.html
        Code Reference: https://github.com/bckenstler/CLR
    """

    def __init__(
        self,
        base_lr=0.001,
        max_lr=0.006,
        step_size=2000.0,
        mode="triangular",
        gamma=1.0,
        scale_fn=None,
        scale_mode="cycle",
        reduce_on_plateau=0,
        monitor="val_loss",
        reduce_factor=2,
        max_momentum=0.95,
        min_momentum=0.85,
        verbose=1,
    ):
        super(keras.callbacks.Callback, self).__init__()

        self.base_lr = base_lr
        self.max_lr = max_lr
        self.step_size = step_size
        self.mode = mode
        self.gamma = gamma
        if scale_fn == None:
            if self.mode == "triangular":
                self.scale_fn = lambda x: 1.0
                self.scale_mode = "cycle"
            elif self.mode == "triangular2":
                self.scale_fn = lambda x: 1 / (2.0 ** (x - 1))
                self.scale_mode = "cycle"
            elif self.mode == "exp_range":
                self.scale_fn = lambda x: gamma ** (x)
                self.scale_mode = "iterations"
        else:
            self.scale_fn = scale_fn
            self.scale_mode = scale_mode
        self.clr_iterations = 0.0
        self.trn_iterations = 0.0
        self.history = {}

        # restoring weights due to CRF bug
        self.best_weights = None

        # LR reduction
        self.verbose = verbose
        self.patience = reduce_on_plateau
        self.factor = 1.0 / reduce_factor
        self.monitor = monitor
        if "acc" not in self.monitor:
            self.monitor_op = lambda a, b: np.less(a, b)
            self.best = np.Inf
        else:
            self.monitor_op = lambda a, b: np.greater(a, b)
            self.best = -np.Inf

        # annihalting LR
        self.overhump = False

        # cyclical momentum
        self.max_momentum = max_momentum
        self.min_momentum = min_momentum
        if self.min_momentum is None and self.max_momentum:
            self.min_momentum = self.max_momentum
        elif self.min_momentum and self.max_momentum is None:
            self.max_momentum = self.min_momentum
        self.cycle_momentum = True if self.max_momentum is not None else False

        self._reset()

    def _reset(self, new_base_lr=None, new_max_lr=None, new_step_size=None):
        """Resets cycle iterations.
        Optional boundary/step size adjustment.
        """
        if new_base_lr != None:
            self.base_lr = new_base_lr
        if new_max_lr != None:
            self.max_lr = new_max_lr
        if new_step_size != None:
            self.step_size = new_step_size
        self.clr_iterations = 0.0

    def clr(self):
        cycle = np.floor(1 + self.clr_iterations / (2 * self.step_size))
        x = np.abs(self.clr_iterations / self.step_size - 2 * cycle + 1)
        if self.scale_mode == "cycle":
            return self.base_lr + (self.max_lr - self.base_lr) * np.maximum(
                0, (1 - x)
            ) * self.scale_fn(cycle)
        else:
            return self.base_lr + (self.max_lr - self.base_lr) * np.maximum(
                0, (1 - x)
            ) * self.scale_fn(self.clr_iterations)

    def on_train_begin(self, logs={}):
        logs = logs or {}

        if self.clr_iterations == 0:
            K.set_value(self.model.optimizer.lr, self.base_lr)
        else:
            K.set_value(self.model.optimizer.lr, self.clr())

        self.orig_base_lr = self.base_lr

    def on_batch_end(self, batch, logs=None):
        logs = logs or {}
        self.trn_iterations += 1
        self.clr_iterations += 1

        self.history.setdefault("lr", []).append(K.get_value(self.model.optimizer.lr))
        self.history.setdefault("iterations", []).append(self.trn_iterations)

        for k, v in logs.items():
            self.history.setdefault(k, []).append(v)

        K.set_value(self.model.optimizer.lr, self.clr())

        # annihilate learning rate
        prev_overhump = self.overhump
        iterations = (self.clr_iterations + 1) % (self.step_size * 2)
        if iterations / self.step_size > 1:
            self.overhump = True
        else:
            self.overhump = False
        if not prev_overhump and self.overhump:
            self.base_lr = self.max_lr / 1000
        elif prev_overhump and not self.overhump:
            self.base_lr = self.orig_base_lr

        # set momentum
        if self.cycle_momentum:
            if self.overhump:
                current_percentage = 1.0 - (
                    (iterations - self.step_size) / float(self.step_size)
                )
                new_momentum = self.max_momentum - current_percentage * (
                    self.max_momentum - self.min_momentum
                )
            else:
                current_percentage = iterations / float(self.step_size)
                new_momentum = self.max_momentum - current_percentage * (
                    self.max_momentum - self.min_momentum
                )
            K.set_value(self.model.optimizer.beta_1, new_momentum)
            self.history.setdefault("momentum", []).append(
                K.get_value(self.model.optimizer.beta_1)
            )

    def on_epoch_end(self, epoch, logs=None):
        # print(K.eval(self.model.optimizer.lr))

        # Stop training if training loss becomes zero or negative
        # to address bug in keras_contrib code for CRF.
        # We restore the weights from previous best epoch
        # rather than this epoch.
        crf = U.is_crf(self.model)
        if crf:
            current_loss = logs.get("loss")
            current_val_loss = logs.get("val_loss", None)
            if (current_loss is not None and current_loss <= 0.0) or (
                current_val_loss is not None and current_val_loss <= 0.0
            ):
                self.model.stop_training = True
                if crf and self.best_weights is not None:
                    if self.verbose > 0:
                        print(
                            "Restoring model weights from the end of " "the best epoch"
                        )
                    self.model.set_weights(self.best_weights)
                return

        if self.patience:
            current = logs.get(self.monitor)
            if current is None:
                raise Exception("cannot monitor %s" % (self.monitor))
            if self.monitor_op(current, self.best):
                self.best = current
                self.wait = 0
                if crf:
                    self.best_weights = self.model.get_weights()
            else:
                self.wait += 1
                if self.wait >= self.patience:
                    min_lr = 1e-7
                    current_lr = float(K.get_value(self.model.optimizer.lr))
                    if self.max_lr > min_lr:
                        self.base_lr = self.base_lr * self.factor
                        self.max_lr = self.max_lr * self.factor
                        new_lr = current_lr * self.factor
                        new_lr = max(new_lr, min_lr)
                        K.set_value(self.model.optimizer.lr, new_lr)
                        if self.verbose:
                            print(
                                "\nEpoch %05d: Reducing Max LR on Plateau: "
                                "new max lr will be %s (if not early_stopping)."
                                % (epoch + 1, self.max_lr)
                            )
                        self.wait = 0

Ancestors

  • keras.callbacks.Callback

Methods

def clr(self)
Expand source code
def clr(self):
    cycle = np.floor(1 + self.clr_iterations / (2 * self.step_size))
    x = np.abs(self.clr_iterations / self.step_size - 2 * cycle + 1)
    if self.scale_mode == "cycle":
        return self.base_lr + (self.max_lr - self.base_lr) * np.maximum(
            0, (1 - x)
        ) * self.scale_fn(cycle)
    else:
        return self.base_lr + (self.max_lr - self.base_lr) * np.maximum(
            0, (1 - x)
        ) * self.scale_fn(self.clr_iterations)
def on_batch_end(self, batch, logs=None)

A backwards compatibility alias for on_train_batch_end.

Expand source code
def on_batch_end(self, batch, logs=None):
    logs = logs or {}
    self.trn_iterations += 1
    self.clr_iterations += 1

    self.history.setdefault("lr", []).append(K.get_value(self.model.optimizer.lr))
    self.history.setdefault("iterations", []).append(self.trn_iterations)

    for k, v in logs.items():
        self.history.setdefault(k, []).append(v)

    K.set_value(self.model.optimizer.lr, self.clr())

    # annihilate learning rate
    prev_overhump = self.overhump
    iterations = (self.clr_iterations + 1) % (self.step_size * 2)
    if iterations / self.step_size > 1:
        self.overhump = True
    else:
        self.overhump = False
    if not prev_overhump and self.overhump:
        self.base_lr = self.max_lr / 1000
    elif prev_overhump and not self.overhump:
        self.base_lr = self.orig_base_lr

    # set momentum
    if self.cycle_momentum:
        if self.overhump:
            current_percentage = 1.0 - (
                (iterations - self.step_size) / float(self.step_size)
            )
            new_momentum = self.max_momentum - current_percentage * (
                self.max_momentum - self.min_momentum
            )
        else:
            current_percentage = iterations / float(self.step_size)
            new_momentum = self.max_momentum - current_percentage * (
                self.max_momentum - self.min_momentum
            )
        K.set_value(self.model.optimizer.beta_1, new_momentum)
        self.history.setdefault("momentum", []).append(
            K.get_value(self.model.optimizer.beta_1)
        )
def on_epoch_end(self, epoch, logs=None)

Called at the end of an epoch.

Subclasses should override for any actions to run. This function should only be called during TRAIN mode.

Args

epoch
Integer, index of epoch.
logs
Dict, metric results for this training epoch, and for the validation epoch if validation is performed. Validation result keys are prefixed with val_. For training epoch, the values of the Model's metrics are returned. Example: {'loss': 0.2, 'accuracy': 0.7}.
Expand source code
def on_epoch_end(self, epoch, logs=None):
    # print(K.eval(self.model.optimizer.lr))

    # Stop training if training loss becomes zero or negative
    # to address bug in keras_contrib code for CRF.
    # We restore the weights from previous best epoch
    # rather than this epoch.
    crf = U.is_crf(self.model)
    if crf:
        current_loss = logs.get("loss")
        current_val_loss = logs.get("val_loss", None)
        if (current_loss is not None and current_loss <= 0.0) or (
            current_val_loss is not None and current_val_loss <= 0.0
        ):
            self.model.stop_training = True
            if crf and self.best_weights is not None:
                if self.verbose > 0:
                    print(
                        "Restoring model weights from the end of " "the best epoch"
                    )
                self.model.set_weights(self.best_weights)
            return

    if self.patience:
        current = logs.get(self.monitor)
        if current is None:
            raise Exception("cannot monitor %s" % (self.monitor))
        if self.monitor_op(current, self.best):
            self.best = current
            self.wait = 0
            if crf:
                self.best_weights = self.model.get_weights()
        else:
            self.wait += 1
            if self.wait >= self.patience:
                min_lr = 1e-7
                current_lr = float(K.get_value(self.model.optimizer.lr))
                if self.max_lr > min_lr:
                    self.base_lr = self.base_lr * self.factor
                    self.max_lr = self.max_lr * self.factor
                    new_lr = current_lr * self.factor
                    new_lr = max(new_lr, min_lr)
                    K.set_value(self.model.optimizer.lr, new_lr)
                    if self.verbose:
                        print(
                            "\nEpoch %05d: Reducing Max LR on Plateau: "
                            "new max lr will be %s (if not early_stopping)."
                            % (epoch + 1, self.max_lr)
                        )
                    self.wait = 0
def on_train_begin(self, logs={})

Called at the beginning of training.

Subclasses should override for any actions to run.

Args

logs
Dict. Currently no data is passed to this argument for this method but that may change in the future.
Expand source code
def on_train_begin(self, logs={}):
    logs = logs or {}

    if self.clr_iterations == 0:
        K.set_value(self.model.optimizer.lr, self.base_lr)
    else:
        K.set_value(self.model.optimizer.lr, self.clr())

    self.orig_base_lr = self.base_lr