Module ktrain.core
Expand source code
from . import utils as U
from .graph.predictor import LinkPredictor, NodePredictor
from .graph.preprocessor import LinkPreprocessor, NodePreprocessor
from .imports import *
from .lroptimize.lrfinder import *
from .tabular.predictor import TabularPredictor
from .tabular.preprocessor import TabularPreprocessor
from .text.ner.predictor import NERPredictor
from .text.ner.preprocessor import NERPreprocessor
from .text.predictor import TextPredictor
from .text.preprocessor import (
from .vision.predictor import ImagePredictor
from .vision.preprocessor import ImagePreprocessor
class Learner(ABC):
Abstract class used to tune and train Keras models. The fit method is
an abstract method and must be implemented by subclasses.
def __init__(self, model, workers=1, use_multiprocessing=False):
if not isinstance(model, keras.Model):
raise ValueError("model must be of instance keras.Model")
self.model = model
self.lr_finder = LRFinder(self.model)
self.workers = workers
self.use_multiprocessing = use_multiprocessing
self.history = None
# save original weights of model
new_file, weightfile = tempfile.mkstemp()
self._original_weights = weightfile
except Exception as e:
warnings.warn("Could not save original model weights: %s" % (e))
self._original_weights = None
def _monitor_metrics(self):
monitor metrics
metrics = ["loss"]
m = U.metrics_from_model(self.model)
if isinstance(m, list):
if self.val_data is not None:
for m in metrics[:]:
metrics.append("val_%s" % (m))
return metrics
def get_weight_decay(self):
Get current weight decay rate
if type(self.model.optimizer).__name__ == "AdamWeightDecay":
return self.model.optimizer.weight_decay_rate
return None
def set_weight_decay(self, wd=U.DEFAULT_WD):
Sets global weight decay via AdamWeightDecay optimizer
wd(float): weight decay
def evaluate(
alias for self.validate().
Returns confusion matrix and optionally prints
a classification report.
This is currently only supported for binary and multiclass
classification, not multilabel classification.
By default, this uses val_data, as supplied to ktrain.get_learner().
Other validation or test data can be optionally be supplied as argument via <test_data> argument.
Supply class_names to include labels instead of intenger class integer values in classification report.
test_data(Dataset|np.ndarray): test or validation data. If None, self.val_data is used.
print_report(bool): If True, classification report will be printed. If False, report will be saved to CSV
at save_path. Not applicable to regression models.
Not applicable to regression models.
save_path(str): Classification report will be saved to this file path/name if print_report=False
Not applicable to regression models.
class_names(list): list of class names to be used in classification report instead of
class integer IDs.
return self.validate(
def validate(
Returns confusion matrix and optionally prints
a classification report.
For multilabel classification problems,confusion matrices are not supported,
but classification reports are.
By default, this uses val_data, as supplied to ktrain.get_learner().
Other validation or test data can be optionally be supplied as argument.
Supply class_names to include labels instead of intenger class integer values in classification report.
val_data(Dataset|np.ndarray): validation data. If None, self.val_data is used.
print_report(bool): If True, classification report will be printed. If False, report will be saved to CSV
at save path. Not applicable to regression models.
save_path(str): Classification report will be saved to this file path/name if print_report=False
class_names(list): list of class names to be used in classification report instead of
class integer IDs.
if val_data is not None:
val = val_data
val = self.val_data
classification, multilabel = U.is_classifier(self.model)
if not classification:
# warnings.warn('learner.validate is only for classification problems. '
#'For regression, etc., use learner.predict and learner.ground_truth '
#'to manually validate.')
# return
is_multilabel = U.is_multilabel(val) or multilabel
y_pred = self.predict(val_data=val)
y_true = self.ground_truth(val_data=val)
y_pred = np.squeeze(y_pred)
y_true = np.squeeze(y_true)
# regression evaluation
if not classification:
from sklearn.metrics import mean_absolute_error, mean_squared_error
regout = []
metrics = U.metrics_from_model(self.model)
for m in metrics:
if m in ["mae", "mean_absolute_error"]:
regout.append((m, mean_absolute_error(y_true, y_pred)))
elif m in ["mse", "mean_squared_error"]:
regout.append((m, mean_squared_error(y_true, y_pred)))
if not regout:
"%s is not supported by validate/evaluate - falling back to MAE"
regout.append(("mae", mean_absolute_error(y_true, y_pred)))
return regout
if len(y_pred.shape) == 1:
y_pred = np.where(y_pred > 0.5, 1, 0)
y_true = np.where(y_true > 0.5, 1, 0)
elif is_multilabel:
from sklearn.preprocessing import binarize
y_pred = binarize(y_pred, threshold=0.5)
y_pred = np.argmax(y_pred, axis=1)
y_true = np.argmax(y_true, axis=1)
if print_report or save_path is not None:
if class_names:
class_names = [str(s) for s in class_names]
report = classification_report(
output_dict=not print_report,
report = classification_report(
output_dict=not print_report,
if print_report:
df = pd.DataFrame(report).transpose()
print("classification report saved to: %s" % (save_path))
cm_func = confusion_matrix
if is_multilabel:
"Confusion matrices do not currently support multilabel classification, so returning None"
cm = confusion_matrix(y_true, y_pred)
return cm
def _check_val(self, val_data):
if val_data is not None:
val = val_data
val = self.val_data
if val is None:
raise Exception(
"val_data must be supplied to get_learner or view_top_losses"
return val
def top_losses(self, n=4, val_data=None, preproc=None):
Computes losses on validation set sorted by examples with top losses
n(int or tuple): a range to select in form of int or tuple
e.g., n=8 is treated as n=(0,8)
val_data: optional val_data to use instead of self.val_data
preproc (Preprocessor): A TextPreprocessor or ImagePreprocessor.
For some data like text data, a preprocessor
is required to undo the pre-processing
to correctly view raw data.
list of n tuples where first element is either
filepath or id of validation example and second element
is loss.
# check validation data and arguments
if val_data is not None:
val = val_data
val = self.val_data
if val is None:
raise Exception("val_data must be supplied to get_learner or top_losses")
if type(n) == type(42):
n = (0, n)
# multilabel = True if U.is_multilabel(val) else False
classification, multilabel = U.is_classifier(self.model)
# get predicictions and ground truth
y_pred = self.predict(val_data=val)
y_true = self.ground_truth(val_data=val)
y_true = y_true.astype("float32")
# adjust y_true for regression problems
if (
not classification
and len(y_true.shape) == 1
and (len(y_pred.shape) == 2 and y_pred.shape[1] == 1)
y_true = np.expand_dims(y_true, -1)
# compute loss
# this doesn't work in tf.keras 1.14
# losses = self.model.loss_functions[0](tf.convert_to_tensor(y_true), tf.convert_to_tensor(y_pred))
# if U.is_tf_keras():
# L = self.model.loss_functions[0].fn
# else:
# L = self.model.loss_functions[0]
L = U.loss_fn_from_model(self.model)
losses = L(tf.convert_to_tensor(y_true), tf.convert_to_tensor(y_pred))
losses = tf.Session().run(losses)
losses = losses.numpy()
class_names = [] if preproc is None else preproc.get_classes()
if preproc is None:
class_fcn = lambda x: "%s" % (x)
class_fcn = lambda x: class_names[x]
# regression output modifications
if not classification:
if len(y_pred.shape) == 2 and y_pred.shape[1] == 1:
y_pred = np.squeeze(y_pred)
y_pred = np.around(y_pred, 2)
if len(y_true.shape) == 2 and y_true.shape[1] == 1:
y_true = np.squeeze(y_true)
y_true = np.around(y_true, 2)
# sort by loss and prune correct classifications, if necessary
if classification and not multilabel:
y_pred = np.squeeze(y_pred)
y_true = np.squeeze(y_true)
if len(y_pred.shape) == 1:
y_p = np.where(y_pred > 0.5, 1, 0)
y_t = np.where(y_true > 0.5, 1, 0)
y_p = np.argmax(y_pred, axis=1)
y_t = np.argmax(y_true, axis=1)
tups = [
(i, x, class_fcn(y_t[i]), class_fcn(y_p[i]))
for i, x in enumerate(losses)
if y_p[i] != y_t[i]
tups = [
(i, x, y_true[i], np.around(y_pred[i], 2)) for i, x in enumerate(losses)
tups.sort(key=operator.itemgetter(1), reverse=True)
# prune by given range
tups = tups[n[0] : n[1]] if n is not None else tups
return tups
def view_top_losses(self, n=4, preproc=None, val_data=None):
View observations with top losses in validation set.
Musta be overridden by Learner subclasses.
raise NotImplementedError(
"view_top_losses must be overriden by Learner subclass"
def _make_model_folder(self, fpath):
if os.path.isfile(fpath):
raise ValueError(
f"There is an existing file named {fpath}. "
+ "Please use dfferent value for fpath."
elif os.path.exists(fpath):
# warnings.warn('model is being saved to folder that already exists: %s' % (fpath))
elif not os.path.exists(fpath):
def save_model(self, fpath):
a wrapper to
fpath(str): path to folder in which to save model
self._make_model_folder(fpath), U.MODEL_NAME), save_format="h5")
def load_model(self, fpath, custom_objects=None, **kwargs):
loads model from folder.
Note: **kwargs included for backwards compatibility only, as TransformerTextClassLearner.load_model was removed in v0.18.0.
fpath(str): path to folder containing model
custom_objects(dict): custom objects required to load model.
For models included with ktrain, this is populated automatically
and can be disregarded.
self.model = _load_model(
fpath, train_data=self.train_data, custom_objects=custom_objects
def _is_adamlike(self):
checks whether optimizer attached to model is an
"Adam-like" optimizer with beta_1 parameter.
return self.model is not None and hasattr(self.model.optimizer, "beta_1")
def _recompile(self, wd=None):
metrics = U.metrics_from_model(self.model)
if (
wd is not None
and wd > 0
and type(self.model.optimizer).__name__ != "AdamWeightDecay"
"recompiling model to use AdamWeightDecay as opimizer with weight decay of %s"
% (wd)
optimizer = U.get_default_optimizer(wd=wd)
elif wd is not None and wd > 0:
optimizer = U.get_default_optimizer(wd=wd)
elif wd is not None and wd == 0:
optimizer = U.DEFAULT_OPT
else: # wd is None -> don't modify optimizer
optimizer = self.model.optimizer
self.model.compile(optimizer=optimizer, loss=self.model.loss, metrics=metrics)
def set_model(self, model):
replace model in this Learner instance
if not isinstance(model, keras.Model):
raise ValueError("model must be of instance keras.Model")
self.model = model
self.history = None
def freeze(self, freeze_range=None):
If freeze_range is None, makes all layers trainable=False except last Dense layer.
If freeze_range is given, freezes the first <freeze_range> layers and
unfrezes all remaining layers.
NOTE: Freeze method does not currently work with
multi-GPU models. If you are using the load_imagemodel method,
please use the freeze_layers argument of load_imagemodel
to freeze layers.
freeze_range(int): number of layers to freeze
if freeze_range is None:
# freeze everything except last Dense layer
# first find last dense layer
dense_id = None
for i, layer in reversed(list(enumerate(self.model.layers))):
if isinstance(layer, keras.layers.Dense):
dense_id = i
if dense_id is None:
raise Exception("cannot find Dense layer in this model")
for i, layer in enumerate(self.model.layers):
if i < dense_id:
layer.trainable = False
layer.trainable = True
# freeze all layers up to and including layer_id
if type(freeze_range) != type(1) or freeze_range < 1:
raise ValueError("freeze_range must be integer > 0")
for i, layer in enumerate(self.model.layers):
if i < freeze_range:
layer.trainable = False
layer.trainable = True
def unfreeze(self, exclude_range=None):
Make every layer trainable except those in exclude_range.
unfreeze is simply a proxy method to freeze.
NOTE: Unfreeze method does not currently work with
multi-GPU models. If you are using the load_imagemodel method,
please use the freeze_layers argument of load_imagemodel
to freeze layers.
# make all layers trainable
for i, layer in enumerate(self.model.layers):
layer.trainable = True
if exclude_range:
for i, layer in enumerate(self.model.layers[:exclude_range]):
layer.trainable = False
def reset_weights(self, verbose=1):
Re-initializes network with original weights
if os.path.isfile(self._original_weights):
self.history = None
U.vprint("Model weights have been reset.", verbose=verbose)
"Weights have not been reset because the original weights file "
+ "(%s) no longer exists." % (self._original_weights)
def lr_find(
Plots loss as learning rate is increased. Highest learning rate
corresponding to a still falling loss should be chosen.
If you find the LR finder is running for more epochs than you'd prefer,
you can set max_epochs (e.g., max_epochs=5) to estimate LR with a
smaller sample size.
If lr_mult is supplied and max_epochs is None, LR will increase until loss diverges.
Reasonable values of lr_mult are between 1.01 and 1.05.
If max_epochs is supplied, lr_mult argument is ignored and computed automatically.
start_lr (float): smallest lr to start simulation
lr_mult (float): multiplication factor to increase LR.
Ignored if max_epochs is supplied.
max_epochs (int): maximum number of epochs to simulate.
lr_mult is ignored if max_epoch is supplied.
Default is None. Set max_epochs to an integer
(e.g., 5) if lr_find is taking too long
and running for more epochs than desired.
class_weight(dict): class_weight parameter passed to
for imbalanced datasets.
stop_factor(int): factor used to determine threhsold that loss
must exceed to stop training simulation.
Increase this if loss is erratic and lr_find
exits too early.
show_plot (bool): If True, automatically invoke lr_plot
restore_weights_only(bool): If True, when training simulation is complete,
the model weights only are restored, but not
the original optimizer weights.
In at least a few cases, this seems to improve performance
when actual training begins. Further investigation is needed,
so it is False by default.
verbose (bool): specifies how much output to print
# dep_fix: bug in TF 2.2 and 2.3
if version.parse(tf.__version__) > version.parse("2.1") and version.parse(
) < version.parse("2.4"):
if max_epochs is None:
raise ValueError(
"Due to a bug in TensorFlow 2.2 and 2.3, the max_epochs argument is temporarily required. "
+ "Please re-run with max_epochs (e.g., max_epochs=5). \n"
+ "More info:"
"simulating training for different learning rates... this may take a few moments...",
# save current weights and temporarily restore original weights
# dep_fix: temporarily use save_model instead of save_weights as default due to
_weights_only = True
if restore_weights_only:
new_file, weightfile = tempfile.mkstemp()
temp_folder = tempfile.mkdtemp()
# compute steps_per_epoch
num_samples = U.nsamples_from_data(self.train_data)
bs = (
if hasattr(self.train_data, "batch_size")
else self.batch_size
if U.is_iter(self.train_data):
use_gen = True
steps_per_epoch = num_samples // bs
use_gen = False
steps_per_epoch = np.ceil(num_samples / bs)
# check steps_per_epoch
if steps_per_epoch <= 64 and max_epochs is None:
"max_epochs is being set to 5 since steps per epoch is small. "
+ "If you wish to estimate LR using more epochs, set max_epochs manually."
max_epochs = 5
# track and plot learning rates
self.lr_finder = LRFinder(self.model, stop_factor=stop_factor)
except KeyboardInterrupt:
# re-load current weights
# self.model.load_weights(weightfile)
# re-load current weights
# dep_fix: temporarily use load_model instead of load_weights as default due to
if restore_weights_only:
# instructions to invoker
U.vprint("\n", verbose=verbose)
U.vprint("done.", verbose=verbose)
if show_plot:
"Visually inspect loss plot and select learning rate associated with falling loss",
"Please invoke the Learner.lr_plot() method to visually inspect "
"the loss plot to help identify the maximal learning rate "
"associated with falling loss.",
def lr_estimate(self):
Return numerical estimates of lr using two different methods:
1. lr associated with minum numerical gradient (None if gradient computation fails)
2. lr associated with minimum loss divided by 10
3. lr associated with longest valley
Since none of these methods are fool-proof and can
potentially return bad estimates, it is recommended that you
examine the plot generated by lr_plot to estimate the learning rate.
tuple: tuple of the form (float, float)
if self.lr_finder is None or not self.lr_finder.find_called():
raise ValueError("Please call lr_find first.")
return self.lr_finder.estimate_lr()
def lr_plot(
self, n_skip_beginning=10, n_skip_end=5, suggest=False, return_fig=False
Plots the loss vs. learning rate to help identify
The maximal learning rate associated with a falling loss.
The nskip_beginning and n_skip_end arguments can be used
to "zoom in" on the plot.
n_skip_beginning(int): number of batches to skip on the left.
n_skip_end(int): number of batches to skip on the right.
suggest(bool): will highlight numerical estimate
of best lr if True - methods adapted from fastai
return_fig(bool): If True, return matplotlib.figure.Figure
matplotlib.figure.Figure if return_fig else None
# dep_fix: bug in TF 2.2 and 2.3
if version.parse(tf.__version__) > version.parse("2.1") and version.parse(
) < version.parse("2.4"):
if n_skip_end == 5:
n_skip_end = 10
if self.lr_finder is None or not self.lr_finder.find_called():
raise ValueError("Please call lr_find first.")
return self.lr_finder.plot_loss(
def plot(self, plot_type="loss", return_fig=False):
plots training history
plot_type (str): A valid value in tf.keras History. Either a built-in value {'loss', 'lr', 'momentum'} or
other values previously specified by user. For instance, if 'mae' and/or 'mse' is previously specified as metrics
when creating model, then these values can also be specified.
return_fig(bool): If True, return matplotlib.figure.Figure
matplotlib.figure.Figure if return_fig else None
if self.history is None:
raise Exception("No training history - did you train the model yet?")
if not isinstance(plot_type, str):
raise ValueError("plot_type must be str/string")
fig = None
if plot_type == "loss":
if "val_loss" in self.history.history:
legend_items = ["train", "validation"]
legend_items = ["train"]
plt.title("Model Loss")
plt.legend(legend_items, loc="upper left")
elif plot_type == "lr":
if "lr" not in self.history.history:
raise ValueError(
"no lr in history: are you sure you used autofit or fit_onecycle to train?"
plt.title("LR Schedule")
elif plot_type == "momentum":
if "momentum" not in self.history.history:
raise ValueError(
"no momentum history: are you sure you used autofit or fit_onecycle to train?"
plt.title("Momentum Schedule")
if plot_type not in self.history.history:
raise ValueError(
f"no {plot_type} in history: are you sure {plot_type} exists in history?"
val_key = f"val_{plot_type}"
if val_key in self.history.history:
legend_items = ["train", "validation"]
f"Validation value for {plot_type} wasn't found in history"
legend_items = ["train"]
plt.title(f"History of {plot_type}")
plt.legend(legend_items, loc="upper left")
fig = plt.gcf()
if return_fig:
return fig
def print_layers(self, show_wd=False):
prints the layers of the model along with indices
if show_wd:
"set_weight_decay now uses AdamWeightDecay instead of kernel_regularizers."
for i, layer in enumerate(self.model.layers):
if show_wd and hasattr(layer, "kernel_regularizer"):
reg = layer.kernel_regularizer
if hasattr(reg, "l2"):
wd = reg.l2
elif hasattr(reg, "l1"):
wd = reg.l1
wd = None
print("%s (trainable=%s, wd=%s) : %s" % (i, layer.trainable, wd, layer))
print("%s (trainable=%s) : %s" % (i, layer.trainable, layer))
def layer_output(self, layer_id, example_id=0, use_val=False):
# should implemented in subclass
raise NotImplementedError
def set_lr(self, lr):
K.set_value(, lr)
def _check_cycles(self, n_cycles, cycle_len, cycle_mult):
if type(n_cycles) != type(1) or n_cycles < 1:
raise ValueError("n_cycles must be >= 1")
if type(cycle_mult) != type(1) or cycle_mult < 1:
raise ValueError("cycle_mult must by >= 1")
if cycle_len is not None:
if type(cycle_len) != type(1) or cycle_len < 1:
raise ValueError("cycle_len must either be None or >= 1")
# calculate number of epochs
if cycle_len is None:
epochs = n_cycles
epochs = 0
tmp_cycle_len = cycle_len
for i in range(n_cycles):
epochs += tmp_cycle_len
tmp_cycle_len *= cycle_mult
return epochs
def _cb_sgdr(
self, max_lr, steps_per_epoch, cycle_len, cycle_mult, lr_decay=1.0, callbacks=[]
if callbacks and "SGDRScheduler" in [type(cb).__name__ for cb in callbacks]:
return callbacks
# configuration
min_lr = 1e-9
if max_lr <= min_lr:
min_lr = max_lr / 10
# use learning_rate schedule
if cycle_len is not None:
if not isinstance(callbacks, list):
callbacks = []
from .lroptimize.sgdr import SGDRScheduler
schedule = SGDRScheduler(
if not callbacks:
callbacks = None
return callbacks
def _cb_checkpoint(self, folder, callbacks=[]):
if callbacks and "ModelCheckpoint" in [type(cb).__name__ for cb in callbacks]:
return callbacks
if folder is not None:
os.makedirs(folder, exist_ok=True)
if not isinstance(callbacks, list):
callbacks = []
if self.val_data is not None:
filepath = os.path.join(
folder, "weights-{epoch:02d}-{val_loss:.2f}.hdf5"
filepath = os.path.join(folder, "weights-{epoch:02d}.hdf5")
filepath, save_best_only=False, save_weights_only=True
if not callbacks:
callbacks = None
return callbacks
def _cb_earlystopping(self, early_stopping, callbacks=[]):
if callbacks and "EarlyStopping" in [type(cb).__name__ for cb in callbacks]:
return callbacks
if early_stopping:
if not isinstance(callbacks, list):
callbacks = []
# if StrictVersion(keras.__version__) >= StrictVersion('2.2.3'):
except TypeError:
The early_stopping=True argument relies on EarlyStopping.restore_best_weights,
which is only supported on Keras 2.2.3 or greater.
For now, we are falling back to EarlyStopping.restore_best_weights=False.
Please use checkpoint_folder option in fit() to restore best weights."""
if not callbacks:
callbacks = None
return callbacks
def _prepare(self, data, train=True):
Subclasses can override this method if data
needs to be specially-prepared prior to invoking fit methods
data: dataset
train(bool): If True, prepare for training. Otherwise, prepare for evaluation.
if data is None:
return None
if hasattr(data, "to_tfdataset"):
return data.to_tfdataset(train=train)
return data
def fit(self, lr, n_cycles, cycle_len=None, cycle_mult=1, batch_size=U.DEFAULT_BS):
def fit_onecycle(
Train model using a version of Leslie Smith's 1cycle policy.
This method can be used with any optimizer. Thus,
cyclical momentum is not currently implemented.
lr (float): (maximum) learning rate.
It is recommended that you estimate lr yourself by
running lr_finder (and lr_plot) and visually inspect plot
for dramatic loss drop.
epochs (int): Number of epochs. Number of epochs
checkpoint_folder (string): Folder path in which to save the model weights
for each epoch.
File name will be of the form:
cycle_momentum (bool): If True and optimizer is Adam, Nadam, or Adamax, momentum of
optimzer will be cycled between 0.95 and 0.85 as described in
Only takes effect if Adam, Nadam, or Adamax optimizer is used.
max_momentum(float): Maximum momentum to use if cycle_momentum=True
min_momentum(float): minimum momentum to use if cycle_momentum=True
class_weight (dict): Optional dictionary mapping class indices (integers) to a weight (float)
callbacks (list): list of Callback instances to employ during training
steps_per_epoch(int): Steps per epoch. If None, then, math.ceil(num_samples/batch_size) is used.
Ignored unless training dataset is generator.
verbose (bool): verbose mode
if not self._is_adamlike() and cycle_momentum:
"cyclical momentum has been disabled because "
+ 'optimizer is not "Adam-like" with beta_1 param'
cycle_momentum = False
num_samples = U.nsamples_from_data(self.train_data)
if steps_per_epoch is None:
steps_per_epoch = math.ceil(num_samples / self.batch_size)
# setup callbacks for learning rates and early stopping
if not callbacks:
kcallbacks = []
kcallbacks = callbacks[:]
if cycle_momentum:
max_momentum = max_momentum
min_momentum = min_momentum
max_momentum = None
min_momentum = None
from .lroptimize.triangular import CyclicLR
clr = CyclicLR(
base_lr=lr / 10,
step_size=math.ceil((steps_per_epoch * epochs) / 2),
# start training
policy = "onecycle"
U.vprint("\n", verbose=verbose)
"begin training using %s policy with max lr of %s..." % (policy, lr),
hist =
hist.history["lr"] = clr.history["lr"]
hist.history["iterations"] = clr.history["iterations"]
if cycle_momentum:
hist.history["momentum"] = clr.history["momentum"]
self.history = hist
return hist
def autofit(
Automatically train model using a default learning rate schedule shown to work well
in practice. By default, this method currently employs a triangular learning
rate policy (
During each epoch, this learning rate policy varies the learning rate from lr/10 to lr
and then back to a low learning rate that is near-zero.
If epochs is None, then early_stopping and reduce_on_plateau are atomatically
set to 5 and 2, respectively.
lr (float): optional initial learning rate. If missing,
lr will be estimated automatically.
It is recommended that you estimate lr yourself by
running lr_finder (and lr_plot) and visually inspect plot
for dramatic loss drop.
epochs (int): Number of epochs. If None, training will continue until
validation loss no longer improves after 5 epochs.
early_stopping (int): If not None, training will automatically stop after this many
epochs of no improvement in validation loss.
Upon completion, model will be loaded with weights from epoch
with lowest validation loss.
NOTE: If reduce_on_plateau is also enabled, then
early_stopping must be greater than reduce_on_plateau.
Example: early_stopping=6, reduce_on_plateau=3.
reduce_on_plateau (int): If not None, will lower learning rate when
when validation loss fails to improve after
the specified number of epochs.
NOTE: If early_stopping is enabled, then
reduce_on_plateu must be less than early_stopping.
Example: early_stopping=6, reduce_on_plateau=3.
reduce_factor (int): Learning reate is reduced by this factor on plateau.
Only takes effect if reduce_on_plateau > 0.
cycle_momentum (bool): If True and optimizer is Adam, Nadam, or Adamax, momentum of
optimzer will be cycled between 0.95 and 0.85 as described in
Only takes effect if Adam, Nadam, or Adamax optimizer is used.
max_momentum(float): maximum momentum to use when cycle_momentum=True
min_momentum(float): minimum momentum to use when cycle_momentum=True
checkpoint_folder (string): Folder path in which to save the model weights
for each epoch.
File name will be of the form:
monitor (str): what metric to monitor for early_stopping
and reduce_on_plateau. Defaults to 'val_loss'.
Only used if early_stopping or reduce_on_plateau
is enabled.
class_weight (dict): Optional dictionary mapping class indices (integers) to a weight (float)
callbacks (list): list of Callback instances to employ during training
steps_per_epoch(int): Steps per epoch. If None, then, math.ceil(num_samples/batch_size) is used.
Ignored unless training dataset is generator.
verbose (bool): verbose mode
# check optimizer
if not self._is_adamlike() and cycle_momentum:
"cyclical momentum has been disabled because "
+ 'optimizer is not "Adam-like" with beta_1 param'
cycle_momentum = False
# setup learning rate policy
num_samples = U.nsamples_from_data(self.train_data)
if steps_per_epoch is None:
steps_per_epoch = math.ceil(num_samples / self.batch_size)
step_size = math.ceil(steps_per_epoch / 2)
# handle missing epochs
if epochs is None:
epochs = 1024
if not early_stopping:
early_stopping = U.DEFAULT_ES
"early_stopping automatically enabled at patience=%s"
if not reduce_on_plateau:
reduce_on_plateau = U.DEFAULT_ROP
"reduce_on_plateau automatically enabled at patience=%s"
if (
and early_stopping
and (reduce_on_plateau > early_stopping)
"reduce_on_plateau=%s and is greater than " % (reduce_on_plateau)
+ "early_stopping=%s. " % (early_stopping)
+ "Either reduce reduce_on_plateau or set early_stopping "
+ "to be higher."
# check monitor
if reduce_on_plateau is not None or early_stopping is not None:
if monitor.startswith("val_") and self.val_data is None:
raise ValueError(
"monitor is %s but no val_data was supplied.\nChange monitor or supply val_data to get_learner function."
% monitor
if monitor != "val_loss" and monitor not in self._monitor_metrics:
raise ValueError(
"monitor must be one of {%s}" % (self._monitor_metrics)
# setup callbacks for learning rates and early stopping
if not callbacks:
kcallbacks = []
kcallbacks = callbacks[:]
if cycle_momentum:
max_momentum = max_momentum
min_momentum = min_momentum
max_momentum = None
min_momentum = None
from .lroptimize.triangular import CyclicLR
clr = CyclicLR(
base_lr=lr / 10,
if early_stopping:
# start training
U.vprint("\n", verbose=verbose)
policy = "triangular learning rate"
"begin training using %s policy with max lr of %s..." % (policy, lr),
hist =
hist.history["lr"] = clr.history["lr"]
hist.history["iterations"] = clr.history["iterations"]
if cycle_momentum:
hist.history["momentum"] = clr.history["momentum"]
self.history = hist
return hist
def ground_truth(self, val_data=None):
if val_data is not None:
val = val_data
val = self.val_data
if not val:
raise Exception("val_data must be supplied to get_learner or ground_truth")
return U.y_from_data(val)
def predict(self, val_data=None):
Makes predictions on validation set
if val_data is not None:
val = val_data
val = self.val_data
if val is None:
raise Exception("val_data must be supplied to get_learner or predict")
if U.is_iter(val):
if hasattr(val, "reset"):
steps = np.ceil(U.nsamples_from_data(val) / val.batch_size)
# *_generator methods are deprecated from TF 2.1.0
# result = self.model.predict_generator(self._prepare(val, train=False),
# steps=steps)
result = self.model.predict(self._prepare(val, train=False), steps=steps)
return result
return self.model.predict(val[0], batch_size=self.eval_batch_size)
class ArrayLearner(Learner):
Main class used to tune and train Keras models
using Array data. An objects of this class should be instantiated
via the ktrain.get_learner method instead of directly.
Main parameters are:
model (Model): A compiled instance of
train_data (ndarray): A tuple of (x_train, y_train), where x_train and
y_train are numpy.ndarrays.
val_data (ndarray): A tuple of (x_test, y_test), where x_test and
y_test are numpy.ndarrays.
def __init__(
model, workers=workers, use_multiprocessing=use_multiprocessing
self.train_data = train_data
self.val_data = val_data
self.batch_size = batch_size
self.eval_batch_size = eval_batch_size
def fit(
Trains the model. By default, fit is simply a wrapper for
When cycle_len parameter is supplied, an SGDR learning rate schedule is used.
Trains the model.
lr (float): learning rate
n_cycles (int): n_cycles
cycle_len (int): If not None, decay learning rate over <cycle_len>
epochs until restarting/resetting learning rate to <lr>.
If None, lr remains constant
cycle_mult (int): Increase cycle_len by factor of cycle_mult.
This will gradually elongate the cycle.
Has no effect if cycle_len is None.
lr_decay(float): rate of decay of learning rate each cycle
checkpoint_folder (string): Folder path in which to save the model weights
for each epoch.
File name will be of the form:
early_stopping (int): If not None, training will automatically stop after this many
epochs of no improvement in validation loss.
Upon completion, model will be loaded with weights from epoch
with lowest validation loss.
callbacks (list): list of Callback instances to employ during training
class_weight (dict): Optional dictionary mapping class indices (integers) to a weight (float)
steps_per_epoch(int): Steps per epoch. If None, then, math.ceil(num_samples/batch_size) is used.
Ignored unless training dataset is generator (and in ArrayLearner instances).
verbose (bool): whether or not to show progress bar
# check early_stopping
if self.val_data is None and early_stopping is not None:
raise ValueError(
"early_stopping monitors val_loss but validation data not set"
# setup data
x_train = self.train_data[0]
y_train = self.train_data[1]
validation = None
if self.val_data:
validation = (self.val_data[0], self.val_data[1])
# setup learning rate schedule
epochs = self._check_cycles(n_cycles, cycle_len, cycle_mult)
# set call backs
kcallbacks = callbacks if callbacks else None
kcallbacks = self._cb_sgdr(
np.ceil(len(x_train) / self.batch_size),
kcallbacks = self._cb_checkpoint(checkpoint_folder, callbacks=kcallbacks)
kcallbacks = self._cb_earlystopping(early_stopping, callbacks=kcallbacks)
sgdr = (
[cb for cb in kcallbacks if type(cb).__name__ == "SGDRScheduler"]
if kcallbacks
else None
sgdr = sgdr[0] if sgdr else None
# train model
with warnings.catch_warnings():
warnings.filterwarnings("ignore", message=".*Check your callbacks.*")
hist =
self._prepare(y_train, train=False),
if sgdr is not None:
hist.history["lr"] = sgdr.history["lr"]
self.history = hist
if early_stopping:
"Weights from best epoch have been loaded into model.", verbose=verbose
# loss, acc = self.model.evaluate(self.val_data[0], self.val_data[1])
# U.vprint('\n', verbose=verbose)
# U.vprint('Early stopping due to no further improvement.', verbose=verbose)
# U.vprint('final loss:%s, final score:%s' % (loss, acc), verbose=verbose)
return hist
def layer_output(self, layer_id, example_id=0, use_val=False):
Prints output of layer with index <layer_id> to help debug models.
Uses first example (example_id=0) from training set, by default.
inp = self.model.layers[0].input
outp = self.model.layers[layer_id].output
f_out = K.function([inp], [outp])
if not use_val:
example = self.train_data[0][example_id]
example = self.val_data[0][example_id]
layer_out = f_out(
return layer_out
def view_top_losses(self, n=4, preproc=None, val_data=None):
Views observations with top losses in validation set.
Typically over-ridden by Learner subclasses.
n(int or tuple): a range to select in form of int or tuple
e.g., n=8 is treated as n=(0,8)
preproc (Preprocessor): A TextPreprocessor or ImagePreprocessor.
For some data like text data, a preprocessor
is required to undo the pre-processing
to correctly view raw data.
val_data: optional val_data to use instead of self.val_data
list of n tuples where first element is either
filepath or id of validation example and second element
is loss.
val = self._check_val(val_data)
# get top losses and associated data
tups = self.top_losses(n=n, val_data=val, preproc=preproc)
# get multilabel status and class names
classes = preproc.get_classes() if preproc is not None else None
# iterate through losses
for tup in tups:
# get data
idx = tup[0]
loss = tup[1]
truth = tup[2]
pred = tup[3]
obs = val[0][idx]
join_char = " "
if preproc is not None:
obs = preproc.undo(obs)
if preproc is not None and isinstance(preproc, TextPreprocessor):
if preproc.is_nospace_lang():
join_char = ""
if type(obs) == str:
obs = join_char.join(obs.split()[:512])
"id:%s | loss:%s | true:%s | pred:%s)\n"
% (idx, round(loss, 2), truth, pred)
class GenLearner(Learner):
Main class used to tune and train Keras models
using a Keras generator (e.g., DirectoryIterator).
Objects of this class should be instantiated using the
ktrain.get_learner function, rather than directly.
Main parameters are:
model (Model): A compiled instance of
train_data (Iterator): a Iterator instance for training set
val_data (Iterator): A Iterator instance for validation set
def __init__(
model, workers=workers, use_multiprocessing=use_multiprocessing
self.train_data = train_data
self.val_data = val_data
self.batch_size = batch_size
self.eval_batch_size = eval_batch_size
if self.train_data:
self.train_data.batch_size = batch_size
if self.val_data:
self.val_data.batch_size = eval_batch_size
def fit(
Trains the model. By default, fit is simply a wrapper for (for generators/sequences).
When cycle_len parameter is supplied, an SGDR learning rate schedule is used.
lr (float): learning rate
n_cycles (int): n_cycles
cycle_len (int): If not None, decay learning rate over <cycle_len>
epochs until restarting/resetting learning rate to <lr>.
If None, lr remains constant
cycle_mult (int): Increase cycle_len by factor of cycle_mult.
This will gradually elongate the cycle.
Has no effect if cycle_len is None.
lr_decay (float): rate of decay of learning reach each cycle.
Has no effect if cycle_len is None
checkpoint_folder (string): Folder path in which to save the model weights
for each epoch.
File name will be of the form:
early_stopping (int): If not None, training will automatically stop after this many
epochs of no improvement in validation loss.
Upon completion, model will be loaded with weights from epoch
with lowest validation loss.
class_weight (dict): Optional dictionary mapping class indices (integers) to a weight (float)
callbacks (list): list of Callback instances to employ during training
steps_per_epoch(int): Steps per epoch. If None, then, math.ceil(num_samples/batch_size) is used.
verbose (boolean): whether or not to print progress bar
# check early_stopping
if self.val_data is None and early_stopping is not None:
raise ValueError(
"early_stopping monitors val_loss but validation data not set"
# handle callbacks
num_samples = U.nsamples_from_data(self.train_data)
train_bs = (
if hasattr(self.train_data, "batch_size")
else self.batch_size
if steps_per_epoch is None:
steps_per_epoch = math.ceil(num_samples / train_bs)
validation_steps = None
if self.val_data is not None:
val_bs = (
if hasattr(self.val_data, "batch_size")
else self.batch_size
validation_steps = math.ceil(U.nsamples_from_data(self.val_data) / val_bs)
epochs = self._check_cycles(n_cycles, cycle_len, cycle_mult)
# set call backs
kcallbacks = callbacks if callbacks else None
kcallbacks = self._cb_sgdr(
lr, steps_per_epoch, cycle_len, cycle_mult, lr_decay, callbacks=kcallbacks
kcallbacks = self._cb_checkpoint(checkpoint_folder, callbacks=kcallbacks)
kcallbacks = self._cb_earlystopping(early_stopping, callbacks=kcallbacks)
sgdr = (
[cb for cb in kcallbacks if type(cb).__name__ == "SGDRScheduler"]
if kcallbacks
else None
sgdr = sgdr[0] if sgdr else None
# if kcallbacks: print([type(cb).__name__ for cb in kcallbacks])
# MNIST times per epoch on Titan V
# workers=4, usemp=True 9 sec.
# workers=1, usemp=True 12 sec.
# workers=1, usemp=False 16 sec.
# workers=4, usemp=False 30+ sec.
# print(self.workers)
# print(self.use_multiprocessing)
# train model
with warnings.catch_warnings():
warnings.filterwarnings("ignore", message=".*Check your callbacks.*")
fit_fn =
hist = fit_fn(
validation_data=self._prepare(self.val_data, train=False),
if sgdr is not None:
hist.history["lr"] = sgdr.history["lr"]
self.history = hist
if early_stopping:
"Weights from best epoch have been loaded into model.", verbose=verbose
# loss, acc = self.model.evaluate_generator(self.val_data)
# U.vprint('\n', verbose=verbose)
# U.vprint('Early stopping due to no further improvement.', verbose=verbose)
# U.vprint('final loss:%s, final score:%s' % (loss, acc), verbose=verbose)
return hist
def layer_output(self, layer_id, example_id=0, batch_id=0, use_val=False):
Prints output of layer with index <layer_id> to help debug models.
Uses first example (example_id=0) from first batch from training set, by default.
inp = self.model.layers[0].input
outp = self.model.layers[layer_id].output
f_out = K.function([inp], [outp])
if not use_val:
example = self.train_data[0][batch_id][example_id]
example = self.val_data[0][batch_id][example_id]
layer_out = f_out(
return layer_out
# def view_top_losses(self, n=4, preproc=None, val_data=None):
# """
# Views observations with top losses in validation set.
# Musta be overridden by Learner subclasses.
# """
# raise NotImplementedError('view_top_losses must be overriden by GenLearner subclass')
def view_top_losses(self, n=4, preproc=None, val_data=None):
Views observations with top losses in validation set.
Typically over-ridden by Learner subclasses.
n(int or tuple): a range to select in form of int or tuple
e.g., n=8 is treated as n=(0,8)
preproc (Preprocessor): A TextPreprocessor or ImagePreprocessor.
For some data like text data, a preprocessor
is required to undo the pre-processing
to correctly view raw data.
val_data: optional val_data to use instead of self.val_data
list of n tuples where first element is either
filepath or id of validation example and second element
is loss.
val = self._check_val(val_data)
# get top losses and associated data
tups = self.top_losses(n=n, val_data=val, preproc=preproc)
# get multilabel status and class names
classes = preproc.get_classes() if preproc is not None else None
# iterate through losses
for tup in tups:
# get data
idx = tup[0]
loss = tup[1]
truth = tup[2]
pred = tup[3]
"id:%s | loss:%s | true:%s | pred:%s)\n"
% (idx, round(loss, 2), truth, pred)
# ------------------------------------------------------------------------------
# Predictor functions
# ------------------------------------------------------------------------------
def get_predictor(model, preproc, batch_size=U.DEFAULT_BS):
Returns a Predictor instance that can be used to make predictions on
unlabeled examples. Can be saved to disk and reloaded as part of a
larger application.
model (Model): A compiled instance of
preproc(Preprocessor): An instance of TextPreprocessor,ImagePreprocessor,
or NERPreprocessor.
These instances are returned from the data loading
functions in the ktrain vision and text modules:
batch_size(int): batch size to use. default:32
# check arguments
if not isinstance(model, keras.Model):
raise ValueError("model must be of instance keras.Model")
if not isinstance(
raise ValueError("preproc must be instance of ktrain.preprocessor.Preprocessor")
if isinstance(preproc, ImagePreprocessor):
return ImagePredictor(model, preproc, batch_size=batch_size)
elif isinstance(preproc, TextPreprocessor):
# elif type(preproc).__name__ == 'TextPreprocessor':
return TextPredictor(model, preproc, batch_size=batch_size)
elif isinstance(preproc, NERPreprocessor):
return NERPredictor(model, preproc, batch_size=batch_size)
elif isinstance(preproc, NodePreprocessor):
return NodePredictor(model, preproc, batch_size=batch_size)
elif isinstance(preproc, LinkPreprocessor):
return LinkPredictor(model, preproc, batch_size=batch_size)
elif isinstance(preproc, TabularPreprocessor):
return TabularPredictor(model, preproc, batch_size=batch_size)
raise Exception("preproc of type %s not currently supported" % (type(preproc)))
def load_predictor(fpath, batch_size=U.DEFAULT_BS, custom_objects=None):
Loads a previously saved Predictor instance
fpath(str): predictor path name (value supplied to
From v0.16.x, this is always the path to a folder.
Pre-v0.16.x, this is the base name used to save model and .preproc instance.
batch_size(int): batch size to use for predictions. default:32
custom_objects(dict): custom objects required to load model.
This is useful if you compiled the model with a custom loss function, for example.
For models included with ktrain as is, this is populated automatically
and can be disregarded.
# load the preprocessor
preproc = None
preproc_name = os.path.join(fpath, U.PREPROC_NAME)
with open(preproc_name, "rb") as f:
preproc = pickle.load(f)
preproc_name = fpath + ".preproc"
# warnings.warn('could not load .preproc file as %s - attempting to load as %s' % (os.path.join(fpath, U.PREPROC_NAME), preproc_name))
with open(preproc_name, "rb") as f:
preproc = pickle.load(f)
raise Exception(
"Failed to load .preproc file in either the post v0.16.x loction (%s) or pre v0.16.x location (%s)"
% (os.path.join(fpath, U.PREPROC_NAME), fpath + ".preproc")
# load the model
model = _load_model(fpath, preproc=preproc, custom_objects=custom_objects)
# preprocessing functions in ImageDataGenerators are not pickable
# so, we must reconstruct
if hasattr(preproc, "datagen") and hasattr(preproc.datagen, "ktrain_preproc"):
preproc_name = preproc.datagen.ktrain_preproc
if preproc_name == "resnet50":
preproc.datagen.preprocessing_function = (
elif preproc_name == "mobilenet":
preproc.datagen.preprocessing_function = (
elif preproc_name == "mobilenetv3":
preproc.datagen.preprocessing_function = (
elif preproc_name == "inception":
preproc.datagen.preprocessing_function = (
elif preproc_name == "efficientnet":
preproc.datagen.preprocessing_function = (
raise Exception("Uknown preprocessing_function name: %s" % (preproc_name))
# return the appropriate predictor
if not isinstance(model, keras.Model):
raise ValueError("model must be of instance keras.Model")
if not isinstance(
raise ValueError("preproc must be instance of ktrain.preprocessor.Preprocessor")
if isinstance(preproc, ImagePreprocessor):
return ImagePredictor(model, preproc, batch_size=batch_size)
elif isinstance(preproc, TextPreprocessor):
return TextPredictor(model, preproc, batch_size=batch_size)
elif isinstance(preproc, NERPreprocessor):
return NERPredictor(model, preproc, batch_size=batch_size)
elif isinstance(preproc, NodePreprocessor):
return NodePredictor(model, preproc, batch_size=batch_size)
elif isinstance(preproc, LinkPreprocessor):
return LinkPredictor(model, preproc, batch_size=batch_size)
elif isinstance(preproc, TabularPreprocessor):
return TabularPredictor(model, preproc, batch_size=batch_size)
raise Exception("preprocessor not currently supported")
# ----------------------------------------
# Utility Functions
# ----------------------------------------
def release_gpu_memory(device=0):
Relase GPU memory allocated by Tensorflow
from numba import cuda
def _load_model(fpath, preproc=None, train_data=None, custom_objects=None):
if not preproc and not train_data:
raise ValueError("Either preproc or train_data is required.")
if (preproc and isinstance(preproc, TransformersPreprocessor)) or (
train_data and U.is_huggingface(data=train_data)
if preproc:
model = preproc.get_model(fpath=fpath)
# if model_name is local_path, update it to reflect current predictor folder
# in case learner was trained with local path on different machine
# TODO: support this for Windows paths
if preproc.model_name.startswith(os.sep):
preproc.model_name = fpath
model = TransformersPreprocessor.load_model_and_configure_from_data(
fpath, train_data
return model
elif (
and (
isinstance(preproc, BERTPreprocessor)
or type(preproc).__name__ == "BERTPreprocessor"
or train_data
and U.bert_data_tuple(train_data)
# custom BERT model
if isinstance(custom_objects, dict):
custom_objects = keras_bert.get_custom_objects()
elif (
and (
isinstance(preproc, NERPreprocessor)
or type(preproc).__name__ == "NERPreprocessor"
or train_data
and U.is_ner(data=train_data)
from .text.ner.anago.layers import CRF, crf_loss
custom_objects = {"CRF": CRF, "crf_loss": crf_loss}
# save old te_model as backup
if preproc:
old_te_model = preproc.p.te_model
# load TransformerEmbedding model from fpath/hf folder
# if model_name is local_path, update it to reflect current predictor folder, since
# all model/tokenizer/config files should have been saved there by
preproc.p.te_model = (
os.path.join(fpath, "hf") if preproc.p.te_model else preproc.p.te_model
if preproc.p.te_model:
# te_model should point fpath/hf folder
preproc.p.te_model, layers=preproc.p.te_layers
# fall back to old model id or location if error for backwards compatibility
f"could not load TransformerEmbedding model from {preproc.p.te_model} - trying {old_te_model}"
preproc.p.te_model = old_te_model
preproc.p.te_model, layers=preproc.p.te_layers
elif (
and (
isinstance(preproc, NodePreprocessor)
or type(preproc).__name__ == "NodePreprocessor"
or train_data
and U.is_nodeclass(data=train_data)
from stellargraph.layer import MeanAggregator
custom_objects = {"MeanAggregator": MeanAggregator}
elif (
and (
isinstance(preproc, LinkPreprocessor)
or type(preproc).__name__ == "LinkPreprocessor"
or train_data
and U.is_linkpred(data=train_data)
from stellargraph.layer import MeanAggregator
custom_objects = {"MeanAggregator": MeanAggregator}
custom_objects = {} if custom_objects is None else custom_objects
from .lroptimize.optimization import AdamWeightDecay
custom_objects["AdamWeightDecay"] = AdamWeightDecay
model = keras.models.load_model(
os.path.join(fpath, U.MODEL_NAME), custom_objects=custom_objects
# pre-0.16: model fpath was file name of model not folder for non-Transformer models
# warnings.warn('could not load model as %s - attempting to load model as %s' % (os.path.join(fpath, U.MODEL_NAME), fpath))
model = keras.models.load_model(fpath, custom_objects=custom_objects)
# for bilstm models without CRF layer on TF2 where CRF is not supported
model = keras.models.load_model(
fpath, custom_objects={"AdamWeightDecay": AdamWeightDecay}
except Exception as e:
"Call to keras.models.load_model failed. Try manually invoking this function to investigate error and report issue if necessary."
raise Exception("Error detected: %s" % (e))
# see issue
if hasattr(model, "_make_predict_function"):
return model
def get_predictor(model, preproc, batch_size=32)
Returns a Predictor instance that can be used to make predictions on unlabeled examples. Can be saved to disk and reloaded as part of a larger application. Args model (Model): A compiled instance of preproc(Preprocessor): An instance of TextPreprocessor,ImagePreprocessor, or NERPreprocessor. These instances are returned from the data loading functions in the ktrain vision and text modules: ktrain.text.texts_from_folder ktrain.text.texts_from_csv ktrain.text.ner.entities_from_csv batch_size(int): batch size to use. default:32
Expand source code
def get_predictor(model, preproc, batch_size=U.DEFAULT_BS): """ ``` Returns a Predictor instance that can be used to make predictions on unlabeled examples. Can be saved to disk and reloaded as part of a larger application. Args model (Model): A compiled instance of preproc(Preprocessor): An instance of TextPreprocessor,ImagePreprocessor, or NERPreprocessor. These instances are returned from the data loading functions in the ktrain vision and text modules: ktrain.text.texts_from_folder ktrain.text.texts_from_csv ktrain.text.ner.entities_from_csv batch_size(int): batch size to use. default:32 ``` """ # check arguments if not isinstance(model, keras.Model): raise ValueError("model must be of instance keras.Model") if not isinstance( preproc, ( ImagePreprocessor, TextPreprocessor, NERPreprocessor, NodePreprocessor, LinkPreprocessor, TabularPreprocessor, ), ): raise ValueError("preproc must be instance of ktrain.preprocessor.Preprocessor") if isinstance(preproc, ImagePreprocessor): return ImagePredictor(model, preproc, batch_size=batch_size) elif isinstance(preproc, TextPreprocessor): # elif type(preproc).__name__ == 'TextPreprocessor': return TextPredictor(model, preproc, batch_size=batch_size) elif isinstance(preproc, NERPreprocessor): return NERPredictor(model, preproc, batch_size=batch_size) elif isinstance(preproc, NodePreprocessor): return NodePredictor(model, preproc, batch_size=batch_size) elif isinstance(preproc, LinkPreprocessor): return LinkPredictor(model, preproc, batch_size=batch_size) elif isinstance(preproc, TabularPreprocessor): return TabularPredictor(model, preproc, batch_size=batch_size) else: raise Exception("preproc of type %s not currently supported" % (type(preproc)))
def load_predictor(fpath, batch_size=32, custom_objects=None)
Loads a previously saved Predictor instance Args fpath(str): predictor path name (value supplied to From v0.16.x, this is always the path to a folder. Pre-v0.16.x, this is the base name used to save model and .preproc instance. batch_size(int): batch size to use for predictions. default:32 custom_objects(dict): custom objects required to load model. This is useful if you compiled the model with a custom loss function, for example. For models included with ktrain as is, this is populated automatically and can be disregarded.
Expand source code
def load_predictor(fpath, batch_size=U.DEFAULT_BS, custom_objects=None): """ ``` Loads a previously saved Predictor instance Args fpath(str): predictor path name (value supplied to From v0.16.x, this is always the path to a folder. Pre-v0.16.x, this is the base name used to save model and .preproc instance. batch_size(int): batch size to use for predictions. default:32 custom_objects(dict): custom objects required to load model. This is useful if you compiled the model with a custom loss function, for example. For models included with ktrain as is, this is populated automatically and can be disregarded. ``` """ # load the preprocessor preproc = None try: preproc_name = os.path.join(fpath, U.PREPROC_NAME) with open(preproc_name, "rb") as f: preproc = pickle.load(f) except: try: preproc_name = fpath + ".preproc" # warnings.warn('could not load .preproc file as %s - attempting to load as %s' % (os.path.join(fpath, U.PREPROC_NAME), preproc_name)) with open(preproc_name, "rb") as f: preproc = pickle.load(f) except: raise Exception( "Failed to load .preproc file in either the post v0.16.x loction (%s) or pre v0.16.x location (%s)" % (os.path.join(fpath, U.PREPROC_NAME), fpath + ".preproc") ) # load the model model = _load_model(fpath, preproc=preproc, custom_objects=custom_objects) # preprocessing functions in ImageDataGenerators are not pickable # so, we must reconstruct if hasattr(preproc, "datagen") and hasattr(preproc.datagen, "ktrain_preproc"): preproc_name = preproc.datagen.ktrain_preproc if preproc_name == "resnet50": preproc.datagen.preprocessing_function = ( keras.applications.resnet50.preprocess_input ) elif preproc_name == "mobilenet": preproc.datagen.preprocessing_function = ( keras.applications.mobilenet.preprocess_input ) elif preproc_name == "mobilenetv3": preproc.datagen.preprocessing_function = ( keras.applications.mobilenet_v3.preprocess_input ) elif preproc_name == "inception": preproc.datagen.preprocessing_function = ( keras.applications.inception_v3.preprocess_input ) elif preproc_name == "efficientnet": preproc.datagen.preprocessing_function = ( keras.applications.efficientnet.preprocess_input ) else: raise Exception("Uknown preprocessing_function name: %s" % (preproc_name)) # return the appropriate predictor if not isinstance(model, keras.Model): raise ValueError("model must be of instance keras.Model") if not isinstance( preproc, ( ImagePreprocessor, TextPreprocessor, NERPreprocessor, NodePreprocessor, LinkPreprocessor, TabularPreprocessor, ), ): raise ValueError("preproc must be instance of ktrain.preprocessor.Preprocessor") if isinstance(preproc, ImagePreprocessor): return ImagePredictor(model, preproc, batch_size=batch_size) elif isinstance(preproc, TextPreprocessor): return TextPredictor(model, preproc, batch_size=batch_size) elif isinstance(preproc, NERPreprocessor): return NERPredictor(model, preproc, batch_size=batch_size) elif isinstance(preproc, NodePreprocessor): return NodePredictor(model, preproc, batch_size=batch_size) elif isinstance(preproc, LinkPreprocessor): return LinkPredictor(model, preproc, batch_size=batch_size) elif isinstance(preproc, TabularPreprocessor): return TabularPredictor(model, preproc, batch_size=batch_size) else: raise Exception("preprocessor not currently supported")
def release_gpu_memory(device=0)
Relase GPU memory allocated by Tensorflow Source:
Expand source code
def release_gpu_memory(device=0): """ ``` Relase GPU memory allocated by Tensorflow Source: ``` """ from numba import cuda K.clear_session() cuda.select_device(device) cuda.close() return
class ArrayLearner (model, train_data=None, val_data=None, batch_size=32, eval_batch_size=32, workers=1, use_multiprocessing=False)
Main class used to tune and train Keras models using Array data. An objects of this class should be instantiated via the ktrain.get_learner method instead of directly. Main parameters are: model (Model): A compiled instance of train_data (ndarray): A tuple of (x_train, y_train), where x_train and y_train are numpy.ndarrays. val_data (ndarray): A tuple of (x_test, y_test), where x_test and y_test are numpy.ndarrays.
Expand source code
class ArrayLearner(Learner): """ ``` Main class used to tune and train Keras models using Array data. An objects of this class should be instantiated via the ktrain.get_learner method instead of directly. Main parameters are: model (Model): A compiled instance of train_data (ndarray): A tuple of (x_train, y_train), where x_train and y_train are numpy.ndarrays. val_data (ndarray): A tuple of (x_test, y_test), where x_test and y_test are numpy.ndarrays. ``` """ def __init__( self, model, train_data=None, val_data=None, batch_size=U.DEFAULT_BS, eval_batch_size=U.DEFAULT_BS, workers=1, use_multiprocessing=False, ): super().__init__( model, workers=workers, use_multiprocessing=use_multiprocessing ) self.train_data = train_data self.val_data = val_data self.batch_size = batch_size self.eval_batch_size = eval_batch_size return def fit( self, lr, n_cycles, cycle_len=None, cycle_mult=1, lr_decay=1, checkpoint_folder=None, early_stopping=None, verbose=1, class_weight=None, callbacks=[], steps_per_epoch=None, ): """ ``` Trains the model. By default, fit is simply a wrapper for When cycle_len parameter is supplied, an SGDR learning rate schedule is used. Trains the model. lr (float): learning rate n_cycles (int): n_cycles cycle_len (int): If not None, decay learning rate over <cycle_len> epochs until restarting/resetting learning rate to <lr>. If None, lr remains constant cycle_mult (int): Increase cycle_len by factor of cycle_mult. This will gradually elongate the cycle. Has no effect if cycle_len is None. lr_decay(float): rate of decay of learning rate each cycle checkpoint_folder (string): Folder path in which to save the model weights for each epoch. File name will be of the form: weights-{epoch:02d}-{val_loss:.2f}.hdf5 early_stopping (int): If not None, training will automatically stop after this many epochs of no improvement in validation loss. Upon completion, model will be loaded with weights from epoch with lowest validation loss. callbacks (list): list of Callback instances to employ during training class_weight (dict): Optional dictionary mapping class indices (integers) to a weight (float) steps_per_epoch(int): Steps per epoch. If None, then, math.ceil(num_samples/batch_size) is used. Ignored unless training dataset is generator (and in ArrayLearner instances). verbose (bool): whether or not to show progress bar ``` """ # check early_stopping if self.val_data is None and early_stopping is not None: raise ValueError( "early_stopping monitors val_loss but validation data not set" ) # setup data x_train = self.train_data[0] y_train = self.train_data[1] validation = None if self.val_data: validation = (self.val_data[0], self.val_data[1]) # setup learning rate schedule epochs = self._check_cycles(n_cycles, cycle_len, cycle_mult) self.set_lr(lr) # set call backs kcallbacks = callbacks if callbacks else None kcallbacks = self._cb_sgdr( lr, np.ceil(len(x_train) / self.batch_size), cycle_len, cycle_mult, lr_decay, callbacks=kcallbacks, ) kcallbacks = self._cb_checkpoint(checkpoint_folder, callbacks=kcallbacks) kcallbacks = self._cb_earlystopping(early_stopping, callbacks=kcallbacks) sgdr = ( [cb for cb in kcallbacks if type(cb).__name__ == "SGDRScheduler"] if kcallbacks else None ) sgdr = sgdr[0] if sgdr else None # train model with warnings.catch_warnings(): warnings.filterwarnings("ignore", message=".*Check your callbacks.*") hist = self._prepare(x_train), self._prepare(y_train, train=False), batch_size=self.batch_size, epochs=epochs, validation_data=validation, verbose=verbose, shuffle=True, class_weight=class_weight, callbacks=kcallbacks, ) if sgdr is not None: hist.history["lr"] = sgdr.history["lr"] self.history = hist if early_stopping: U.vprint( "Weights from best epoch have been loaded into model.", verbose=verbose ) # loss, acc = self.model.evaluate(self.val_data[0], self.val_data[1]) # U.vprint('\n', verbose=verbose) # U.vprint('Early stopping due to no further improvement.', verbose=verbose) # U.vprint('final loss:%s, final score:%s' % (loss, acc), verbose=verbose) return hist def layer_output(self, layer_id, example_id=0, use_val=False): """ ``` Prints output of layer with index <layer_id> to help debug models. Uses first example (example_id=0) from training set, by default. ``` """ inp = self.model.layers[0].input outp = self.model.layers[layer_id].output f_out = K.function([inp], [outp]) if not use_val: example = self.train_data[0][example_id] else: example = self.val_data[0][example_id] layer_out = f_out( [ np.array( [ example, ] ) ] )[0] return layer_out def view_top_losses(self, n=4, preproc=None, val_data=None): """ ``` Views observations with top losses in validation set. Typically over-ridden by Learner subclasses. Args: n(int or tuple): a range to select in form of int or tuple e.g., n=8 is treated as n=(0,8) preproc (Preprocessor): A TextPreprocessor or ImagePreprocessor. For some data like text data, a preprocessor is required to undo the pre-processing to correctly view raw data. val_data: optional val_data to use instead of self.val_data Returns: list of n tuples where first element is either filepath or id of validation example and second element is loss. ``` """ val = self._check_val(val_data) # get top losses and associated data tups = self.top_losses(n=n, val_data=val, preproc=preproc) # get multilabel status and class names classes = preproc.get_classes() if preproc is not None else None # iterate through losses for tup in tups: # get data idx = tup[0] loss = tup[1] truth = tup[2] pred = tup[3] obs = val[0][idx] join_char = " " if preproc is not None: obs = preproc.undo(obs) if preproc is not None and isinstance(preproc, TextPreprocessor): if preproc.is_nospace_lang(): join_char = "" if type(obs) == str: obs = join_char.join(obs.split()[:512]) print("----------") print( "id:%s | loss:%s | true:%s | pred:%s)\n" % (idx, round(loss, 2), truth, pred) ) print(obs) return
- Learner
- abc.ABC
def fit(self, lr, n_cycles, cycle_len=None, cycle_mult=1, lr_decay=1, checkpoint_folder=None, early_stopping=None, verbose=1, class_weight=None, callbacks=[], steps_per_epoch=None)
Trains the model. By default, fit is simply a wrapper for When cycle_len parameter is supplied, an SGDR learning rate schedule is used. Trains the model. lr (float): learning rate n_cycles (int): n_cycles cycle_len (int): If not None, decay learning rate over <cycle_len> epochs until restarting/resetting learning rate to <lr>. If None, lr remains constant cycle_mult (int): Increase cycle_len by factor of cycle_mult. This will gradually elongate the cycle. Has no effect if cycle_len is None. lr_decay(float): rate of decay of learning rate each cycle checkpoint_folder (string): Folder path in which to save the model weights for each epoch. File name will be of the form: weights-{epoch:02d}-{val_loss:.2f}.hdf5 early_stopping (int): If not None, training will automatically stop after this many epochs of no improvement in validation loss. Upon completion, model will be loaded with weights from epoch with lowest validation loss. callbacks (list): list of Callback instances to employ during training class_weight (dict): Optional dictionary mapping class indices (integers) to a weight (float) steps_per_epoch(int): Steps per epoch. If None, then, math.ceil(num_samples/batch_size) is used. Ignored unless training dataset is generator (and in ArrayLearner instances). verbose (bool): whether or not to show progress bar
Expand source code
def fit( self, lr, n_cycles, cycle_len=None, cycle_mult=1, lr_decay=1, checkpoint_folder=None, early_stopping=None, verbose=1, class_weight=None, callbacks=[], steps_per_epoch=None, ): """ ``` Trains the model. By default, fit is simply a wrapper for When cycle_len parameter is supplied, an SGDR learning rate schedule is used. Trains the model. lr (float): learning rate n_cycles (int): n_cycles cycle_len (int): If not None, decay learning rate over <cycle_len> epochs until restarting/resetting learning rate to <lr>. If None, lr remains constant cycle_mult (int): Increase cycle_len by factor of cycle_mult. This will gradually elongate the cycle. Has no effect if cycle_len is None. lr_decay(float): rate of decay of learning rate each cycle checkpoint_folder (string): Folder path in which to save the model weights for each epoch. File name will be of the form: weights-{epoch:02d}-{val_loss:.2f}.hdf5 early_stopping (int): If not None, training will automatically stop after this many epochs of no improvement in validation loss. Upon completion, model will be loaded with weights from epoch with lowest validation loss. callbacks (list): list of Callback instances to employ during training class_weight (dict): Optional dictionary mapping class indices (integers) to a weight (float) steps_per_epoch(int): Steps per epoch. If None, then, math.ceil(num_samples/batch_size) is used. Ignored unless training dataset is generator (and in ArrayLearner instances). verbose (bool): whether or not to show progress bar ``` """ # check early_stopping if self.val_data is None and early_stopping is not None: raise ValueError( "early_stopping monitors val_loss but validation data not set" ) # setup data x_train = self.train_data[0] y_train = self.train_data[1] validation = None if self.val_data: validation = (self.val_data[0], self.val_data[1]) # setup learning rate schedule epochs = self._check_cycles(n_cycles, cycle_len, cycle_mult) self.set_lr(lr) # set call backs kcallbacks = callbacks if callbacks else None kcallbacks = self._cb_sgdr( lr, np.ceil(len(x_train) / self.batch_size), cycle_len, cycle_mult, lr_decay, callbacks=kcallbacks, ) kcallbacks = self._cb_checkpoint(checkpoint_folder, callbacks=kcallbacks) kcallbacks = self._cb_earlystopping(early_stopping, callbacks=kcallbacks) sgdr = ( [cb for cb in kcallbacks if type(cb).__name__ == "SGDRScheduler"] if kcallbacks else None ) sgdr = sgdr[0] if sgdr else None # train model with warnings.catch_warnings(): warnings.filterwarnings("ignore", message=".*Check your callbacks.*") hist = self._prepare(x_train), self._prepare(y_train, train=False), batch_size=self.batch_size, epochs=epochs, validation_data=validation, verbose=verbose, shuffle=True, class_weight=class_weight, callbacks=kcallbacks, ) if sgdr is not None: hist.history["lr"] = sgdr.history["lr"] self.history = hist if early_stopping: U.vprint( "Weights from best epoch have been loaded into model.", verbose=verbose ) # loss, acc = self.model.evaluate(self.val_data[0], self.val_data[1]) # U.vprint('\n', verbose=verbose) # U.vprint('Early stopping due to no further improvement.', verbose=verbose) # U.vprint('final loss:%s, final score:%s' % (loss, acc), verbose=verbose) return hist
def layer_output(self, layer_id, example_id=0, use_val=False)
Prints output of layer with index <layer_id> to help debug models. Uses first example (example_id=0) from training set, by default.
Expand source code
def layer_output(self, layer_id, example_id=0, use_val=False): """ ``` Prints output of layer with index <layer_id> to help debug models. Uses first example (example_id=0) from training set, by default. ``` """ inp = self.model.layers[0].input outp = self.model.layers[layer_id].output f_out = K.function([inp], [outp]) if not use_val: example = self.train_data[0][example_id] else: example = self.val_data[0][example_id] layer_out = f_out( [ np.array( [ example, ] ) ] )[0] return layer_out
def view_top_losses(self, n=4, preproc=None, val_data=None)
Views observations with top losses in validation set. Typically over-ridden by Learner subclasses. Args: n(int or tuple): a range to select in form of int or tuple e.g., n=8 is treated as n=(0,8) preproc (Preprocessor): A TextPreprocessor or ImagePreprocessor. For some data like text data, a preprocessor is required to undo the pre-processing to correctly view raw data. val_data: optional val_data to use instead of self.val_data Returns: list of n tuples where first element is either filepath or id of validation example and second element is loss.
Expand source code
def view_top_losses(self, n=4, preproc=None, val_data=None): """ ``` Views observations with top losses in validation set. Typically over-ridden by Learner subclasses. Args: n(int or tuple): a range to select in form of int or tuple e.g., n=8 is treated as n=(0,8) preproc (Preprocessor): A TextPreprocessor or ImagePreprocessor. For some data like text data, a preprocessor is required to undo the pre-processing to correctly view raw data. val_data: optional val_data to use instead of self.val_data Returns: list of n tuples where first element is either filepath or id of validation example and second element is loss. ``` """ val = self._check_val(val_data) # get top losses and associated data tups = self.top_losses(n=n, val_data=val, preproc=preproc) # get multilabel status and class names classes = preproc.get_classes() if preproc is not None else None # iterate through losses for tup in tups: # get data idx = tup[0] loss = tup[1] truth = tup[2] pred = tup[3] obs = val[0][idx] join_char = " " if preproc is not None: obs = preproc.undo(obs) if preproc is not None and isinstance(preproc, TextPreprocessor): if preproc.is_nospace_lang(): join_char = "" if type(obs) == str: obs = join_char.join(obs.split()[:512]) print("----------") print( "id:%s | loss:%s | true:%s | pred:%s)\n" % (idx, round(loss, 2), truth, pred) ) print(obs) return
Inherited members
class GenLearner (model, train_data=None, val_data=None, batch_size=32, eval_batch_size=32, workers=1, use_multiprocessing=False)
Main class used to tune and train Keras models using a Keras generator (e.g., DirectoryIterator). Objects of this class should be instantiated using the ktrain.get_learner function, rather than directly. Main parameters are: model (Model): A compiled instance of train_data (Iterator): a Iterator instance for training set val_data (Iterator): A Iterator instance for validation set
Expand source code
class GenLearner(Learner): """ ``` Main class used to tune and train Keras models using a Keras generator (e.g., DirectoryIterator). Objects of this class should be instantiated using the ktrain.get_learner function, rather than directly. Main parameters are: model (Model): A compiled instance of train_data (Iterator): a Iterator instance for training set val_data (Iterator): A Iterator instance for validation set ``` """ def __init__( self, model, train_data=None, val_data=None, batch_size=U.DEFAULT_BS, eval_batch_size=U.DEFAULT_BS, workers=1, use_multiprocessing=False, ): super().__init__( model, workers=workers, use_multiprocessing=use_multiprocessing ) self.train_data = train_data self.val_data = val_data self.batch_size = batch_size self.eval_batch_size = eval_batch_size if self.train_data: self.train_data.batch_size = batch_size if self.val_data: self.val_data.batch_size = eval_batch_size return def fit( self, lr, n_cycles, cycle_len=None, cycle_mult=1, lr_decay=1.0, checkpoint_folder=None, early_stopping=None, class_weight=None, callbacks=[], steps_per_epoch=None, verbose=1, ): """ ``` Trains the model. By default, fit is simply a wrapper for (for generators/sequences). When cycle_len parameter is supplied, an SGDR learning rate schedule is used. lr (float): learning rate n_cycles (int): n_cycles cycle_len (int): If not None, decay learning rate over <cycle_len> epochs until restarting/resetting learning rate to <lr>. If None, lr remains constant cycle_mult (int): Increase cycle_len by factor of cycle_mult. This will gradually elongate the cycle. Has no effect if cycle_len is None. lr_decay (float): rate of decay of learning reach each cycle. Has no effect if cycle_len is None checkpoint_folder (string): Folder path in which to save the model weights for each epoch. File name will be of the form: weights-{epoch:02d}-{val_loss:.2f}.hdf5 early_stopping (int): If not None, training will automatically stop after this many epochs of no improvement in validation loss. Upon completion, model will be loaded with weights from epoch with lowest validation loss. class_weight (dict): Optional dictionary mapping class indices (integers) to a weight (float) callbacks (list): list of Callback instances to employ during training steps_per_epoch(int): Steps per epoch. If None, then, math.ceil(num_samples/batch_size) is used. verbose (boolean): whether or not to print progress bar ``` """ # check early_stopping if self.val_data is None and early_stopping is not None: raise ValueError( "early_stopping monitors val_loss but validation data not set" ) # handle callbacks num_samples = U.nsamples_from_data(self.train_data) train_bs = ( self.train_data.batch_size if hasattr(self.train_data, "batch_size") else self.batch_size ) if steps_per_epoch is None: steps_per_epoch = math.ceil(num_samples / train_bs) validation_steps = None if self.val_data is not None: val_bs = ( self.val_data.batch_size if hasattr(self.val_data, "batch_size") else self.batch_size ) validation_steps = math.ceil(U.nsamples_from_data(self.val_data) / val_bs) epochs = self._check_cycles(n_cycles, cycle_len, cycle_mult) self.set_lr(lr) # set call backs kcallbacks = callbacks if callbacks else None kcallbacks = self._cb_sgdr( lr, steps_per_epoch, cycle_len, cycle_mult, lr_decay, callbacks=kcallbacks ) kcallbacks = self._cb_checkpoint(checkpoint_folder, callbacks=kcallbacks) kcallbacks = self._cb_earlystopping(early_stopping, callbacks=kcallbacks) sgdr = ( [cb for cb in kcallbacks if type(cb).__name__ == "SGDRScheduler"] if kcallbacks else None ) sgdr = sgdr[0] if sgdr else None # if kcallbacks: print([type(cb).__name__ for cb in kcallbacks]) # MNIST times per epoch on Titan V # workers=4, usemp=True 9 sec. # workers=1, usemp=True 12 sec. # workers=1, usemp=False 16 sec. # workers=4, usemp=False 30+ sec. # print(self.workers) # print(self.use_multiprocessing) # train model with warnings.catch_warnings(): warnings.filterwarnings("ignore", message=".*Check your callbacks.*") fit_fn = hist = fit_fn( self._prepare(self.train_data), steps_per_epoch=steps_per_epoch, validation_steps=validation_steps, epochs=epochs, validation_data=self._prepare(self.val_data, train=False), workers=self.workers, use_multiprocessing=self.use_multiprocessing, verbose=verbose, shuffle=True, class_weight=class_weight, callbacks=kcallbacks, ) if sgdr is not None: hist.history["lr"] = sgdr.history["lr"] self.history = hist if early_stopping: U.vprint( "Weights from best epoch have been loaded into model.", verbose=verbose ) # loss, acc = self.model.evaluate_generator(self.val_data) # U.vprint('\n', verbose=verbose) # U.vprint('Early stopping due to no further improvement.', verbose=verbose) # U.vprint('final loss:%s, final score:%s' % (loss, acc), verbose=verbose) return hist def layer_output(self, layer_id, example_id=0, batch_id=0, use_val=False): """ ``` Prints output of layer with index <layer_id> to help debug models. Uses first example (example_id=0) from first batch from training set, by default. ``` """ inp = self.model.layers[0].input outp = self.model.layers[layer_id].output f_out = K.function([inp], [outp]) if not use_val: example = self.train_data[0][batch_id][example_id] else: example = self.val_data[0][batch_id][example_id] layer_out = f_out( [ np.array( [ example, ] ) ] )[0] return layer_out # def view_top_losses(self, n=4, preproc=None, val_data=None): # """ # Views observations with top losses in validation set. # Musta be overridden by Learner subclasses. # """ # raise NotImplementedError('view_top_losses must be overriden by GenLearner subclass') def view_top_losses(self, n=4, preproc=None, val_data=None): """ ``` Views observations with top losses in validation set. Typically over-ridden by Learner subclasses. Args: n(int or tuple): a range to select in form of int or tuple e.g., n=8 is treated as n=(0,8) preproc (Preprocessor): A TextPreprocessor or ImagePreprocessor. For some data like text data, a preprocessor is required to undo the pre-processing to correctly view raw data. val_data: optional val_data to use instead of self.val_data Returns: list of n tuples where first element is either filepath or id of validation example and second element is loss. ``` """ val = self._check_val(val_data) # get top losses and associated data tups = self.top_losses(n=n, val_data=val, preproc=preproc) # get multilabel status and class names classes = preproc.get_classes() if preproc is not None else None # iterate through losses for tup in tups: # get data idx = tup[0] loss = tup[1] truth = tup[2] pred = tup[3] print("----------") print( "id:%s | loss:%s | true:%s | pred:%s)\n" % (idx, round(loss, 2), truth, pred) ) return
- Learner
- abc.ABC
def fit(self, lr, n_cycles, cycle_len=None, cycle_mult=1, lr_decay=1.0, checkpoint_folder=None, early_stopping=None, class_weight=None, callbacks=[], steps_per_epoch=None, verbose=1)
Trains the model. By default, fit is simply a wrapper for (for generators/sequences). When cycle_len parameter is supplied, an SGDR learning rate schedule is used. lr (float): learning rate n_cycles (int): n_cycles cycle_len (int): If not None, decay learning rate over <cycle_len> epochs until restarting/resetting learning rate to <lr>. If None, lr remains constant cycle_mult (int): Increase cycle_len by factor of cycle_mult. This will gradually elongate the cycle. Has no effect if cycle_len is None. lr_decay (float): rate of decay of learning reach each cycle. Has no effect if cycle_len is None checkpoint_folder (string): Folder path in which to save the model weights for each epoch. File name will be of the form: weights-{epoch:02d}-{val_loss:.2f}.hdf5 early_stopping (int): If not None, training will automatically stop after this many epochs of no improvement in validation loss. Upon completion, model will be loaded with weights from epoch with lowest validation loss. class_weight (dict): Optional dictionary mapping class indices (integers) to a weight (float) callbacks (list): list of Callback instances to employ during training steps_per_epoch(int): Steps per epoch. If None, then, math.ceil(num_samples/batch_size) is used. verbose (boolean): whether or not to print progress bar
Expand source code
def fit( self, lr, n_cycles, cycle_len=None, cycle_mult=1, lr_decay=1.0, checkpoint_folder=None, early_stopping=None, class_weight=None, callbacks=[], steps_per_epoch=None, verbose=1, ): """ ``` Trains the model. By default, fit is simply a wrapper for (for generators/sequences). When cycle_len parameter is supplied, an SGDR learning rate schedule is used. lr (float): learning rate n_cycles (int): n_cycles cycle_len (int): If not None, decay learning rate over <cycle_len> epochs until restarting/resetting learning rate to <lr>. If None, lr remains constant cycle_mult (int): Increase cycle_len by factor of cycle_mult. This will gradually elongate the cycle. Has no effect if cycle_len is None. lr_decay (float): rate of decay of learning reach each cycle. Has no effect if cycle_len is None checkpoint_folder (string): Folder path in which to save the model weights for each epoch. File name will be of the form: weights-{epoch:02d}-{val_loss:.2f}.hdf5 early_stopping (int): If not None, training will automatically stop after this many epochs of no improvement in validation loss. Upon completion, model will be loaded with weights from epoch with lowest validation loss. class_weight (dict): Optional dictionary mapping class indices (integers) to a weight (float) callbacks (list): list of Callback instances to employ during training steps_per_epoch(int): Steps per epoch. If None, then, math.ceil(num_samples/batch_size) is used. verbose (boolean): whether or not to print progress bar ``` """ # check early_stopping if self.val_data is None and early_stopping is not None: raise ValueError( "early_stopping monitors val_loss but validation data not set" ) # handle callbacks num_samples = U.nsamples_from_data(self.train_data) train_bs = ( self.train_data.batch_size if hasattr(self.train_data, "batch_size") else self.batch_size ) if steps_per_epoch is None: steps_per_epoch = math.ceil(num_samples / train_bs) validation_steps = None if self.val_data is not None: val_bs = ( self.val_data.batch_size if hasattr(self.val_data, "batch_size") else self.batch_size ) validation_steps = math.ceil(U.nsamples_from_data(self.val_data) / val_bs) epochs = self._check_cycles(n_cycles, cycle_len, cycle_mult) self.set_lr(lr) # set call backs kcallbacks = callbacks if callbacks else None kcallbacks = self._cb_sgdr( lr, steps_per_epoch, cycle_len, cycle_mult, lr_decay, callbacks=kcallbacks ) kcallbacks = self._cb_checkpoint(checkpoint_folder, callbacks=kcallbacks) kcallbacks = self._cb_earlystopping(early_stopping, callbacks=kcallbacks) sgdr = ( [cb for cb in kcallbacks if type(cb).__name__ == "SGDRScheduler"] if kcallbacks else None ) sgdr = sgdr[0] if sgdr else None # if kcallbacks: print([type(cb).__name__ for cb in kcallbacks]) # MNIST times per epoch on Titan V # workers=4, usemp=True 9 sec. # workers=1, usemp=True 12 sec. # workers=1, usemp=False 16 sec. # workers=4, usemp=False 30+ sec. # print(self.workers) # print(self.use_multiprocessing) # train model with warnings.catch_warnings(): warnings.filterwarnings("ignore", message=".*Check your callbacks.*") fit_fn = hist = fit_fn( self._prepare(self.train_data), steps_per_epoch=steps_per_epoch, validation_steps=validation_steps, epochs=epochs, validation_data=self._prepare(self.val_data, train=False), workers=self.workers, use_multiprocessing=self.use_multiprocessing, verbose=verbose, shuffle=True, class_weight=class_weight, callbacks=kcallbacks, ) if sgdr is not None: hist.history["lr"] = sgdr.history["lr"] self.history = hist if early_stopping: U.vprint( "Weights from best epoch have been loaded into model.", verbose=verbose ) # loss, acc = self.model.evaluate_generator(self.val_data) # U.vprint('\n', verbose=verbose) # U.vprint('Early stopping due to no further improvement.', verbose=verbose) # U.vprint('final loss:%s, final score:%s' % (loss, acc), verbose=verbose) return hist
def layer_output(self, layer_id, example_id=0, batch_id=0, use_val=False)
Prints output of layer with index <layer_id> to help debug models. Uses first example (example_id=0) from first batch from training set, by default.
Expand source code
def layer_output(self, layer_id, example_id=0, batch_id=0, use_val=False): """ ``` Prints output of layer with index <layer_id> to help debug models. Uses first example (example_id=0) from first batch from training set, by default. ``` """ inp = self.model.layers[0].input outp = self.model.layers[layer_id].output f_out = K.function([inp], [outp]) if not use_val: example = self.train_data[0][batch_id][example_id] else: example = self.val_data[0][batch_id][example_id] layer_out = f_out( [ np.array( [ example, ] ) ] )[0] return layer_out
def view_top_losses(self, n=4, preproc=None, val_data=None)
Views observations with top losses in validation set. Typically over-ridden by Learner subclasses. Args: n(int or tuple): a range to select in form of int or tuple e.g., n=8 is treated as n=(0,8) preproc (Preprocessor): A TextPreprocessor or ImagePreprocessor. For some data like text data, a preprocessor is required to undo the pre-processing to correctly view raw data. val_data: optional val_data to use instead of self.val_data Returns: list of n tuples where first element is either filepath or id of validation example and second element is loss.
Expand source code
def view_top_losses(self, n=4, preproc=None, val_data=None): """ ``` Views observations with top losses in validation set. Typically over-ridden by Learner subclasses. Args: n(int or tuple): a range to select in form of int or tuple e.g., n=8 is treated as n=(0,8) preproc (Preprocessor): A TextPreprocessor or ImagePreprocessor. For some data like text data, a preprocessor is required to undo the pre-processing to correctly view raw data. val_data: optional val_data to use instead of self.val_data Returns: list of n tuples where first element is either filepath or id of validation example and second element is loss. ``` """ val = self._check_val(val_data) # get top losses and associated data tups = self.top_losses(n=n, val_data=val, preproc=preproc) # get multilabel status and class names classes = preproc.get_classes() if preproc is not None else None # iterate through losses for tup in tups: # get data idx = tup[0] loss = tup[1] truth = tup[2] pred = tup[3] print("----------") print( "id:%s | loss:%s | true:%s | pred:%s)\n" % (idx, round(loss, 2), truth, pred) ) return
Inherited members
class Learner (model, workers=1, use_multiprocessing=False)
Abstract class used to tune and train Keras models. The fit method is an abstract method and must be implemented by subclasses.
Expand source code
class Learner(ABC): """ ``` Abstract class used to tune and train Keras models. The fit method is an abstract method and must be implemented by subclasses. ``` """ def __init__(self, model, workers=1, use_multiprocessing=False): if not isinstance(model, keras.Model): raise ValueError("model must be of instance keras.Model") self.model = model self.lr_finder = LRFinder(self.model) self.workers = workers self.use_multiprocessing = use_multiprocessing self.history = None # save original weights of model try: new_file, weightfile = tempfile.mkstemp() self.model.save_weights(weightfile) self._original_weights = weightfile except Exception as e: warnings.warn("Could not save original model weights: %s" % (e)) self._original_weights = None @property def _monitor_metrics(self): """ ``` monitor metrics ``` """ metrics = ["loss"] try: m = U.metrics_from_model(self.model) if isinstance(m, list): metrics.extend(m) except: pass if self.val_data is not None: for m in metrics[:]: metrics.append("val_%s" % (m)) return metrics def get_weight_decay(self): """ ``` Get current weight decay rate ``` """ if type(self.model.optimizer).__name__ == "AdamWeightDecay": return self.model.optimizer.weight_decay_rate else: return None def set_weight_decay(self, wd=U.DEFAULT_WD): """ ``` Sets global weight decay via AdamWeightDecay optimizer Args: wd(float): weight decay Returns: None ``` """ self._recompile(wd=wd) return def evaluate( self, test_data=None, print_report=True, save_path="ktrain_classification_report.csv", class_names=[], ): """ ``` alias for self.validate(). Returns confusion matrix and optionally prints a classification report. This is currently only supported for binary and multiclass classification, not multilabel classification. By default, this uses val_data, as supplied to ktrain.get_learner(). Other validation or test data can be optionally be supplied as argument via <test_data> argument. Supply class_names to include labels instead of intenger class integer values in classification report. Args: test_data(Dataset|np.ndarray): test or validation data. If None, self.val_data is used. print_report(bool): If True, classification report will be printed. If False, report will be saved to CSV at save_path. Not applicable to regression models. Not applicable to regression models. save_path(str): Classification report will be saved to this file path/name if print_report=False Not applicable to regression models. class_names(list): list of class names to be used in classification report instead of class integer IDs. ``` """ return self.validate( val_data=test_data, print_report=print_report, save_path=save_path, class_names=class_names, ) def validate( self, val_data=None, print_report=True, save_path="ktrain_classification_report.csv", class_names=[], ): """ ``` Returns confusion matrix and optionally prints a classification report. For multilabel classification problems,confusion matrices are not supported, but classification reports are. By default, this uses val_data, as supplied to ktrain.get_learner(). Other validation or test data can be optionally be supplied as argument. Supply class_names to include labels instead of intenger class integer values in classification report. Args: val_data(Dataset|np.ndarray): validation data. If None, self.val_data is used. print_report(bool): If True, classification report will be printed. If False, report will be saved to CSV at save path. Not applicable to regression models. save_path(str): Classification report will be saved to this file path/name if print_report=False class_names(list): list of class names to be used in classification report instead of class integer IDs. ``` """ if val_data is not None: val = val_data else: val = self.val_data classification, multilabel = U.is_classifier(self.model) if not classification: # warnings.warn('learner.validate is only for classification problems. ' #'For regression, etc., use learner.predict and learner.ground_truth ' #'to manually validate.') # return pass is_multilabel = U.is_multilabel(val) or multilabel y_pred = self.predict(val_data=val) y_true = self.ground_truth(val_data=val) y_pred = np.squeeze(y_pred) y_true = np.squeeze(y_true) # regression evaluation if not classification: from sklearn.metrics import mean_absolute_error, mean_squared_error regout = [] metrics = U.metrics_from_model(self.model) for m in metrics: if m in ["mae", "mean_absolute_error"]: regout.append((m, mean_absolute_error(y_true, y_pred))) elif m in ["mse", "mean_squared_error"]: regout.append((m, mean_squared_error(y_true, y_pred))) if not regout: warnings.warn( "%s is not supported by validate/evaluate - falling back to MAE" ) regout.append(("mae", mean_absolute_error(y_true, y_pred))) return regout if len(y_pred.shape) == 1: y_pred = np.where(y_pred > 0.5, 1, 0) y_true = np.where(y_true > 0.5, 1, 0) elif is_multilabel: from sklearn.preprocessing import binarize y_pred = binarize(y_pred, threshold=0.5) else: y_pred = np.argmax(y_pred, axis=1) y_true = np.argmax(y_true, axis=1) if print_report or save_path is not None: if class_names: try: class_names = [str(s) for s in class_names] except: pass report = classification_report( y_true, y_pred, target_names=class_names, output_dict=not print_report, ) else: report = classification_report( y_true, y_pred, output_dict=not print_report, zero_division=0, ) if print_report: print(report) else: df = pd.DataFrame(report).transpose() df.to_csv(save_path) print("classification report saved to: %s" % (save_path)) cm_func = confusion_matrix if is_multilabel: warnings.warn( "Confusion matrices do not currently support multilabel classification, so returning None" ) return cm = confusion_matrix(y_true, y_pred) return cm def _check_val(self, val_data): if val_data is not None: val = val_data else: val = self.val_data if val is None: raise Exception( "val_data must be supplied to get_learner or view_top_losses" ) return val def top_losses(self, n=4, val_data=None, preproc=None): """ ``` Computes losses on validation set sorted by examples with top losses Args: n(int or tuple): a range to select in form of int or tuple e.g., n=8 is treated as n=(0,8) val_data: optional val_data to use instead of self.val_data preproc (Preprocessor): A TextPreprocessor or ImagePreprocessor. For some data like text data, a preprocessor is required to undo the pre-processing to correctly view raw data. Returns: list of n tuples where first element is either filepath or id of validation example and second element is loss. ``` """ # check validation data and arguments if val_data is not None: val = val_data else: val = self.val_data if val is None: raise Exception("val_data must be supplied to get_learner or top_losses") if type(n) == type(42): n = (0, n) # multilabel = True if U.is_multilabel(val) else False classification, multilabel = U.is_classifier(self.model) # get predicictions and ground truth y_pred = self.predict(val_data=val) y_true = self.ground_truth(val_data=val) y_true = y_true.astype("float32") # adjust y_true for regression problems if ( not classification and len(y_true.shape) == 1 and (len(y_pred.shape) == 2 and y_pred.shape[1] == 1) ): y_true = np.expand_dims(y_true, -1) # compute loss # this doesn't work in tf.keras 1.14 # losses = self.model.loss_functions[0](tf.convert_to_tensor(y_true), tf.convert_to_tensor(y_pred)) # if U.is_tf_keras(): # L = self.model.loss_functions[0].fn # else: # L = self.model.loss_functions[0] L = U.loss_fn_from_model(self.model) losses = L(tf.convert_to_tensor(y_true), tf.convert_to_tensor(y_pred)) if DISABLE_V2_BEHAVIOR: losses = tf.Session().run(losses) else: losses = losses.numpy() class_names = [] if preproc is None else preproc.get_classes() if preproc is None: class_fcn = lambda x: "%s" % (x) else: class_fcn = lambda x: class_names[x] # regression output modifications if not classification: if len(y_pred.shape) == 2 and y_pred.shape[1] == 1: y_pred = np.squeeze(y_pred) y_pred = np.around(y_pred, 2) if len(y_true.shape) == 2 and y_true.shape[1] == 1: y_true = np.squeeze(y_true) y_true = np.around(y_true, 2) # sort by loss and prune correct classifications, if necessary if classification and not multilabel: y_pred = np.squeeze(y_pred) y_true = np.squeeze(y_true) if len(y_pred.shape) == 1: y_p = np.where(y_pred > 0.5, 1, 0) y_t = np.where(y_true > 0.5, 1, 0) else: y_p = np.argmax(y_pred, axis=1) y_t = np.argmax(y_true, axis=1) tups = [ (i, x, class_fcn(y_t[i]), class_fcn(y_p[i])) for i, x in enumerate(losses) if y_p[i] != y_t[i] ] else: tups = [ (i, x, y_true[i], np.around(y_pred[i], 2)) for i, x in enumerate(losses) ] tups.sort(key=operator.itemgetter(1), reverse=True) # prune by given range tups = tups[n[0] : n[1]] if n is not None else tups return tups def view_top_losses(self, n=4, preproc=None, val_data=None): """ ``` View observations with top losses in validation set. Musta be overridden by Learner subclasses. ``` """ raise NotImplementedError( "view_top_losses must be overriden by Learner subclass" ) def _make_model_folder(self, fpath): if os.path.isfile(fpath): raise ValueError( f"There is an existing file named {fpath}. " + "Please use dfferent value for fpath." ) elif os.path.exists(fpath): # warnings.warn('model is being saved to folder that already exists: %s' % (fpath)) pass elif not os.path.exists(fpath): os.makedirs(fpath) def save_model(self, fpath): """ ``` a wrapper to Args: fpath(str): path to folder in which to save model Returns: None ``` """ self._make_model_folder(fpath), U.MODEL_NAME), save_format="h5") return def load_model(self, fpath, custom_objects=None, **kwargs): """ ``` loads model from folder. Note: **kwargs included for backwards compatibility only, as TransformerTextClassLearner.load_model was removed in v0.18.0. Args: fpath(str): path to folder containing model custom_objects(dict): custom objects required to load model. For models included with ktrain, this is populated automatically and can be disregarded. ``` """ self.model = _load_model( fpath, train_data=self.train_data, custom_objects=custom_objects ) return def _is_adamlike(self): """ ``` checks whether optimizer attached to model is an "Adam-like" optimizer with beta_1 parameter. ``` """ return self.model is not None and hasattr(self.model.optimizer, "beta_1") def _recompile(self, wd=None): metrics = U.metrics_from_model(self.model) if ( wd is not None and wd > 0 and type(self.model.optimizer).__name__ != "AdamWeightDecay" ): warnings.warn( "recompiling model to use AdamWeightDecay as opimizer with weight decay of %s" % (wd) ) optimizer = U.get_default_optimizer(wd=wd) elif wd is not None and wd > 0: optimizer = U.get_default_optimizer(wd=wd) elif wd is not None and wd == 0: optimizer = U.DEFAULT_OPT else: # wd is None -> don't modify optimizer optimizer = self.model.optimizer self.model.compile(optimizer=optimizer, loss=self.model.loss, metrics=metrics) return def set_model(self, model): """ ``` replace model in this Learner instance ``` """ if not isinstance(model, keras.Model): raise ValueError("model must be of instance keras.Model") self.model = model self.history = None return def freeze(self, freeze_range=None): """ ``` If freeze_range is None, makes all layers trainable=False except last Dense layer. If freeze_range is given, freezes the first <freeze_range> layers and unfrezes all remaining layers. NOTE: Freeze method does not currently work with multi-GPU models. If you are using the load_imagemodel method, please use the freeze_layers argument of load_imagemodel to freeze layers. Args: freeze_range(int): number of layers to freeze Returns: None ``` """ if freeze_range is None: # freeze everything except last Dense layer # first find last dense layer dense_id = None for i, layer in reversed(list(enumerate(self.model.layers))): if isinstance(layer, keras.layers.Dense): dense_id = i break if dense_id is None: raise Exception("cannot find Dense layer in this model") for i, layer in enumerate(self.model.layers): if i < dense_id: layer.trainable = False else: layer.trainable = True else: # freeze all layers up to and including layer_id if type(freeze_range) != type(1) or freeze_range < 1: raise ValueError("freeze_range must be integer > 0") for i, layer in enumerate(self.model.layers): if i < freeze_range: layer.trainable = False else: layer.trainable = True self._recompile() return def unfreeze(self, exclude_range=None): """ ``` Make every layer trainable except those in exclude_range. unfreeze is simply a proxy method to freeze. NOTE: Unfreeze method does not currently work with multi-GPU models. If you are using the load_imagemodel method, please use the freeze_layers argument of load_imagemodel to freeze layers. ``` """ # make all layers trainable for i, layer in enumerate(self.model.layers): layer.trainable = True if exclude_range: for i, layer in enumerate(self.model.layers[:exclude_range]): layer.trainable = False self._recompile() return def reset_weights(self, verbose=1): """ ``` Re-initializes network with original weights ``` """ if os.path.isfile(self._original_weights): self.model.load_weights(self._original_weights) self.history = None U.vprint("Model weights have been reset.", verbose=verbose) else: warnings.warn( "Weights have not been reset because the original weights file " + "(%s) no longer exists." % (self._original_weights) ) return def lr_find( self, start_lr=1e-7, lr_mult=1.01, max_epochs=None, class_weight=None, stop_factor=4, show_plot=False, suggest=False, restore_weights_only=False, verbose=1, ): """ ``` Plots loss as learning rate is increased. Highest learning rate corresponding to a still falling loss should be chosen. If you find the LR finder is running for more epochs than you'd prefer, you can set max_epochs (e.g., max_epochs=5) to estimate LR with a smaller sample size. If lr_mult is supplied and max_epochs is None, LR will increase until loss diverges. Reasonable values of lr_mult are between 1.01 and 1.05. If max_epochs is supplied, lr_mult argument is ignored and computed automatically. Reference: Args: start_lr (float): smallest lr to start simulation lr_mult (float): multiplication factor to increase LR. Ignored if max_epochs is supplied. max_epochs (int): maximum number of epochs to simulate. lr_mult is ignored if max_epoch is supplied. Default is None. Set max_epochs to an integer (e.g., 5) if lr_find is taking too long and running for more epochs than desired. class_weight(dict): class_weight parameter passed to for imbalanced datasets. stop_factor(int): factor used to determine threhsold that loss must exceed to stop training simulation. Increase this if loss is erratic and lr_find exits too early. show_plot (bool): If True, automatically invoke lr_plot restore_weights_only(bool): If True, when training simulation is complete, the model weights only are restored, but not the original optimizer weights. In at least a few cases, this seems to improve performance when actual training begins. Further investigation is needed, so it is False by default. verbose (bool): specifies how much output to print Returns: None ``` """ # dep_fix: bug in TF 2.2 and 2.3 if version.parse(tf.__version__) > version.parse("2.1") and version.parse( tf.__version__ ) < version.parse("2.4"): if max_epochs is None: raise ValueError( "Due to a bug in TensorFlow 2.2 and 2.3, the max_epochs argument is temporarily required. " + "Please re-run with max_epochs (e.g., max_epochs=5). \n" + "More info:" ) U.vprint( "simulating training for different learning rates... this may take a few moments...", verbose=verbose, ) # save current weights and temporarily restore original weights # dep_fix: temporarily use save_model instead of save_weights as default due to _weights_only = True if restore_weights_only: new_file, weightfile = tempfile.mkstemp() self.model.save_weights(weightfile) else: temp_folder = tempfile.mkdtemp() self.save_model(temp_folder) # compute steps_per_epoch num_samples = U.nsamples_from_data(self.train_data) bs = ( self.train_data.batch_size if hasattr(self.train_data, "batch_size") else self.batch_size ) if U.is_iter(self.train_data): use_gen = True steps_per_epoch = num_samples // bs else: use_gen = False steps_per_epoch = np.ceil(num_samples / bs) # check steps_per_epoch if steps_per_epoch <= 64 and max_epochs is None: warnings.warn( "max_epochs is being set to 5 since steps per epoch is small. " + "If you wish to estimate LR using more epochs, set max_epochs manually." ) max_epochs = 5 try: # track and plot learning rates self.lr_finder = LRFinder(self.model, stop_factor=stop_factor) self.lr_finder.find( self._prepare(self.train_data), steps_per_epoch, use_gen=use_gen, start_lr=start_lr, lr_mult=lr_mult, max_epochs=max_epochs, class_weight=class_weight, workers=self.workers, use_multiprocessing=self.use_multiprocessing, batch_size=self.batch_size, verbose=verbose, ) except KeyboardInterrupt: # re-load current weights # self.model.load_weights(weightfile) self.load_model(temp_folder) return # re-load current weights # dep_fix: temporarily use load_model instead of load_weights as default due to if restore_weights_only: self.model.load_weights(weightfile) else: self.load_model(temp_folder) # instructions to invoker U.vprint("\n", verbose=verbose) U.vprint("done.", verbose=verbose) if show_plot: U.vprint( "Visually inspect loss plot and select learning rate associated with falling loss", verbose=verbose, ) self.lr_plot(suggest=suggest) else: U.vprint( "Please invoke the Learner.lr_plot() method to visually inspect " "the loss plot to help identify the maximal learning rate " "associated with falling loss.", verbose=verbose, ) return def lr_estimate(self): """ ``` Return numerical estimates of lr using two different methods: 1. lr associated with minum numerical gradient (None if gradient computation fails) 2. lr associated with minimum loss divided by 10 3. lr associated with longest valley Since none of these methods are fool-proof and can potentially return bad estimates, it is recommended that you examine the plot generated by lr_plot to estimate the learning rate. Returns: tuple: tuple of the form (float, float) ``` """ if self.lr_finder is None or not self.lr_finder.find_called(): raise ValueError("Please call lr_find first.") return self.lr_finder.estimate_lr() def lr_plot( self, n_skip_beginning=10, n_skip_end=5, suggest=False, return_fig=False ): """ ``` Plots the loss vs. learning rate to help identify The maximal learning rate associated with a falling loss. The nskip_beginning and n_skip_end arguments can be used to "zoom in" on the plot. Args: n_skip_beginning(int): number of batches to skip on the left. n_skip_end(int): number of batches to skip on the right. suggest(bool): will highlight numerical estimate of best lr if True - methods adapted from fastai return_fig(bool): If True, return matplotlib.figure.Figure Returns: matplotlib.figure.Figure if return_fig else None ``` """ # dep_fix: bug in TF 2.2 and 2.3 if version.parse(tf.__version__) > version.parse("2.1") and version.parse( tf.__version__ ) < version.parse("2.4"): if n_skip_end == 5: n_skip_end = 10 if self.lr_finder is None or not self.lr_finder.find_called(): raise ValueError("Please call lr_find first.") return self.lr_finder.plot_loss( n_skip_beginning=n_skip_beginning, n_skip_end=n_skip_end, suggest=suggest, return_fig=return_fig, ) def plot(self, plot_type="loss", return_fig=False): """ ``` plots training history Args: plot_type (str): A valid value in tf.keras History. Either a built-in value {'loss', 'lr', 'momentum'} or other values previously specified by user. For instance, if 'mae' and/or 'mse' is previously specified as metrics when creating model, then these values can also be specified. return_fig(bool): If True, return matplotlib.figure.Figure Return: matplotlib.figure.Figure if return_fig else None ``` """ if self.history is None: raise Exception("No training history - did you train the model yet?") if not isinstance(plot_type, str): raise ValueError("plot_type must be str/string") fig = None if plot_type == "loss": plt.plot(self.history.history["loss"]) if "val_loss" in self.history.history: plt.plot(self.history.history["val_loss"]) legend_items = ["train", "validation"] else: legend_items = ["train"] plt.title("Model Loss") plt.ylabel("loss") plt.xlabel("epoch") plt.legend(legend_items, loc="upper left") elif plot_type == "lr": if "lr" not in self.history.history: raise ValueError( "no lr in history: are you sure you used autofit or fit_onecycle to train?" ) plt.plot(self.history.history["lr"]) plt.title("LR Schedule") plt.ylabel("lr") plt.xlabel("iterations") elif plot_type == "momentum": if "momentum" not in self.history.history: raise ValueError( "no momentum history: are you sure you used autofit or fit_onecycle to train?" ) plt.plot(self.history.history["momentum"]) plt.title("Momentum Schedule") plt.ylabel("momentum") plt.xlabel("iterations") else: if plot_type not in self.history.history: raise ValueError( f"no {plot_type} in history: are you sure {plot_type} exists in history?" ) plt.plot(self.history.history[plot_type]) val_key = f"val_{plot_type}" if val_key in self.history.history: plt.plot(self.history.history[val_key]) legend_items = ["train", "validation"] else: warnings.warn( f"Validation value for {plot_type} wasn't found in history" ) legend_items = ["train"] plt.title(f"History of {plot_type}") plt.ylabel(plot_type) plt.xlabel("epoch") plt.legend(legend_items, loc="upper left") fig = plt.gcf() if return_fig: return fig return def print_layers(self, show_wd=False): """ ``` prints the layers of the model along with indices ``` """ if show_wd: warnings.warn( "set_weight_decay now uses AdamWeightDecay instead of kernel_regularizers." ) for i, layer in enumerate(self.model.layers): if show_wd and hasattr(layer, "kernel_regularizer"): reg = layer.kernel_regularizer if hasattr(reg, "l2"): wd = reg.l2 elif hasattr(reg, "l1"): wd = reg.l1 else: wd = None print("%s (trainable=%s, wd=%s) : %s" % (i, layer.trainable, wd, layer)) else: print("%s (trainable=%s) : %s" % (i, layer.trainable, layer)) return def layer_output(self, layer_id, example_id=0, use_val=False): # should implemented in subclass raise NotImplementedError def set_lr(self, lr): K.set_value(, lr) return def _check_cycles(self, n_cycles, cycle_len, cycle_mult): if type(n_cycles) != type(1) or n_cycles < 1: raise ValueError("n_cycles must be >= 1") if type(cycle_mult) != type(1) or cycle_mult < 1: raise ValueError("cycle_mult must by >= 1") if cycle_len is not None: if type(cycle_len) != type(1) or cycle_len < 1: raise ValueError("cycle_len must either be None or >= 1") # calculate number of epochs if cycle_len is None: epochs = n_cycles else: epochs = 0 tmp_cycle_len = cycle_len for i in range(n_cycles): epochs += tmp_cycle_len tmp_cycle_len *= cycle_mult return epochs def _cb_sgdr( self, max_lr, steps_per_epoch, cycle_len, cycle_mult, lr_decay=1.0, callbacks=[] ): if callbacks and "SGDRScheduler" in [type(cb).__name__ for cb in callbacks]: return callbacks # configuration min_lr = 1e-9 if max_lr <= min_lr: min_lr = max_lr / 10 # use learning_rate schedule if cycle_len is not None: if not isinstance(callbacks, list): callbacks = [] from .lroptimize.sgdr import SGDRScheduler schedule = SGDRScheduler( min_lr=min_lr, max_lr=max_lr, steps_per_epoch=steps_per_epoch, lr_decay=lr_decay, cycle_length=cycle_len, mult_factor=cycle_mult, ) callbacks.append(schedule) if not callbacks: callbacks = None return callbacks def _cb_checkpoint(self, folder, callbacks=[]): if callbacks and "ModelCheckpoint" in [type(cb).__name__ for cb in callbacks]: return callbacks if folder is not None: os.makedirs(folder, exist_ok=True) if not isinstance(callbacks, list): callbacks = [] if self.val_data is not None: filepath = os.path.join( folder, "weights-{epoch:02d}-{val_loss:.2f}.hdf5" ) else: filepath = os.path.join(folder, "weights-{epoch:02d}.hdf5") callbacks.append( keras.callbacks.ModelCheckpoint( filepath, save_best_only=False, save_weights_only=True ) ) if not callbacks: callbacks = None return callbacks def _cb_earlystopping(self, early_stopping, callbacks=[]): if callbacks and "EarlyStopping" in [type(cb).__name__ for cb in callbacks]: return callbacks if early_stopping: if not isinstance(callbacks, list): callbacks = [] # if StrictVersion(keras.__version__) >= StrictVersion('2.2.3'): try: callbacks.append( keras.callbacks.EarlyStopping( monitor="val_loss", min_delta=0, patience=early_stopping, restore_best_weights=True, verbose=0, mode="auto", ) ) except TypeError: warnings.warn( """ The early_stopping=True argument relies on EarlyStopping.restore_best_weights, which is only supported on Keras 2.2.3 or greater. For now, we are falling back to EarlyStopping.restore_best_weights=False. Please use checkpoint_folder option in fit() to restore best weights.""" ) callbacks.append( keras.callbacks.EarlyStopping( monitor="val_loss", min_delta=0, patience=early_stopping, verbose=0, mode="auto", ) ) if not callbacks: callbacks = None return callbacks def _prepare(self, data, train=True): """ ``` Subclasses can override this method if data needs to be specially-prepared prior to invoking fit methods Args: data: dataset train(bool): If True, prepare for training. Otherwise, prepare for evaluation. ``` """ if data is None: return None if hasattr(data, "to_tfdataset"): return data.to_tfdataset(train=train) else: return data @abstractmethod def fit(self, lr, n_cycles, cycle_len=None, cycle_mult=1, batch_size=U.DEFAULT_BS): pass def fit_onecycle( self, lr, epochs, checkpoint_folder=None, cycle_momentum=True, max_momentum=0.95, min_momentum=0.85, class_weight=None, callbacks=[], steps_per_epoch=None, verbose=1, ): """ ``` Train model using a version of Leslie Smith's 1cycle policy. This method can be used with any optimizer. Thus, cyclical momentum is not currently implemented. Args: lr (float): (maximum) learning rate. It is recommended that you estimate lr yourself by running lr_finder (and lr_plot) and visually inspect plot for dramatic loss drop. epochs (int): Number of epochs. Number of epochs checkpoint_folder (string): Folder path in which to save the model weights for each epoch. File name will be of the form: weights-{epoch:02d}-{val_loss:.2f}.hdf5 cycle_momentum (bool): If True and optimizer is Adam, Nadam, or Adamax, momentum of optimzer will be cycled between 0.95 and 0.85 as described in Only takes effect if Adam, Nadam, or Adamax optimizer is used. max_momentum(float): Maximum momentum to use if cycle_momentum=True min_momentum(float): minimum momentum to use if cycle_momentum=True class_weight (dict): Optional dictionary mapping class indices (integers) to a weight (float) callbacks (list): list of Callback instances to employ during training steps_per_epoch(int): Steps per epoch. If None, then, math.ceil(num_samples/batch_size) is used. Ignored unless training dataset is generator. verbose (bool): verbose mode ``` """ if not self._is_adamlike() and cycle_momentum: warnings.warn( "cyclical momentum has been disabled because " + 'optimizer is not "Adam-like" with beta_1 param' ) cycle_momentum = False num_samples = U.nsamples_from_data(self.train_data) if steps_per_epoch is None: steps_per_epoch = math.ceil(num_samples / self.batch_size) # setup callbacks for learning rates and early stopping if not callbacks: kcallbacks = [] else: kcallbacks = callbacks[:] if cycle_momentum: max_momentum = max_momentum min_momentum = min_momentum else: max_momentum = None min_momentum = None from .lroptimize.triangular import CyclicLR clr = CyclicLR( base_lr=lr / 10, max_lr=lr, step_size=math.ceil((steps_per_epoch * epochs) / 2), reduce_on_plateau=0, max_momentum=max_momentum, min_momentum=min_momentum, verbose=verbose, ) kcallbacks.append(clr) # start training policy = "onecycle" U.vprint("\n", verbose=verbose) U.vprint( "begin training using %s policy with max lr of %s..." % (policy, lr), verbose=verbose, ) hist = lr, epochs, early_stopping=None, checkpoint_folder=checkpoint_folder, verbose=verbose, class_weight=class_weight, callbacks=kcallbacks, steps_per_epoch=steps_per_epoch, ) hist.history["lr"] = clr.history["lr"] hist.history["iterations"] = clr.history["iterations"] if cycle_momentum: hist.history["momentum"] = clr.history["momentum"] self.history = hist return hist def autofit( self, lr, epochs=None, early_stopping=None, reduce_on_plateau=None, reduce_factor=2, cycle_momentum=True, max_momentum=0.95, min_momentum=0.85, monitor="val_loss", checkpoint_folder=None, class_weight=None, callbacks=[], steps_per_epoch=None, verbose=1, ): """ ``` Automatically train model using a default learning rate schedule shown to work well in practice. By default, this method currently employs a triangular learning rate policy ( During each epoch, this learning rate policy varies the learning rate from lr/10 to lr and then back to a low learning rate that is near-zero. If epochs is None, then early_stopping and reduce_on_plateau are atomatically set to 5 and 2, respectively. Args: lr (float): optional initial learning rate. If missing, lr will be estimated automatically. It is recommended that you estimate lr yourself by running lr_finder (and lr_plot) and visually inspect plot for dramatic loss drop. epochs (int): Number of epochs. If None, training will continue until validation loss no longer improves after 5 epochs. early_stopping (int): If not None, training will automatically stop after this many epochs of no improvement in validation loss. Upon completion, model will be loaded with weights from epoch with lowest validation loss. NOTE: If reduce_on_plateau is also enabled, then early_stopping must be greater than reduce_on_plateau. Example: early_stopping=6, reduce_on_plateau=3. reduce_on_plateau (int): If not None, will lower learning rate when when validation loss fails to improve after the specified number of epochs. NOTE: If early_stopping is enabled, then reduce_on_plateu must be less than early_stopping. Example: early_stopping=6, reduce_on_plateau=3. reduce_factor (int): Learning reate is reduced by this factor on plateau. Only takes effect if reduce_on_plateau > 0. cycle_momentum (bool): If True and optimizer is Adam, Nadam, or Adamax, momentum of optimzer will be cycled between 0.95 and 0.85 as described in Only takes effect if Adam, Nadam, or Adamax optimizer is used. max_momentum(float): maximum momentum to use when cycle_momentum=True min_momentum(float): minimum momentum to use when cycle_momentum=True checkpoint_folder (string): Folder path in which to save the model weights for each epoch. File name will be of the form: weights-{epoch:02d}-{val_loss:.2f}.hdf5 monitor (str): what metric to monitor for early_stopping and reduce_on_plateau. Defaults to 'val_loss'. Only used if early_stopping or reduce_on_plateau is enabled. class_weight (dict): Optional dictionary mapping class indices (integers) to a weight (float) callbacks (list): list of Callback instances to employ during training steps_per_epoch(int): Steps per epoch. If None, then, math.ceil(num_samples/batch_size) is used. Ignored unless training dataset is generator. verbose (bool): verbose mode ``` """ # check optimizer if not self._is_adamlike() and cycle_momentum: warnings.warn( "cyclical momentum has been disabled because " + 'optimizer is not "Adam-like" with beta_1 param' ) cycle_momentum = False # setup learning rate policy num_samples = U.nsamples_from_data(self.train_data) if steps_per_epoch is None: steps_per_epoch = math.ceil(num_samples / self.batch_size) step_size = math.ceil(steps_per_epoch / 2) # handle missing epochs if epochs is None: epochs = 1024 if not early_stopping: early_stopping = U.DEFAULT_ES U.vprint( "early_stopping automatically enabled at patience=%s" % (U.DEFAULT_ES), verbose=verbose, ) if not reduce_on_plateau: reduce_on_plateau = U.DEFAULT_ROP U.vprint( "reduce_on_plateau automatically enabled at patience=%s" % (U.DEFAULT_ROP), verbose=verbose, ) if ( reduce_on_plateau and early_stopping and (reduce_on_plateau > early_stopping) ): warnings.warn( "reduce_on_plateau=%s and is greater than " % (reduce_on_plateau) + "early_stopping=%s. " % (early_stopping) + "Either reduce reduce_on_plateau or set early_stopping " + "to be higher." ) # check monitor if reduce_on_plateau is not None or early_stopping is not None: if monitor.startswith("val_") and self.val_data is None: raise ValueError( "monitor is %s but no val_data was supplied.\nChange monitor or supply val_data to get_learner function." % monitor ) if monitor != "val_loss" and monitor not in self._monitor_metrics: raise ValueError( "monitor must be one of {%s}" % (self._monitor_metrics) ) # setup callbacks for learning rates and early stopping if not callbacks: kcallbacks = [] else: kcallbacks = callbacks[:] if cycle_momentum: max_momentum = max_momentum min_momentum = min_momentum else: max_momentum = None min_momentum = None from .lroptimize.triangular import CyclicLR clr = CyclicLR( base_lr=lr / 10, max_lr=lr, step_size=step_size, verbose=verbose, monitor=monitor, reduce_on_plateau=reduce_on_plateau, reduce_factor=reduce_factor, max_momentum=max_momentum, min_momentum=min_momentum, ) kcallbacks.append(clr) if early_stopping: kcallbacks.append( keras.callbacks.EarlyStopping( monitor=monitor, min_delta=0, patience=early_stopping, restore_best_weights=True, verbose=1, mode="auto", ) ) # start training U.vprint("\n", verbose=verbose) policy = "triangular learning rate" U.vprint( "begin training using %s policy with max lr of %s..." % (policy, lr), verbose=verbose, ) hist = lr, epochs, early_stopping=early_stopping, checkpoint_folder=checkpoint_folder, verbose=verbose, class_weight=class_weight, callbacks=kcallbacks, steps_per_epoch=steps_per_epoch, ) hist.history["lr"] = clr.history["lr"] hist.history["iterations"] = clr.history["iterations"] if cycle_momentum: hist.history["momentum"] = clr.history["momentum"] self.history = hist return hist def ground_truth(self, val_data=None): if val_data is not None: val = val_data else: val = self.val_data if not val: raise Exception("val_data must be supplied to get_learner or ground_truth") return U.y_from_data(val) def predict(self, val_data=None): """ ``` Makes predictions on validation set ``` """ if val_data is not None: val = val_data else: val = self.val_data if val is None: raise Exception("val_data must be supplied to get_learner or predict") if U.is_iter(val): if hasattr(val, "reset"): val.reset() steps = np.ceil(U.nsamples_from_data(val) / val.batch_size) # *_generator methods are deprecated from TF 2.1.0 # result = self.model.predict_generator(self._prepare(val, train=False), # steps=steps) result = self.model.predict(self._prepare(val, train=False), steps=steps) return result else: return self.model.predict(val[0], batch_size=self.eval_batch_size)
- abc.ABC
def autofit(self, lr, epochs=None, early_stopping=None, reduce_on_plateau=None, reduce_factor=2, cycle_momentum=True, max_momentum=0.95, min_momentum=0.85, monitor='val_loss', checkpoint_folder=None, class_weight=None, callbacks=[], steps_per_epoch=None, verbose=1)
Automatically train model using a default learning rate schedule shown to work well in practice. By default, this method currently employs a triangular learning rate policy ( During each epoch, this learning rate policy varies the learning rate from lr/10 to lr and then back to a low learning rate that is near-zero. If epochs is None, then early_stopping and reduce_on_plateau are atomatically set to 5 and 2, respectively. Args: lr (float): optional initial learning rate. If missing, lr will be estimated automatically. It is recommended that you estimate lr yourself by running lr_finder (and lr_plot) and visually inspect plot for dramatic loss drop. epochs (int): Number of epochs. If None, training will continue until validation loss no longer improves after 5 epochs. early_stopping (int): If not None, training will automatically stop after this many epochs of no improvement in validation loss. Upon completion, model will be loaded with weights from epoch with lowest validation loss. NOTE: If reduce_on_plateau is also enabled, then early_stopping must be greater than reduce_on_plateau. Example: early_stopping=6, reduce_on_plateau=3. reduce_on_plateau (int): If not None, will lower learning rate when when validation loss fails to improve after the specified number of epochs. NOTE: If early_stopping is enabled, then reduce_on_plateu must be less than early_stopping. Example: early_stopping=6, reduce_on_plateau=3. reduce_factor (int): Learning reate is reduced by this factor on plateau. Only takes effect if reduce_on_plateau > 0. cycle_momentum (bool): If True and optimizer is Adam, Nadam, or Adamax, momentum of optimzer will be cycled between 0.95 and 0.85 as described in Only takes effect if Adam, Nadam, or Adamax optimizer is used. max_momentum(float): maximum momentum to use when cycle_momentum=True min_momentum(float): minimum momentum to use when cycle_momentum=True checkpoint_folder (string): Folder path in which to save the model weights for each epoch. File name will be of the form: weights-{epoch:02d}-{val_loss:.2f}.hdf5 monitor (str): what metric to monitor for early_stopping and reduce_on_plateau. Defaults to 'val_loss'. Only used if early_stopping or reduce_on_plateau is enabled. class_weight (dict): Optional dictionary mapping class indices (integers) to a weight (float) callbacks (list): list of Callback instances to employ during training steps_per_epoch(int): Steps per epoch. If None, then, math.ceil(num_samples/batch_size) is used. Ignored unless training dataset is generator. verbose (bool): verbose mode
Expand source code
def autofit( self, lr, epochs=None, early_stopping=None, reduce_on_plateau=None, reduce_factor=2, cycle_momentum=True, max_momentum=0.95, min_momentum=0.85, monitor="val_loss", checkpoint_folder=None, class_weight=None, callbacks=[], steps_per_epoch=None, verbose=1, ): """ ``` Automatically train model using a default learning rate schedule shown to work well in practice. By default, this method currently employs a triangular learning rate policy ( During each epoch, this learning rate policy varies the learning rate from lr/10 to lr and then back to a low learning rate that is near-zero. If epochs is None, then early_stopping and reduce_on_plateau are atomatically set to 5 and 2, respectively. Args: lr (float): optional initial learning rate. If missing, lr will be estimated automatically. It is recommended that you estimate lr yourself by running lr_finder (and lr_plot) and visually inspect plot for dramatic loss drop. epochs (int): Number of epochs. If None, training will continue until validation loss no longer improves after 5 epochs. early_stopping (int): If not None, training will automatically stop after this many epochs of no improvement in validation loss. Upon completion, model will be loaded with weights from epoch with lowest validation loss. NOTE: If reduce_on_plateau is also enabled, then early_stopping must be greater than reduce_on_plateau. Example: early_stopping=6, reduce_on_plateau=3. reduce_on_plateau (int): If not None, will lower learning rate when when validation loss fails to improve after the specified number of epochs. NOTE: If early_stopping is enabled, then reduce_on_plateu must be less than early_stopping. Example: early_stopping=6, reduce_on_plateau=3. reduce_factor (int): Learning reate is reduced by this factor on plateau. Only takes effect if reduce_on_plateau > 0. cycle_momentum (bool): If True and optimizer is Adam, Nadam, or Adamax, momentum of optimzer will be cycled between 0.95 and 0.85 as described in Only takes effect if Adam, Nadam, or Adamax optimizer is used. max_momentum(float): maximum momentum to use when cycle_momentum=True min_momentum(float): minimum momentum to use when cycle_momentum=True checkpoint_folder (string): Folder path in which to save the model weights for each epoch. File name will be of the form: weights-{epoch:02d}-{val_loss:.2f}.hdf5 monitor (str): what metric to monitor for early_stopping and reduce_on_plateau. Defaults to 'val_loss'. Only used if early_stopping or reduce_on_plateau is enabled. class_weight (dict): Optional dictionary mapping class indices (integers) to a weight (float) callbacks (list): list of Callback instances to employ during training steps_per_epoch(int): Steps per epoch. If None, then, math.ceil(num_samples/batch_size) is used. Ignored unless training dataset is generator. verbose (bool): verbose mode ``` """ # check optimizer if not self._is_adamlike() and cycle_momentum: warnings.warn( "cyclical momentum has been disabled because " + 'optimizer is not "Adam-like" with beta_1 param' ) cycle_momentum = False # setup learning rate policy num_samples = U.nsamples_from_data(self.train_data) if steps_per_epoch is None: steps_per_epoch = math.ceil(num_samples / self.batch_size) step_size = math.ceil(steps_per_epoch / 2) # handle missing epochs if epochs is None: epochs = 1024 if not early_stopping: early_stopping = U.DEFAULT_ES U.vprint( "early_stopping automatically enabled at patience=%s" % (U.DEFAULT_ES), verbose=verbose, ) if not reduce_on_plateau: reduce_on_plateau = U.DEFAULT_ROP U.vprint( "reduce_on_plateau automatically enabled at patience=%s" % (U.DEFAULT_ROP), verbose=verbose, ) if ( reduce_on_plateau and early_stopping and (reduce_on_plateau > early_stopping) ): warnings.warn( "reduce_on_plateau=%s and is greater than " % (reduce_on_plateau) + "early_stopping=%s. " % (early_stopping) + "Either reduce reduce_on_plateau or set early_stopping " + "to be higher." ) # check monitor if reduce_on_plateau is not None or early_stopping is not None: if monitor.startswith("val_") and self.val_data is None: raise ValueError( "monitor is %s but no val_data was supplied.\nChange monitor or supply val_data to get_learner function." % monitor ) if monitor != "val_loss" and monitor not in self._monitor_metrics: raise ValueError( "monitor must be one of {%s}" % (self._monitor_metrics) ) # setup callbacks for learning rates and early stopping if not callbacks: kcallbacks = [] else: kcallbacks = callbacks[:] if cycle_momentum: max_momentum = max_momentum min_momentum = min_momentum else: max_momentum = None min_momentum = None from .lroptimize.triangular import CyclicLR clr = CyclicLR( base_lr=lr / 10, max_lr=lr, step_size=step_size, verbose=verbose, monitor=monitor, reduce_on_plateau=reduce_on_plateau, reduce_factor=reduce_factor, max_momentum=max_momentum, min_momentum=min_momentum, ) kcallbacks.append(clr) if early_stopping: kcallbacks.append( keras.callbacks.EarlyStopping( monitor=monitor, min_delta=0, patience=early_stopping, restore_best_weights=True, verbose=1, mode="auto", ) ) # start training U.vprint("\n", verbose=verbose) policy = "triangular learning rate" U.vprint( "begin training using %s policy with max lr of %s..." % (policy, lr), verbose=verbose, ) hist = lr, epochs, early_stopping=early_stopping, checkpoint_folder=checkpoint_folder, verbose=verbose, class_weight=class_weight, callbacks=kcallbacks, steps_per_epoch=steps_per_epoch, ) hist.history["lr"] = clr.history["lr"] hist.history["iterations"] = clr.history["iterations"] if cycle_momentum: hist.history["momentum"] = clr.history["momentum"] self.history = hist return hist
def evaluate(self, test_data=None, print_report=True, save_path='ktrain_classification_report.csv', class_names=[])
alias for self.validate(). Returns confusion matrix and optionally prints a classification report. This is currently only supported for binary and multiclass classification, not multilabel classification. By default, this uses val_data, as supplied to ktrain.get_learner(). Other validation or test data can be optionally be supplied as argument via <test_data> argument. Supply class_names to include labels instead of intenger class integer values in classification report. Args: test_data(Dataset|np.ndarray): test or validation data. If None, self.val_data is used. print_report(bool): If True, classification report will be printed. If False, report will be saved to CSV at save_path. Not applicable to regression models. Not applicable to regression models. save_path(str): Classification report will be saved to this file path/name if print_report=False Not applicable to regression models. class_names(list): list of class names to be used in classification report instead of class integer IDs.
Expand source code
def evaluate( self, test_data=None, print_report=True, save_path="ktrain_classification_report.csv", class_names=[], ): """ ``` alias for self.validate(). Returns confusion matrix and optionally prints a classification report. This is currently only supported for binary and multiclass classification, not multilabel classification. By default, this uses val_data, as supplied to ktrain.get_learner(). Other validation or test data can be optionally be supplied as argument via <test_data> argument. Supply class_names to include labels instead of intenger class integer values in classification report. Args: test_data(Dataset|np.ndarray): test or validation data. If None, self.val_data is used. print_report(bool): If True, classification report will be printed. If False, report will be saved to CSV at save_path. Not applicable to regression models. Not applicable to regression models. save_path(str): Classification report will be saved to this file path/name if print_report=False Not applicable to regression models. class_names(list): list of class names to be used in classification report instead of class integer IDs. ``` """ return self.validate( val_data=test_data, print_report=print_report, save_path=save_path, class_names=class_names, )
def fit(self, lr, n_cycles, cycle_len=None, cycle_mult=1, batch_size=32)
Expand source code
@abstractmethod def fit(self, lr, n_cycles, cycle_len=None, cycle_mult=1, batch_size=U.DEFAULT_BS): pass
def fit_onecycle(self, lr, epochs, checkpoint_folder=None, cycle_momentum=True, max_momentum=0.95, min_momentum=0.85, class_weight=None, callbacks=[], steps_per_epoch=None, verbose=1)
Train model using a version of Leslie Smith's 1cycle policy. This method can be used with any optimizer. Thus, cyclical momentum is not currently implemented. Args: lr (float): (maximum) learning rate. It is recommended that you estimate lr yourself by running lr_finder (and lr_plot) and visually inspect plot for dramatic loss drop. epochs (int): Number of epochs. Number of epochs checkpoint_folder (string): Folder path in which to save the model weights for each epoch. File name will be of the form: weights-{epoch:02d}-{val_loss:.2f}.hdf5 cycle_momentum (bool): If True and optimizer is Adam, Nadam, or Adamax, momentum of optimzer will be cycled between 0.95 and 0.85 as described in Only takes effect if Adam, Nadam, or Adamax optimizer is used. max_momentum(float): Maximum momentum to use if cycle_momentum=True min_momentum(float): minimum momentum to use if cycle_momentum=True class_weight (dict): Optional dictionary mapping class indices (integers) to a weight (float) callbacks (list): list of Callback instances to employ during training steps_per_epoch(int): Steps per epoch. If None, then, math.ceil(num_samples/batch_size) is used. Ignored unless training dataset is generator. verbose (bool): verbose mode
Expand source code
def fit_onecycle( self, lr, epochs, checkpoint_folder=None, cycle_momentum=True, max_momentum=0.95, min_momentum=0.85, class_weight=None, callbacks=[], steps_per_epoch=None, verbose=1, ): """ ``` Train model using a version of Leslie Smith's 1cycle policy. This method can be used with any optimizer. Thus, cyclical momentum is not currently implemented. Args: lr (float): (maximum) learning rate. It is recommended that you estimate lr yourself by running lr_finder (and lr_plot) and visually inspect plot for dramatic loss drop. epochs (int): Number of epochs. Number of epochs checkpoint_folder (string): Folder path in which to save the model weights for each epoch. File name will be of the form: weights-{epoch:02d}-{val_loss:.2f}.hdf5 cycle_momentum (bool): If True and optimizer is Adam, Nadam, or Adamax, momentum of optimzer will be cycled between 0.95 and 0.85 as described in Only takes effect if Adam, Nadam, or Adamax optimizer is used. max_momentum(float): Maximum momentum to use if cycle_momentum=True min_momentum(float): minimum momentum to use if cycle_momentum=True class_weight (dict): Optional dictionary mapping class indices (integers) to a weight (float) callbacks (list): list of Callback instances to employ during training steps_per_epoch(int): Steps per epoch. If None, then, math.ceil(num_samples/batch_size) is used. Ignored unless training dataset is generator. verbose (bool): verbose mode ``` """ if not self._is_adamlike() and cycle_momentum: warnings.warn( "cyclical momentum has been disabled because " + 'optimizer is not "Adam-like" with beta_1 param' ) cycle_momentum = False num_samples = U.nsamples_from_data(self.train_data) if steps_per_epoch is None: steps_per_epoch = math.ceil(num_samples / self.batch_size) # setup callbacks for learning rates and early stopping if not callbacks: kcallbacks = [] else: kcallbacks = callbacks[:] if cycle_momentum: max_momentum = max_momentum min_momentum = min_momentum else: max_momentum = None min_momentum = None from .lroptimize.triangular import CyclicLR clr = CyclicLR( base_lr=lr / 10, max_lr=lr, step_size=math.ceil((steps_per_epoch * epochs) / 2), reduce_on_plateau=0, max_momentum=max_momentum, min_momentum=min_momentum, verbose=verbose, ) kcallbacks.append(clr) # start training policy = "onecycle" U.vprint("\n", verbose=verbose) U.vprint( "begin training using %s policy with max lr of %s..." % (policy, lr), verbose=verbose, ) hist = lr, epochs, early_stopping=None, checkpoint_folder=checkpoint_folder, verbose=verbose, class_weight=class_weight, callbacks=kcallbacks, steps_per_epoch=steps_per_epoch, ) hist.history["lr"] = clr.history["lr"] hist.history["iterations"] = clr.history["iterations"] if cycle_momentum: hist.history["momentum"] = clr.history["momentum"] self.history = hist return hist
def freeze(self, freeze_range=None)
If freeze_range is None, makes all layers trainable=False except last Dense layer. If freeze_range is given, freezes the first <freeze_range> layers and unfrezes all remaining layers. NOTE: Freeze method does not currently work with multi-GPU models. If you are using the load_imagemodel method, please use the freeze_layers argument of load_imagemodel to freeze layers. Args: freeze_range(int): number of layers to freeze Returns: None
Expand source code
def freeze(self, freeze_range=None): """ ``` If freeze_range is None, makes all layers trainable=False except last Dense layer. If freeze_range is given, freezes the first <freeze_range> layers and unfrezes all remaining layers. NOTE: Freeze method does not currently work with multi-GPU models. If you are using the load_imagemodel method, please use the freeze_layers argument of load_imagemodel to freeze layers. Args: freeze_range(int): number of layers to freeze Returns: None ``` """ if freeze_range is None: # freeze everything except last Dense layer # first find last dense layer dense_id = None for i, layer in reversed(list(enumerate(self.model.layers))): if isinstance(layer, keras.layers.Dense): dense_id = i break if dense_id is None: raise Exception("cannot find Dense layer in this model") for i, layer in enumerate(self.model.layers): if i < dense_id: layer.trainable = False else: layer.trainable = True else: # freeze all layers up to and including layer_id if type(freeze_range) != type(1) or freeze_range < 1: raise ValueError("freeze_range must be integer > 0") for i, layer in enumerate(self.model.layers): if i < freeze_range: layer.trainable = False else: layer.trainable = True self._recompile() return
def get_weight_decay(self)
Get current weight decay rate
Expand source code
def get_weight_decay(self): """ ``` Get current weight decay rate ``` """ if type(self.model.optimizer).__name__ == "AdamWeightDecay": return self.model.optimizer.weight_decay_rate else: return None
def ground_truth(self, val_data=None)
Expand source code
def ground_truth(self, val_data=None): if val_data is not None: val = val_data else: val = self.val_data if not val: raise Exception("val_data must be supplied to get_learner or ground_truth") return U.y_from_data(val)
def layer_output(self, layer_id, example_id=0, use_val=False)
Expand source code
def layer_output(self, layer_id, example_id=0, use_val=False): # should implemented in subclass raise NotImplementedError
def load_model(self, fpath, custom_objects=None, **kwargs)
loads model from folder. Note: **kwargs included for backwards compatibility only, as TransformerTextClassLearner.load_model was removed in v0.18.0. Args: fpath(str): path to folder containing model custom_objects(dict): custom objects required to load model. For models included with ktrain, this is populated automatically and can be disregarded.
Expand source code
def load_model(self, fpath, custom_objects=None, **kwargs): """ ``` loads model from folder. Note: **kwargs included for backwards compatibility only, as TransformerTextClassLearner.load_model was removed in v0.18.0. Args: fpath(str): path to folder containing model custom_objects(dict): custom objects required to load model. For models included with ktrain, this is populated automatically and can be disregarded. ``` """ self.model = _load_model( fpath, train_data=self.train_data, custom_objects=custom_objects ) return
def lr_estimate(self)
Return numerical estimates of lr using two different methods: 1. lr associated with minum numerical gradient (None if gradient computation fails) 2. lr associated with minimum loss divided by 10 3. lr associated with longest valley Since none of these methods are fool-proof and can potentially return bad estimates, it is recommended that you examine the plot generated by lr_plot to estimate the learning rate. Returns: tuple: tuple of the form (float, float)
Expand source code
def lr_estimate(self): """ ``` Return numerical estimates of lr using two different methods: 1. lr associated with minum numerical gradient (None if gradient computation fails) 2. lr associated with minimum loss divided by 10 3. lr associated with longest valley Since none of these methods are fool-proof and can potentially return bad estimates, it is recommended that you examine the plot generated by lr_plot to estimate the learning rate. Returns: tuple: tuple of the form (float, float) ``` """ if self.lr_finder is None or not self.lr_finder.find_called(): raise ValueError("Please call lr_find first.") return self.lr_finder.estimate_lr()
def lr_find(self, start_lr=1e-07, lr_mult=1.01, max_epochs=None, class_weight=None, stop_factor=4, show_plot=False, suggest=False, restore_weights_only=False, verbose=1)
Plots loss as learning rate is increased. Highest learning rate corresponding to a still falling loss should be chosen. If you find the LR finder is running for more epochs than you'd prefer, you can set max_epochs (e.g., max_epochs=5) to estimate LR with a smaller sample size. If lr_mult is supplied and max_epochs is None, LR will increase until loss diverges. Reasonable values of lr_mult are between 1.01 and 1.05. If max_epochs is supplied, lr_mult argument is ignored and computed automatically. Reference: Args: start_lr (float): smallest lr to start simulation lr_mult (float): multiplication factor to increase LR. Ignored if max_epochs is supplied. max_epochs (int): maximum number of epochs to simulate. lr_mult is ignored if max_epoch is supplied. Default is None. Set max_epochs to an integer (e.g., 5) if lr_find is taking too long and running for more epochs than desired. class_weight(dict): class_weight parameter passed to for imbalanced datasets. stop_factor(int): factor used to determine threhsold that loss must exceed to stop training simulation. Increase this if loss is erratic and lr_find exits too early. show_plot (bool): If True, automatically invoke lr_plot restore_weights_only(bool): If True, when training simulation is complete, the model weights only are restored, but not the original optimizer weights. In at least a few cases, this seems to improve performance when actual training begins. Further investigation is needed, so it is False by default. verbose (bool): specifies how much output to print Returns: None
Expand source code
def lr_find( self, start_lr=1e-7, lr_mult=1.01, max_epochs=None, class_weight=None, stop_factor=4, show_plot=False, suggest=False, restore_weights_only=False, verbose=1, ): """ ``` Plots loss as learning rate is increased. Highest learning rate corresponding to a still falling loss should be chosen. If you find the LR finder is running for more epochs than you'd prefer, you can set max_epochs (e.g., max_epochs=5) to estimate LR with a smaller sample size. If lr_mult is supplied and max_epochs is None, LR will increase until loss diverges. Reasonable values of lr_mult are between 1.01 and 1.05. If max_epochs is supplied, lr_mult argument is ignored and computed automatically. Reference: Args: start_lr (float): smallest lr to start simulation lr_mult (float): multiplication factor to increase LR. Ignored if max_epochs is supplied. max_epochs (int): maximum number of epochs to simulate. lr_mult is ignored if max_epoch is supplied. Default is None. Set max_epochs to an integer (e.g., 5) if lr_find is taking too long and running for more epochs than desired. class_weight(dict): class_weight parameter passed to for imbalanced datasets. stop_factor(int): factor used to determine threhsold that loss must exceed to stop training simulation. Increase this if loss is erratic and lr_find exits too early. show_plot (bool): If True, automatically invoke lr_plot restore_weights_only(bool): If True, when training simulation is complete, the model weights only are restored, but not the original optimizer weights. In at least a few cases, this seems to improve performance when actual training begins. Further investigation is needed, so it is False by default. verbose (bool): specifies how much output to print Returns: None ``` """ # dep_fix: bug in TF 2.2 and 2.3 if version.parse(tf.__version__) > version.parse("2.1") and version.parse( tf.__version__ ) < version.parse("2.4"): if max_epochs is None: raise ValueError( "Due to a bug in TensorFlow 2.2 and 2.3, the max_epochs argument is temporarily required. " + "Please re-run with max_epochs (e.g., max_epochs=5). \n" + "More info:" ) U.vprint( "simulating training for different learning rates... this may take a few moments...", verbose=verbose, ) # save current weights and temporarily restore original weights # dep_fix: temporarily use save_model instead of save_weights as default due to _weights_only = True if restore_weights_only: new_file, weightfile = tempfile.mkstemp() self.model.save_weights(weightfile) else: temp_folder = tempfile.mkdtemp() self.save_model(temp_folder) # compute steps_per_epoch num_samples = U.nsamples_from_data(self.train_data) bs = ( self.train_data.batch_size if hasattr(self.train_data, "batch_size") else self.batch_size ) if U.is_iter(self.train_data): use_gen = True steps_per_epoch = num_samples // bs else: use_gen = False steps_per_epoch = np.ceil(num_samples / bs) # check steps_per_epoch if steps_per_epoch <= 64 and max_epochs is None: warnings.warn( "max_epochs is being set to 5 since steps per epoch is small. " + "If you wish to estimate LR using more epochs, set max_epochs manually." ) max_epochs = 5 try: # track and plot learning rates self.lr_finder = LRFinder(self.model, stop_factor=stop_factor) self.lr_finder.find( self._prepare(self.train_data), steps_per_epoch, use_gen=use_gen, start_lr=start_lr, lr_mult=lr_mult, max_epochs=max_epochs, class_weight=class_weight, workers=self.workers, use_multiprocessing=self.use_multiprocessing, batch_size=self.batch_size, verbose=verbose, ) except KeyboardInterrupt: # re-load current weights # self.model.load_weights(weightfile) self.load_model(temp_folder) return # re-load current weights # dep_fix: temporarily use load_model instead of load_weights as default due to if restore_weights_only: self.model.load_weights(weightfile) else: self.load_model(temp_folder) # instructions to invoker U.vprint("\n", verbose=verbose) U.vprint("done.", verbose=verbose) if show_plot: U.vprint( "Visually inspect loss plot and select learning rate associated with falling loss", verbose=verbose, ) self.lr_plot(suggest=suggest) else: U.vprint( "Please invoke the Learner.lr_plot() method to visually inspect " "the loss plot to help identify the maximal learning rate " "associated with falling loss.", verbose=verbose, ) return
def lr_plot(self, n_skip_beginning=10, n_skip_end=5, suggest=False, return_fig=False)
Plots the loss vs. learning rate to help identify The maximal learning rate associated with a falling loss. The nskip_beginning and n_skip_end arguments can be used to "zoom in" on the plot. Args: n_skip_beginning(int): number of batches to skip on the left. n_skip_end(int): number of batches to skip on the right. suggest(bool): will highlight numerical estimate of best lr if True - methods adapted from fastai return_fig(bool): If True, return matplotlib.figure.Figure Returns: matplotlib.figure.Figure if return_fig else None
Expand source code
def lr_plot( self, n_skip_beginning=10, n_skip_end=5, suggest=False, return_fig=False ): """ ``` Plots the loss vs. learning rate to help identify The maximal learning rate associated with a falling loss. The nskip_beginning and n_skip_end arguments can be used to "zoom in" on the plot. Args: n_skip_beginning(int): number of batches to skip on the left. n_skip_end(int): number of batches to skip on the right. suggest(bool): will highlight numerical estimate of best lr if True - methods adapted from fastai return_fig(bool): If True, return matplotlib.figure.Figure Returns: matplotlib.figure.Figure if return_fig else None ``` """ # dep_fix: bug in TF 2.2 and 2.3 if version.parse(tf.__version__) > version.parse("2.1") and version.parse( tf.__version__ ) < version.parse("2.4"): if n_skip_end == 5: n_skip_end = 10 if self.lr_finder is None or not self.lr_finder.find_called(): raise ValueError("Please call lr_find first.") return self.lr_finder.plot_loss( n_skip_beginning=n_skip_beginning, n_skip_end=n_skip_end, suggest=suggest, return_fig=return_fig, )
def plot(self, plot_type='loss', return_fig=False)
plots training history Args: plot_type (str): A valid value in tf.keras History. Either a built-in value {'loss', 'lr', 'momentum'} or other values previously specified by user. For instance, if 'mae' and/or 'mse' is previously specified as metrics when creating model, then these values can also be specified. return_fig(bool): If True, return matplotlib.figure.Figure Return: matplotlib.figure.Figure if return_fig else None
Expand source code
def plot(self, plot_type="loss", return_fig=False): """ ``` plots training history Args: plot_type (str): A valid value in tf.keras History. Either a built-in value {'loss', 'lr', 'momentum'} or other values previously specified by user. For instance, if 'mae' and/or 'mse' is previously specified as metrics when creating model, then these values can also be specified. return_fig(bool): If True, return matplotlib.figure.Figure Return: matplotlib.figure.Figure if return_fig else None ``` """ if self.history is None: raise Exception("No training history - did you train the model yet?") if not isinstance(plot_type, str): raise ValueError("plot_type must be str/string") fig = None if plot_type == "loss": plt.plot(self.history.history["loss"]) if "val_loss" in self.history.history: plt.plot(self.history.history["val_loss"]) legend_items = ["train", "validation"] else: legend_items = ["train"] plt.title("Model Loss") plt.ylabel("loss") plt.xlabel("epoch") plt.legend(legend_items, loc="upper left") elif plot_type == "lr": if "lr" not in self.history.history: raise ValueError( "no lr in history: are you sure you used autofit or fit_onecycle to train?" ) plt.plot(self.history.history["lr"]) plt.title("LR Schedule") plt.ylabel("lr") plt.xlabel("iterations") elif plot_type == "momentum": if "momentum" not in self.history.history: raise ValueError( "no momentum history: are you sure you used autofit or fit_onecycle to train?" ) plt.plot(self.history.history["momentum"]) plt.title("Momentum Schedule") plt.ylabel("momentum") plt.xlabel("iterations") else: if plot_type not in self.history.history: raise ValueError( f"no {plot_type} in history: are you sure {plot_type} exists in history?" ) plt.plot(self.history.history[plot_type]) val_key = f"val_{plot_type}" if val_key in self.history.history: plt.plot(self.history.history[val_key]) legend_items = ["train", "validation"] else: warnings.warn( f"Validation value for {plot_type} wasn't found in history" ) legend_items = ["train"] plt.title(f"History of {plot_type}") plt.ylabel(plot_type) plt.xlabel("epoch") plt.legend(legend_items, loc="upper left") fig = plt.gcf() if return_fig: return fig return
def predict(self, val_data=None)
Makes predictions on validation set
Expand source code
def predict(self, val_data=None): """ ``` Makes predictions on validation set ``` """ if val_data is not None: val = val_data else: val = self.val_data if val is None: raise Exception("val_data must be supplied to get_learner or predict") if U.is_iter(val): if hasattr(val, "reset"): val.reset() steps = np.ceil(U.nsamples_from_data(val) / val.batch_size) # *_generator methods are deprecated from TF 2.1.0 # result = self.model.predict_generator(self._prepare(val, train=False), # steps=steps) result = self.model.predict(self._prepare(val, train=False), steps=steps) return result else: return self.model.predict(val[0], batch_size=self.eval_batch_size)
def print_layers(self, show_wd=False)
prints the layers of the model along with indices
Expand source code
def print_layers(self, show_wd=False): """ ``` prints the layers of the model along with indices ``` """ if show_wd: warnings.warn( "set_weight_decay now uses AdamWeightDecay instead of kernel_regularizers." ) for i, layer in enumerate(self.model.layers): if show_wd and hasattr(layer, "kernel_regularizer"): reg = layer.kernel_regularizer if hasattr(reg, "l2"): wd = reg.l2 elif hasattr(reg, "l1"): wd = reg.l1 else: wd = None print("%s (trainable=%s, wd=%s) : %s" % (i, layer.trainable, wd, layer)) else: print("%s (trainable=%s) : %s" % (i, layer.trainable, layer)) return
def reset_weights(self, verbose=1)
Re-initializes network with original weights
Expand source code
def reset_weights(self, verbose=1): """ ``` Re-initializes network with original weights ``` """ if os.path.isfile(self._original_weights): self.model.load_weights(self._original_weights) self.history = None U.vprint("Model weights have been reset.", verbose=verbose) else: warnings.warn( "Weights have not been reset because the original weights file " + "(%s) no longer exists." % (self._original_weights) ) return
def save_model(self, fpath)
a wrapper to Args: fpath(str): path to folder in which to save model Returns: None
Expand source code
def save_model(self, fpath): """ ``` a wrapper to Args: fpath(str): path to folder in which to save model Returns: None ``` """ self._make_model_folder(fpath), U.MODEL_NAME), save_format="h5") return
def set_lr(self, lr)
Expand source code
def set_lr(self, lr): K.set_value(, lr) return
def set_model(self, model)
replace model in this Learner instance
Expand source code
def set_model(self, model): """ ``` replace model in this Learner instance ``` """ if not isinstance(model, keras.Model): raise ValueError("model must be of instance keras.Model") self.model = model self.history = None return
def set_weight_decay(self, wd=0.01)
Sets global weight decay via AdamWeightDecay optimizer Args: wd(float): weight decay Returns: None
Expand source code
def set_weight_decay(self, wd=U.DEFAULT_WD): """ ``` Sets global weight decay via AdamWeightDecay optimizer Args: wd(float): weight decay Returns: None ``` """ self._recompile(wd=wd) return
def top_losses(self, n=4, val_data=None, preproc=None)
Computes losses on validation set sorted by examples with top losses Args: n(int or tuple): a range to select in form of int or tuple e.g., n=8 is treated as n=(0,8) val_data: optional val_data to use instead of self.val_data preproc (Preprocessor): A TextPreprocessor or ImagePreprocessor. For some data like text data, a preprocessor is required to undo the pre-processing to correctly view raw data. Returns: list of n tuples where first element is either filepath or id of validation example and second element is loss.
Expand source code
def top_losses(self, n=4, val_data=None, preproc=None): """ ``` Computes losses on validation set sorted by examples with top losses Args: n(int or tuple): a range to select in form of int or tuple e.g., n=8 is treated as n=(0,8) val_data: optional val_data to use instead of self.val_data preproc (Preprocessor): A TextPreprocessor or ImagePreprocessor. For some data like text data, a preprocessor is required to undo the pre-processing to correctly view raw data. Returns: list of n tuples where first element is either filepath or id of validation example and second element is loss. ``` """ # check validation data and arguments if val_data is not None: val = val_data else: val = self.val_data if val is None: raise Exception("val_data must be supplied to get_learner or top_losses") if type(n) == type(42): n = (0, n) # multilabel = True if U.is_multilabel(val) else False classification, multilabel = U.is_classifier(self.model) # get predicictions and ground truth y_pred = self.predict(val_data=val) y_true = self.ground_truth(val_data=val) y_true = y_true.astype("float32") # adjust y_true for regression problems if ( not classification and len(y_true.shape) == 1 and (len(y_pred.shape) == 2 and y_pred.shape[1] == 1) ): y_true = np.expand_dims(y_true, -1) # compute loss # this doesn't work in tf.keras 1.14 # losses = self.model.loss_functions[0](tf.convert_to_tensor(y_true), tf.convert_to_tensor(y_pred)) # if U.is_tf_keras(): # L = self.model.loss_functions[0].fn # else: # L = self.model.loss_functions[0] L = U.loss_fn_from_model(self.model) losses = L(tf.convert_to_tensor(y_true), tf.convert_to_tensor(y_pred)) if DISABLE_V2_BEHAVIOR: losses = tf.Session().run(losses) else: losses = losses.numpy() class_names = [] if preproc is None else preproc.get_classes() if preproc is None: class_fcn = lambda x: "%s" % (x) else: class_fcn = lambda x: class_names[x] # regression output modifications if not classification: if len(y_pred.shape) == 2 and y_pred.shape[1] == 1: y_pred = np.squeeze(y_pred) y_pred = np.around(y_pred, 2) if len(y_true.shape) == 2 and y_true.shape[1] == 1: y_true = np.squeeze(y_true) y_true = np.around(y_true, 2) # sort by loss and prune correct classifications, if necessary if classification and not multilabel: y_pred = np.squeeze(y_pred) y_true = np.squeeze(y_true) if len(y_pred.shape) == 1: y_p = np.where(y_pred > 0.5, 1, 0) y_t = np.where(y_true > 0.5, 1, 0) else: y_p = np.argmax(y_pred, axis=1) y_t = np.argmax(y_true, axis=1) tups = [ (i, x, class_fcn(y_t[i]), class_fcn(y_p[i])) for i, x in enumerate(losses) if y_p[i] != y_t[i] ] else: tups = [ (i, x, y_true[i], np.around(y_pred[i], 2)) for i, x in enumerate(losses) ] tups.sort(key=operator.itemgetter(1), reverse=True) # prune by given range tups = tups[n[0] : n[1]] if n is not None else tups return tups
def unfreeze(self, exclude_range=None)
Make every layer trainable except those in exclude_range. unfreeze is simply a proxy method to freeze. NOTE: Unfreeze method does not currently work with multi-GPU models. If you are using the load_imagemodel method, please use the freeze_layers argument of load_imagemodel to freeze layers.
Expand source code
def unfreeze(self, exclude_range=None): """ ``` Make every layer trainable except those in exclude_range. unfreeze is simply a proxy method to freeze. NOTE: Unfreeze method does not currently work with multi-GPU models. If you are using the load_imagemodel method, please use the freeze_layers argument of load_imagemodel to freeze layers. ``` """ # make all layers trainable for i, layer in enumerate(self.model.layers): layer.trainable = True if exclude_range: for i, layer in enumerate(self.model.layers[:exclude_range]): layer.trainable = False self._recompile() return
def validate(self, val_data=None, print_report=True, save_path='ktrain_classification_report.csv', class_names=[])
Returns confusion matrix and optionally prints a classification report. For multilabel classification problems,confusion matrices are not supported, but classification reports are. By default, this uses val_data, as supplied to ktrain.get_learner(). Other validation or test data can be optionally be supplied as argument. Supply class_names to include labels instead of intenger class integer values in classification report. Args: val_data(Dataset|np.ndarray): validation data. If None, self.val_data is used. print_report(bool): If True, classification report will be printed. If False, report will be saved to CSV at save path. Not applicable to regression models. save_path(str): Classification report will be saved to this file path/name if print_report=False class_names(list): list of class names to be used in classification report instead of class integer IDs.
Expand source code
def validate( self, val_data=None, print_report=True, save_path="ktrain_classification_report.csv", class_names=[], ): """ ``` Returns confusion matrix and optionally prints a classification report. For multilabel classification problems,confusion matrices are not supported, but classification reports are. By default, this uses val_data, as supplied to ktrain.get_learner(). Other validation or test data can be optionally be supplied as argument. Supply class_names to include labels instead of intenger class integer values in classification report. Args: val_data(Dataset|np.ndarray): validation data. If None, self.val_data is used. print_report(bool): If True, classification report will be printed. If False, report will be saved to CSV at save path. Not applicable to regression models. save_path(str): Classification report will be saved to this file path/name if print_report=False class_names(list): list of class names to be used in classification report instead of class integer IDs. ``` """ if val_data is not None: val = val_data else: val = self.val_data classification, multilabel = U.is_classifier(self.model) if not classification: # warnings.warn('learner.validate is only for classification problems. ' #'For regression, etc., use learner.predict and learner.ground_truth ' #'to manually validate.') # return pass is_multilabel = U.is_multilabel(val) or multilabel y_pred = self.predict(val_data=val) y_true = self.ground_truth(val_data=val) y_pred = np.squeeze(y_pred) y_true = np.squeeze(y_true) # regression evaluation if not classification: from sklearn.metrics import mean_absolute_error, mean_squared_error regout = [] metrics = U.metrics_from_model(self.model) for m in metrics: if m in ["mae", "mean_absolute_error"]: regout.append((m, mean_absolute_error(y_true, y_pred))) elif m in ["mse", "mean_squared_error"]: regout.append((m, mean_squared_error(y_true, y_pred))) if not regout: warnings.warn( "%s is not supported by validate/evaluate - falling back to MAE" ) regout.append(("mae", mean_absolute_error(y_true, y_pred))) return regout if len(y_pred.shape) == 1: y_pred = np.where(y_pred > 0.5, 1, 0) y_true = np.where(y_true > 0.5, 1, 0) elif is_multilabel: from sklearn.preprocessing import binarize y_pred = binarize(y_pred, threshold=0.5) else: y_pred = np.argmax(y_pred, axis=1) y_true = np.argmax(y_true, axis=1) if print_report or save_path is not None: if class_names: try: class_names = [str(s) for s in class_names] except: pass report = classification_report( y_true, y_pred, target_names=class_names, output_dict=not print_report, ) else: report = classification_report( y_true, y_pred, output_dict=not print_report, zero_division=0, ) if print_report: print(report) else: df = pd.DataFrame(report).transpose() df.to_csv(save_path) print("classification report saved to: %s" % (save_path)) cm_func = confusion_matrix if is_multilabel: warnings.warn( "Confusion matrices do not currently support multilabel classification, so returning None" ) return cm = confusion_matrix(y_true, y_pred) return cm
def view_top_losses(self, n=4, preproc=None, val_data=None)
View observations with top losses in validation set. Musta be overridden by Learner subclasses.
Expand source code
def view_top_losses(self, n=4, preproc=None, val_data=None): """ ``` View observations with top losses in validation set. Musta be overridden by Learner subclasses. ``` """ raise NotImplementedError( "view_top_losses must be overriden by Learner subclass" )