Package ktrain
Expand source code
from . import imports as I
from . import utils as U
from .core import (
ArrayLearner,
GenLearner,
get_predictor,
load_predictor,
release_gpu_memory,
)
from .graph.learner import LinkPredLearner, NodeClassLearner
from .text.learner import BERTTextClassLearner, TransformerTextClassLearner
from .text.ner.learner import NERLearner
from .version import __version__
from .vision.learner import ImageClassLearner
__all__ = ["get_learner", "get_predictor", "load_predictor", "release_gpu_memory"]
def get_learner(
model,
train_data=None,
val_data=None,
batch_size=U.DEFAULT_BS,
eval_batch_size=U.DEFAULT_BS,
workers=1,
use_multiprocessing=False,
):
"""
```
Returns a Learner instance that can be used to tune and train Keras models.
model (Model): A compiled instance of keras.engine.training.Model
train_data (tuple or generator): Either a:
1) tuple of (x_train, y_train), where x_train and
y_train are numpy.ndarrays or
2) Iterator
val_data (tuple or generator): Either a:
1) tuple of (x_test, y_test), where x_testand
y_test are numpy.ndarrays or
2) Iterator
Note: Should be same type as train_data.
batch_size (int): Batch size to use in training. default:32
eval_batch_size(int): batch size used by learner.predict
only applies to validaton data during training if
val_data is instance of utils.Sequence.
default:32
workers (int): number of cpu processes used to load data.
This is ignored unless train_data/val_data is an instance of
tf.keras.preprocessing.image.DirectoryIterator or tf.keras.preprocessing.image.DataFrameIterator.
use_multiprocessing(bool): whether or not to use multiprocessing for workers
This is ignored unless train_data/val_data is an instance of
tf.keras.preprocessing.image.DirectoryIterator or tf.keras.preprocessing.image.DataFrameIterator.
```
"""
# check arguments
if not isinstance(model, I.keras.Model):
raise ValueError("model must be of instance Model")
U.data_arg_check(train_data=train_data, val_data=val_data)
if type(workers) != type(1) or workers < 1:
workers = 1
# check for NumpyArrayIterator
if train_data and not U.ondisk(train_data):
if workers > 1 and not use_multiprocessing:
use_multiprocessing = True
wrn_msg = "Changed use_multiprocessing to True because NumpyArrayIterator with workers>1"
wrn_msg += " is slow when use_multiprocessing=False."
wrn_msg += " If you experience issues with this, please set workers=1 and use_multiprocessing=False."
I.warnings.warn(wrn_msg)
# verify BERT
is_bert = U.bert_data_tuple(train_data)
if is_bert:
maxlen = U.shape_from_data(train_data)[1]
msg = """For a GPU with 12GB of RAM, the following maxima apply:
sequence len=64, max_batch_size=64
sequence len=128, max_batch_size=32
sequence len=256, max_batch_size=16
sequence len=320, max_batch_size=14
sequence len=384, max_batch_size=12
sequence len=512, max_batch_size=6
You've exceeded these limits.
If using a GPU with <=12GB of memory, you may run out of memory during training.
If necessary, adjust sequence length or batch size based on above."""
wrn = False
if maxlen > 64 and batch_size > 64:
wrn = True
elif maxlen > 128 and batch_size > 32:
wrn = True
elif maxlen > 256 and batch_size > 16:
wrn = True
elif maxlen > 320 and batch_size > 14:
wrn = True
elif maxlen > 384 and batch_size > 12:
wrn = True
elif maxlen > 512 and batch_size > 6:
wrn = True
if wrn:
I.warnings.warn(msg)
# return the appropriate trainer
if U.is_iter(train_data):
if U.is_ner(model=model, data=train_data):
learner = NERLearner
elif U.is_imageclass_from_data(train_data):
learner = ImageClassLearner
elif U.is_nodeclass(data=train_data):
learner = NodeClassLearner
elif U.is_nodeclass(data=train_data):
learner = LinkPredLearner
elif U.is_huggingface(data=train_data):
learner = TransformerTextClassLearner
else:
learner = GenLearner
else:
if is_bert:
learner = BERTTextClassLearner
else: # vanilla text classifiers use standard ArrayLearners
learner = ArrayLearner
l = learner(
model,
train_data=train_data,
val_data=val_data,
batch_size=batch_size,
eval_batch_size=eval_batch_size,
workers=workers,
use_multiprocessing=use_multiprocessing,
)
import tensorflow as tf
from tensorflow.keras.optimizers import Optimizer
import warnings
from packaging import version
if (version.parse(tf.__version__) >= version.parse("2.11")) and (
isinstance(l.model.optimizer, Optimizer)
):
warnings.warn(
"ktrain currently only supports legacy optimizers in tensorflow>=2.11 - recompiling your model to use legacy Adam"
)
l._recompile(wd=0)
return l
# keys
# currently_unsupported: unsupported or disabled features (e.g., xai graph neural networks have not been implemented)
# dep_fix: a fix to address a problem in a dependency
# TODO: things to change
# NOTES: As of 0.30.x, TensorFlow is optional and no longer forced to allow for use of pretrained PyTorch or sklearn models.
# In core, lroptimize imports were localized to allow for optional TF
# References to ktrain.dataset (keras.utils) and anago (keras.Callback) were also localized (from module-level) for optional TF
Sub-modules
ktrain.core
ktrain.dataset
ktrain.graph
ktrain.imports
ktrain.lroptimize
ktrain.models
ktrain.predictor
ktrain.preprocessor
ktrain.tabular
ktrain.text
ktrain.torch_base
ktrain.utils
ktrain.version
ktrain.vision
Functions
def get_learner(model, train_data=None, val_data=None, batch_size=32, eval_batch_size=32, workers=1, use_multiprocessing=False)
-
Returns a Learner instance that can be used to tune and train Keras models. model (Model): A compiled instance of keras.engine.training.Model train_data (tuple or generator): Either a: 1) tuple of (x_train, y_train), where x_train and y_train are numpy.ndarrays or 2) Iterator val_data (tuple or generator): Either a: 1) tuple of (x_test, y_test), where x_testand y_test are numpy.ndarrays or 2) Iterator Note: Should be same type as train_data. batch_size (int): Batch size to use in training. default:32 eval_batch_size(int): batch size used by learner.predict only applies to validaton data during training if val_data is instance of utils.Sequence. default:32 workers (int): number of cpu processes used to load data. This is ignored unless train_data/val_data is an instance of tf.keras.preprocessing.image.DirectoryIterator or tf.keras.preprocessing.image.DataFrameIterator. use_multiprocessing(bool): whether or not to use multiprocessing for workers This is ignored unless train_data/val_data is an instance of tf.keras.preprocessing.image.DirectoryIterator or tf.keras.preprocessing.image.DataFrameIterator.
Expand source code
def get_learner( model, train_data=None, val_data=None, batch_size=U.DEFAULT_BS, eval_batch_size=U.DEFAULT_BS, workers=1, use_multiprocessing=False, ): """ ``` Returns a Learner instance that can be used to tune and train Keras models. model (Model): A compiled instance of keras.engine.training.Model train_data (tuple or generator): Either a: 1) tuple of (x_train, y_train), where x_train and y_train are numpy.ndarrays or 2) Iterator val_data (tuple or generator): Either a: 1) tuple of (x_test, y_test), where x_testand y_test are numpy.ndarrays or 2) Iterator Note: Should be same type as train_data. batch_size (int): Batch size to use in training. default:32 eval_batch_size(int): batch size used by learner.predict only applies to validaton data during training if val_data is instance of utils.Sequence. default:32 workers (int): number of cpu processes used to load data. This is ignored unless train_data/val_data is an instance of tf.keras.preprocessing.image.DirectoryIterator or tf.keras.preprocessing.image.DataFrameIterator. use_multiprocessing(bool): whether or not to use multiprocessing for workers This is ignored unless train_data/val_data is an instance of tf.keras.preprocessing.image.DirectoryIterator or tf.keras.preprocessing.image.DataFrameIterator. ``` """ # check arguments if not isinstance(model, I.keras.Model): raise ValueError("model must be of instance Model") U.data_arg_check(train_data=train_data, val_data=val_data) if type(workers) != type(1) or workers < 1: workers = 1 # check for NumpyArrayIterator if train_data and not U.ondisk(train_data): if workers > 1 and not use_multiprocessing: use_multiprocessing = True wrn_msg = "Changed use_multiprocessing to True because NumpyArrayIterator with workers>1" wrn_msg += " is slow when use_multiprocessing=False." wrn_msg += " If you experience issues with this, please set workers=1 and use_multiprocessing=False." I.warnings.warn(wrn_msg) # verify BERT is_bert = U.bert_data_tuple(train_data) if is_bert: maxlen = U.shape_from_data(train_data)[1] msg = """For a GPU with 12GB of RAM, the following maxima apply: sequence len=64, max_batch_size=64 sequence len=128, max_batch_size=32 sequence len=256, max_batch_size=16 sequence len=320, max_batch_size=14 sequence len=384, max_batch_size=12 sequence len=512, max_batch_size=6 You've exceeded these limits. If using a GPU with <=12GB of memory, you may run out of memory during training. If necessary, adjust sequence length or batch size based on above.""" wrn = False if maxlen > 64 and batch_size > 64: wrn = True elif maxlen > 128 and batch_size > 32: wrn = True elif maxlen > 256 and batch_size > 16: wrn = True elif maxlen > 320 and batch_size > 14: wrn = True elif maxlen > 384 and batch_size > 12: wrn = True elif maxlen > 512 and batch_size > 6: wrn = True if wrn: I.warnings.warn(msg) # return the appropriate trainer if U.is_iter(train_data): if U.is_ner(model=model, data=train_data): learner = NERLearner elif U.is_imageclass_from_data(train_data): learner = ImageClassLearner elif U.is_nodeclass(data=train_data): learner = NodeClassLearner elif U.is_nodeclass(data=train_data): learner = LinkPredLearner elif U.is_huggingface(data=train_data): learner = TransformerTextClassLearner else: learner = GenLearner else: if is_bert: learner = BERTTextClassLearner else: # vanilla text classifiers use standard ArrayLearners learner = ArrayLearner l = learner( model, train_data=train_data, val_data=val_data, batch_size=batch_size, eval_batch_size=eval_batch_size, workers=workers, use_multiprocessing=use_multiprocessing, ) import tensorflow as tf from tensorflow.keras.optimizers import Optimizer import warnings from packaging import version if (version.parse(tf.__version__) >= version.parse("2.11")) and ( isinstance(l.model.optimizer, Optimizer) ): warnings.warn( "ktrain currently only supports legacy optimizers in tensorflow>=2.11 - recompiling your model to use legacy Adam" ) l._recompile(wd=0) return l
def get_predictor(model, preproc, batch_size=32)
-
Returns a Predictor instance that can be used to make predictions on unlabeled examples. Can be saved to disk and reloaded as part of a larger application. Args model (Model): A compiled instance of keras.engine.training.Model preproc(Preprocessor): An instance of TextPreprocessor,ImagePreprocessor, or NERPreprocessor. These instances are returned from the data loading functions in the ktrain vision and text modules: ktrain.vision.images_from_folder ktrain.vision.images_from_csv ktrain.vision.images_from_array ktrain.text.texts_from_folder ktrain.text.texts_from_csv ktrain.text.ner.entities_from_csv batch_size(int): batch size to use. default:32
Expand source code
def get_predictor(model, preproc, batch_size=U.DEFAULT_BS): """ ``` Returns a Predictor instance that can be used to make predictions on unlabeled examples. Can be saved to disk and reloaded as part of a larger application. Args model (Model): A compiled instance of keras.engine.training.Model preproc(Preprocessor): An instance of TextPreprocessor,ImagePreprocessor, or NERPreprocessor. These instances are returned from the data loading functions in the ktrain vision and text modules: ktrain.vision.images_from_folder ktrain.vision.images_from_csv ktrain.vision.images_from_array ktrain.text.texts_from_folder ktrain.text.texts_from_csv ktrain.text.ner.entities_from_csv batch_size(int): batch size to use. default:32 ``` """ # check arguments if not isinstance(model, keras.Model): raise ValueError("model must be of instance keras.Model") if not isinstance( preproc, ( ImagePreprocessor, TextPreprocessor, NERPreprocessor, NodePreprocessor, LinkPreprocessor, TabularPreprocessor, ), ): raise ValueError("preproc must be instance of ktrain.preprocessor.Preprocessor") if isinstance(preproc, ImagePreprocessor): return ImagePredictor(model, preproc, batch_size=batch_size) elif isinstance(preproc, TextPreprocessor): # elif type(preproc).__name__ == 'TextPreprocessor': return TextPredictor(model, preproc, batch_size=batch_size) elif isinstance(preproc, NERPreprocessor): return NERPredictor(model, preproc, batch_size=batch_size) elif isinstance(preproc, NodePreprocessor): return NodePredictor(model, preproc, batch_size=batch_size) elif isinstance(preproc, LinkPreprocessor): return LinkPredictor(model, preproc, batch_size=batch_size) elif isinstance(preproc, TabularPreprocessor): return TabularPredictor(model, preproc, batch_size=batch_size) else: raise Exception("preproc of type %s not currently supported" % (type(preproc)))
def load_predictor(fpath, batch_size=32, custom_objects=None)
-
Loads a previously saved Predictor instance Args fpath(str): predictor path name (value supplied to predictor.save) From v0.16.x, this is always the path to a folder. Pre-v0.16.x, this is the base name used to save model and .preproc instance. batch_size(int): batch size to use for predictions. default:32 custom_objects(dict): custom objects required to load model. This is useful if you compiled the model with a custom loss function, for example. For models included with ktrain as is, this is populated automatically and can be disregarded.
Expand source code
def load_predictor(fpath, batch_size=U.DEFAULT_BS, custom_objects=None): """ ``` Loads a previously saved Predictor instance Args fpath(str): predictor path name (value supplied to predictor.save) From v0.16.x, this is always the path to a folder. Pre-v0.16.x, this is the base name used to save model and .preproc instance. batch_size(int): batch size to use for predictions. default:32 custom_objects(dict): custom objects required to load model. This is useful if you compiled the model with a custom loss function, for example. For models included with ktrain as is, this is populated automatically and can be disregarded. ``` """ # load the preprocessor preproc = None try: preproc_name = os.path.join(fpath, U.PREPROC_NAME) with open(preproc_name, "rb") as f: preproc = pickle.load(f) except: try: preproc_name = fpath + ".preproc" # warnings.warn('could not load .preproc file as %s - attempting to load as %s' % (os.path.join(fpath, U.PREPROC_NAME), preproc_name)) with open(preproc_name, "rb") as f: preproc = pickle.load(f) except: raise Exception( "Failed to load .preproc file in either the post v0.16.x loction (%s) or pre v0.16.x location (%s)" % (os.path.join(fpath, U.PREPROC_NAME), fpath + ".preproc") ) # load the model model = _load_model(fpath, preproc=preproc, custom_objects=custom_objects) # preprocessing functions in ImageDataGenerators are not pickable # so, we must reconstruct if hasattr(preproc, "datagen") and hasattr(preproc.datagen, "ktrain_preproc"): preproc_name = preproc.datagen.ktrain_preproc if preproc_name == "resnet50": preproc.datagen.preprocessing_function = ( keras.applications.resnet50.preprocess_input ) elif preproc_name == "mobilenet": preproc.datagen.preprocessing_function = ( keras.applications.mobilenet.preprocess_input ) elif preproc_name == "mobilenetv3": preproc.datagen.preprocessing_function = ( keras.applications.mobilenet_v3.preprocess_input ) elif preproc_name == "inception": preproc.datagen.preprocessing_function = ( keras.applications.inception_v3.preprocess_input ) elif preproc_name == "efficientnet": preproc.datagen.preprocessing_function = ( keras.applications.efficientnet.preprocess_input ) else: raise Exception("Uknown preprocessing_function name: %s" % (preproc_name)) # return the appropriate predictor if not isinstance(model, keras.Model): raise ValueError("model must be of instance keras.Model") if not isinstance( preproc, ( ImagePreprocessor, TextPreprocessor, NERPreprocessor, NodePreprocessor, LinkPreprocessor, TabularPreprocessor, ), ): raise ValueError("preproc must be instance of ktrain.preprocessor.Preprocessor") if isinstance(preproc, ImagePreprocessor): return ImagePredictor(model, preproc, batch_size=batch_size) elif isinstance(preproc, TextPreprocessor): return TextPredictor(model, preproc, batch_size=batch_size) elif isinstance(preproc, NERPreprocessor): return NERPredictor(model, preproc, batch_size=batch_size) elif isinstance(preproc, NodePreprocessor): return NodePredictor(model, preproc, batch_size=batch_size) elif isinstance(preproc, LinkPreprocessor): return LinkPredictor(model, preproc, batch_size=batch_size) elif isinstance(preproc, TabularPreprocessor): return TabularPredictor(model, preproc, batch_size=batch_size) else: raise Exception("preprocessor not currently supported")
def release_gpu_memory(device=0)
-
Relase GPU memory allocated by Tensorflow Source: https://stackoverflow.com/questions/51005147/keras-release-memory-after-finish-training-process
Expand source code
def release_gpu_memory(device=0): """ ``` Relase GPU memory allocated by Tensorflow Source: https://stackoverflow.com/questions/51005147/keras-release-memory-after-finish-training-process ``` """ from numba import cuda K.clear_session() cuda.select_device(device) cuda.close() return