Module ktrain.vision.predictor

Expand source code
from .. import utils as U
from ..imports import *
from ..predictor import Predictor
from .preprocessor import ImagePreprocessor


class ImagePredictor(Predictor):
    """
    ```
    predicts image classes
    ```
    """

    def __init__(self, model, preproc, batch_size=U.DEFAULT_BS):
        if not isinstance(model, keras.Model):
            raise ValueError("model must be of instance keras.Model")
        if not isinstance(preproc, ImagePreprocessor):
            raise ValueError("preproc must be instance of ImagePreprocessor")
        self.model = model
        self.preproc = preproc
        self.datagen = self.preproc.get_preprocessor()
        self.c = self.preproc.get_classes()
        self.batch_size = batch_size

    def get_classes(self):
        return self.c

    def explain(self, img_fpath):
        """
        ```
        Highlights image to explain prediction
        ```
        """
        try:
            import eli5
        except:
            msg = (
                "ktrain requires a forked version of eli5 to support tf.keras. "
                + "Install with: pip install https://github.com/amaiya/eli5-tf/archive/refs/heads/master.zip"
            )
            warnings.warn(msg)
            return

        if not DISABLE_V2_BEHAVIOR:
            warnings.warn(
                "Please add os.environ['DISABLE_V2_BEHAVIOR'] = '1' at top of your script or notebook."
            )
            msg = (
                "\nFor image classification, the explain method currently requires disabling V2 behavior in TensorFlow 2.\n"
                + "Please add the following to the top of your script or notebook BEFORE you import ktrain and restart Colab runtime or Jupyter kernel:\n\n"
                + "import os\n"
                + "os.environ['DISABLE_V2_BEHAVIOR'] = '1'\n"
            )
            print(msg)
            return

        img = keras.preprocessing.image.load_img(
            img_fpath,
            target_size=self.preproc.target_size,
            color_mode=self.preproc.color_mode,
        )
        x = keras.preprocessing.image.img_to_array(img)
        x = np.expand_dims(x, axis=0)
        return eli5.show_prediction(self.model, x)

    def predict(self, data, return_proba=False, verbose=0):
        """
        ```
        Predicts class from image in array format.
        If return_proba is True, returns probabilities of each class.
        ```
        """
        if not isinstance(data, np.ndarray):
            raise ValueError("data must be numpy.ndarray")
        (generator, steps) = self.preproc.preprocess(data, batch_size=self.batch_size)
        return self.predict_generator(
            generator, steps=steps, return_proba=return_proba, verbose=verbose
        )

    def predict_filename(self, img_path, return_proba=False, verbose=0):
        """
        ```
        Predicts class from filepath to single image file.
        If return_proba is True, returns probabilities of each class.
        ```
        """
        if not os.path.isfile(img_path):
            raise ValueError("img_path must be valid file")
        (generator, steps) = self.preproc.preprocess(
            img_path, batch_size=self.batch_size
        )
        return self.predict_generator(
            generator, steps=steps, return_proba=return_proba, verbose=verbose
        )

    def predict_folder(self, folder, return_proba=False, verbose=0):
        """
        ```
        Predicts the classes of all images in a folder.
        If return_proba is True, returns probabilities of each class.
        ```

        """
        if not os.path.isdir(folder):
            raise ValueError("folder must be valid directory")
        (generator, steps) = self.preproc.preprocess(folder, batch_size=self.batch_size)
        result = self.predict_generator(
            generator, steps=steps, return_proba=return_proba, verbose=verbose
        )
        if len(result) != len(generator.filenames):
            raise Exception("number of results does not equal number of filenames")
        return list(zip(generator.filenames, result))

    def predict_generator(self, generator, steps=None, return_proba=False, verbose=0):
        # loss = self.model.loss
        # if callable(loss): loss = loss.__name__
        # treat_multilabel = False
        # if loss != 'categorical_crossentropy' and not return_proba:
        #    return_proba=True
        #    treat_multilabel = True
        classification, multilabel = U.is_classifier(self.model)
        if not classification:
            return_proba = True
        # *_generator methods are deprecated from TF 2.1.0
        # preds =  self.model.predict_generator(generator, steps=steps)
        preds = self.model.predict(generator, steps=steps, verbose=verbose)
        result = (
            preds
            if return_proba or multilabel
            else [self.c[np.argmax(pred)] for pred in preds]
        )
        if multilabel and not return_proba:
            return [list(zip(self.c, r)) for r in result]
        if not classification:
            return np.squeeze(result, axis=1)
        else:
            return result

    def predict_proba(self, data, verbose=0):
        return self.predict(data, return_proba=True, verbose=verbose)

    def predict_proba_folder(self, folder, verbose=0):
        return self.predict_folder(folder, return_proba=True, verbose=verbose)

    def predict_proba_filename(self, img_path, verbose=0):
        return self.predict_filename(img_path, return_proba=True, verbose=verbose)

    def predict_proba_generator(self, generator, steps=None, verbose=0):
        return self.predict_proba_generator(
            generator, steps=steps, return_proba=True, verbose=verbose
        )

    def analyze_valid(self, generator, print_report=True, multilabel=None):
        """
        ```
        Makes predictions on validation set and returns the confusion matrix.
        Accepts as input a genrator (e.g., DirectoryIterator, DataframeIterator)
        representing the validation set.


        Optionally prints a classification report.
        Currently, this method is only supported for binary and multiclass
        problems, not multilabel classification problems.
        ```
        """
        if multilabel is None:
            multilabel = U.is_multilabel(generator)
        if multilabel:
            warnings.warn("multilabel_confusion_matrix not yet supported - skipping")
            return

        y_true = generator.classes
        # *_generator methods are deprecated from TF 2.1.0
        # y_pred = self.model.predict_generator(generator)
        y_pred = self.model.predict(generator)
        y_pred = np.argmax(y_pred, axis=1)
        if print_report:
            print(classification_report(y_true, y_pred, target_names=self.c))
        if not multilabel:
            cm_func = confusion_matrix
            cm = cm_func(y_true, y_pred)
        else:
            cm = None
        return cm

    def _save_preproc(self, fpath):
        preproc_name = "tf_model.preproc"
        with open(os.path.join(fpath, preproc_name), "wb") as f:
            datagen = self.preproc.get_preprocessor()
            pfunc = datagen.preprocessing_function
            datagen.preprocessing_function = None
            pickle.dump(self.preproc, f)
            datagen.preprocessing_function = pfunc
        return

Classes

class ImagePredictor (model, preproc, batch_size=32)
predicts image classes
Expand source code
class ImagePredictor(Predictor):
    """
    ```
    predicts image classes
    ```
    """

    def __init__(self, model, preproc, batch_size=U.DEFAULT_BS):
        if not isinstance(model, keras.Model):
            raise ValueError("model must be of instance keras.Model")
        if not isinstance(preproc, ImagePreprocessor):
            raise ValueError("preproc must be instance of ImagePreprocessor")
        self.model = model
        self.preproc = preproc
        self.datagen = self.preproc.get_preprocessor()
        self.c = self.preproc.get_classes()
        self.batch_size = batch_size

    def get_classes(self):
        return self.c

    def explain(self, img_fpath):
        """
        ```
        Highlights image to explain prediction
        ```
        """
        try:
            import eli5
        except:
            msg = (
                "ktrain requires a forked version of eli5 to support tf.keras. "
                + "Install with: pip install https://github.com/amaiya/eli5-tf/archive/refs/heads/master.zip"
            )
            warnings.warn(msg)
            return

        if not DISABLE_V2_BEHAVIOR:
            warnings.warn(
                "Please add os.environ['DISABLE_V2_BEHAVIOR'] = '1' at top of your script or notebook."
            )
            msg = (
                "\nFor image classification, the explain method currently requires disabling V2 behavior in TensorFlow 2.\n"
                + "Please add the following to the top of your script or notebook BEFORE you import ktrain and restart Colab runtime or Jupyter kernel:\n\n"
                + "import os\n"
                + "os.environ['DISABLE_V2_BEHAVIOR'] = '1'\n"
            )
            print(msg)
            return

        img = keras.preprocessing.image.load_img(
            img_fpath,
            target_size=self.preproc.target_size,
            color_mode=self.preproc.color_mode,
        )
        x = keras.preprocessing.image.img_to_array(img)
        x = np.expand_dims(x, axis=0)
        return eli5.show_prediction(self.model, x)

    def predict(self, data, return_proba=False, verbose=0):
        """
        ```
        Predicts class from image in array format.
        If return_proba is True, returns probabilities of each class.
        ```
        """
        if not isinstance(data, np.ndarray):
            raise ValueError("data must be numpy.ndarray")
        (generator, steps) = self.preproc.preprocess(data, batch_size=self.batch_size)
        return self.predict_generator(
            generator, steps=steps, return_proba=return_proba, verbose=verbose
        )

    def predict_filename(self, img_path, return_proba=False, verbose=0):
        """
        ```
        Predicts class from filepath to single image file.
        If return_proba is True, returns probabilities of each class.
        ```
        """
        if not os.path.isfile(img_path):
            raise ValueError("img_path must be valid file")
        (generator, steps) = self.preproc.preprocess(
            img_path, batch_size=self.batch_size
        )
        return self.predict_generator(
            generator, steps=steps, return_proba=return_proba, verbose=verbose
        )

    def predict_folder(self, folder, return_proba=False, verbose=0):
        """
        ```
        Predicts the classes of all images in a folder.
        If return_proba is True, returns probabilities of each class.
        ```

        """
        if not os.path.isdir(folder):
            raise ValueError("folder must be valid directory")
        (generator, steps) = self.preproc.preprocess(folder, batch_size=self.batch_size)
        result = self.predict_generator(
            generator, steps=steps, return_proba=return_proba, verbose=verbose
        )
        if len(result) != len(generator.filenames):
            raise Exception("number of results does not equal number of filenames")
        return list(zip(generator.filenames, result))

    def predict_generator(self, generator, steps=None, return_proba=False, verbose=0):
        # loss = self.model.loss
        # if callable(loss): loss = loss.__name__
        # treat_multilabel = False
        # if loss != 'categorical_crossentropy' and not return_proba:
        #    return_proba=True
        #    treat_multilabel = True
        classification, multilabel = U.is_classifier(self.model)
        if not classification:
            return_proba = True
        # *_generator methods are deprecated from TF 2.1.0
        # preds =  self.model.predict_generator(generator, steps=steps)
        preds = self.model.predict(generator, steps=steps, verbose=verbose)
        result = (
            preds
            if return_proba or multilabel
            else [self.c[np.argmax(pred)] for pred in preds]
        )
        if multilabel and not return_proba:
            return [list(zip(self.c, r)) for r in result]
        if not classification:
            return np.squeeze(result, axis=1)
        else:
            return result

    def predict_proba(self, data, verbose=0):
        return self.predict(data, return_proba=True, verbose=verbose)

    def predict_proba_folder(self, folder, verbose=0):
        return self.predict_folder(folder, return_proba=True, verbose=verbose)

    def predict_proba_filename(self, img_path, verbose=0):
        return self.predict_filename(img_path, return_proba=True, verbose=verbose)

    def predict_proba_generator(self, generator, steps=None, verbose=0):
        return self.predict_proba_generator(
            generator, steps=steps, return_proba=True, verbose=verbose
        )

    def analyze_valid(self, generator, print_report=True, multilabel=None):
        """
        ```
        Makes predictions on validation set and returns the confusion matrix.
        Accepts as input a genrator (e.g., DirectoryIterator, DataframeIterator)
        representing the validation set.


        Optionally prints a classification report.
        Currently, this method is only supported for binary and multiclass
        problems, not multilabel classification problems.
        ```
        """
        if multilabel is None:
            multilabel = U.is_multilabel(generator)
        if multilabel:
            warnings.warn("multilabel_confusion_matrix not yet supported - skipping")
            return

        y_true = generator.classes
        # *_generator methods are deprecated from TF 2.1.0
        # y_pred = self.model.predict_generator(generator)
        y_pred = self.model.predict(generator)
        y_pred = np.argmax(y_pred, axis=1)
        if print_report:
            print(classification_report(y_true, y_pred, target_names=self.c))
        if not multilabel:
            cm_func = confusion_matrix
            cm = cm_func(y_true, y_pred)
        else:
            cm = None
        return cm

    def _save_preproc(self, fpath):
        preproc_name = "tf_model.preproc"
        with open(os.path.join(fpath, preproc_name), "wb") as f:
            datagen = self.preproc.get_preprocessor()
            pfunc = datagen.preprocessing_function
            datagen.preprocessing_function = None
            pickle.dump(self.preproc, f)
            datagen.preprocessing_function = pfunc
        return

Ancestors

Methods

def analyze_valid(self, generator, print_report=True, multilabel=None)
Makes predictions on validation set and returns the confusion matrix.
Accepts as input a genrator (e.g., DirectoryIterator, DataframeIterator)
representing the validation set.


Optionally prints a classification report.
Currently, this method is only supported for binary and multiclass
problems, not multilabel classification problems.
Expand source code
def analyze_valid(self, generator, print_report=True, multilabel=None):
    """
    ```
    Makes predictions on validation set and returns the confusion matrix.
    Accepts as input a genrator (e.g., DirectoryIterator, DataframeIterator)
    representing the validation set.


    Optionally prints a classification report.
    Currently, this method is only supported for binary and multiclass
    problems, not multilabel classification problems.
    ```
    """
    if multilabel is None:
        multilabel = U.is_multilabel(generator)
    if multilabel:
        warnings.warn("multilabel_confusion_matrix not yet supported - skipping")
        return

    y_true = generator.classes
    # *_generator methods are deprecated from TF 2.1.0
    # y_pred = self.model.predict_generator(generator)
    y_pred = self.model.predict(generator)
    y_pred = np.argmax(y_pred, axis=1)
    if print_report:
        print(classification_report(y_true, y_pred, target_names=self.c))
    if not multilabel:
        cm_func = confusion_matrix
        cm = cm_func(y_true, y_pred)
    else:
        cm = None
    return cm
def explain(self, img_fpath)
Highlights image to explain prediction
Expand source code
def explain(self, img_fpath):
    """
    ```
    Highlights image to explain prediction
    ```
    """
    try:
        import eli5
    except:
        msg = (
            "ktrain requires a forked version of eli5 to support tf.keras. "
            + "Install with: pip install https://github.com/amaiya/eli5-tf/archive/refs/heads/master.zip"
        )
        warnings.warn(msg)
        return

    if not DISABLE_V2_BEHAVIOR:
        warnings.warn(
            "Please add os.environ['DISABLE_V2_BEHAVIOR'] = '1' at top of your script or notebook."
        )
        msg = (
            "\nFor image classification, the explain method currently requires disabling V2 behavior in TensorFlow 2.\n"
            + "Please add the following to the top of your script or notebook BEFORE you import ktrain and restart Colab runtime or Jupyter kernel:\n\n"
            + "import os\n"
            + "os.environ['DISABLE_V2_BEHAVIOR'] = '1'\n"
        )
        print(msg)
        return

    img = keras.preprocessing.image.load_img(
        img_fpath,
        target_size=self.preproc.target_size,
        color_mode=self.preproc.color_mode,
    )
    x = keras.preprocessing.image.img_to_array(img)
    x = np.expand_dims(x, axis=0)
    return eli5.show_prediction(self.model, x)
def get_classes(self)
Expand source code
def get_classes(self):
    return self.c
def predict(self, data, return_proba=False, verbose=0)
Predicts class from image in array format.
If return_proba is True, returns probabilities of each class.
Expand source code
def predict(self, data, return_proba=False, verbose=0):
    """
    ```
    Predicts class from image in array format.
    If return_proba is True, returns probabilities of each class.
    ```
    """
    if not isinstance(data, np.ndarray):
        raise ValueError("data must be numpy.ndarray")
    (generator, steps) = self.preproc.preprocess(data, batch_size=self.batch_size)
    return self.predict_generator(
        generator, steps=steps, return_proba=return_proba, verbose=verbose
    )
def predict_filename(self, img_path, return_proba=False, verbose=0)
Predicts class from filepath to single image file.
If return_proba is True, returns probabilities of each class.
Expand source code
def predict_filename(self, img_path, return_proba=False, verbose=0):
    """
    ```
    Predicts class from filepath to single image file.
    If return_proba is True, returns probabilities of each class.
    ```
    """
    if not os.path.isfile(img_path):
        raise ValueError("img_path must be valid file")
    (generator, steps) = self.preproc.preprocess(
        img_path, batch_size=self.batch_size
    )
    return self.predict_generator(
        generator, steps=steps, return_proba=return_proba, verbose=verbose
    )
def predict_folder(self, folder, return_proba=False, verbose=0)
Predicts the classes of all images in a folder.
If return_proba is True, returns probabilities of each class.
Expand source code
def predict_folder(self, folder, return_proba=False, verbose=0):
    """
    ```
    Predicts the classes of all images in a folder.
    If return_proba is True, returns probabilities of each class.
    ```

    """
    if not os.path.isdir(folder):
        raise ValueError("folder must be valid directory")
    (generator, steps) = self.preproc.preprocess(folder, batch_size=self.batch_size)
    result = self.predict_generator(
        generator, steps=steps, return_proba=return_proba, verbose=verbose
    )
    if len(result) != len(generator.filenames):
        raise Exception("number of results does not equal number of filenames")
    return list(zip(generator.filenames, result))
def predict_generator(self, generator, steps=None, return_proba=False, verbose=0)
Expand source code
def predict_generator(self, generator, steps=None, return_proba=False, verbose=0):
    # loss = self.model.loss
    # if callable(loss): loss = loss.__name__
    # treat_multilabel = False
    # if loss != 'categorical_crossentropy' and not return_proba:
    #    return_proba=True
    #    treat_multilabel = True
    classification, multilabel = U.is_classifier(self.model)
    if not classification:
        return_proba = True
    # *_generator methods are deprecated from TF 2.1.0
    # preds =  self.model.predict_generator(generator, steps=steps)
    preds = self.model.predict(generator, steps=steps, verbose=verbose)
    result = (
        preds
        if return_proba or multilabel
        else [self.c[np.argmax(pred)] for pred in preds]
    )
    if multilabel and not return_proba:
        return [list(zip(self.c, r)) for r in result]
    if not classification:
        return np.squeeze(result, axis=1)
    else:
        return result
def predict_proba(self, data, verbose=0)
Expand source code
def predict_proba(self, data, verbose=0):
    return self.predict(data, return_proba=True, verbose=verbose)
def predict_proba_filename(self, img_path, verbose=0)
Expand source code
def predict_proba_filename(self, img_path, verbose=0):
    return self.predict_filename(img_path, return_proba=True, verbose=verbose)
def predict_proba_folder(self, folder, verbose=0)
Expand source code
def predict_proba_folder(self, folder, verbose=0):
    return self.predict_folder(folder, return_proba=True, verbose=verbose)
def predict_proba_generator(self, generator, steps=None, verbose=0)
Expand source code
def predict_proba_generator(self, generator, steps=None, verbose=0):
    return self.predict_proba_generator(
        generator, steps=steps, return_proba=True, verbose=verbose
    )

Inherited members