Source code for nolearn.metrics

from __future__ import print_function

import numpy as np
from sklearn.base import clone
from sklearn.metrics import f1_score


[docs]def multiclass_logloss(actual, predicted, eps=1e-15):
    """Multi class version of Logarithmic Loss metric.

    :param actual: Array containing the actual target classes
    :param predicted: Matrix with class predictions, one probability per class
    """
    # Convert 'actual' to a binary array if it's not already:
    if len(actual.shape) == 1:
        actual2 = np.zeros((actual.shape[0], predicted.shape[1]))
        for i, val in enumerate(actual):
            actual2[i, val] = 1
        actual = actual2

    clip = np.clip(predicted, eps, 1 - eps)
    rows = actual.shape[0]
    vsota = np.sum(actual * np.log(clip))
    return -1.0 / rows * vsota


class LearningCurve(object):
    score_func = staticmethod(f1_score)

    def __init__(self, score_func=None):
        if score_func is None:
            score_func = self.score_func
        self.score_func = score_func

    def predict(self, clf, X):
        return clf.predict(X)

    def __call__(self, dataset, classifier, steps=10,
                 verbose=0, random_state=42):
        """Create a learning curve that uses more training cases with
        each step.

        :param dataset: Dataset to work with
        :type dataset: :class:`~nolearn.dataset.Dataset`
        :param classifier: Classifier for fitting and making predictions.
        :type classifier: :class:`~sklearn.base.BaseEstimator`
        :param steps: Number of steps in the learning curve.
        :type steps: int

        :result: 3-tuple with lists `scores_train`, `scores_test`, `sizes`

        Drawing the resulting learning curve can be done like this:

        .. code-block:: python

          dataset = Dataset()
          clf = LogisticRegression()
          scores_train, scores_test, sizes = learning_curve(dataset, clf)
          pl.plot(sizes, scores_train, 'b', label='training set')
          pl.plot(sizes, scores_test, 'r', label='test set')
          pl.legend(loc='lower right')
          pl.show()
        """
        X_train, X_test, y_train, y_test = dataset.train_test_split()

        scores_train = []
        scores_test = []
        sizes = []

        if verbose:
            print("          n      train      test")

        for frac in np.linspace(0.1, 1.0, num=steps):
            frac_size = int(X_train.shape[0] * frac)
            sizes.append(frac_size)
            X_train1 = X_train[:frac_size]
            y_train1 = y_train[:frac_size]

            clf = clone(classifier)
            clf.fit(X_train1, y_train1)

            predict_train = self.predict(clf, X_train1)
            predict_test = self.predict(clf, X_test)

            score_train = self.score_func(y_train1, predict_train)
            score_test = self.score_func(y_test, predict_test)

            scores_train.append(score_train)
            scores_test.append(score_test)

            if verbose:
                print("   %8d     %0.4f    %0.4f" % (
                    frac_size, score_train, score_test))

        return scores_train, scores_test, sizes


class LearningCurveProbas(LearningCurve):
    score_func = staticmethod(multiclass_logloss)

    def predict(self, clf, X):
        return clf.predict_proba(X)

learning_curve = LearningCurve().__call__
#: Same as :func:`learning_curve` but uses :func:`multiclass_logloss`
#: as the loss funtion.
learning_curve_logloss = LearningCurveProbas().__call__
Navigation

Source code for nolearn.metrics

Quick search

Navigation