Source code for nolearn.metrics

from __future__ import print_function

import numpy as np
from sklearn.base import clone
from sklearn.metrics import f1_score

[docs]def multiclass_logloss(actual, predicted, eps=1e-15): """Multi class version of Logarithmic Loss metric. :param actual: Array containing the actual target classes :param predicted: Matrix with class predictions, one probability per class """ # Convert 'actual' to a binary array if it's not already: if len(actual.shape) == 1: actual2 = np.zeros((actual.shape[0], predicted.shape[1])) for i, val in enumerate(actual): actual2[i, val] = 1 actual = actual2 clip = np.clip(predicted, eps, 1 - eps) rows = actual.shape[0] vsota = np.sum(actual * np.log(clip)) return -1.0 / rows * vsota
class LearningCurve(object): score_func = staticmethod(f1_score) def __init__(self, score_func=None): if score_func is None: score_func = self.score_func self.score_func = score_func def predict(self, clf, X): return clf.predict(X) def __call__(self, dataset, classifier, steps=10, verbose=0, random_state=42): """Create a learning curve that uses more training cases with each step. :param dataset: Dataset to work with :type dataset: :class:`~nolearn.dataset.Dataset` :param classifier: Classifier for fitting and making predictions. :type classifier: :class:`~sklearn.base.BaseEstimator` :param steps: Number of steps in the learning curve. :type steps: int :result: 3-tuple with lists `scores_train`, `scores_test`, `sizes` Drawing the resulting learning curve can be done like this: .. code-block:: python dataset = Dataset() clf = LogisticRegression() scores_train, scores_test, sizes = learning_curve(dataset, clf) pl.plot(sizes, scores_train, 'b', label='training set') pl.plot(sizes, scores_test, 'r', label='test set') pl.legend(loc='lower right') """ X_train, X_test, y_train, y_test = dataset.train_test_split() scores_train = [] scores_test = [] sizes = [] if verbose: print(" n train test") for frac in np.linspace(0.1, 1.0, num=steps): frac_size = int(X_train.shape[0] * frac) sizes.append(frac_size) X_train1 = X_train[:frac_size] y_train1 = y_train[:frac_size] clf = clone(classifier), y_train1) predict_train = self.predict(clf, X_train1) predict_test = self.predict(clf, X_test) score_train = self.score_func(y_train1, predict_train) score_test = self.score_func(y_test, predict_test) scores_train.append(score_train) scores_test.append(score_test) if verbose: print(" %8d %0.4f %0.4f" % ( frac_size, score_train, score_test)) return scores_train, scores_test, sizes class LearningCurveProbas(LearningCurve): score_func = staticmethod(multiclass_logloss) def predict(self, clf, X): return clf.predict_proba(X) learning_curve = LearningCurve().__call__ #: Same as :func:`learning_curve` but uses :func:`multiclass_logloss` #: as the loss funtion. learning_curve_logloss = LearningCurveProbas().__call__