Source code for yellowbrick.text.base

# yellowbrick.text.base
# Base classes for text feature visualizers and feature selection tools.
#
# Author:   Rebecca Bilbro <rbilbro@districtdatalabs.com>
# Created:  2017-01-20 14:44
#
# Copyright (C) 2017 District Data Labs
# For license information, see LICENSE.txt
#
# ID: base.py [] rbilbro@districtdatalabs.com $

"""
Base classes for text feature visualizers and text feature selection tools.
"""

##########################################################################
## Imports
##########################################################################

from yellowbrick.base import Visualizer
from yellowbrick.utils import is_dataframe
from sklearn.base import TransformerMixin

##########################################################################
## Text Visualizers
##########################################################################

[docs]class TextVisualizer(Visualizer, TransformerMixin): """ Base class for text feature visualization to investigate documents individually or as a full corpus. TextVisualizers are used after a text corpus has been transformed in some way (e.g. normalized through stemming or lemmatization, via stopwords removal, or through vectorization). Thus a TextVisualizer is itself a transformer and can be used in a Scikit-Learn Pipeline to perform automatic visual analysis during build. Accepts as input a DataFrame or Numpy array. """ def __init__(self, ax=None, **kwargs): """ These parameters can be influenced later on in the visualization process, but can and should be set as early as possible. Parameters ---------- ax : axes the axis to plot the figure on kwargs : dict Pass generic arguments to the drawing method """ super(TextVisualizer, self).__init__(ax=ax, **kwargs)
[docs] def fit(self, X, y=None, **fit_params): """ This method performs preliminary computations in order to set up the figure, compute statistics, or perform other analyses. It can also call drawing methods in order to set up various non-instance-related figure elements. Parameters ---------- X : ndarray or DataFrame of shape n x m A matrix of n instances with m features y : ndarray or Series of length n An array or series of target or class values fit_params: dict keyword arguments for parameter fitting. Returns ------- self : instance Returns the instance of the transformer/visualizer """ return self
[docs] def transform(self, X): """ Primarily a pass-through to ensure that the text visualizer will work in a pipeline setting. This method can also call drawing methods in order to ensure that the visualization is constructed. Returns ------- X : numpy array This method must return a numpy array with the same shape as X. """ return X
[docs] def fit_transform_poof(self, X, y=None, **kwargs): """ Fit to data, transform it, then visualize it. Fits the text visualizer to X and y with optional parameters by passing in all of kwargs, then calls poof with the same kwargs. This method must return the result of the transform method. Parameters ---------- X : ndarray or DataFrame of shape n x m A matrix of n instances with m features y : ndarray or Series of length n An array or series of target or class values kwargs : dict Pass generic arguments to the drawing method Returns ------- X : numpy array This method must return a numpy array with the same shape as X. """ Xp = self.fit_transform(X, y, **kwargs) self.poof(**kwargs) return Xp