Source code for yellowbrick.features.pcoords

# yellowbrick.features.pcoords
# Implementations of parallel coordinates for feature analysis.
#
# Author:   Benjamin Bengfort <bbengfort@districtdatalabs.com>
# Created:  Mon Oct 03 21:46:06 2016 -0400
#
# Copyright (C) 2016 District Data Labs
# For license information, see LICENSE.txt
#
# ID: pcoords.py [] benjamin@bengfort.com $

"""
Implementations of parallel coordinates for multi-dimensional feature
analysis. There are a variety of parallel coordinates from Andrews Curves to
coordinates that optimize column order.
"""

##########################################################################
## Imports
##########################################################################

import numpy as np
import matplotlib.pyplot as plt

from yellowbrick.features.base import DataVisualizer
from yellowbrick.exceptions import YellowbrickTypeError
from yellowbrick.style.colors import resolve_colors, get_color_cycle

##########################################################################
## Quick Methods
##########################################################################

[docs]def parallel_coordinates(X, y=None, ax=None, features=None, classes=None, color=None, colormap=None, vlines=True, vlines_kwds=None, **kwargs): """Displays each feature as a vertical axis and each instance as a line. This helper function is a quick wrapper to utilize the ParallelCoordinates Visualizer (Transformer) for one-off analysis. Parameters ---------- X : ndarray or DataFrame of shape n x m A matrix of n instances with m features y : ndarray or Series of length n An array or series of target or class values ax : matplotlib axes The axes to plot the figure on. features : list of strings The names of the features or columns classes : list of strings The names of the classes in the target color : list or tuple of colors Specify the colors for each individual class colormap : string or matplotlib cmap Sequential colormap for continuous target vlines : bool Display the vertical azis lines vlines_kwds : dict Keyword arguments to draw the vlines Returns ------- ax : matplotlib axes Returns the axes that the parallel coordinates were drawn on. """ # Instantiate the visualizer visualizer = ParallelCoordinates( ax, features, classes, color, colormap, vlines, vlines_kwds, **kwargs ) # Fit and transform the visualizer (calls draw) visualizer.fit(X, y, **kwargs) visualizer.transform(X) # Return the axes object on the visualizer return visualizer.ax
########################################################################## ## Static Parallel Coordinates Visualizer ##########################################################################
[docs]class ParallelCoordinates(DataVisualizer): """ Parallel coordinates displays each feature as a vertical axis spaced evenly along the horizontal, and each instance as a line drawn between each individual axis. """ def __init__(self, ax=None, features=None, classes=None, color=None, colormap=None, vlines=True, vlines_kwds=None, **kwargs): """ Initialize the base parallel coordinates with many of the options required in order to make the visualization work. Parameters ---------- :param ax: the axis to plot the figure on. :param features: a list of feature names to use If a DataFrame is passed to fit and features is None, feature names are selected as the columns of the DataFrame. :param classes: a list of class names for the legend If classes is None and a y value is passed to fit then the classes are selected from the target vector. :param color: optional list or tuple of colors to colorize lines Use either color to colorize the lines on a per class basis or colormap to color them on a continuous scale. :param colormap: optional string or matplotlib cmap to colorize lines Use either color to colorize the lines on a per class basis or colormap to color them on a continuous scale. :param vlines: flag to determine vertical line display, default True :param vlines_kwds: options to style or display the vertical lines :param kwargs: keyword arguments passed to the super class. These parameters can be influenced later on in the visualization process, but can and should be set as early as possible. """ super(ParallelCoordinates, self).__init__( ax, features, classes, color, colormap, **kwargs ) # Visual Parameters self.show_vlines = vlines self.vlines_kwds = vlines_kwds or { 'linewidth': 1, 'color': 'black' }
[docs] def draw(self, X, y, **kwargs): """ Called from the fit method, this method creates the parallel coordinates canvas and draws each instance and vertical lines on it. """ # Get the shape of the data nrows, ncols = X.shape # Create the xticks for each column # TODO: Allow the user to specify this feature x = list(range(ncols)) # Create the axis if it doesn't exist if self.ax is None: self.ax = plt.gca() # Create the colors # TODO: Allow both colormap, listed colors, and palette definition # TODO: Make this an independent function or property for override! # color_values = resolve_colors( # num_colors=len(self.classes_), colormap=self.colormap, color=self.color # ) color_values = get_color_cycle() colors = dict(zip(self.classes_, color_values)) # Track which labels are already in the legend used_legends = set([]) # TODO: Make this function compatible with DataFrames! # TODO: Make an independent function to allow addition of instances! for idx, row in enumerate(X): # TODO: How to map classmap to labels? label = y[idx] # Get the label for the row label = self.classes_[label] if label not in used_legends: used_legends.add(label) self.ax.plot(x, row, color=colors[label], label=label, **kwargs) else: self.ax.plot(x, row, color=colors[label], **kwargs) # Add the vertical lines # TODO: Make an independent function for override! if self.show_vlines: for idx in x: self.ax.axvline(idx, **self.vlines_kwds) # Set the limits self.ax.set_xticks(x) self.ax.set_xticklabels(self.features_) self.ax.set_xlim(x[0], x[-1])
[docs] def finalize(self, **kwargs): """ Finalize executes any subclass-specific axes finalization steps. The user calls poof and poof calls finalize. Parameters ---------- kwargs: generic keyword arguments. """ # Set the title self.set_title( 'Parallel Coordinates for {} Features'.format(len(self.features_)) ) # Set the legend and the grid self.ax.legend(loc='best') self.ax.grid()