# yellowbrick.features.radviz
# Implements radviz for feature analysis.
#
# Author: Benjamin Bengfort <bbengfort@districtdatalabs.com>
# Created: Fri Oct 07 13:18:00 2016 -0400
#
# Copyright (C) 2016 District Data Labs
# For license information, see LICENSE.txt
#
# ID: radviz.py [] benjamin@bengfort.com $
"""
Implements radviz for feature analysis.
"""
##########################################################################
## Imports
##########################################################################
import numpy as np
import matplotlib.pyplot as plt
import matplotlib.patches as patches
from yellowbrick.features.base import DataVisualizer
from yellowbrick.exceptions import YellowbrickTypeError
from yellowbrick.style.colors import resolve_colors, get_color_cycle
##########################################################################
## Quick Methods
##########################################################################
[docs]def radviz(X, y=None, ax=None, features=None, classes=None,
color=None, colormap=None, **kwargs):
"""Displays each feature as an axis around a circle surrounding a scatter
plot whose points are each individual instance.
This helper function is a quick wrapper to utilize the RadialVisualizer
(Transformer) for one-off analysis.
Parameters
----------
X : ndarray or DataFrame of shape n x m
A matrix of n instances with m features
y : ndarray or Series of length n
An array or series of target or class values
ax : matplotlib axes
The axes to plot the figure on.
features : list of strings
The names of the features or columns
classes : list of strings
The names of the classes in the target
color : list or tuple of colors
Specify the colors for each individual class
colormap : string or matplotlib cmap
Sequential colormap for continuous target
Returns
-------
ax : matplotlib axes
Returns the axes that the parallel coordinates were drawn on.
"""
# Instantiate the visualizer
visualizer = RadialVisualizer(
ax, features, classes, color, colormap, **kwargs
)
# Fit and transform the visualizer (calls draw)
visualizer.fit(X, y, **kwargs)
visualizer.transform(X)
# Return the axes object on the visualizer
return visualizer.ax
##########################################################################
## Static RadViz Visualizer
##########################################################################
[docs]class RadialVisualizer(DataVisualizer):
"""
RadViz is a multivariate data visualization algorithm that plots each
axis uniformely around the circumference of a circle then plots points on
the interior of the circle such that the point normalizes its values on
the axes from the center to each arc.
"""
def __init__(self, ax=None, features=None, classes=None, color=None,
colormap=None, **kwargs):
"""
Initialize the base radviz with many of the options required in order
to make the visualization work.
Parameters
----------
:param ax: the axis to plot the figure on.
:param features: a list of feature names to use
If a DataFrame is passed to fit and features is None, feature
names are selected as the columns of the DataFrame.
:param classes: a list of class names for the legend
If classes is None and a y value is passed to fit then the classes
are selected from the target vector.
:param color: optional list or tuple of colors to colorize lines
Use either color to colorize the lines on a per class basis or
colormap to color them on a continuous scale.
:param colormap: optional string or matplotlib cmap to colorize lines
Use either color to colorize the lines on a per class basis or
colormap to color them on a continuous scale.
:param kwargs: keyword arguments passed to the super class.
These parameters can be influenced later on in the visualization
process, but can and should be set as early as possible.
"""
super(RadialVisualizer, self).__init__(
ax, features, classes, color, colormap, **kwargs
)
@staticmethod
[docs] def normalize(X):
"""
MinMax normalization to fit a matrix in the space [0,1] by column.
"""
a = X.min(axis=0)
b = X.max(axis=0)
return (X - a[np.newaxis, :]) / ((b - a)[np.newaxis, :])
[docs] def draw(self, X, y, **kwargs):
"""
Called from the fit method, this method creates the radviz canvas and
draws each instance as a class or target colored point, whose location
is determined by the feature data set.
"""
# Get the shape of the data
nrows, ncols = X.shape
# Create the axes if they don't exist
if self.ax is None:
self.ax = plt.gca(xlim=[-1,1], ylim=[-1,1])
# Create the colors
# TODO: Allow both colormap, listed colors, and palette definition
# TODO: Make this an independent function or property for override!
# color_values = resolve_colors(
# num_colors=len(self.classes_), colormap=self.colormap, color=self.color
# )
color_values = get_color_cycle()
colors = dict(zip(self.classes_, color_values))
# Create a data structure to hold scatter plot representations
to_plot = {}
for kls in self.classes_:
to_plot[kls] = [[], []]
# Compute the arcs around the circumference for each feature axis
# TODO: make this an independent function for override
s = np.array([
(np.cos(t), np.sin(t))
for t in [
2.0 * np.pi * (i / float(ncols))
for i in range(ncols)
]
])
# Compute the locations of the scatter plot for each class
# Normalize the data first to plot along the 0, 1 axis
for i, row in enumerate(self.normalize(X)):
row_ = np.repeat(np.expand_dims(row, axis=1), 2, axis=1)
xy = (s * row_).sum(axis=0) / row.sum()
kls = self.classes_[y[i]]
to_plot[kls][0].append(xy[0])
to_plot[kls][1].append(xy[1])
# Add the scatter plots from the to_plot function
# TODO: store these plots to add more instances to later
# TODO: make this a separate function
for i, kls in enumerate(self.classes_):
self.ax.scatter(to_plot[kls][0], to_plot[kls][1], color=colors[kls], label=str(kls), **kwargs)
# Add the circular axis path
# TODO: Make this a seperate function (along with labeling)
self.ax.add_patch(patches.Circle((0.0, 0.0), radius=1.0, facecolor='none'))
# Add the feature names
for xy, name in zip(s, self.features_):
# Add the patch indicating the location of the axis
self.ax.add_patch(patches.Circle(xy, radius=0.025, facecolor='#777777'))
# Add the feature names offset around the axis marker
if xy[0] < 0.0 and xy[1] < 0.0:
self.ax.text(xy[0] - 0.025, xy[1] - 0.025, name, ha='right', va='top', size='small')
elif xy[0] < 0.0 and xy[1] >= 0.0:
self.ax.text(xy[0] - 0.025, xy[1] + 0.025, name, ha='right', va='bottom', size='small')
elif xy[0] >= 0.0 and xy[1] < 0.0:
self.ax.text(xy[0] + 0.025, xy[1] - 0.025, name, ha='left', va='top', size='small')
elif xy[0] >= 0.0 and xy[1] >= 0.0:
self.ax.text(xy[0] + 0.025, xy[1] + 0.025, name, ha='left', va='bottom', size='small')
self.ax.axis('equal')
[docs] def finalize(self, **kwargs):
"""
Finalize executes any subclass-specific axes finalization steps.
The user calls poof and poof calls finalize.
Parameters
----------
kwargs: generic keyword arguments.
"""
# Set the title
self.set_title(
'RadViz for {} Features'.format(len(self.features_))
)
# Add the legend
self.ax.legend(loc='best')
# Alias for RadViz
RadViz = RadialVisualizer