"""
Calendar heatmaps from Pandas time series data.
Plot Pandas time series data sampled by day in a heatmap per calendar year,
similar to GitHub's contributions calendar.
"""
from __future__ import unicode_literals
import calendar
import datetime
from matplotlib.colors import ColorConverter, ListedColormap
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
__version_info__ = ('0', '0', '6')
__date__ = '14 Feb 2016'
__version__ = '.'.join(__version_info__)
__author__ = 'Martijn Vermaat'
__contact__ = 'martijn@vermaat.name'
__homepage__ = 'https://github.com/martijnvermaat/calmap'
[docs]def yearplot(data, year=None, how='sum', vmin=None, vmax=None, cmap='Reds',
fillcolor='whitesmoke', linewidth=1, linecolor=None,
daylabels=calendar.day_abbr[:], dayticks=True,
monthlabels=calendar.month_abbr[1:], monthticks=True, ax=None,
**kwargs):
"""
Plot one year from a timeseries as a calendar heatmap.
Parameters
----------
data : Series
Data for the plot. Must be indexed by a DatetimeIndex.
year : integer
Only data indexed by this year will be plotted. If `None`, the first
year for which there is data will be plotted.
how : string
Method for resampling data by day. If `None`, assume data is already
sampled by day and don't resample. Otherwise, this is passed to Pandas
`Series.resample`.
vmin, vmax : floats
Values to anchor the colormap. If `None`, min and max are used after
resampling data by day.
cmap : matplotlib colormap name or object
The mapping from data values to color space.
fillcolor : matplotlib color
Color to use for days without data.
linewidth : float
Width of the lines that will divide each day.
linecolor : color
Color of the lines that will divide each day. If `None`, the axes
background color is used, or 'white' if it is transparent.
daylabels : list
Strings to use as labels for days, must be of length 7.
dayticks : list or int or bool
If `True`, label all days. If `False`, don't label days. If a list,
only label days with these indices. If an integer, label every n day.
monthlabels : list
Strings to use as labels for months, must be of length 12.
monthticks : list or int or bool
If `True`, label all months. If `False`, don't label months. If a
list, only label months with these indices. If an integer, label every
n month.
ax : matplotlib Axes
Axes in which to draw the plot, otherwise use the currently-active
Axes.
kwargs : other keyword arguments
All other keyword arguments are passed to matplotlib `ax.pcolormesh`.
Returns
-------
ax : matplotlib Axes
Axes object with the calendar heatmap.
Examples
--------
By default, `yearplot` plots the first year and sums the values per day:
.. plot::
:context: close-figs
calmap.yearplot(events)
We can choose which year is plotted with the `year` keyword argment:
.. plot::
:context: close-figs
calmap.yearplot(events, year=2015)
The appearance can be changed by using another colormap. Here we also use
a darker fill color for days without data and remove the lines:
.. plot::
:context: close-figs
calmap.yearplot(events, cmap='YlGn', fillcolor='grey',
linewidth=0)
The axis tick labels can look a bit crowded. We can ask to draw only every
nth label, or explicitely supply the label indices. The labels themselves
can also be customized:
.. plot::
:context: close-figs
calmap.yearplot(events, monthticks=3, daylabels='MTWTFSS',
dayticks=[0, 2, 4, 6])
"""
if year is None:
year = data.index.sort_values()[0].year
if how is None:
# Assume already sampled by day.
by_day = data
else:
# Sample by day.
by_day = data.resample('D', how=how)
# Min and max per day.
if vmin is None:
vmin = by_day.min()
if vmax is None:
vmax = by_day.max()
if ax is None:
ax = plt.gca()
if linecolor is None:
# Unfortunately, linecolor cannot be transparent, as it is drawn on
# top of the heatmap cells. Therefore it is only possible to mimic
# transparent lines by setting them to the axes background color. This
# of course won't work when the axes itself has a transparent
# background so in that case we default to white which will usually be
# the figure or canvas background color.
linecolor = ax.get_axis_bgcolor()
if ColorConverter().to_rgba(linecolor)[-1] == 0:
linecolor = 'white'
# Filter on year.
by_day = by_day[str(year)]
# Add missing days.
by_day = by_day.reindex(
pd.date_range(start=str(year), end=str(year + 1), freq='D')[:-1])
# Create data frame we can pivot later.
by_day = pd.DataFrame({'data': by_day,
'fill': 1,
'day': by_day.index.dayofweek,
'week': by_day.index.week})
# There may be some days assigned to previous year's last week or
# next year's first week. We create new week numbers for them so
# the ordering stays intact and week/day pairs unique.
by_day.loc[(by_day.index.month == 1) & (by_day.week > 50), 'week'] = 0
by_day.loc[(by_day.index.month == 12) & (by_day.week < 10), 'week'] \
= by_day.week.max() + 1
# Pivot data on day and week and mask NaN days.
plot_data = by_day.pivot('day', 'week', 'data').values[::-1]
plot_data = np.ma.masked_where(np.isnan(plot_data), plot_data)
# Do the same for all days of the year, not just those we have data for.
fill_data = by_day.pivot('day', 'week', 'fill').values[::-1]
fill_data = np.ma.masked_where(np.isnan(fill_data), fill_data)
# Draw heatmap for all days of the year with fill color.
ax.pcolormesh(fill_data, vmin=0, vmax=1, cmap=ListedColormap([fillcolor]))
# Draw heatmap.
kwargs['linewidth'] = linewidth
kwargs['edgecolors'] = linecolor
ax.pcolormesh(plot_data, vmin=vmin, vmax=vmax, cmap=cmap, **kwargs)
# Limit heatmap to our data.
ax.set(xlim=(0, plot_data.shape[1]), ylim=(0, plot_data.shape[0]))
# Square cells.
ax.set_aspect('equal')
# Remove spines and ticks.
for side in ('top', 'right', 'left', 'bottom'):
ax.spines[side].set_visible(False)
ax.xaxis.set_tick_params(which='both', length=0)
ax.yaxis.set_tick_params(which='both', length=0)
# Get indices for monthlabels.
if monthticks is True:
monthticks = range(len(monthlabels))
elif monthticks is False:
monthticks = []
elif isinstance(monthticks, int):
monthticks = range(len(monthlabels))[monthticks // 2::monthticks]
# Get indices for daylabels.
if dayticks is True:
dayticks = range(len(daylabels))
elif dayticks is False:
dayticks = []
elif isinstance(dayticks, int):
dayticks = range(len(daylabels))[dayticks // 2::dayticks]
ax.set_xlabel('')
ax.set_xticks([by_day.ix[datetime.date(year, i + 1, 15)].week
for i in monthticks])
ax.set_xticklabels([monthlabels[i] for i in monthticks], ha='center')
ax.set_ylabel('')
ax.yaxis.set_ticks_position('right')
ax.set_yticks([6 - i + 0.5 for i in dayticks])
ax.set_yticklabels([daylabels[i] for i in dayticks], rotation='horizontal',
va='center')
return ax
[docs]def calendarplot(data, how='sum', yearlabels=True, yearascending=True, yearlabel_kws=None,
subplot_kws=None, gridspec_kws=None, fig_kws=None, **kwargs):
"""
Plot a timeseries as a calendar heatmap.
Parameters
----------
data : Series
Data for the plot. Must be indexed by a DatetimeIndex.
how : string
Method for resampling data by day. If `None`, assume data is already
sampled by day and don't resample. Otherwise, this is passed to Pandas
`Series.resample`.
yearlabels : bool
Whether or not to draw the year for each subplot.
yearascending : bool
Sort the calendar in ascending or descending order.
yearlabel_kws : dict
Keyword arguments passed to the matplotlib `set_ylabel` call which is
used to draw the year for each subplot.
subplot_kws : dict
Keyword arguments passed to the matplotlib `add_subplot` call used to
create each subplot.
gridspec_kws : dict
Keyword arguments passed to the matplotlib `GridSpec` constructor used
to create the grid the subplots are placed on.
fig_kws : dict
Keyword arguments passed to the matplotlib `figure` call.
kwargs : other keyword arguments
All other keyword arguments are passed to `yearplot`.
Returns
-------
fig, axes : matplotlib Figure and Axes
Tuple where `fig` is the matplotlib Figure object `axes` is an array
of matplotlib Axes objects with the calendar heatmaps, one per year.
Examples
--------
With `calendarplot` we can plot several years in one figure:
.. plot::
:context: close-figs
calmap.calendarplot(events)
"""
yearlabel_kws = yearlabel_kws or {}
subplot_kws = subplot_kws or {}
gridspec_kws = gridspec_kws or {}
fig_kws = fig_kws or {}
years = np.unique(data.index.year)
if not yearascending:
years = years[::-1]
fig, axes = plt.subplots(nrows=len(years), ncols=1, squeeze=False,
subplot_kw=subplot_kws,
gridspec_kw=gridspec_kws, **fig_kws)
axes = axes.T[0]
# We explicitely resample by day only once. This is an optimization.
if how is None:
by_day = data
else:
by_day = data.resample('D', how=how)
ylabel_kws = dict(
fontsize=32,
color=kwargs.get('fillcolor', 'whitesmoke'),
fontweight='bold',
fontname='Arial',
ha='center')
ylabel_kws.update(yearlabel_kws)
max_weeks = 0
for year, ax in zip(years, axes):
yearplot(by_day, year=year, how=None, ax=ax, **kwargs)
max_weeks = max(max_weeks, ax.get_xlim()[1])
if yearlabels:
ax.set_ylabel(str(year), **ylabel_kws)
# In a leap year it might happen that we have 54 weeks (e.g., 2012).
# Here we make sure the width is consistent over all years.
for ax in axes:
ax.set_xlim(0, max_weeks)
# Make the axes look good.
plt.tight_layout()
return fig, axes