Source code for calmap

"""
Calendar heatmaps from Pandas time series data.

Plot Pandas time series data sampled by day in a heatmap per calendar year,
similar to GitHub's contributions calendar.
"""


from __future__ import unicode_literals

import calendar
import datetime

from matplotlib.colors import ColorConverter, ListedColormap
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd


__version_info__ = ('0', '0', '6')
__date__ = '14 Feb 2016'


__version__ = '.'.join(__version_info__)
__author__ = 'Martijn Vermaat'
__contact__ = 'martijn@vermaat.name'
__homepage__ = 'https://github.com/martijnvermaat/calmap'


[docs]def yearplot(data, year=None, how='sum', vmin=None, vmax=None, cmap='Reds', fillcolor='whitesmoke', linewidth=1, linecolor=None, daylabels=calendar.day_abbr[:], dayticks=True, monthlabels=calendar.month_abbr[1:], monthticks=True, ax=None, **kwargs): """ Plot one year from a timeseries as a calendar heatmap. Parameters ---------- data : Series Data for the plot. Must be indexed by a DatetimeIndex. year : integer Only data indexed by this year will be plotted. If `None`, the first year for which there is data will be plotted. how : string Method for resampling data by day. If `None`, assume data is already sampled by day and don't resample. Otherwise, this is passed to Pandas `Series.resample`. vmin, vmax : floats Values to anchor the colormap. If `None`, min and max are used after resampling data by day. cmap : matplotlib colormap name or object The mapping from data values to color space. fillcolor : matplotlib color Color to use for days without data. linewidth : float Width of the lines that will divide each day. linecolor : color Color of the lines that will divide each day. If `None`, the axes background color is used, or 'white' if it is transparent. daylabels : list Strings to use as labels for days, must be of length 7. dayticks : list or int or bool If `True`, label all days. If `False`, don't label days. If a list, only label days with these indices. If an integer, label every n day. monthlabels : list Strings to use as labels for months, must be of length 12. monthticks : list or int or bool If `True`, label all months. If `False`, don't label months. If a list, only label months with these indices. If an integer, label every n month. ax : matplotlib Axes Axes in which to draw the plot, otherwise use the currently-active Axes. kwargs : other keyword arguments All other keyword arguments are passed to matplotlib `ax.pcolormesh`. Returns ------- ax : matplotlib Axes Axes object with the calendar heatmap. Examples -------- By default, `yearplot` plots the first year and sums the values per day: .. plot:: :context: close-figs calmap.yearplot(events) We can choose which year is plotted with the `year` keyword argment: .. plot:: :context: close-figs calmap.yearplot(events, year=2015) The appearance can be changed by using another colormap. Here we also use a darker fill color for days without data and remove the lines: .. plot:: :context: close-figs calmap.yearplot(events, cmap='YlGn', fillcolor='grey', linewidth=0) The axis tick labels can look a bit crowded. We can ask to draw only every nth label, or explicitely supply the label indices. The labels themselves can also be customized: .. plot:: :context: close-figs calmap.yearplot(events, monthticks=3, daylabels='MTWTFSS', dayticks=[0, 2, 4, 6]) """ if year is None: year = data.index.sort_values()[0].year if how is None: # Assume already sampled by day. by_day = data else: # Sample by day. by_day = data.resample('D', how=how) # Min and max per day. if vmin is None: vmin = by_day.min() if vmax is None: vmax = by_day.max() if ax is None: ax = plt.gca() if linecolor is None: # Unfortunately, linecolor cannot be transparent, as it is drawn on # top of the heatmap cells. Therefore it is only possible to mimic # transparent lines by setting them to the axes background color. This # of course won't work when the axes itself has a transparent # background so in that case we default to white which will usually be # the figure or canvas background color. linecolor = ax.get_axis_bgcolor() if ColorConverter().to_rgba(linecolor)[-1] == 0: linecolor = 'white' # Filter on year. by_day = by_day[str(year)] # Add missing days. by_day = by_day.reindex( pd.date_range(start=str(year), end=str(year + 1), freq='D')[:-1]) # Create data frame we can pivot later. by_day = pd.DataFrame({'data': by_day, 'fill': 1, 'day': by_day.index.dayofweek, 'week': by_day.index.week}) # There may be some days assigned to previous year's last week or # next year's first week. We create new week numbers for them so # the ordering stays intact and week/day pairs unique. by_day.loc[(by_day.index.month == 1) & (by_day.week > 50), 'week'] = 0 by_day.loc[(by_day.index.month == 12) & (by_day.week < 10), 'week'] \ = by_day.week.max() + 1 # Pivot data on day and week and mask NaN days. plot_data = by_day.pivot('day', 'week', 'data').values[::-1] plot_data = np.ma.masked_where(np.isnan(plot_data), plot_data) # Do the same for all days of the year, not just those we have data for. fill_data = by_day.pivot('day', 'week', 'fill').values[::-1] fill_data = np.ma.masked_where(np.isnan(fill_data), fill_data) # Draw heatmap for all days of the year with fill color. ax.pcolormesh(fill_data, vmin=0, vmax=1, cmap=ListedColormap([fillcolor])) # Draw heatmap. kwargs['linewidth'] = linewidth kwargs['edgecolors'] = linecolor ax.pcolormesh(plot_data, vmin=vmin, vmax=vmax, cmap=cmap, **kwargs) # Limit heatmap to our data. ax.set(xlim=(0, plot_data.shape[1]), ylim=(0, plot_data.shape[0])) # Square cells. ax.set_aspect('equal') # Remove spines and ticks. for side in ('top', 'right', 'left', 'bottom'): ax.spines[side].set_visible(False) ax.xaxis.set_tick_params(which='both', length=0) ax.yaxis.set_tick_params(which='both', length=0) # Get indices for monthlabels. if monthticks is True: monthticks = range(len(monthlabels)) elif monthticks is False: monthticks = [] elif isinstance(monthticks, int): monthticks = range(len(monthlabels))[monthticks // 2::monthticks] # Get indices for daylabels. if dayticks is True: dayticks = range(len(daylabels)) elif dayticks is False: dayticks = [] elif isinstance(dayticks, int): dayticks = range(len(daylabels))[dayticks // 2::dayticks] ax.set_xlabel('') ax.set_xticks([by_day.ix[datetime.date(year, i + 1, 15)].week for i in monthticks]) ax.set_xticklabels([monthlabels[i] for i in monthticks], ha='center') ax.set_ylabel('') ax.yaxis.set_ticks_position('right') ax.set_yticks([6 - i + 0.5 for i in dayticks]) ax.set_yticklabels([daylabels[i] for i in dayticks], rotation='horizontal', va='center') return ax
[docs]def calendarplot(data, how='sum', yearlabels=True, yearascending=True, yearlabel_kws=None, subplot_kws=None, gridspec_kws=None, fig_kws=None, **kwargs): """ Plot a timeseries as a calendar heatmap. Parameters ---------- data : Series Data for the plot. Must be indexed by a DatetimeIndex. how : string Method for resampling data by day. If `None`, assume data is already sampled by day and don't resample. Otherwise, this is passed to Pandas `Series.resample`. yearlabels : bool Whether or not to draw the year for each subplot. yearascending : bool Sort the calendar in ascending or descending order. yearlabel_kws : dict Keyword arguments passed to the matplotlib `set_ylabel` call which is used to draw the year for each subplot. subplot_kws : dict Keyword arguments passed to the matplotlib `add_subplot` call used to create each subplot. gridspec_kws : dict Keyword arguments passed to the matplotlib `GridSpec` constructor used to create the grid the subplots are placed on. fig_kws : dict Keyword arguments passed to the matplotlib `figure` call. kwargs : other keyword arguments All other keyword arguments are passed to `yearplot`. Returns ------- fig, axes : matplotlib Figure and Axes Tuple where `fig` is the matplotlib Figure object `axes` is an array of matplotlib Axes objects with the calendar heatmaps, one per year. Examples -------- With `calendarplot` we can plot several years in one figure: .. plot:: :context: close-figs calmap.calendarplot(events) """ yearlabel_kws = yearlabel_kws or {} subplot_kws = subplot_kws or {} gridspec_kws = gridspec_kws or {} fig_kws = fig_kws or {} years = np.unique(data.index.year) if not yearascending: years = years[::-1] fig, axes = plt.subplots(nrows=len(years), ncols=1, squeeze=False, subplot_kw=subplot_kws, gridspec_kw=gridspec_kws, **fig_kws) axes = axes.T[0] # We explicitely resample by day only once. This is an optimization. if how is None: by_day = data else: by_day = data.resample('D', how=how) ylabel_kws = dict( fontsize=32, color=kwargs.get('fillcolor', 'whitesmoke'), fontweight='bold', fontname='Arial', ha='center') ylabel_kws.update(yearlabel_kws) max_weeks = 0 for year, ax in zip(years, axes): yearplot(by_day, year=year, how=None, ax=ax, **kwargs) max_weeks = max(max_weeks, ax.get_xlim()[1]) if yearlabels: ax.set_ylabel(str(year), **ylabel_kws) # In a leap year it might happen that we have 54 weeks (e.g., 2012). # Here we make sure the width is consistent over all years. for ax in axes: ax.set_xlim(0, max_weeks) # Make the axes look good. plt.tight_layout() return fig, axes