Source code for jmpy.bayes.oneway

import random

import numpy as np
import pymc as pm
import pandas as pd
import matplotlib.gridspec as gs
import matplotlib as mpl
import matplotlib.backends.backend_agg as mbb

import jmpy.plotting as jp


[docs]class JbResult(object): def __init__(self, model, groups): self.model = model self.df = None self.groups = groups self.names = [] for name in ('mean', 'delta', 'deltam', 'deltas'): self.names += ['{}_{}'.format(name, g) for g in groups]
[docs] def traceplot(self, name, **kwargs): return pm.Matplot.plot(self.model.trace(name), **kwargs)
[docs] def result(self, boxkwargs=None, figsize=(12, 10), maxpoints=1000): """ :return: """ if boxkwargs is None: boxkwargs = {} filtered = random.sample(list(self.df.index), maxpoints) sampled_df = self.df.ix[filtered] fig = mpl.figure.Figure(figsize=figsize, tight_layout=True) canvas = mbb.FigureCanvasAgg(fig) grid = gs.GridSpec(4, 3, width_ratios=[4, 4, 1], height_ratios=[2, 1, 2, 1]) axm = fig.add_subplot(grid[0], label='axm') axmd = fig.add_subplot(grid[1], label='axmd') axtm = fig.add_subplot(grid[3], label='axtm', frameon=False,) axtmd = fig.add_subplot(grid[4], label='axtmd', frameon=False,) axs = fig.add_subplot(grid[6], label='axs') axsd = fig.add_subplot(grid[7], label='axsd') axts = fig.add_subplot(grid[9], label='axts', frameon=False,) axtsd = fig.add_subplot(grid[10], label='axtsd', frameon=False,) jp.boxplot('Group', 'Mean', sampled_df, axes=axm, legend='Group', **boxkwargs) jp.boxplot('Group', 'Mean Delta', sampled_df, axes=axmd, legend='Group', **boxkwargs) jp.boxplot('Group', 'Sigma', sampled_df, axes=axs, legend='Group', **boxkwargs) jp.boxplot('Group', 'Sigma Delta', sampled_df, axes=axsd, legend='Group', **boxkwargs) jp.components.datatable('Mean', self.df, axtm, by='Group', probs='bayes') jp.components.datatable('Mean Delta', self.df, axtmd, by='Group', probs='bayes') jp.components.datatable('Sigma', self.df, axts, by='Group', probs='bayes') jp.components.datatable('Sigma Delta', self.df, axtsd, by='Group', probs='bayes') for ax in axm, axmd, axtm, axtmd, axs, axsd, axts, axtsd: if not ax: continue ax.tick_params(top="off") ax.tick_params(bottom="off") ax.tick_params(right="off") ax.tick_params(left="off") fig.suptitle('') return canvas.figure
[docs]def oneway(x, y, data=None, control=None, iterations=24000, burn=4000, verbose=False): """ Generate a oneway anova via bayesian markov-chain monte carlo :param x: str or ndarray, x axis (grouping axis) :param y: str or ndarray, y axis (values) :param data: pandas dataframe or Null :param control: If x has greater than two groups, you need to compare vs. the control :param iterations: :param burn: :param verbose: bool; default=False; turns on pymc progress bar :return: a results object. """ if data is None: x, y, _, data = jp.components.create_df(x, y, None) df = data.copy() df = df.reset_index() df[x] = df[x].astype('str') df[y] = df[y].astype('float').dropna() groups = sorted(set(df[x])) if len(groups) > 2 and control is None: raise ValueError('Need a control group') elif len(groups) <= 2 and control is None: control = groups[0] means = {} # prior distributions sigmas = {} # deltas_mean = {} # deltas vs control deltas_sig = {} obs = {} # observations nu = {} def delta(dist, control, exp): return dist[exp] - dist[control] for group in groups: means[group] = pm.Uniform('mean_{}'.format(group), np.percentile(df[y], 1), np.percentile(df[y], 99)) nu[group] = pm.Uniform('nu_{}'.format(group), 0, 1000) sigmas[group] = pm.Gamma('sigma_{}'.format(group), np.median(df[y].dropna()), nu[group]) obs[group] = pm.Normal('obs_{}'.format(group), means[group], 1/sigmas[group]**2, value=df[df[x]==group][y], observed=True, trace=True) for group in groups: if group == control: continue else: deltas_mean[group] = pm.Deterministic(delta, doc='Delta function of mean', name='deltam_{}'.format(group), parents={'dist': means, 'exp': group, 'control': control}, trace=True) deltas_sig[group] = pm.Deterministic(delta, doc='Delta function of sigma', name='deltas_{}'.format(group), parents={'dist': sigmas, 'exp': group, 'control': control}, trace=True) mylist = [means[i] for i in groups] + [obs[i] for i in groups] + [deltas_mean[i] for i in groups if i != control] mylist += [sigmas[i] for i in groups] + [nu[i] for i in groups] + [deltas_sig[i] for i in groups if i != control] model = pm.Model(mylist) map_ = pm.MAP(model, ) map_.fit() mcmc = pm.MCMC(model, ) mcmc.sample(iterations, burn=burn, progress_bar=verbose) res = JbResult(mcmc, groups) for group in groups: if res.df is None: res.df = pd.DataFrame() res.df['Mean'] = mcmc.trace('mean_{}'.format(group))[:] res.df['Sigma'] = mcmc.trace('sigma_{}'.format(group))[:] res.df['Group'] = group if group == control: continue res.df['Mean Delta'] = mcmc.trace('deltam_{}'.format(group))[:] res.df['Sigma Delta'] = mcmc.trace('deltas_{}'.format(group))[:] else: tdf = pd.DataFrame() tdf['Mean'] = mcmc.trace('mean_{}'.format(group))[:] tdf['Sigma'] = mcmc.trace('sigma_{}'.format(group))[:] tdf['Group'] = group if group == control: res.df = res.df.append(tdf) continue tdf['Mean Delta'] = mcmc.trace('deltam_{}'.format(group))[:] tdf['Sigma Delta'] = mcmc.trace('deltas_{}'.format(group))[:] res.df = res.df.append(tdf) return res