Source code for abcpy.summaryselections

from abc import ABCMeta, abstractmethod
import numpy as np
from sklearn import linear_model

[docs]class Summaryselections(metaclass = ABCMeta):
    """This abstract base class defines a way to choose the summary statistics.
    """
    
    @abstractmethod
[docs]    def __init__(self, model, statistics_calc, backend, n_samples = 1000, seed = None):
        """The constructor of a sub-class must accept a non-optional model, statistics calculator and 
        backend which are stored to self.model, self.statistics_calc and self.backend. Further it 
        accepts two optional parameters n_samples and seed defining the number of simulated dataset
        used for the pilot to decide the summary statistics and the integer to initialize the random 
        number generator.
    
        Parameters
        ----------
        model: abcpy.models.Model
            Model object that conforms to the Model class.
        statistics_cal: abcpy.statistics.Statistics
            Statistics object that conforms to the Statistics class.
        backend: abcpy.backends.Backend
            Backend object that conforms to the Backend class.
        n_samples: int, optional
            The number of (parameter, simulated data) tuple generated to learn the summary statistics in pilot step. 
            The default value is 1000. 
        seed: integer, optional
            Optional initial seed for the random number generator. The default value is generated randomly.    
        """
        raise NotImplemented
        
        
    @abstractmethod
[docs]    def transformation(self, statistics):
        raise NotImplemented


[docs]class Semiautomatic(Summaryselections):
    """This class implements the semi auomatic summary statistics choice described in Fearnhead and Prangle [1].
    
    [1] Fearnhead P., Prangle D. 2012. Constructing summary statistics for approximate
    Bayesian computation: semi-automatic approximate Bayesian computation. J. Roy. Stat. Soc. B 74:419–474.    
    """

[docs]    def __init__(self, model, statistics_calc, backend, n_samples = 1000, seed = None):
        self.model = model
        self.statistics_calc = statistics_calc
        self.backend = backend
        self.rng = np.random.RandomState(seed)
        self.model.prior.reseed(self.rng.randint(np.iinfo(np.uint32).max, dtype=np.uint32)) 
        
        rc = _RemoteContextSemiautomatic(self.backend, self.model, self.statistics_calc)
        
        # main algorithm                 
        seed_arr = self.rng.randint(1, n_samples*n_samples, size=n_samples, dtype=np.int32)
        seed_pds = self.backend.parallelize(seed_arr)     

        sample_parameters_statistics_pds = self.backend.map(rc._sample_parameter_statistics, seed_pds)
        sample_parameters_and_statistics = self.backend.collect(sample_parameters_statistics_pds)
        sample_parameters, sample_statistics = [list(t) for t in zip(*sample_parameters_and_statistics)]
        sample_parameters = np.array(sample_parameters)
        sample_statistics = np.concatenate(sample_statistics)
        
        self.coefficients_learnt = np.zeros(shape=(sample_parameters.shape[1],sample_statistics.shape[1]))
        regr = linear_model.LinearRegression(fit_intercept=True)
        for ind in range(sample_parameters.shape[1]):
            regr.fit(sample_statistics, sample_parameters[:,ind]) 
            self.coefficients_learnt[:,ind] = regr.coef_ 
        
[docs]    def transformation(self, statistics):
        if not statistics.shape[1] == self.coefficients_learnt.shape[0]:    
            raise ValueError('Mismatch in dimension of summary statistics')
        return np.dot(statistics,self.coefficients_learnt)
        
        
class _RemoteContextSemiautomatic:
    """
    Contains everything that is sent over the network like broadcast vars and map functions
    """
    
    def __init__(self, backend, model, stat_calc):
        self.model = model
        self.stat_calc = stat_calc

        
    def _sample_parameter_statistics(self, seed):
        """
        Samples a single model parameter and simulates from it until
        distance between simulated outcome and the observation is
        smaller than eplison.
        
        Parameters
        ----------            
        seed: int
            value of a seed to be used for reseeding
        Returns
        -------
        np.array
            accepted parameter
        """
        self.model.prior.reseed(seed)
        self.model.sample_from_prior()
        
        return (self.model.get_parameters(), self.stat_calc.statistics(self.model.simulate(1)))