Source code for abcpy.inferences

import numpy as np
from abcpy.output import Journal
from scipy import optimize

[docs]class RejectionABC:
    """This base class implements the rejection algorithm based inference scheme [1] for
    Approximate Bayesian Computation. 

    [1] Tavaré, S., Balding, D., Griffith, R., Donnelly, P.: Inferring coalescence
    times from DNA sequence data. Genetics 145(2), 505–518 (1997).

    Parameters
    ----------
    model: abcpy.models.Model
        Model object that conforms to the Model class.
    distance: abcpy.distances.Distance
        Distance object that conforms to the Distance class.
    backend: abcpy.backends.Backend
        Backend object that conforms to the Backend class.
    seed: integer, optional
         Optional initial seed for the random number generator. The default value is generated randomly.
    """
[docs]    def __init__(self, model, distance, backend, seed=None):
        self.model = model
        self.dist_calc = distance
        self.backend = backend
        self.rng = np.random.RandomState(seed)


[docs]    def sample(self, observations, n_samples, n_samples_per_param, epsilon, full_output=0):
        """Samples from the posterior distribution of the model parameter given the observed 
        data observations.
        
        Parameters
        ----------
        observations : python list
            The observed data set.
        n_samples : integer
            Number of samples to generate.
        n_samples_per_param : integer
            Number of data points in each simulated dataset.  
        epsilon: float 
            Value of threshold.
        full_output: integer, optional
            If full_output==1, intermediate results are included in output journal. 
            The default value is 0, meaning the intermediate results are not saved.
            
        Returns
        -------
        abcpy.output.Journal
            a journal containing simulation results, metadata and optionally intermediate results.
        """

        journal = Journal(full_output)
        journal.configuration["n_samples"] = n_samples
        journal.configuration["n_samples_per_param"] = n_samples_per_param
        journal.configuration["epsilon"] = epsilon
        
        accepted_parameters = None                        
        
        # Initialize variables that need to be available remotely
        rc = _RemoteContextRejectionABC(self.backend, self.model, self.dist_calc, observations, n_samples, n_samples_per_param, epsilon)
        
        # main Rejection ABC algorithm                 
        seed_arr = self.rng.randint(1, n_samples*n_samples, size=n_samples, dtype=np.int32)
        seed_pds = self.backend.parallelize(seed_arr)     

        accepted_parameters_pds = self.backend.map(rc._sample_parameter, seed_pds)
        accepted_parameters = self.backend.collect(accepted_parameters_pds)
        accepted_parameters = np.array(accepted_parameters)

        journal.add_parameters(accepted_parameters)
        journal.add_weights(np.ones((n_samples, 1)))
    
        return journal


    
class _RemoteContextRejectionABC:
    """
    Contains everything that is sent over the network like broadcast vars and map functions
    """
    
    def __init__(self, backend, model, dist_calc, observations, n_samples, n_samples_per_param, epsilon):
        self.model = model
        self.dist_calc = dist_calc
        self.n_samples = n_samples
        self.n_samples_per_param = n_samples_per_param
        
        self.epsilon = epsilon

        # these are usually big tables, so we broadcast them to have them once
        # per executor instead of once per task
        self.observations_bds = backend.broadcast(observations)


        
    def _sample_parameter(self, seed):
        """
        Samples a single model parameter and simulates from it until
        distance between simulated outcome and the observation is
        smaller than eplison.
        
        Parameters
        ----------            
        seed: int
            value of a seed to be used for reseeding
        Returns
        -------
        np.array
            accepted parameter
        """
    
        distance = self.dist_calc.dist_max()
        self.model.prior.reseed(seed)
        
        while distance > self.epsilon:
            # Accept new parameter value if the distance is less than epsilon
            self.model.sample_from_prior()
            y_sim = self.model.simulate(self.n_samples_per_param)
            distance = self.dist_calc.distance(self.observations_bds.value(), y_sim)

        return self.model.get_parameters()




[docs]class PMCABC:
    """This base class implements a modified version of Population Monte Carlo based inference scheme 
    for Approximate Bayesian computation of Beaumont et. al. [1]. Here the threshold value at `t`-th generation are adaptively chosen 
    by taking the maximum between the epsilon_percentile-th value of discrepancies of the accepted 
    parameters at `t-1`-th generation and the threshold value provided for this generation by the user. If we take the 
    value of epsilon_percentile to be zero (default), this method becomes the inference scheme described in [1], where
    the threshold values considered at each generation are the ones provided by the user. 
        
    [1] M. A. Beaumont. Approximate Bayesian computation in evolution and ecology. Annual Review of Ecology,
    Evolution, and Systematics, 41(1):379–406, Nov. 2010.
    
    Parameters
    ----------
    model : abcpy.models.Model
        Model object that conforms to the Model class.
    distance : abcpy.distances.Distance
        Distance object that conforms to the Distance class.
    kernel : abcpy.distributions.Distribution
        Distribution object defining the perturbation kernel needed for the sampling
    backend : abcpy.backends.Backend
        Backend object that conforms to the Backend class.
    seed : integer, optional
         Optional initial seed for the random number generator. The default value is generated randomly.
    """
[docs]    def __init__(self, model, distance, kernel, backend, seed=None):       
        self.model = model
        self.distance = distance
        self.kernel = kernel
        self.backend = backend
        self.rng = np.random.RandomState(seed)
        

[docs]    def sample(self, observations, steps, epsilon_init, n_samples = 10000, n_samples_per_param = 1, epsilon_percentile = 0, covFactor = 2, full_output=0):
        """Samples from the posterior distribution of the model parameter given the observed 
        data observations.
        
        Parameters
        ----------
        observations : numpy.ndarray 
            Observed data.
        steps : integer        
            Number of iterations in the sequential algoritm ("generations") 
        epsilon_init : numpy.ndarray        
            An array of proposed values of epsilon to be used at each steps. Can be supplied
            A single value to be used as the threshold in Step 1 or a `steps`-dimensional array of values to be 
            used as the threshold in evry steps.
        n_samples : integer, optional
            Number of samples to generate. The default value is 10000.
        n_samples_per_param : integer, optional        
            Number of data points in each simulated data set. The default value is 1.
        epsilon_percentile : float, optional        
            A value between [0, 100]. The default value is 0, meaning the threshold value provided by the user being used.
        covFactor : float, optional     
            scaling parameter of the covariance matrix. The default value is 2 as considered in [1].
        full_output: integer, optional
            If full_output==1, intermediate results are included in output journal. 
            The default value is 0, meaning the intermediate results are not saved.

        Returns
        -------        
        abcpy.output.Journal
            A journal containing simulation results, metadata and optionally intermediate results.
        """
        
        journal = Journal(full_output)
        journal.configuration["type_model"] = type(self.model)
        journal.configuration["type_dist_func"] = type(self.distance)
        journal.configuration["n_samples"] = n_samples
        journal.configuration["n_samples_per_param"] = n_samples_per_param
        journal.configuration["steps"] = steps
        journal.configuration["epsilon_percentile"] = epsilon_percentile
        
        accepted_parameters = None
        accepted_weights = None
        accepted_cov_mat = None
        #Define epsilon_arr
        if len(epsilon_init) == steps:
            epsilon_arr = epsilon_init
        else:
            if len(epsilon_init) == 1:
                epsilon_arr = [None]*steps
                epsilon_arr[0] = epsilon_init  
            else:
                raise ValueError("The length of epsilon_init can only be of 1 or steps.")
        
        
        # Initialize variables that need to be available remotely
        rc = _RemoteContextPMCABC(self.backend, self.model, self.distance, self.kernel, observations, n_samples, n_samples_per_param)

        # main PMCABC algorithm
        # print("INFO: Starting PMCABC iterations.")
        for aStep in range(0, steps):
            # print("DEBUG: Iteration " + str(aStep) + " of PMCABC algorithm.")
            seed_arr = self.rng.randint(0, np.iinfo(np.uint32).max, size=n_samples, dtype=np.uint32)
            seed_pds = self.backend.parallelize(seed_arr)

            # 0: update remotely required variables
            # print("INFO: Broadcasting parameters.")
            rc.epsilon = epsilon_arr[aStep] 
            rc._update_broadcasts(self.backend, accepted_parameters, accepted_weights, accepted_cov_mat)

            # 1: calculate resample parameters
            # print("INFO: Resampling parameters")
            params_and_dists_and_ysim_pds = self.backend.map(rc._resample_parameter, seed_pds)
            params_and_dists_and_ysim = self.backend.collect(params_and_dists_and_ysim_pds)
            new_parameters, distances = [list(t) for t in zip(*params_and_dists_and_ysim)]
            new_parameters = np.array(new_parameters)
            rc._update_broadcasts(self.backend, accepted_parameters, accepted_weights, accepted_cov_mat)
            
            # Compute epsilon for next step
            # print("INFO: Calculating acceptance threshold (epsilon).")
            if aStep < steps-1:
                if epsilon_arr[aStep + 1] == None:
                    epsilon_arr[aStep + 1] = np.percentile(distances, epsilon_percentile)
                else:
                    epsilon_arr[aStep + 1] = np.max([np.percentile(distances, epsilon_percentile), epsilon_arr[aStep+1]]) 

            # 2: calculate weights for new parameters 
            # print("INFO: Calculating weights.")
            new_parameters_pds = self.backend.parallelize(new_parameters)
            new_weights_pds = self.backend.map(rc._calculate_weight, new_parameters_pds)
            new_weights = np.array(self.backend.collect(new_weights_pds)).reshape(-1,1)
            sum_of_weights = 0.0
            for w in new_weights:
                sum_of_weights += w
            new_weights = new_weights / sum_of_weights

            # 3: calculate covariance
            # print("INFO: Calculating covariance matrix.")
            new_cov_mat = covFactor * np.cov(new_parameters, aweights = new_weights.reshape(-1), rowvar=False)
                                
            # 4: Update the newly computed values
            accepted_parameters = new_parameters
            accepted_weights = new_weights
            accepted_cov_mat = new_cov_mat

            # print("INFO: Saving configuration to output journal.")
            if (full_output == 1 and aStep <= steps-1) or (full_output == 0 and aStep == steps-1):
                journal.add_parameters(accepted_parameters)
                journal.add_weights(accepted_weights)
                
        #Add epsilon_arr to the journal                
        journal.configuration["epsilon_arr"] = epsilon_arr

        return journal

class _RemoteContextPMCABC:
    """
    Contains everything that is sent over the network like broadcast vars and map functions
    """
    
    def __init__(self, backend, model, distance, kernel, observations, n_samples, n_samples_per_param):
        self.model = model
        self.distance = distance
        self.n_samples = n_samples
        self.n_samples_per_param = n_samples_per_param
        self.kernel = kernel
        
        self.epsilon = None

        # these are usually big tables, so we broadcast them to have them once
        # per executor instead of once per task
        self.observations_bds = backend.broadcast(observations)
        self.accepted_parameters_bds = None
        self.accepted_weights_bds = None
        self.accepted_cov_mat_bds = None


    def _update_broadcasts(self, backend, accepted_parameters, accepted_weights, accepted_cov_mat):
        def destroy(bc):
            if bc != None:
                bc.unpersist
                #bc.destroy
                
        if not accepted_parameters is None:
            self.accepted_parameters_bds = backend.broadcast(accepted_parameters)
        if not accepted_weights is None:
            self.accepted_weights_bds = backend.broadcast(accepted_weights)
        if not accepted_cov_mat is None:
            self.accepted_cov_mat_bds = backend.broadcast(accepted_cov_mat)
                

    # define helper functions for map step
    def _resample_parameter(self, seed):
        """
        Samples a single model parameter and simulate from it until
        distance between simulated outcome and the observation is
        smaller than eplison.
            
        :type seed: int
        :rtype: np.array
        :return: accepted parameter
        """
        rng = np.random.RandomState(seed)
        self.model.prior.reseed(rng.randint(np.iinfo(np.uint32).max, dtype=np.uint32))
        self.kernel.reseed(rng.randint(np.iinfo(np.uint32).max, dtype=np.uint32))
        
        distance = self.distance.dist_max()
        while distance > self.epsilon:
            #print("on seed " + str(seed) + " distance: " + str(distance) + " epsilon: " + str(self.epsilon))
            if self.accepted_parameters_bds == None:
                self.model.sample_from_prior()
            else:
                index = rng.choice(self.n_samples, size=1, p=self.accepted_weights_bds.value().reshape(-1))
                theta = self.accepted_parameters_bds.value()[index[0]]
                # trucate the normal to the bounds of parameter space of the model
                # truncating the normal like this is fine: https://arxiv.org/pdf/0907.4010v1.pdf
                while True:
                    self.kernel.set_parameters([theta, self.accepted_cov_mat_bds.value()])
                    new_theta = self.kernel.sample(1)[0,:]
                    theta_is_accepted = self.model.set_parameters(new_theta)
                    if theta_is_accepted and self.model.prior.pdf(self.model.get_parameters()) != 0:
                        break

            y_sim = self.model.simulate(self.n_samples_per_param)

            distance = self.distance.distance(self.observations_bds.value(), y_sim)
        return (self.model.get_parameters(), distance)
    
    def _calculate_weight(self, theta):
        """
        Calculates the weight for the given parameter using
        accepted_parameters, accepted_cov_mat

        Parameters
        ----------
        theta: np.array
            1xp matrix containing model parameter, where p is the dimension of parameter
            
        Returns
        -------
        float
            the new weight for theta
        """
        
        if self.accepted_weights_bds is None:
            return 1.0 / self.n_samples
        else:
            prior_prob = self.model.prior.pdf(theta)

            denominator = 0.0
            for i in range(0, self.n_samples):
                self.kernel.set_parameters([self.accepted_parameters_bds.value()[i,:], self.accepted_cov_mat_bds.value()])
                pdf_value = self.kernel.pdf(theta)
                denominator += self.accepted_weights_bds.value()[i,0] * pdf_value
            return 1.0 * prior_prob / denominator


[docs]class PMC:
    """Population Monte Carlo based inference scheme of Cappé et. al. [1]. 

    This algorithm assumes a likelihood function is available and can be evaluated
    at any parameter value given the oberved dataset.  In absence of the
    likelihood function or when it can't be evaluated with a rational
    computational expenses, we use the approximated likleihood functions in
    abcpy.approx_lhd module, for which the argument of the consistency of the
    inference schemes are based on Andrieu and Roberts [2].

    [1] Cappé, O., Guillin, A., Marin, J.-M., and Robert, C. P. (2004). Population Monte Carlo.
    Journal of Computational and Graphical Statistics, 13(4), 907–929. 
     
    [2] C. Andrieu and G. O. Roberts. The pseudo-marginal approach for efficient Monte Carlo computations.
    Annals of Statistics, 37(2):697–725, 04 2009.
     
    """        
        
[docs]    def __init__(self, model, likfun, kernel, backend, seed=None):
        """Constructor of PMC inference schemes.

        Parameters
        ----------
        model : abcpy.models.Model
            Model object that conforms to the Model class 
        likfun : abcpy.approx_lhd.Approx_likelihood
            Approx_likelihood object that conforms to the Approx_likelihood class
        kernel : abcpy.distributions.Distribution
            Distribution object defining the perturbation kernel needed for the sampling
        backend : abcpy.backends.Backend
            Backend object that conforms to the Backend class
        seed : integer, optional
            Optional initial seed for the random number generator. The default value is generated randomly.

        """
        self.model = model
        self.likfun = likfun
        self.kernel = kernel
        self.backend = backend
        self.rng = np.random.RandomState(seed)
        
[docs]    def sample(self, observations, steps, n_samples = 10000, n_samples_per_param = 100, covFactor = None, iniPoints = None, full_output=0):
        """Samples from the posterior distribution of the model parameter given the observed 
        data observations.
        
        Parameters
        ----------
        observations : python list 
            Observed data
        steps : integer        
            number of iterations in the sequential algoritm ("generations") 
        n_sample : integer, optional
            number of samples to generate. The default value is 10000.
        n_samples_per_param : integer, optional        
            number of data points in each simulated data set. The default value is 100.
        covFactor : float, optional     
            scaling parameter of the covariance matrix. The default is a p dimensional array of 1 when p is the dimension of the parameter.
        inipoints : numpy.ndarray, optional
            parameter vaulues from where the sampling starts. By default sampled from the prior.
        full_output: integer, optional
            If full_output==1, intermediate results are included in output journal. 
            The default value is 0, meaning the intermediate results are not saved.

        Returns
        -------        
        abcpy.output.Journal
            A journal containing simulation results, metadata and optionally intermediate results.        
        
        Parameters
        ----------

        """

        journal = Journal(full_output)
        journal.configuration["type_model"] = type(self.model)
        journal.configuration["type_lhd_func"] = type(self.likfun)
        journal.configuration["n_samples"] = n_samples
        journal.configuration["n_samples_per_param"] = n_samples_per_param
        journal.configuration["steps"] = steps
        journal.configuration["covFactor"] = covFactor
        journal.configuration["iniPoints"] = iniPoints
        
        accepted_parameters = None
        accepted_weights = None
        accepted_cov_mat = None
        new_theta = None
        dim = len(self.model.get_parameters())
        
        # Initialize variables that need to be available remotely
        rc = _RemoteContextPMC(self.backend, self.model, self.likfun, self.kernel, observations, n_samples, n_samples_per_param)
        
        # Initialize particles: When not supplied, randomly draw them from prior distribution
        # Weights of particles: Assign equal weights for each of the particles
        if iniPoints == None:
            accepted_parameters = np.zeros(shape = (n_samples,dim))
            for ind in range(0,n_samples):
                self.model.sample_from_prior()
                accepted_parameters[ind,:] = self.model.get_parameters()
            accepted_weights = np.ones((n_samples,1), dtype=np.float)/n_samples 
        else:
            accepted_parameters = iniPoints
            accepted_weights = np.ones((iniPoints.shape[0],1), dtype=np.float)/iniPoints.shape[0] 
        
        if covFactor is None:
            covFactor = np.ones(shape=(dim,))

        # Calculate initial covariance matrix
        accepted_cov_mat = covFactor * np.cov(accepted_parameters, aweights = accepted_weights.reshape(-1), rowvar=False)                            
        
        # main SMC algorithm  
        # print("INFO: Starting PMC iterations.")
        for aStep in range(0, steps):
            # print("DEBUG: Iteration " + str(aStep) + " of PMC algorithm.")
            
            # 0: update remotely required variables
            # print("INFO: Broadcasting parameters.")
            rc._update_broadcasts(self.backend, accepted_parameters, accepted_weights, accepted_cov_mat)

            # 1: calculate resample parameters
            # print("INFO: Resample parameters.")
            index = self.rng.choice(accepted_parameters.shape[0], size = n_samples, p = accepted_weights.reshape(-1))
            #Choose a new particle using the resampled particle (make the boundary proper)
            #Initialize new_parameters
            new_parameters = np.zeros((n_samples,dim), dtype=np.float) 
            for ind in range(0,n_samples):
                self.kernel.set_parameters([accepted_parameters[index[ind],:], accepted_cov_mat])
                while True:
                    new_theta = self.kernel.sample(1)[0,:]
                    theta_is_accepted = self.model.set_parameters(new_theta)
                    if theta_is_accepted and self.model.prior.pdf(self.model.get_parameters()) != 0:
                        new_parameters[ind,:] = new_theta
                        break
            # 2: calculate approximate lieklihood for new parameters
            # print("INFO: Calculate approximate likelihood.")
            new_parameters_pds = self.backend.parallelize(new_parameters)
            approx_likelihood_new_parameters_pds = self.backend.map(rc._approx_lik_calc, new_parameters_pds)
            # print("DEBUG: Collect approximate likelihood from pds.")
            approx_likelihood_new_parameters = np.array(self.backend.collect(approx_likelihood_new_parameters_pds)).reshape(-1,1)         
            
            # 3: calculate new weights for new parameters 
            # print("INFO: Calculating weights.")
            new_weights_pds = self.backend.map(rc._calculate_weight, new_parameters_pds)
            new_weights = np.array(self.backend.collect(new_weights_pds)).reshape(-1,1)
            
            sum_of_weights = 0.0
            for i in range(0, n_samples):
                new_weights[i] = new_weights[i]*approx_likelihood_new_parameters[i]
                sum_of_weights += new_weights[i]
            new_weights = new_weights / sum_of_weights
            accepted_parameters = new_parameters

            # 4: calculate covariance
            # print("INFO: Calculating covariance matrix.")
            new_cov_mat = covFactor * np.cov(accepted_parameters, aweights = accepted_weights.reshape(-1), rowvar=False)
                       
            # 5: Update the newly computed values
            accepted_parameters = new_parameters
            accepted_weights = new_weights
            accepted_cov_mat = new_cov_mat
            
            # print("INFO: Saving configuration to output journal.")
            if (full_output == 1 and aStep <= steps-1) or (full_output == 0 and aStep == steps-1):
                journal.add_parameters(accepted_parameters)
                journal.add_weights(accepted_weights)
                journal.add_opt_values(approx_likelihood_new_parameters)

        return journal

class _RemoteContextPMC:
    """
    Contains everything that is sent over the network like broadcast vars and map functions
    """        

    def __init__(self, backend, model, likfun, kernel, observations, n_samples, n_samples_per_param,):
        self.model = model
        self.likfun = likfun
        self.n_samples = n_samples
        self.n_samples_per_param = n_samples_per_param
        self.kernel = kernel

        # these are usually big tables, so we broadcast them to have them once
        # per executor instead of once per task
        self.observations_bds = backend.broadcast(observations)
        self.accepted_parameters_bds = None
        self.accepted_weights_bds = None
        self.accepted_cov_mat_bds = None

    def _update_broadcasts(self, backend, accepted_parameters, accepted_weights, accepted_cov_mat):
        def destroy(bc):
            if bc != None:
                bc.unpersist
                #bc.destroy
                
        if not accepted_parameters is None:
            self.accepted_parameters_bds = backend.broadcast(accepted_parameters)
        if not accepted_weights is None:
            self.accepted_weights_bds = backend.broadcast(accepted_weights)
        if not accepted_cov_mat is None:
            self.accepted_cov_mat_bds = backend.broadcast(accepted_cov_mat)

    # define helper functions for map step
    def _approx_lik_calc(self, theta):
        """
        Compute likelihood for new parameters using approximate likelihood function
        
        :seed = aTuple[0]
        :theta = aTuple[1]
        :type seed: int
        :rtype: np.arraybg

        :return: likelihood values of new parameter
        """  

        # Assign theta to model
        self.model.set_parameters(theta)
        # Simulate the fake data from the model given the parameter value theta
        # print("DEBUG: Simulate model for parameter " + str(theta))
        y_sim = self.model.simulate(self.n_samples_per_param)
        # print("DEBUG: Extracting observation.")
        obs = self.observations_bds.value()
        # print("DEBUG: Computing likelihood...")
        lhd = self.likfun.likelihood(obs, y_sim)
        # print("DEBUG: Likelihood is :" + str(lhd))
        pdf_at_theta = self.model.prior.pdf(theta)
        # print("DEBUG: prior pdf evaluated at theta is :" + str(pdf_at_theta))
        return pdf_at_theta * lhd
        
    def _calculate_weight(self, theta):
        """
        Calculates the weight for the given parameter using
        accepted_parameters, accepted_cov_mat

        :type theta: np.array
        :param theta: 1xp matrix containing model parameters, where p is the number of parameters
        :rtype: float
        :return: the new weight for theta
        """
        
        if self.accepted_weights_bds is None:
            return 1.0 / self.n_samples
        else:
            prior_prob = self.model.prior.pdf(theta)

            denominator = 0.0
            for i in range(0, self.n_samples):
                self.kernel.set_parameters([self.accepted_parameters_bds.value()[i,:], self.accepted_cov_mat_bds.value()])
                pdf_value = self.kernel.pdf(theta)
                denominator += self.accepted_weights_bds.value()[i,0] * pdf_value
            return 1.0 * prior_prob / denominator        


[docs]class SABC:
    """This base class implements a modified version of Simulated Annealing Approximate Bayesian Computation (SABC) of [1] when the prior is non-informative.
        
    [1] C. Albert, H. R. Kuensch and A. Scheidegger. A Simulated Annealing Approach to 
    Approximate Bayes Computations. Statistics and Computing, (2014). 
    
    Parameters
    ----------
    model : abcpy.models.Model
        Model object that conforms to the Model class.
    distance : abcpy.distances.Distance
        Distance object that conforms to the Distance class.
    kernel : abcpy.distributions.Distribution
        Distribution object defining the perturbation kernel needed for the sampling
    backend : abcpy.backends.Backend
        Backend object that conforms to the Backend class.
    seed : integer, optional
         Optional initial seed for the random number generator. The default value is generated randomly.
    """
[docs]    def __init__(self, model, distance, kernel, backend, seed=None):       
        self.model = model
        self.distance = distance
        self.kernel = kernel
        self.backend = backend
        self.rng = np.random.RandomState(seed)
        

[docs]    def sample(self, observations, steps, epsilon, n_samples = 10000, n_samples_per_param = 1, beta = 2, delta = 0.2, v = 0.3, ar_cutoff = 0.5, resample = None, n_update = None, adaptcov = 1, full_output=0):
        """Samples from the posterior distribution of the model parameter given the observed 
        data observations.
        
        Parameters
        ----------
        observations : numpy.ndarray 
            Observed data.
        steps : integer        
            Number of maximum iterations in the sequential algoritm ("generations") 
        epsilon : numpy.float        
            An array of proposed values of epsilon to be used at each steps. 
        n_samples : integer, optional
            Number of samples to generate. The default value is 10000.
        n_samples_per_param : integer, optional        
            Number of data points in each simulated data set. The default value is 1.
        beta : numpy.float
            Tuning parameter of SABC
        delta : numpy.float
            Tuning parameter of SABC
        v : numpy.float, optional
            Tuning parameter of SABC, The default value is 0.3.
        ar_cutoff : numpy.float
            Acceptance ratio cutoff, The default value is 0.5
        resample: int, optional
            Resample after this many acceptance, The default value if n_samples
        n_update: int, optional
            Number of perturbed parameters at each step, The default value if n_samples
        adaptcov : boolean, optional     
            Whether we adapt the covariance matrix in iteration stage. The default value TRUE.
        full_output: integer, optional
            If full_output==1, intermediate results are included in output journal. 
            The default value is 0, meaning the intermediate results are not saved.

        Returns
        -------        
        abcpy.output.Journal
            A journal containing simulation results, metadata and optionally intermediate results.
        """
        journal = Journal(full_output)
        journal.configuration["type_model"] = type(self.model)
        journal.configuration["type_dist_func"] = type(self.distance)
        journal.configuration["type_kernel_func"] = type(self.kernel)
        journal.configuration["n_samples"] = n_samples
        journal.configuration["n_samples_per_param"] = n_samples_per_param
        journal.configuration["beta"] = beta
        journal.configuration["delta"] = delta
        journal.configuration["v"] = v
        journal.configuration["ar_cutoff"] = ar_cutoff
        journal.configuration["resample"] = resample
        journal.configuration["n_update"] = n_update
        journal.configuration["adaptcov"] = adaptcov
        journal.configuration["full_output"] = full_output
        
        accepted_parameters = np.zeros(shape=(n_samples,len(self.model.get_parameters())))
        distances = np.zeros(shape=(n_samples,))
        smooth_distances = np.zeros(shape=(n_samples,))
        accepted_weights = np.ones(shape=(n_samples,1))
        all_distances = None
        accepted_cov_mat = None               

        if resample == None:
            resample = n_samples
        if n_update == None:
            n_update = n_samples
        sample_array = np.ones(shape=(steps,))
        sample_array[0] = n_samples
        sample_array[1:] = n_update
        ## Acceptance counter to determine the resampling step
        accept = 0
        samples_until = 0
        
        # Initialize variables that need to be available remotely
        rc = _RemoteContextSABC(self.backend, self.model, self.distance, self.kernel, \
        observations, n_samples, n_samples_per_param)
        
        for aStep in range(0,steps):
            # main SABC algorithm 
            # print("INFO: Initialization of SABC")
            seed_arr = self.rng.randint(0, np.iinfo(np.uint32).max, size=int(sample_array[aStep]), dtype=np.uint32)
            seed_pds = self.backend.parallelize(seed_arr)

            # 0: update remotely required variables
            # print("INFO: Broadcasting parameters.")
            rc.epsilon = epsilon
            rc._update_broadcasts(self.backend, accepted_parameters, accepted_cov_mat, smooth_distances, all_distances)
        

            # 1: Calculate  parameters
            # print("INFO: Initial accepted parameter parameters")
            params_and_dists_pds = self.backend.map(rc._accept_parameter, seed_pds)
            params_and_dists = self.backend.collect(params_and_dists_pds)
            new_parameters, new_distances, new_all_parameters, new_all_distances, index, acceptance = [list(t) for t in zip(*params_and_dists)]
            new_parameters = np.array(new_parameters)            
            new_distances = np.array(new_distances)
            new_all_distances = np.concatenate(new_all_distances)
            index = np.array(index)
            acceptance = np.array(acceptance)
            
            # Reading all_distances at Initial step 
            if aStep == 0:
                index = np.linspace(0,n_samples-1,n_samples).astype(int).reshape(n_samples,)
                accept = 0
                all_distances = new_all_distances

            #print(index[acceptance == 1])              
            # Initialize/Update the accepted parameters and their corresponding distances            
            accepted_parameters[index[acceptance==1],:] = new_parameters[acceptance==1,:]
            distances[index[acceptance==1]] = new_distances[acceptance==1]
            
                
            # 2: Smoothing of the distances
            smooth_distances[index[acceptance==1]] = self._smoother_distance(distances[index[acceptance==1]],all_distances)
            
            # 3: Initialize/Update U, epsilon and covariance of perturbation kernel
            if aStep == 0:
                U = self._avergae_redefined_distance(self._smoother_distance(all_distances,all_distances), epsilon)
            else:               
                U = np.mean(smooth_distances)
            epsilon = self._schedule(U,v)   
            if accepted_parameters.shape[1] > 1:
                accepted_cov_mat = beta*np.cov(np.transpose(accepted_parameters)) + \
                0.0001*np.trace(np.cov(np.transpose(accepted_parameters)))*np.eye(accepted_parameters.shape[1])
            else:
                accepted_cov_mat = beta*np.var(np.transpose(accepted_parameters)) + \
                0.0001*(np.var(np.transpose(accepted_parameters)))*np.eye(accepted_parameters.shape[1]) 
                  
            ## 4: Show progress and if acceptance rate smaller than a value break the iteration
            
            # print("INFO: Saving intermediate configuration to output journal.")
            if full_output == 1:
                journal.add_parameters(accepted_parameters)
                journal.add_weights(accepted_weights)
                
            if aStep > 0:
                accept = accept + np.sum(acceptance)
                samples_until = samples_until + sample_array[aStep]
                acceptance_rate = accept/samples_until
                print('updates: ',np.sum(sample_array[1:aStep+1])/np.sum(sample_array[1:])*100,' epsilon: ' ,epsilon,\
                'u.mean: ', U, 'acceptance rate: ', acceptance_rate)
                if acceptance_rate < ar_cutoff:
                    break
       
            # 5: Resampling if number of accepted particles greater than resample
            if accept >= resample and U > 1e-100:
                ## Weighted resampling:
                weight = np.exp(-smooth_distances*delta/U)
                weight = weight/sum(weight)
                index_resampled = self.rng.choice(np.arange(n_samples), n_samples, replace = 1, p = weight)
                accepted_parameters = accepted_parameters[index_resampled,:]
                smooth_distances = smooth_distances[index_resampled]
  
                ## Update U and epsilon:
                epsilon = epsilon*(1-delta)
                U = np.mean(smooth_distances)
                epsilon = self._schedule(U,v) 
  
                ## Print effective sampling size
                print('Resampling: Effective sampling size: ', 1/sum(pow(weight/sum(weight),2)))
                accept = 0      
                samples_until = 0
      
        #Add epsilon_arr, number of final steps and final output to the journal 
        # print("INFO: Saving final configuration to output journal.")
        if full_output == 0:
            journal.add_parameters(accepted_parameters)
            journal.add_weights(accepted_weights)
        journal.configuration["steps"] = aStep + 1             
        journal.configuration["epsilon"] = epsilon

        return journal
        
        
    def _smoother_distance(self, distance, old_distance):
        """Smooths the distance using the Equation 14 of [1].
        
        [1] C. Albert, H. R. Kuensch and A. Scheidegger. A Simulated Annealing Approach to 
        Approximate Bayes Computations. Statistics and Computing 0960-3174 (2014).     
        """
        
        smoothed_distance = np.zeros(shape=(len(distance),))        
        
        for ind in range(0,len(distance)):
            if distance[ind] < np.min(old_distance):
                smoothed_distance[ind] = (distance[ind]/np.min(old_distance))/len(old_distance)
            else:
                smoothed_distance[ind] = np.mean(np.array(old_distance)<distance[ind])
                
        return smoothed_distance
        
    def _avergae_redefined_distance(self, distance, epsilon):        
        if epsilon==0:
            U = 0
        else:
            U = np.average(distance, weights = np.exp(-distance/epsilon))

        return(U)
        
    def _schedule(self, rho, v):
      if rho < 1e-100:
          epsilon = 0
      else:
          fun = lambda epsilon: pow(epsilon,2) + v*pow(epsilon,3/2) - pow(rho,2)
          epsilon = optimize.fsolve(fun,rho/2)
             
      return(epsilon)
      

class _RemoteContextSABC:
    """
    Contains everything that is sent over the network like broadcast vars and map functions
    """
    
    def __init__(self, backend, model, distance, kernel, observations, n_samples, n_samples_per_param):
        self.model = model
        self.distance = distance
        self.n_samples = n_samples
        self.n_samples_per_param = n_samples_per_param     
        self.epsilon = None
        self.kernel = kernel
        #self._smoother_distance = _smoother_distance

        # these are usually big tables, so we broadcast them to have them once
        # per executor instead of once per task
        self.observations_bds = backend.broadcast(observations)               
        self.accepted_parameters_bds = None
        self.accepted_cov_mat_bds = None
        self.smooth_distances_bds = None
        self.all_distances_bds = None
        
    def _update_broadcasts(self, backend, accepted_parameters, accepted_cov_mat, smooth_distances, all_distances):
        def destroy(bc):
            if bc != None:
                bc.unpersist
                #bc.destroy
                
        if not accepted_parameters is None:
            self.accepted_parameters_bds = backend.broadcast(accepted_parameters)  
        if not accepted_cov_mat is None:
            self.accepted_cov_mat_bds = backend.broadcast(accepted_cov_mat)
        if not smooth_distances is None:
            self.smooth_distances_bds = backend.broadcast(smooth_distances)
        if not all_distances is None:
            self.all_distances_bds = backend.broadcast(all_distances)
    
    def _smoother_distance_remote(self, distance, old_distance):
        """Smooths the distance using the Equation 14 of [1].
        
        [1] C. Albert, H. R. Kuensch and A. Scheidegger. A Simulated Annealing Approach to 
        Approximate Bayes Computations. Statistics and Computing 0960-3174 (2014).     
        """
        
        smoothed_distance = np.zeros(shape=(len(distance),))        
        
        for ind in range(0,len(distance)):
            if distance[ind] < np.min(old_distance):
                smoothed_distance[ind] = (distance[ind]/np.min(old_distance))/len(old_distance)
            else:
                smoothed_distance[ind] = np.mean(np.array(old_distance)<distance[ind])
                
        return smoothed_distance
        
    # define helper functions for map step
    def _accept_parameter(self, seed):
        """
        Samples a single model parameter and simulate from it until
        accepted with probabilty exp[-rho(x,y)/epsilon].
            
        :type seed: int
        :rtype: np.array
        :return: accepted parameter
        """
        rng = np.random.RandomState(seed)
        self.model.prior.reseed(rng.randint(np.iinfo(np.uint32).max, dtype=np.uint32))
        self.kernel.reseed(rng.randint(np.iinfo(np.uint32).max, dtype=np.uint32))        
        
        
        
        all_parameters = []
        all_distances = []  
        index = []          
        acceptance = 0 
        
        
        
        if self.accepted_cov_mat_bds == None:
            while acceptance == 0:
                self.model.sample_from_prior()
                new_theta = self.model.get_parameters()
                all_parameters.append(self.model.get_parameters())
                y_sim = self.model.simulate(self.n_samples_per_param)
                distance = self.distance.distance(self.observations_bds.value(), y_sim)
                all_distances.append(distance)
                acceptance = rng.binomial(1,np.exp(-distance/self.epsilon),1)
            acceptance = 1
        else:                    
            ## Select one arbitrary particle:
            index = rng.choice(self.n_samples, size=1)[0]
            ## Sample proposal parameter and calculate new distance:
            theta = self.accepted_parameters_bds.value()[index,:]
            while True:
                self.kernel.set_parameters([theta, self.accepted_cov_mat_bds.value()])
                if len(theta) > 1:
                    new_theta = self.kernel.sample(1)[0,:]
                else:
                    new_theta = self.kernel.sample(1)
                theta_is_accepted = self.model.set_parameters(new_theta)
                if theta_is_accepted and self.model.prior.pdf(self.model.get_parameters()) != 0:
                    break
            y_sim = self.model.simulate(self.n_samples_per_param)
            distance = self.distance.distance(self.observations_bds.value(), y_sim)          
            smooth_distance = self._smoother_distance_remote([distance],self.all_distances_bds.value())
            
            ## Calculate acceptance probability:
            ratio_prior_prob  = self.model.prior.pdf(new_theta)/self.model.prior.pdf(self.accepted_parameters_bds.value()[index,:])
            ratio_likelihood_prob = np.exp((self.smooth_distances_bds.value()[index] - smooth_distance) / self.epsilon)
            acceptance_prob = ratio_prior_prob*ratio_likelihood_prob
            
            ## If accepted
            if rng.rand(1) < acceptance_prob:
                acceptance = 1
            else:
                distance = np.inf
            
        return (new_theta, distance, all_parameters, all_distances, index, acceptance)

[docs]class ABCsubsim:
    """This base class implements Approximate Bayesian Computation by subset simulation (ABCsubsim) algorithm of [1].
        
    [1] M. Chiachio, J. L. Beck, J. Chiachio, and G. Rus., Approximate Bayesian computation by subset
    simulation. SIAM J. Sci. Comput., 36(3):A1339–A1358, 2014/10/03 2014.
    
    Parameters
    ----------
    model : abcpy.models.Model
        Model object that conforms to the Model class.
    distance : abcpy.distances.Distance
        Distance object that conforms to the Distance class.
    kernel : abcpy.distributions.Distribution
        Distribution object defining the perturbation kernel needed for the sampling
    backend : abcpy.backends.Backend
        Backend object that conforms to the Backend class.
    seed : integer, optional
         Optional initial seed for the random number generator. The default value is generated randomly.
    """
[docs]    def __init__(self, model, distance, kernel, backend, seed=None):       
        self.model = model
        self.distance = distance
        self.kernel = kernel
        self.backend = backend
        self.rng = np.random.RandomState(seed)
        

[docs]    def sample(self, observations, steps, n_samples = 10000, n_samples_per_param = 1, chain_length = 10, ap_change_cutoff = 10, full_output=0):
        """Samples from the posterior distribution of the model parameter given the observed 
        data observations.
        
        Parameters
        ----------
        observations : numpy.ndarray 
            Observed data.
        steps : integer        
            Number of iterations in the sequential algoritm ("generations") 
        n_samples : integer, optional
            Number of samples to generate. The default value is 10000.
        n_samples_per_param : integer, optional        
            Number of data points in each simulated data set. The default value is 1.
        chain_length : integer, optional
            Chain length of the MCMC. n_samples should be divisable by chain_length. The default value is 10.
        ap_change_cutoff : float, optional
            The cutoff value for the percentage change in the anneal parameter. If the change is less than 
            ap_change_cutoff the iterations are stopped. The default value is 10.
        full_output: integer, optional
            If full_output==1, intermediate results are included in output journal. 
            The default value is 0, meaning the intermediate results are not saved.

        Returns
        -------        
        abcpy.output.Journal
            A journal containing simulation results, metadata and optionally intermediate results.
        """
        
        journal = Journal(full_output)
        journal.configuration["type_model"] = type(self.model)
        journal.configuration["type_dist_func"] = type(self.distance)
        journal.configuration["type_kernel_func"] = type(self.kernel)
        journal.configuration["n_samples"] = n_samples
        journal.configuration["n_samples_per_param"] = n_samples_per_param
        journal.configuration["chain_length"] = chain_length
        journal.configuration["ap_change_cutoff"] = ap_change_cutoff
        journal.configuration["full_output"] = full_output
        
        accepted_parameters = None
        accepted_weights = np.ones(shape=(n_samples,1))
        accepted_cov_mat = None  
        anneal_parameter = 0
        anneal_parameter_old = 0
        temp_chain_length = 1
        
        # Initialize variables that need to be available remotely
        rc = _RemoteContextABCsubsim(self.backend, self.model, self.distance, self.kernel, observations, n_samples, n_samples_per_param, chain_length)
        
        for aStep in range(0,steps):
            # main ABCsubsim algorithm 
            # print("INFO: Initialization of ABCsubsim")
            seed_arr = self.rng.randint(0, np.iinfo(np.uint32).max, size=int(n_samples/temp_chain_length), dtype=np.uint32)
            index_arr = np.linspace(0,n_samples/temp_chain_length-1,n_samples/temp_chain_length).astype(int).reshape(int(n_samples/temp_chain_length),)
            seed_index_arr = np.column_stack((seed_arr,index_arr))            
            seed_index_pds = self.backend.parallelize(seed_index_arr)
            

            # 0: update remotely required variables
            # print("INFO: Broadcasting parameters.")
            rc._update_broadcasts(self.backend, accepted_parameters, accepted_cov_mat)       

            # 1: Calculate  parameters
            # print("INFO: Initial accepted parameter parameters")
            params_and_dists_pds = self.backend.map(rc._accept_parameter, seed_index_pds)
            params_and_dists = self.backend.collect(params_and_dists_pds)
            new_parameters, new_distances = [list(t) for t in zip(*params_and_dists)]
            accepted_parameters = np.concatenate(new_parameters)  
            distances = np.concatenate(new_distances)

            # 2: Sort and renumber samples   
            SortIndex = sorted(range(len(distances)), key=lambda k: distances[k])
            distances = distances[SortIndex]
            accepted_parameters = accepted_parameters[SortIndex,:]
            
            # 3: Calculate and broadcast annealling parameters
            temp_chain_length = chain_length
            if aStep > 0:
                anneal_parameter_old = anneal_parameter
            anneal_parameter = 0.5*(distances[int(n_samples/temp_chain_length)]+distances[int(n_samples/temp_chain_length)+1])
            rc.anneal_parameter = anneal_parameter
            
            # 4: Update proposal covariance matrix (Parallelized)
            if aStep == 0:
                accepted_cov_mat = np.cov(accepted_parameters, rowvar=False)
            else:
                accepted_cov_mat = pow(2,1)*accepted_cov_mat
            rc._update_broadcasts(self.backend, accepted_parameters, accepted_cov_mat)
            
            seed_arr = self.rng.randint(0, np.iinfo(np.uint32).max, size=10, dtype=np.uint32)
            index_arr = np.linspace(0,10-1,10).astype(int).reshape(10,)
            seed_index_arr = np.column_stack((seed_arr,index_arr))
            seed_index_pds = self.backend.parallelize(seed_index_arr)
            
            cov_mat_index_pds = self.backend.map(rc._update_cov_mat, seed_index_pds)
            cov_mat_index = self.backend.collect(cov_mat_index_pds)
            cov_mat, T, accept_index = [list(t) for t in zip(*cov_mat_index)]
            
            for ind in range(10):
                if accept_index[ind] == 1:
                    accepted_cov_mat = cov_mat[ind]
                    break
                
            # print("INFO: Saving intermediate configuration to output journal.")
            if full_output == 1:
                journal.add_parameters(accepted_parameters)
                journal.add_weights(accepted_weights)

            # Show progress
            anneal_parameter_change_percentage = 100*abs(anneal_parameter_old-anneal_parameter)/anneal_parameter
            print('Steps: ', aStep, 'annealing parameter: ', anneal_parameter, 'change (%) in annealing parameter: ', anneal_parameter_change_percentage )
            if anneal_parameter_change_percentage < ap_change_cutoff:
                break
            

        #Add anneal_parameter, number of final steps and final output to the journal 
        # print("INFO: Saving final configuration to output journal.")
        if full_output == 0:
            journal.add_parameters(accepted_parameters)
            journal.add_weights(accepted_weights)
        journal.configuration["steps"] = aStep+1              
        journal.configuration["anneal_parameter"] = anneal_parameter

        return journal

class _RemoteContextABCsubsim:
    """
    Contains everything that is sent over the network like broadcast vars and map functions
    """
    
    def __init__(self, backend, model, distance, kernel, observations, n_samples, n_samples_per_param, chain_length):
        self.model = model
        self.distance = distance
        self.n_samples = n_samples
        self.n_samples_per_param = n_samples_per_param
        self.kernel = kernel
        self.chain_length = chain_length
        
        self.anneal_parameter = None
        
        # these are usually big tables, so we broadcast them to have them once
        # per executor instead of once per task
        self.observations_bds = backend.broadcast(observations)
        self.accepted_parameters_bds = None
        self.accepted_cov_mat_bds = None


    def _update_broadcasts(self, backend, accepted_parameters, accepted_cov_mat):
        def destroy(bc):
            if bc != None:
                bc.unpersist
                #bc.destroy
                
        if not accepted_parameters is None:
            self.accepted_parameters_bds = backend.broadcast(accepted_parameters)
        if not accepted_cov_mat is None:
            self.accepted_cov_mat_bds = backend.broadcast(accepted_cov_mat)
                

    # define helper functions for map step
    def _accept_parameter(self, seed_index):
        """
        Samples a single model parameter and simulate from it until
        distance between simulated outcome and the observation is
        smaller than eplison.
            
        :type seed: int
        :rtype: np.array
        :return: accepted parameter
        """
        seed = seed_index[0]
        index = seed_index[1]
        rng = np.random.RandomState(seed)
        self.model.prior.reseed(rng.randint(np.iinfo(np.uint32).max, dtype=np.uint32))
        self.kernel.reseed(rng.randint(np.iinfo(np.uint32).max, dtype=np.uint32))

        result_theta = []        
        result_distance = []       
        
        if self.accepted_parameters_bds == None:
            self.model.sample_from_prior()
            y_sim = self.model.simulate(self.n_samples_per_param)
            distance = self.distance.distance(self.observations_bds.value(), y_sim)
            result_theta.append(self.model.get_parameters())
            result_distance.append(distance)
        else:
            theta = self.accepted_parameters_bds.value()[index]
            self.model.set_parameters(theta)
            y_sim = self.model.simulate(self.n_samples_per_param)
            distance = self.distance.distance(self.observations_bds.value(), y_sim)
            result_theta.append(theta)
            result_distance.append(distance)
            for ind in range(0,self.chain_length-1):
                while True:
                    self.kernel.set_parameters([theta, self.accepted_cov_mat_bds.value()])
                    new_theta = self.kernel.sample(1)[0,:]
                    theta_is_accepted = self.model.set_parameters(new_theta)
                    if theta_is_accepted and self.model.prior.pdf(self.model.get_parameters()) != 0:
                        break
                y_sim = self.model.simulate(self.n_samples_per_param)
                new_distance = self.distance.distance(self.observations_bds.value(), y_sim)

                ## Calculate acceptance probability:
                ratio_prior_prob  = self.model.prior.pdf(new_theta)/self.model.prior.pdf(theta)
                self.kernel.set_parameters([new_theta, self.accepted_cov_mat_bds.value()])
                kernel_numerator = self.kernel.pdf(theta)
                self.kernel.set_parameters([theta, self.accepted_cov_mat_bds.value()])                
                kernel_denominator = self.kernel.pdf(new_theta)
                ratio_likelihood_prob = kernel_numerator/kernel_denominator
                acceptance_prob = min(1,ratio_prior_prob*ratio_likelihood_prob)*(new_distance<self.anneal_parameter)
            
                ## If accepted
                if rng.binomial(1,acceptance_prob)==1:
                    result_theta.append(new_theta)
                    result_distance.append(new_distance)
                    theta = new_theta
                    distance = new_distance
                else:
                    result_theta.append(theta)
                    result_distance.append(distance)
                    
        return (result_theta, result_distance)
        
    def _update_cov_mat(self, seed_t):
        
        seed = seed_t[0]
        t = seed_t[1]
        rng = np.random.RandomState(seed)
        self.model.prior.reseed(rng.randint(np.iinfo(np.uint32).max, dtype=np.uint32))
        self.kernel.reseed(rng.randint(np.iinfo(np.uint32).max, dtype=np.uint32))
        acceptance = 0
        accepted_cov_mat_transformed = self.accepted_cov_mat_bds.value()*pow(2.0,-2.0*t)
        theta = self.accepted_parameters_bds.value()[0]
        self.model.set_parameters(theta)
        for ind in range(0,self.chain_length):
            while True:
                self.kernel.set_parameters([theta, accepted_cov_mat_transformed])
                new_theta = self.kernel.sample(1)[0,:]
                theta_is_accepted = self.model.set_parameters(new_theta)
                if theta_is_accepted and self.model.prior.pdf(self.model.get_parameters()) != 0:
                    break
                y_sim = self.model.simulate(self.n_samples_per_param)
                new_distance = self.distance.distance(self.observations_bds.value(), y_sim)
                        
                ## Calculate acceptance probability:
                ratio_prior_prob  = self.model.prior.pdf(new_theta)/self.model.prior.pdf(theta)
                self.kernel.set_parameters([new_theta, accepted_cov_mat_transformed])
                kernel_numerator = self.kernel.pdf(theta)
                self.kernel.set_parameters([theta, accepted_cov_mat_transformed])                
                kernel_denominator = self.kernel.pdf(new_theta)
                ratio_likelihood_prob = kernel_numerator/kernel_denominator
                acceptance_prob = min(1,ratio_prior_prob*ratio_likelihood_prob)*(new_distance<self.anneal_parameter)
                ## If accepted
                if rng.binomial(1,acceptance_prob)==1:
                    theta = new_theta
                    acceptance = acceptance + 1
        if acceptance/10<=0.5 and acceptance/10>=0.3:
            return(accepted_cov_mat_transformed, t, 1)
        else:
            return(accepted_cov_mat_transformed, t, 0)
            
[docs]class RSMCABC:
    """This base class implements Adaptive Population Monte Carlo Approximate Bayesian computation of 
    Drovandi and Pettitt [1]. 
        
    [1] CC. Drovandi CC and AN. Pettitt, Estimation of parameters for macroparasite population evolution using
    approximate Bayesian computation. Biometrics 67(1):225–233, 2011.
    
    Parameters
    ----------
    model : abcpy.models.Model
        Model object that conforms to the Model class.
    distance : abcpy.distances.Distance
        Distance object that conforms to the Distance class.
    kernel : abcpy.distributions.Distribution
        Distribution object defining the perturbation kernel needed for the sampling
    backend : abcpy.backends.Backend
        Backend object that conforms to the Backend class.
    seed : integer, optional
         Optional initial seed for the random number generator. The default value is generated randomly.
    """
[docs]    def __init__(self, model, distance, kernel, backend, seed=None):       
        self.model = model
        self.distance = distance
        self.kernel = kernel
        self.backend = backend
        self.rng = np.random.RandomState(seed)
        

[docs]    def sample(self, observations, steps, n_samples = 10000, n_samples_per_param = 1, alpha = 0.1, epsilon_init = 100, epsilon_final = 0.1, const = 1, covFactor = 2.0, full_output=0):
        """Samples from the posterior distribution of the model parameter given the observed 
        data observations.
        
        Parameters
        ----------
        observations : numpy.ndarray 
            Observed data.
        steps : integer        
            Number of iterations in the sequential algoritm ("generations") 
        n_samples : integer, optional
            Number of samples to generate. The default value is 10000.
        n_samples_per_param : integer, optional        
            Number of data points in each simulated data set. The default value is 1.
        alpha : float, optional
            A parameter taking values between [0,1], the default value is 0.1.
        epsilon_init : float, optional
            Initial value of threshold, the default is 100
        epsilon_final : float, optional
            Terminal value of threshold, the default is 0.1
        const : float, optional 
             A constant to compute acceptance probabilty
        covFactor : float, optional     
            scaling parameter of the covariance matrix. The default value is 2.
        full_output: integer, optional
            If full_output==1, intermediate results are included in output journal. 
            The default value is 0, meaning the intermediate results are not saved.

        Returns
        -------        
        abcpy.output.Journal
            A journal containing simulation results, metadata and optionally intermediate results.
        """
        
        journal = Journal(full_output)
        journal.configuration["type_model"] = type(self.model)
        journal.configuration["type_dist_func"] = type(self.distance)
        journal.configuration["n_samples"] = n_samples
        journal.configuration["n_samples_per_param"] = n_samples_per_param
        journal.configuration["steps"] = steps
        
        accepted_parameters = None
        accepted_cov_mat = None
        accepted_dist = None
        
        # Initialize variables that need to be available remotely
        rc = _RemoteContextRSMCABC(self.backend, self.model, self.distance, self.kernel, observations, n_samples, n_samples_per_param, alpha)

        # main RSMCABC algorithm
        # print("INFO: Starting RSMCABC iterations.")
        for aStep in range(steps):
            
            # 0: Compute epsilon, compute new covariance matrix for Kernel, 
            # and finally Drawing new new/perturbed samples using prior or MCMC Kernel
            # print("DEBUG: Iteration " + str(aStep) + " of RSMCABC algorithm.")
            if aStep == 0:         
                n_replenish = n_samples
                # Compute epsilon                
                epsilon = [epsilon_init]
                R = int(1)
            else:
                n_replenish = round(n_samples*alpha)
                # Throw away N_alpha particles with largest dist     
                accepted_parameters = np.delete(accepted_parameters, np.arange(round(n_samples*alpha))+(n_samples-round(n_samples*alpha)), 0)
                accepted_dist = np.delete(accepted_dist, np.arange(round(n_samples*alpha))+(n_samples-round(n_samples*alpha)), 0)
                # Compute epsilon
                epsilon.append(accepted_dist[-1])
                # Calculate covariance
                # print("INFO: Calculating covariance matrix.")
                new_cov_mat = covFactor * np.cov(accepted_parameters, rowvar=False)                      
                accepted_cov_mat = new_cov_mat 
                
            
            if epsilon[-1] < epsilon_final:
                break
            
            seed_arr = self.rng.randint(0, np.iinfo(np.uint32).max, size = n_replenish, dtype=np.uint32)
            seed_pds = self.backend.parallelize(seed_arr)

            #update remotely required variables
            # print("INFO: Broadcasting parameters.")
            rc.epsilon = epsilon
            rc.R = R
            # Broadcast updated variable
            rc._update_broadcasts(self.backend, accepted_parameters, accepted_dist, accepted_cov_mat)

            #calculate resample parameters
            #print("INFO: Resampling parameters")
            params_and_dist_index_pds = self.backend.map(rc._accept_parameter, seed_pds)
            params_and_dist_index = self.backend.collect(params_and_dist_index_pds)
            new_parameters, new_dist, new_index = [list(t) for t in zip(*params_and_dist_index)]
            new_parameters = np.array(new_parameters)
            new_dist = np.array(new_dist)
            new_index = np.array(new_index)
            
            # 1: Update all parameters, compute acceptance probability, compute epsilon
            if len(new_dist) == n_samples:
                accepted_parameters = new_parameters
                accepted_dist = new_dist              
            else:
                accepted_parameters = np.concatenate((accepted_parameters,new_parameters))
                accepted_dist = np.concatenate((accepted_dist, new_dist))

            # 2: Compute acceptance probabilty and set R 
            #print(aStep)
            #print(new_index)
            prob_acceptance = sum(new_index)/(R*n_replenish)
            if prob_acceptance == 1 or prob_acceptance == 0:
                R = 1
            else:
                R = int(np.log(const)/np.log(1-prob_acceptance))
                
                                 
            # print("INFO: Saving configuration to output journal.")
            if (full_output == 1 and aStep <= steps-1) or (full_output == 0 and aStep == steps-1):
                journal.add_parameters(accepted_parameters)
                journal.add_weights(np.ones(shape=(n_samples,1))*(1/n_samples))

        #Add epsilon_arr to the journal                
        journal.configuration["epsilon_arr"] = epsilon

        return journal
        


class _RemoteContextRSMCABC:
    """
    Contains everything that is sent over the network like broadcast vars and map functions
    """
    
    def __init__(self, backend, model, distance, kernel, observations, n_samples, n_samples_per_param, alpha):
        self.model = model
        self.distance = distance
        self.n_samples = n_samples
        self.n_samples_per_param = n_samples_per_param
        self.kernel = kernel
        self.alpha = alpha
        
        self.epsilon = None
        self.R = None

        # these are usually big tables, so we broadcast them to have them once
        # per executor instead of once per task
        self.observations_bds = backend.broadcast(observations)
        self.accepted_parameters_bds = None
        self.accepted_dist_bds = None
        self.accepted_cov_mat_bds = None

    def _update_broadcasts(self, backend, accepted_parameters, accepted_dist, accepted_cov_mat):
        def destroy(bc):
            if bc != None:
                bc.unpersist
                #bc.destroy
                
        if not accepted_parameters is None:
            self.accepted_parameters_bds = backend.broadcast(accepted_parameters)
        if not accepted_dist is None:
            self.accepted_dist_bds = backend.broadcast(accepted_dist)
        if not accepted_cov_mat is None:
            self.accepted_cov_mat_bds = backend.broadcast(accepted_cov_mat)

    # define helper functions for map step
    def _accept_parameter(self, seed):
        """
        Samples a single model parameter and simulate from it until
        distance between simulated outcome and the observation is
        smaller than eplison.
            
        :type seed: int
        :rtype: np.array
        :return: accepted parameter
        """

        rng = np.random.RandomState(seed)
        self.model.prior.reseed(rng.randint(np.iinfo(np.uint32).max, dtype=np.uint32))
        self.kernel.reseed(rng.randint(np.iinfo(np.uint32).max, dtype=np.uint32))
        
        distance = self.distance.dist_max()
        if self.accepted_parameters_bds == None:
            while distance > self.epsilon[-1]:
                self.model.sample_from_prior()
                y_sim = self.model.simulate(self.n_samples_per_param)
                distance = self.distance.distance(self.observations_bds.value(), y_sim) 
            index_accept = 1
        else:
            index = rng.choice(len(self.accepted_parameters_bds.value()), size=1)
            theta = self.accepted_parameters_bds.value()[index[0]]
            index_accept = 0.0
            for ind in range(self.R):
                while True:
                    self.kernel.set_parameters([theta, self.accepted_cov_mat_bds.value()])
                    new_theta = self.kernel.sample(1)[0,:]
                    theta_is_accepted = self.model.set_parameters(new_theta)
                    if theta_is_accepted and self.model.prior.pdf(self.model.get_parameters()) != 0:
                        break            
                y_sim = self.model.simulate(self.n_samples_per_param)
                distance = self.distance.distance(self.observations_bds.value(), y_sim)
                ratio_prior_prob  = self.model.prior.pdf(new_theta)/self.model.prior.pdf(theta)
                self.kernel.set_parameters([new_theta, self.accepted_cov_mat_bds.value()])
                kernel_numerator = self.kernel.pdf(theta)
                self.kernel.set_parameters([theta, self.accepted_cov_mat_bds.value()])                
                kernel_denominator = self.kernel.pdf(new_theta)
                ratio_kernel_prob = kernel_numerator/kernel_denominator
                probability_acceptance = min(1,ratio_prior_prob*ratio_kernel_prob)
                if distance < self.epsilon[-1] and rng.binomial(1,probability_acceptance) == 1:
                    index_accept += 1
                else:
                    self.model.set_parameters(theta)
                    distance = self.accepted_dist_bds.value()[index[0]]
        
        return (self.model.get_parameters(), distance, index_accept)

[docs]class APMCABC:
    """This base class implements Adaptive Population Monte Carlo Approximate Bayesian computation of 
    M. Lenormand et al. [1]. 
        
    [1] M. Lenormand, F. Jabot and G. Deffuant, Adaptive approximate Bayesian computation
    for complex models. Computational Statistics, 28:2777–2796, 2013.
    
    Parameters
    ----------
    model : abcpy.models.Model
        Model object that conforms to the Model class.
    distance : abcpy.distances.Distance
        Distance object that conforms to the Distance class.
    kernel : abcpy.distributions.Distribution
        Distribution object defining the perturbation kernel needed for the sampling
    backend : abcpy.backends.Backend
        Backend object that conforms to the Backend class.
    seed : integer, optional
         Optional initial seed for the random number generator. The default value is generated randomly.
    """
[docs]    def __init__(self, model, distance, kernel, backend, seed=None):       
        self.model = model
        self.distance = distance
        self.kernel = kernel
        self.backend = backend
        self.rng = np.random.RandomState(seed)
        

[docs]    def sample(self, observations, steps, n_samples = 10000, n_samples_per_param = 1, alpha = 0.9, acceptance_cutoff = 0.2, covFactor = 2.0, full_output=0):
        """Samples from the posterior distribution of the model parameter given the observed 
        data observations.
        
        Parameters
        ----------
        observations : numpy.ndarray 
            Observed data.
        steps : integer        
            Number of iterations in the sequential algoritm ("generations") 
        n_samples : integer, optional
            Number of samples to generate. The default value is 10000.
        n_samples_per_param : integer, optional        
            Number of data points in each simulated data set. The default value is 1.
        alpha : float, optional
            A parameter taking values between [0,1], the default value is 0.1.
        acceptance_cutoff : float, optional
            Acceptance ratio cutoff, The default value is 0.2
        covFactor : float, optional     
            scaling parameter of the covariance matrix. The default value is 2.
        full_output: integer, optional
            If full_output==1, intermediate results are included in output journal. 
            The default value is 0, meaning the intermediate results are not saved.

        Returns
        -------        
        abcpy.output.Journal
            A journal containing simulation results, metadata and optionally intermediate results.
        """
        
        journal = Journal(full_output)
        journal.configuration["type_model"] = type(self.model)
        journal.configuration["type_dist_func"] = type(self.distance)
        journal.configuration["n_samples"] = n_samples
        journal.configuration["n_samples_per_param"] = n_samples_per_param
        journal.configuration["steps"] = steps
        
        accepted_parameters = None
        accepted_weights = None
        accepted_cov_mat = None
        accepted_dist = None
        alpha_accepted_parameters = None
        alpha_accepted_weights = None
        alpha_accepted_dist = None
        
        # Initialize variables that need to be available remotely
        rc = _RemoteContextAPMCABC(self.backend, self.model, self.distance, self.kernel, observations, n_samples, n_samples_per_param, alpha)

        # main APMCABC algorithm
        # print("INFO: Starting APMCABC iterations.")
        for aStep in range(steps):
            
            # 0: Drawing new new/perturbed samples using prior or MCMC Kernel
            # print("DEBUG: Iteration " + str(aStep) + " of APMCABC algorithm.")
            if aStep > 0:
                n_additional_samples = n_samples - round(n_samples*alpha)
            else:
                n_additional_samples = n_samples
            
            seed_arr = self.rng.randint(0, np.iinfo(np.uint32).max, size = n_additional_samples, dtype=np.uint32)
            seed_pds = self.backend.parallelize(seed_arr)

            #update remotely required variables
            # print("INFO: Broadcasting parameters.")
            rc._update_broadcasts(self.backend, alpha_accepted_parameters, alpha_accepted_weights, alpha_accepted_dist, accepted_cov_mat)

            #calculate resample parameters
            #print("INFO: Resampling parameters")
            params_and_dist_weights_pds = self.backend.map(rc._accept_parameter, seed_pds)
            params_and_dist_weights = self.backend.collect(params_and_dist_weights_pds)
            new_parameters, new_dist, new_weights = [list(t) for t in zip(*params_and_dist_weights)]
            new_parameters = np.array(new_parameters)
            new_dist = np.array(new_dist)
            new_weights = np.array(new_weights).reshape(n_additional_samples,1)
            
            # 1: Update all parameters, compute acceptance probability, compute epsilon
            if len(new_weights) == n_samples:
                accepted_parameters = new_parameters
                accepted_dist = new_dist
                accepted_weights = new_weights
                # Compute acceptance probability
                prob_acceptance = 1
                # Compute epsilon
                epsilon = [np.percentile(accepted_dist, alpha*100)]
            else:
                accepted_parameters = np.concatenate((alpha_accepted_parameters,new_parameters))
                accepted_dist = np.concatenate((alpha_accepted_dist, new_dist))
                accepted_weights = np.concatenate((alpha_accepted_weights, new_weights))
                # Compute acceptance probability
                prob_acceptance = sum(new_dist < epsilon[-1])/len(new_dist)
                # Compute epsilon
                epsilon.append(np.percentile(accepted_dist, alpha*100))
               
            # 2: Update alpha_parameters, alpha_dist and alpha_weights
            index_alpha = accepted_dist < epsilon[-1]
            alpha_accepted_parameters = accepted_parameters[index_alpha,:]
            alpha_accepted_weights = accepted_weights[index_alpha]/sum(accepted_weights[index_alpha])
            alpha_accepted_dist = accepted_dist[index_alpha]
            
            # 3: calculate covariance
            # print("INFO: Calculating covariance matrix.")
            new_cov_mat = covFactor * np.cov(alpha_accepted_parameters, aweights = alpha_accepted_weights.reshape(-1), rowvar=False)                      
            accepted_cov_mat = new_cov_mat           

            # print("INFO: Saving configuration to output journal.")
            if (full_output == 1 and aStep <= steps-1) or (full_output == 0 and aStep == steps-1):
                journal.add_parameters(accepted_parameters)
                journal.add_weights(accepted_weights)

            # 4: Check probability of acceptance lower than acceptance_cutoff
            if prob_acceptance < acceptance_cutoff:
                break

        #Add epsilon_arr to the journal                
        journal.configuration["epsilon_arr"] = epsilon

        return journal
        


class _RemoteContextAPMCABC:
    """
    Contains everything that is sent over the network like broadcast vars and map functions
    """
    
    def __init__(self, backend, model, distance, kernel, observations, n_samples, n_samples_per_param, alpha):
        self.model = model
        self.distance = distance
        self.n_samples = n_samples
        self.n_samples_per_param = n_samples_per_param
        self.kernel = kernel
        self.alpha = alpha
        
        self.epsilon = None

        # these are usually big tables, so we broadcast them to have them once
        # per executor instead of once per task
        self.observations_bds = backend.broadcast(observations)
        self.alpha_accepted_parameters_bds = None
        self.alpha_accepted_weights_bds = None
        self.alpha_accepted_dist = None
        self.accepted_cov_mat_bds = None

    def _update_broadcasts(self, backend, alpha_accepted_parameters, alpha_accepted_weights, alpha_accepted_dist, accepted_cov_mat):
        def destroy(bc):
            if bc != None:
                bc.unpersist
                #bc.destroy
                
        if not alpha_accepted_parameters is None:
            self.alpha_accepted_parameters_bds = backend.broadcast(alpha_accepted_parameters)
        if not alpha_accepted_weights is None:
            self.alpha_accepted_weights_bds = backend.broadcast(alpha_accepted_weights)
        if not alpha_accepted_dist is None:
            self.alpha_accepted_dist_bds = backend.broadcast(alpha_accepted_dist)
        if not accepted_cov_mat is None:
            self.accepted_cov_mat_bds = backend.broadcast(accepted_cov_mat)

    # define helper functions for map step
    def _accept_parameter(self, seed):
        """
        Samples a single model parameter and simulate from it until
        distance between simulated outcome and the observation is
        smaller than eplison.
            
        :type seed: int
        :rtype: np.array
        :return: accepted parameter
        """

        rng = np.random.RandomState(seed)
        self.model.prior.reseed(rng.randint(np.iinfo(np.uint32).max, dtype=np.uint32))
        self.kernel.reseed(rng.randint(np.iinfo(np.uint32).max, dtype=np.uint32))
        
        if self.alpha_accepted_parameters_bds == None:
            self.model.sample_from_prior()
            y_sim = self.model.simulate(self.n_samples_per_param)
            dist = self.distance.distance(self.observations_bds.value(), y_sim)
            weight = 1.0
        else:
            index = rng.choice(len(self.alpha_accepted_weights_bds.value()), size=1, p=self.alpha_accepted_weights_bds.value().reshape(-1))
            theta = self.alpha_accepted_parameters_bds.value()[index[0]]
            # trucate the normal to the bounds of parameter space of the model
            # truncating the normal like this is fine: https://arxiv.org/pdf/0907.4010v1.pdf
            while True:
                self.kernel.set_parameters([theta, self.accepted_cov_mat_bds.value()])
                new_theta = self.kernel.sample(1)[0,:]
                theta_is_accepted = self.model.set_parameters(new_theta)
                if theta_is_accepted and self.model.prior.pdf(self.model.get_parameters()) != 0:
                    break

            y_sim = self.model.simulate(self.n_samples_per_param)
            dist = self.distance.distance(self.observations_bds.value(), y_sim)
            
            prior_prob = self.model.prior.pdf(new_theta)
            denominator = 0.0
            for i in range(0, len(self.alpha_accepted_weights_bds.value())):
                self.kernel.set_parameters([self.alpha_accepted_parameters_bds.value()[i,:], self.accepted_cov_mat_bds.value()])
                pdf_value = self.kernel.pdf(new_theta)
                denominator += self.alpha_accepted_weights_bds.value()[i,0] * pdf_value
            weight = 1.0 * prior_prob / denominator        
            
        return (self.model.get_parameters(), dist, weight)                    


[docs]class SMCABC:
    """This base class implements Adaptive Population Monte Carlo Approximate Bayesian computation of 
    Del Moral et al. [1]. 
    
    [1] P. Del Moral, A. Doucet, A. Jasra, An adaptive sequential Monte Carlo method for approximate
    Bayesian computation. Statistics and Computing, 22(5):1009–1020, 2012.
    
    Parameters
    ----------
    model : abcpy.models.Model
        Model object that conforms to the Model class.
    distance : abcpy.distances.Distance
        Distance object that conforms to the Distance class.
    kernel : abcpy.distributions.Distribution
        Distribution object defining the perturbation kernel needed for the sampling
    backend : abcpy.backends.Backend
        Backend object that conforms to the Backend class.
    seed : integer, optional
         Optional initial seed for the random number generator. The default value is generated randomly.
    """
[docs]    def __init__(self, model, distance, kernel, backend, seed=None):       
        self.model = model
        self.distance = distance
        self.kernel = kernel
        self.backend = backend
        self.rng = np.random.RandomState(seed)
        

[docs]    def sample(self, observations, steps, n_samples = 10000, n_samples_per_param = 1, epsilon_final = 0.1, alpha = 0.95, covFactor = 2, resample = None, full_output=0):
        """Samples from the posterior distribution of the model parameter given the observed 
        data observations.
        
        Parameters
        ----------
        observations : numpy.ndarray 
            Observed data.
        steps : integer        
            Number of iterations in the sequential algoritm ("generations") 
        epsilon_final : float, optional
            The final threshold value of epsilon to be reached. The default value is 0.1.
        n_samples : integer, optional
            Number of samples to generate. The default value is 10000.
        n_samples_per_param : integer, optional        
            Number of data points in each simulated data set. The default value is 1.
        alpha : float, optional
            A parameter taking values between [0,1], determinining the rate of change of the threshold epsilon. The
            default value is 0.5.
        covFactor : float, optional     
            scaling parameter of the covariance matrix. The default value is 2.
        full_output: integer, optional
            If full_output==1, intermediate results are included in output journal. 
            The default value is 0, meaning the intermediate results are not saved.

        Returns
        -------        
        abcpy.output.Journal
            A journal containing simulation results, metadata and optionally intermediate results.
        """
        
        journal = Journal(full_output)
        journal.configuration["type_model"] = type(self.model)
        journal.configuration["type_dist_func"] = type(self.distance)
        journal.configuration["n_samples"] = n_samples
        journal.configuration["n_samples_per_param"] = n_samples_per_param
        journal.configuration["steps"] = steps
        
        accepted_parameters = None
        accepted_weights = None
        accepted_cov_mat = None
        accepted_y_sim = None
        
        # Define the resmaple parameter
        if resample == None:
            resample = n_samples*0.5     
        
        #Define epsilon_init
        epsilon = [10000]
        
        
        # Initialize variables that need to be available remotely
        rc = _RemoteContextSMCABC(self.backend, self.model, self.distance, self.kernel, observations, n_samples, n_samples_per_param)

        # main SMC ABC algorithm
        # print("INFO: Starting SMCABC iterations.")
        for aStep in range(0, steps):
            
            # Break if epsilon in previous step is less than epsilon_final
            if epsilon[-1] == epsilon_final:
                break
            
            # 0: Compute the Epsilon
            if accepted_y_sim != None:
                # Compute epsilon for next step
                fun = lambda epsilon_var: self._compute_epsilon(epsilon_var, \
                epsilon, observations, accepted_y_sim, accepted_weights, n_samples, n_samples_per_param, alpha)
                epsilon_new =  self._bisection(fun, epsilon_final, epsilon[-1], 0.001)
                if epsilon_new < epsilon_final:
                    epsilon_new = epsilon_final
                epsilon.append(epsilon_new)

            # 1: calculate weights for new parameters 
            # print("INFO: Calculating weights.")
            if accepted_y_sim != None:
                new_weights = np.zeros(shape=(n_samples),)
                for ind1 in range(n_samples):
                    numerator = 0.0
                    denominator = 0.0
                    for ind2 in range(n_samples_per_param):
                        numerator += (self.distance.distance(observations, [accepted_y_sim[ind1][ind2]]) < epsilon[-1])
                        denominator += (self.distance.distance(observations, [accepted_y_sim[ind1][ind2]]) < epsilon[-2])
                    if denominator != 0.0:
                        new_weights[ind1] = accepted_weights[ind1]*(numerator/denominator)
                    else:
                        new_weights[ind1] = 0
                new_weights = new_weights / sum(new_weights)
            else:
                new_weights = np.ones(shape=(n_samples),)*(1.0 /n_samples)
            
            # 2: Resample                    
            if accepted_y_sim != None and pow(sum(pow(new_weights,2)),-1) < resample:
                print('Resampling')
                # Weighted resampling:
                index_resampled = self.rng.choice(np.arange(n_samples), n_samples, replace = 1, p = new_weights)
                accepted_parameters = accepted_parameters[index_resampled,:]
                new_weights = np.ones(shape=(n_samples),)*(1.0 /n_samples)
            
            # Update the weights
            accepted_weights = new_weights.reshape(len(new_weights),1)

            # 3: Drawing new perturbed samples using MCMC Kernel
            # print("DEBUG: Iteration " + str(aStep) + " of SMCABC algorithm.")
            seed_arr = self.rng.randint(0, np.iinfo(np.uint32).max, size=n_samples, dtype=np.uint32)
            index_arr = np.arange(n_samples)
            seed_index_arr = np.column_stack((seed_arr,index_arr))
            seed_index_pds = self.backend.parallelize(seed_index_arr)

            #update remotely required variables
            # print("INFO: Broadcasting parameters.")
            rc.epsilon = epsilon 
            rc._update_broadcasts(self.backend, accepted_parameters, accepted_weights, accepted_cov_mat, accepted_y_sim)

            #calculate resample parameters
            #print("INFO: Resampling parameters")
            params_and_ysim_pds = self.backend.map(rc._accept_parameter, seed_index_pds)
            params_and_ysim = self.backend.collect(params_and_ysim_pds)
            new_parameters, new_y_sim = [list(t) for t in zip(*params_and_ysim)]
            new_parameters = np.array(new_parameters)

            #Update the parameters
            accepted_parameters = new_parameters
            accepted_y_sim = new_y_sim
            
            # 4: calculate covariance
            # print("INFO: Calculating covariance matrix.")
            new_cov_mat = covFactor * np.cov(accepted_parameters, aweights = accepted_weights.reshape(-1), rowvar=False)                      
            accepted_cov_mat = new_cov_mat           

            # print("INFO: Saving configuration to output journal.")
            if (full_output == 1 and aStep <= steps-1) or (full_output == 0 and aStep == steps-1):
                journal.add_parameters(accepted_parameters)
                journal.add_weights(accepted_weights)
        #Add epsilon_arr to the journal                
        journal.configuration["epsilon_arr"] = epsilon

        return journal
        
    def _compute_epsilon(self, epsilon_new, epsilon, observations, accepted_y_sim, accepted_weights, n_samples, n_samples_per_param, alpha):

        RHS =  alpha*pow(sum(pow(accepted_weights,2)),-1)
        LHS = np.zeros(shape=(n_samples),)
        for ind1 in range(n_samples):
            numerator = 0.0
            denominator = 0.0
            for ind2 in range(n_samples_per_param):
                numerator += (self.distance.distance(observations, [accepted_y_sim[ind1][ind2]]) < epsilon_new)
                denominator += (self.distance.distance(observations, [accepted_y_sim[ind1][ind2]]) < epsilon[-1])
            LHS[ind1] = accepted_weights[ind1]*(numerator/denominator)
        if sum(LHS) == 0:
            result = RHS
        else:
            LHS = LHS/sum(LHS)
            LHS =  pow(sum(pow(LHS,2)),-1)
            result = RHS-LHS
        return(result)
        
    def _bisection(self, func, low, high, tol):
        midpoint = (low+high)/2.0
        while (high-low)/2.0 > tol:
            if func(midpoint) == 0:
                return midpoint
            elif func(low)*func(midpoint) < 0:
                high = midpoint
            else :
                low = midpoint
            midpoint = (low+high)/2.0
    
        return midpoint


class _RemoteContextSMCABC:
    """
    Contains everything that is sent over the network like broadcast vars and map functions
    """
    
    def __init__(self, backend, model, distance, kernel, observations, n_samples, n_samples_per_param):
        self.model = model
        self.distance = distance
        self.n_samples = n_samples
        self.n_samples_per_param = n_samples_per_param
        self.kernel = kernel
        
        self.epsilon = None

        # these are usually big tables, so we broadcast them to have them once
        # per executor instead of once per task
        self.observations_bds = backend.broadcast(observations)
        self.accepted_parameters_bds = None
        self.accepted_weights_bds = None
        self.accepted_cov_mat_bds = None
        self.accepted_y_sim_bds = None

    def _update_broadcasts(self, backend, accepted_parameters, accepted_weights, accepted_cov_mat, accepted_y_sim):
        def destroy(bc):
            if bc != None:
                bc.unpersist
                #bc.destroy
                
        if not accepted_parameters is None:
            self.accepted_parameters_bds = backend.broadcast(accepted_parameters)
        if not accepted_weights is None:
            self.accepted_weights_bds = backend.broadcast(accepted_weights)
        if not accepted_cov_mat is None:
            self.accepted_cov_mat_bds = backend.broadcast(accepted_cov_mat)
        if not accepted_y_sim is None:
            self.accepted_y_sim_bds = backend.broadcast(accepted_y_sim)                

    # define helper functions for map step
    def _accept_parameter(self, seed_index):
        """
        Samples a single model parameter and simulate from it until
        distance between simulated outcome and the observation is
        smaller than eplison.
            
        :type seed: int
        :rtype: np.array
        :return: accepted parameter
        """
        seed = seed_index[0]
        index = seed_index[1]
        rng = np.random.RandomState(seed)
        self.model.prior.reseed(rng.randint(np.iinfo(np.uint32).max, dtype=np.uint32))
        self.kernel.reseed(rng.randint(np.iinfo(np.uint32).max, dtype=np.uint32))
        
        #print("on seed " + str(seed) + " distance: " + str(distance) + " epsilon: " + str(self.epsilon))
        if self.accepted_parameters_bds == None:
            self.model.sample_from_prior()
            y_sim = self.model.simulate(self.n_samples_per_param)
        else:
            if self.accepted_weights_bds.value()[index] > 0:
                theta = self.accepted_parameters_bds.value()[index]
                while True:
                    self.kernel.set_parameters([theta, self.accepted_cov_mat_bds.value()])
                    new_theta = self.kernel.sample(1)[0,:]
                    theta_is_accepted = self.model.set_parameters(new_theta)
                    if theta_is_accepted and self.model.prior.pdf(self.model.get_parameters()) != 0:
                        break
                y_sim = self.model.simulate(self.n_samples_per_param)
                y_sim_old = self.accepted_y_sim_bds.value()[index]
                ## Calculate acceptance probability:
                numerator = 0.0
                denominator = 0.0
                for ind in range(self.n_samples_per_param):
                    numerator += (self.distance.distance(self.observations_bds.value(), [y_sim[ind]]) < self.epsilon[-1])
                    denominator += (self.distance.distance(self.observations_bds.value(), [y_sim_old[ind]]) < self.epsilon[-1])
                ratio_data_epsilon = numerator/denominator
                ratio_prior_prob  = self.model.prior.pdf(new_theta)/self.model.prior.pdf(theta)
                self.kernel.set_parameters([new_theta, self.accepted_cov_mat_bds.value()])
                kernel_numerator = self.kernel.pdf(theta)
                self.kernel.set_parameters([theta, self.accepted_cov_mat_bds.value()])                
                kernel_denominator = self.kernel.pdf(new_theta)
                ratio_likelihood_prob = kernel_numerator/kernel_denominator
                acceptance_prob = min(1,ratio_data_epsilon*ratio_prior_prob*ratio_likelihood_prob)
                if rng.binomial(1,acceptance_prob) == 1:
                    self.model.set_parameters(new_theta)
                else:
                    self.model.set_parameters(theta)
                    y_sim = self.accepted_y_sim_bds.value()[index]
            else:
                self.model.set_parameters(self.accepted_parameters_bds.value()[index])
                y_sim = self.accepted_y_sim_bds.value()[index]
                
        return (self.model.get_parameters(), y_sim)