Source code for abcpy.inferences

import numpy as np
from abcpy.output import Journal
from scipy import optimize

[docs]class RejectionABC: """This base class implements the rejection algorithm based inference scheme [1] for Approximate Bayesian Computation. [1] Tavaré, S., Balding, D., Griffith, R., Donnelly, P.: Inferring coalescence times from DNA sequence data. Genetics 145(2), 505–518 (1997). Parameters ---------- model: abcpy.models.Model Model object that conforms to the Model class. distance: abcpy.distances.Distance Distance object that conforms to the Distance class. backend: abcpy.backends.Backend Backend object that conforms to the Backend class. seed: integer, optional Optional initial seed for the random number generator. The default value is generated randomly. """
[docs] def __init__(self, model, distance, backend, seed=None): self.model = model self.dist_calc = distance self.backend = backend self.rng = np.random.RandomState(seed)
[docs] def sample(self, observations, n_samples, n_samples_per_param, epsilon, full_output=0): """Samples from the posterior distribution of the model parameter given the observed data observations. Parameters ---------- observations : python list The observed data set. n_samples : integer Number of samples to generate. n_samples_per_param : integer Number of data points in each simulated dataset. epsilon: float Value of threshold. full_output: integer, optional If full_output==1, intermediate results are included in output journal. The default value is 0, meaning the intermediate results are not saved. Returns ------- abcpy.output.Journal a journal containing simulation results, metadata and optionally intermediate results. """ journal = Journal(full_output) journal.configuration["n_samples"] = n_samples journal.configuration["n_samples_per_param"] = n_samples_per_param journal.configuration["epsilon"] = epsilon accepted_parameters = None # Initialize variables that need to be available remotely rc = _RemoteContextRejectionABC(self.backend, self.model, self.dist_calc, observations, n_samples, n_samples_per_param, epsilon) # main Rejection ABC algorithm seed_arr = self.rng.randint(1, n_samples*n_samples, size=n_samples, dtype=np.int32) seed_pds = self.backend.parallelize(seed_arr) accepted_parameters_pds = self.backend.map(rc._sample_parameter, seed_pds) accepted_parameters = self.backend.collect(accepted_parameters_pds) accepted_parameters = np.array(accepted_parameters) journal.add_parameters(accepted_parameters) journal.add_weights(np.ones((n_samples, 1))) return journal
class _RemoteContextRejectionABC: """ Contains everything that is sent over the network like broadcast vars and map functions """ def __init__(self, backend, model, dist_calc, observations, n_samples, n_samples_per_param, epsilon): self.model = model self.dist_calc = dist_calc self.n_samples = n_samples self.n_samples_per_param = n_samples_per_param self.epsilon = epsilon # these are usually big tables, so we broadcast them to have them once # per executor instead of once per task self.observations_bds = backend.broadcast(observations) def _sample_parameter(self, seed): """ Samples a single model parameter and simulates from it until distance between simulated outcome and the observation is smaller than eplison. Parameters ---------- seed: int value of a seed to be used for reseeding Returns ------- np.array accepted parameter """ distance = self.dist_calc.dist_max() self.model.prior.reseed(seed) while distance > self.epsilon: # Accept new parameter value if the distance is less than epsilon self.model.sample_from_prior() y_sim = self.model.simulate(self.n_samples_per_param) distance = self.dist_calc.distance(self.observations_bds.value(), y_sim) return self.model.get_parameters()
[docs]class PMCABC: """This base class implements a modified version of Population Monte Carlo based inference scheme for Approximate Bayesian computation of Beaumont et. al. [1]. Here the threshold value at `t`-th generation are adaptively chosen by taking the maximum between the epsilon_percentile-th value of discrepancies of the accepted parameters at `t-1`-th generation and the threshold value provided for this generation by the user. If we take the value of epsilon_percentile to be zero (default), this method becomes the inference scheme described in [1], where the threshold values considered at each generation are the ones provided by the user. [1] M. A. Beaumont. Approximate Bayesian computation in evolution and ecology. Annual Review of Ecology, Evolution, and Systematics, 41(1):379–406, Nov. 2010. Parameters ---------- model : abcpy.models.Model Model object that conforms to the Model class. distance : abcpy.distances.Distance Distance object that conforms to the Distance class. kernel : abcpy.distributions.Distribution Distribution object defining the perturbation kernel needed for the sampling backend : abcpy.backends.Backend Backend object that conforms to the Backend class. seed : integer, optional Optional initial seed for the random number generator. The default value is generated randomly. """
[docs] def __init__(self, model, distance, kernel, backend, seed=None): self.model = model self.distance = distance self.kernel = kernel self.backend = backend self.rng = np.random.RandomState(seed)
[docs] def sample(self, observations, steps, epsilon_init, n_samples = 10000, n_samples_per_param = 1, epsilon_percentile = 0, covFactor = 2, full_output=0): """Samples from the posterior distribution of the model parameter given the observed data observations. Parameters ---------- observations : numpy.ndarray Observed data. steps : integer Number of iterations in the sequential algoritm ("generations") epsilon_init : numpy.ndarray An array of proposed values of epsilon to be used at each steps. Can be supplied A single value to be used as the threshold in Step 1 or a `steps`-dimensional array of values to be used as the threshold in evry steps. n_samples : integer, optional Number of samples to generate. The default value is 10000. n_samples_per_param : integer, optional Number of data points in each simulated data set. The default value is 1. epsilon_percentile : float, optional A value between [0, 100]. The default value is 0, meaning the threshold value provided by the user being used. covFactor : float, optional scaling parameter of the covariance matrix. The default value is 2 as considered in [1]. full_output: integer, optional If full_output==1, intermediate results are included in output journal. The default value is 0, meaning the intermediate results are not saved. Returns ------- abcpy.output.Journal A journal containing simulation results, metadata and optionally intermediate results. """ journal = Journal(full_output) journal.configuration["type_model"] = type(self.model) journal.configuration["type_dist_func"] = type(self.distance) journal.configuration["n_samples"] = n_samples journal.configuration["n_samples_per_param"] = n_samples_per_param journal.configuration["steps"] = steps journal.configuration["epsilon_percentile"] = epsilon_percentile accepted_parameters = None accepted_weights = None accepted_cov_mat = None #Define epsilon_arr if len(epsilon_init) == steps: epsilon_arr = epsilon_init else: if len(epsilon_init) == 1: epsilon_arr = [None]*steps epsilon_arr[0] = epsilon_init else: raise ValueError("The length of epsilon_init can only be of 1 or steps.") # Initialize variables that need to be available remotely rc = _RemoteContextPMCABC(self.backend, self.model, self.distance, self.kernel, observations, n_samples, n_samples_per_param) # main PMCABC algorithm # print("INFO: Starting PMCABC iterations.") for aStep in range(0, steps): # print("DEBUG: Iteration " + str(aStep) + " of PMCABC algorithm.") seed_arr = self.rng.randint(0, np.iinfo(np.uint32).max, size=n_samples, dtype=np.uint32) seed_pds = self.backend.parallelize(seed_arr) # 0: update remotely required variables # print("INFO: Broadcasting parameters.") rc.epsilon = epsilon_arr[aStep] rc._update_broadcasts(self.backend, accepted_parameters, accepted_weights, accepted_cov_mat) # 1: calculate resample parameters # print("INFO: Resampling parameters") params_and_dists_and_ysim_pds = self.backend.map(rc._resample_parameter, seed_pds) params_and_dists_and_ysim = self.backend.collect(params_and_dists_and_ysim_pds) new_parameters, distances = [list(t) for t in zip(*params_and_dists_and_ysim)] new_parameters = np.array(new_parameters) rc._update_broadcasts(self.backend, accepted_parameters, accepted_weights, accepted_cov_mat) # Compute epsilon for next step # print("INFO: Calculating acceptance threshold (epsilon).") if aStep < steps-1: if epsilon_arr[aStep + 1] == None: epsilon_arr[aStep + 1] = np.percentile(distances, epsilon_percentile) else: epsilon_arr[aStep + 1] = np.max([np.percentile(distances, epsilon_percentile), epsilon_arr[aStep+1]]) # 2: calculate weights for new parameters # print("INFO: Calculating weights.") new_parameters_pds = self.backend.parallelize(new_parameters) new_weights_pds = self.backend.map(rc._calculate_weight, new_parameters_pds) new_weights = np.array(self.backend.collect(new_weights_pds)).reshape(-1,1) sum_of_weights = 0.0 for w in new_weights: sum_of_weights += w new_weights = new_weights / sum_of_weights # 3: calculate covariance # print("INFO: Calculating covariance matrix.") new_cov_mat = covFactor * np.cov(new_parameters, aweights = new_weights.reshape(-1), rowvar=False) # 4: Update the newly computed values accepted_parameters = new_parameters accepted_weights = new_weights accepted_cov_mat = new_cov_mat # print("INFO: Saving configuration to output journal.") if (full_output == 1 and aStep <= steps-1) or (full_output == 0 and aStep == steps-1): journal.add_parameters(accepted_parameters) journal.add_weights(accepted_weights) #Add epsilon_arr to the journal journal.configuration["epsilon_arr"] = epsilon_arr return journal
class _RemoteContextPMCABC: """ Contains everything that is sent over the network like broadcast vars and map functions """ def __init__(self, backend, model, distance, kernel, observations, n_samples, n_samples_per_param): self.model = model self.distance = distance self.n_samples = n_samples self.n_samples_per_param = n_samples_per_param self.kernel = kernel self.epsilon = None # these are usually big tables, so we broadcast them to have them once # per executor instead of once per task self.observations_bds = backend.broadcast(observations) self.accepted_parameters_bds = None self.accepted_weights_bds = None self.accepted_cov_mat_bds = None def _update_broadcasts(self, backend, accepted_parameters, accepted_weights, accepted_cov_mat): def destroy(bc): if bc != None: bc.unpersist #bc.destroy if not accepted_parameters is None: self.accepted_parameters_bds = backend.broadcast(accepted_parameters) if not accepted_weights is None: self.accepted_weights_bds = backend.broadcast(accepted_weights) if not accepted_cov_mat is None: self.accepted_cov_mat_bds = backend.broadcast(accepted_cov_mat) # define helper functions for map step def _resample_parameter(self, seed): """ Samples a single model parameter and simulate from it until distance between simulated outcome and the observation is smaller than eplison. :type seed: int :rtype: np.array :return: accepted parameter """ rng = np.random.RandomState(seed) self.model.prior.reseed(rng.randint(np.iinfo(np.uint32).max, dtype=np.uint32)) self.kernel.reseed(rng.randint(np.iinfo(np.uint32).max, dtype=np.uint32)) distance = self.distance.dist_max() while distance > self.epsilon: #print("on seed " + str(seed) + " distance: " + str(distance) + " epsilon: " + str(self.epsilon)) if self.accepted_parameters_bds == None: self.model.sample_from_prior() else: index = rng.choice(self.n_samples, size=1, p=self.accepted_weights_bds.value().reshape(-1)) theta = self.accepted_parameters_bds.value()[index[0]] # trucate the normal to the bounds of parameter space of the model # truncating the normal like this is fine: https://arxiv.org/pdf/0907.4010v1.pdf while True: self.kernel.set_parameters([theta, self.accepted_cov_mat_bds.value()]) new_theta = self.kernel.sample(1)[0,:] theta_is_accepted = self.model.set_parameters(new_theta) if theta_is_accepted and self.model.prior.pdf(self.model.get_parameters()) != 0: break y_sim = self.model.simulate(self.n_samples_per_param) distance = self.distance.distance(self.observations_bds.value(), y_sim) return (self.model.get_parameters(), distance) def _calculate_weight(self, theta): """ Calculates the weight for the given parameter using accepted_parameters, accepted_cov_mat Parameters ---------- theta: np.array 1xp matrix containing model parameter, where p is the dimension of parameter Returns ------- float the new weight for theta """ if self.accepted_weights_bds is None: return 1.0 / self.n_samples else: prior_prob = self.model.prior.pdf(theta) denominator = 0.0 for i in range(0, self.n_samples): self.kernel.set_parameters([self.accepted_parameters_bds.value()[i,:], self.accepted_cov_mat_bds.value()]) pdf_value = self.kernel.pdf(theta) denominator += self.accepted_weights_bds.value()[i,0] * pdf_value return 1.0 * prior_prob / denominator
[docs]class PMC: """Population Monte Carlo based inference scheme of Cappé et. al. [1]. This algorithm assumes a likelihood function is available and can be evaluated at any parameter value given the oberved dataset. In absence of the likelihood function or when it can't be evaluated with a rational computational expenses, we use the approximated likleihood functions in abcpy.approx_lhd module, for which the argument of the consistency of the inference schemes are based on Andrieu and Roberts [2]. [1] Cappé, O., Guillin, A., Marin, J.-M., and Robert, C. P. (2004). Population Monte Carlo. Journal of Computational and Graphical Statistics, 13(4), 907–929. [2] C. Andrieu and G. O. Roberts. The pseudo-marginal approach for efficient Monte Carlo computations. Annals of Statistics, 37(2):697–725, 04 2009. """
[docs] def __init__(self, model, likfun, kernel, backend, seed=None): """Constructor of PMC inference schemes. Parameters ---------- model : abcpy.models.Model Model object that conforms to the Model class likfun : abcpy.approx_lhd.Approx_likelihood Approx_likelihood object that conforms to the Approx_likelihood class kernel : abcpy.distributions.Distribution Distribution object defining the perturbation kernel needed for the sampling backend : abcpy.backends.Backend Backend object that conforms to the Backend class seed : integer, optional Optional initial seed for the random number generator. The default value is generated randomly. """ self.model = model self.likfun = likfun self.kernel = kernel self.backend = backend self.rng = np.random.RandomState(seed)
[docs] def sample(self, observations, steps, n_samples = 10000, n_samples_per_param = 100, covFactor = None, iniPoints = None, full_output=0): """Samples from the posterior distribution of the model parameter given the observed data observations. Parameters ---------- observations : python list Observed data steps : integer number of iterations in the sequential algoritm ("generations") n_sample : integer, optional number of samples to generate. The default value is 10000. n_samples_per_param : integer, optional number of data points in each simulated data set. The default value is 100. covFactor : float, optional scaling parameter of the covariance matrix. The default is a p dimensional array of 1 when p is the dimension of the parameter. inipoints : numpy.ndarray, optional parameter vaulues from where the sampling starts. By default sampled from the prior. full_output: integer, optional If full_output==1, intermediate results are included in output journal. The default value is 0, meaning the intermediate results are not saved. Returns ------- abcpy.output.Journal A journal containing simulation results, metadata and optionally intermediate results. Parameters ---------- """ journal = Journal(full_output) journal.configuration["type_model"] = type(self.model) journal.configuration["type_lhd_func"] = type(self.likfun) journal.configuration["n_samples"] = n_samples journal.configuration["n_samples_per_param"] = n_samples_per_param journal.configuration["steps"] = steps journal.configuration["covFactor"] = covFactor journal.configuration["iniPoints"] = iniPoints accepted_parameters = None accepted_weights = None accepted_cov_mat = None new_theta = None dim = len(self.model.get_parameters()) # Initialize variables that need to be available remotely rc = _RemoteContextPMC(self.backend, self.model, self.likfun, self.kernel, observations, n_samples, n_samples_per_param) # Initialize particles: When not supplied, randomly draw them from prior distribution # Weights of particles: Assign equal weights for each of the particles if iniPoints == None: accepted_parameters = np.zeros(shape = (n_samples,dim)) for ind in range(0,n_samples): self.model.sample_from_prior() accepted_parameters[ind,:] = self.model.get_parameters() accepted_weights = np.ones((n_samples,1), dtype=np.float)/n_samples else: accepted_parameters = iniPoints accepted_weights = np.ones((iniPoints.shape[0],1), dtype=np.float)/iniPoints.shape[0] if covFactor is None: covFactor = np.ones(shape=(dim,)) # Calculate initial covariance matrix accepted_cov_mat = covFactor * np.cov(accepted_parameters, aweights = accepted_weights.reshape(-1), rowvar=False) # main SMC algorithm # print("INFO: Starting PMC iterations.") for aStep in range(0, steps): # print("DEBUG: Iteration " + str(aStep) + " of PMC algorithm.") # 0: update remotely required variables # print("INFO: Broadcasting parameters.") rc._update_broadcasts(self.backend, accepted_parameters, accepted_weights, accepted_cov_mat) # 1: calculate resample parameters # print("INFO: Resample parameters.") index = self.rng.choice(accepted_parameters.shape[0], size = n_samples, p = accepted_weights.reshape(-1)) #Choose a new particle using the resampled particle (make the boundary proper) #Initialize new_parameters new_parameters = np.zeros((n_samples,dim), dtype=np.float) for ind in range(0,n_samples): self.kernel.set_parameters([accepted_parameters[index[ind],:], accepted_cov_mat]) while True: new_theta = self.kernel.sample(1)[0,:] theta_is_accepted = self.model.set_parameters(new_theta) if theta_is_accepted and self.model.prior.pdf(self.model.get_parameters()) != 0: new_parameters[ind,:] = new_theta break # 2: calculate approximate lieklihood for new parameters # print("INFO: Calculate approximate likelihood.") new_parameters_pds = self.backend.parallelize(new_parameters) approx_likelihood_new_parameters_pds = self.backend.map(rc._approx_lik_calc, new_parameters_pds) # print("DEBUG: Collect approximate likelihood from pds.") approx_likelihood_new_parameters = np.array(self.backend.collect(approx_likelihood_new_parameters_pds)).reshape(-1,1) # 3: calculate new weights for new parameters # print("INFO: Calculating weights.") new_weights_pds = self.backend.map(rc._calculate_weight, new_parameters_pds) new_weights = np.array(self.backend.collect(new_weights_pds)).reshape(-1,1) sum_of_weights = 0.0 for i in range(0, n_samples): new_weights[i] = new_weights[i]*approx_likelihood_new_parameters[i] sum_of_weights += new_weights[i] new_weights = new_weights / sum_of_weights accepted_parameters = new_parameters # 4: calculate covariance # print("INFO: Calculating covariance matrix.") new_cov_mat = covFactor * np.cov(accepted_parameters, aweights = accepted_weights.reshape(-1), rowvar=False) # 5: Update the newly computed values accepted_parameters = new_parameters accepted_weights = new_weights accepted_cov_mat = new_cov_mat # print("INFO: Saving configuration to output journal.") if (full_output == 1 and aStep <= steps-1) or (full_output == 0 and aStep == steps-1): journal.add_parameters(accepted_parameters) journal.add_weights(accepted_weights) journal.add_opt_values(approx_likelihood_new_parameters) return journal
class _RemoteContextPMC: """ Contains everything that is sent over the network like broadcast vars and map functions """ def __init__(self, backend, model, likfun, kernel, observations, n_samples, n_samples_per_param,): self.model = model self.likfun = likfun self.n_samples = n_samples self.n_samples_per_param = n_samples_per_param self.kernel = kernel # these are usually big tables, so we broadcast them to have them once # per executor instead of once per task self.observations_bds = backend.broadcast(observations) self.accepted_parameters_bds = None self.accepted_weights_bds = None self.accepted_cov_mat_bds = None def _update_broadcasts(self, backend, accepted_parameters, accepted_weights, accepted_cov_mat): def destroy(bc): if bc != None: bc.unpersist #bc.destroy if not accepted_parameters is None: self.accepted_parameters_bds = backend.broadcast(accepted_parameters) if not accepted_weights is None: self.accepted_weights_bds = backend.broadcast(accepted_weights) if not accepted_cov_mat is None: self.accepted_cov_mat_bds = backend.broadcast(accepted_cov_mat) # define helper functions for map step def _approx_lik_calc(self, theta): """ Compute likelihood for new parameters using approximate likelihood function :seed = aTuple[0] :theta = aTuple[1] :type seed: int :rtype: np.arraybg :return: likelihood values of new parameter """ # Assign theta to model self.model.set_parameters(theta) # Simulate the fake data from the model given the parameter value theta # print("DEBUG: Simulate model for parameter " + str(theta)) y_sim = self.model.simulate(self.n_samples_per_param) # print("DEBUG: Extracting observation.") obs = self.observations_bds.value() # print("DEBUG: Computing likelihood...") lhd = self.likfun.likelihood(obs, y_sim) # print("DEBUG: Likelihood is :" + str(lhd)) pdf_at_theta = self.model.prior.pdf(theta) # print("DEBUG: prior pdf evaluated at theta is :" + str(pdf_at_theta)) return pdf_at_theta * lhd def _calculate_weight(self, theta): """ Calculates the weight for the given parameter using accepted_parameters, accepted_cov_mat :type theta: np.array :param theta: 1xp matrix containing model parameters, where p is the number of parameters :rtype: float :return: the new weight for theta """ if self.accepted_weights_bds is None: return 1.0 / self.n_samples else: prior_prob = self.model.prior.pdf(theta) denominator = 0.0 for i in range(0, self.n_samples): self.kernel.set_parameters([self.accepted_parameters_bds.value()[i,:], self.accepted_cov_mat_bds.value()]) pdf_value = self.kernel.pdf(theta) denominator += self.accepted_weights_bds.value()[i,0] * pdf_value return 1.0 * prior_prob / denominator
[docs]class SABC: """This base class implements a modified version of Simulated Annealing Approximate Bayesian Computation (SABC) of [1] when the prior is non-informative. [1] C. Albert, H. R. Kuensch and A. Scheidegger. A Simulated Annealing Approach to Approximate Bayes Computations. Statistics and Computing, (2014). Parameters ---------- model : abcpy.models.Model Model object that conforms to the Model class. distance : abcpy.distances.Distance Distance object that conforms to the Distance class. kernel : abcpy.distributions.Distribution Distribution object defining the perturbation kernel needed for the sampling backend : abcpy.backends.Backend Backend object that conforms to the Backend class. seed : integer, optional Optional initial seed for the random number generator. The default value is generated randomly. """
[docs] def __init__(self, model, distance, kernel, backend, seed=None): self.model = model self.distance = distance self.kernel = kernel self.backend = backend self.rng = np.random.RandomState(seed)
[docs] def sample(self, observations, steps, epsilon, n_samples = 10000, n_samples_per_param = 1, beta = 2, delta = 0.2, v = 0.3, ar_cutoff = 0.5, resample = None, n_update = None, adaptcov = 1, full_output=0): """Samples from the posterior distribution of the model parameter given the observed data observations. Parameters ---------- observations : numpy.ndarray Observed data. steps : integer Number of maximum iterations in the sequential algoritm ("generations") epsilon : numpy.float An array of proposed values of epsilon to be used at each steps. n_samples : integer, optional Number of samples to generate. The default value is 10000. n_samples_per_param : integer, optional Number of data points in each simulated data set. The default value is 1. beta : numpy.float Tuning parameter of SABC delta : numpy.float Tuning parameter of SABC v : numpy.float, optional Tuning parameter of SABC, The default value is 0.3. ar_cutoff : numpy.float Acceptance ratio cutoff, The default value is 0.5 resample: int, optional Resample after this many acceptance, The default value if n_samples n_update: int, optional Number of perturbed parameters at each step, The default value if n_samples adaptcov : boolean, optional Whether we adapt the covariance matrix in iteration stage. The default value TRUE. full_output: integer, optional If full_output==1, intermediate results are included in output journal. The default value is 0, meaning the intermediate results are not saved. Returns ------- abcpy.output.Journal A journal containing simulation results, metadata and optionally intermediate results. """ journal = Journal(full_output) journal.configuration["type_model"] = type(self.model) journal.configuration["type_dist_func"] = type(self.distance) journal.configuration["type_kernel_func"] = type(self.kernel) journal.configuration["n_samples"] = n_samples journal.configuration["n_samples_per_param"] = n_samples_per_param journal.configuration["beta"] = beta journal.configuration["delta"] = delta journal.configuration["v"] = v journal.configuration["ar_cutoff"] = ar_cutoff journal.configuration["resample"] = resample journal.configuration["n_update"] = n_update journal.configuration["adaptcov"] = adaptcov journal.configuration["full_output"] = full_output accepted_parameters = np.zeros(shape=(n_samples,len(self.model.get_parameters()))) distances = np.zeros(shape=(n_samples,)) smooth_distances = np.zeros(shape=(n_samples,)) accepted_weights = np.ones(shape=(n_samples,1)) all_distances = None accepted_cov_mat = None if resample == None: resample = n_samples if n_update == None: n_update = n_samples sample_array = np.ones(shape=(steps,)) sample_array[0] = n_samples sample_array[1:] = n_update ## Acceptance counter to determine the resampling step accept = 0 samples_until = 0 # Initialize variables that need to be available remotely rc = _RemoteContextSABC(self.backend, self.model, self.distance, self.kernel, \ observations, n_samples, n_samples_per_param) for aStep in range(0,steps): # main SABC algorithm # print("INFO: Initialization of SABC") seed_arr = self.rng.randint(0, np.iinfo(np.uint32).max, size=int(sample_array[aStep]), dtype=np.uint32) seed_pds = self.backend.parallelize(seed_arr) # 0: update remotely required variables # print("INFO: Broadcasting parameters.") rc.epsilon = epsilon rc._update_broadcasts(self.backend, accepted_parameters, accepted_cov_mat, smooth_distances, all_distances) # 1: Calculate parameters # print("INFO: Initial accepted parameter parameters") params_and_dists_pds = self.backend.map(rc._accept_parameter, seed_pds) params_and_dists = self.backend.collect(params_and_dists_pds) new_parameters, new_distances, new_all_parameters, new_all_distances, index, acceptance = [list(t) for t in zip(*params_and_dists)] new_parameters = np.array(new_parameters) new_distances = np.array(new_distances) new_all_distances = np.concatenate(new_all_distances) index = np.array(index) acceptance = np.array(acceptance) # Reading all_distances at Initial step if aStep == 0: index = np.linspace(0,n_samples-1,n_samples).astype(int).reshape(n_samples,) accept = 0 all_distances = new_all_distances #print(index[acceptance == 1]) # Initialize/Update the accepted parameters and their corresponding distances accepted_parameters[index[acceptance==1],:] = new_parameters[acceptance==1,:] distances[index[acceptance==1]] = new_distances[acceptance==1] # 2: Smoothing of the distances smooth_distances[index[acceptance==1]] = self._smoother_distance(distances[index[acceptance==1]],all_distances) # 3: Initialize/Update U, epsilon and covariance of perturbation kernel if aStep == 0: U = self._avergae_redefined_distance(self._smoother_distance(all_distances,all_distances), epsilon) else: U = np.mean(smooth_distances) epsilon = self._schedule(U,v) if accepted_parameters.shape[1] > 1: accepted_cov_mat = beta*np.cov(np.transpose(accepted_parameters)) + \ 0.0001*np.trace(np.cov(np.transpose(accepted_parameters)))*np.eye(accepted_parameters.shape[1]) else: accepted_cov_mat = beta*np.var(np.transpose(accepted_parameters)) + \ 0.0001*(np.var(np.transpose(accepted_parameters)))*np.eye(accepted_parameters.shape[1]) ## 4: Show progress and if acceptance rate smaller than a value break the iteration # print("INFO: Saving intermediate configuration to output journal.") if full_output == 1: journal.add_parameters(accepted_parameters) journal.add_weights(accepted_weights) if aStep > 0: accept = accept + np.sum(acceptance) samples_until = samples_until + sample_array[aStep] acceptance_rate = accept/samples_until print('updates: ',np.sum(sample_array[1:aStep+1])/np.sum(sample_array[1:])*100,' epsilon: ' ,epsilon,\ 'u.mean: ', U, 'acceptance rate: ', acceptance_rate) if acceptance_rate < ar_cutoff: break # 5: Resampling if number of accepted particles greater than resample if accept >= resample and U > 1e-100: ## Weighted resampling: weight = np.exp(-smooth_distances*delta/U) weight = weight/sum(weight) index_resampled = self.rng.choice(np.arange(n_samples), n_samples, replace = 1, p = weight) accepted_parameters = accepted_parameters[index_resampled,:] smooth_distances = smooth_distances[index_resampled] ## Update U and epsilon: epsilon = epsilon*(1-delta) U = np.mean(smooth_distances) epsilon = self._schedule(U,v) ## Print effective sampling size print('Resampling: Effective sampling size: ', 1/sum(pow(weight/sum(weight),2))) accept = 0 samples_until = 0 #Add epsilon_arr, number of final steps and final output to the journal # print("INFO: Saving final configuration to output journal.") if full_output == 0: journal.add_parameters(accepted_parameters) journal.add_weights(accepted_weights) journal.configuration["steps"] = aStep + 1 journal.configuration["epsilon"] = epsilon return journal
def _smoother_distance(self, distance, old_distance): """Smooths the distance using the Equation 14 of [1]. [1] C. Albert, H. R. Kuensch and A. Scheidegger. A Simulated Annealing Approach to Approximate Bayes Computations. Statistics and Computing 0960-3174 (2014). """ smoothed_distance = np.zeros(shape=(len(distance),)) for ind in range(0,len(distance)): if distance[ind] < np.min(old_distance): smoothed_distance[ind] = (distance[ind]/np.min(old_distance))/len(old_distance) else: smoothed_distance[ind] = np.mean(np.array(old_distance)<distance[ind]) return smoothed_distance def _avergae_redefined_distance(self, distance, epsilon): if epsilon==0: U = 0 else: U = np.average(distance, weights = np.exp(-distance/epsilon)) return(U) def _schedule(self, rho, v): if rho < 1e-100: epsilon = 0 else: fun = lambda epsilon: pow(epsilon,2) + v*pow(epsilon,3/2) - pow(rho,2) epsilon = optimize.fsolve(fun,rho/2) return(epsilon)
class _RemoteContextSABC: """ Contains everything that is sent over the network like broadcast vars and map functions """ def __init__(self, backend, model, distance, kernel, observations, n_samples, n_samples_per_param): self.model = model self.distance = distance self.n_samples = n_samples self.n_samples_per_param = n_samples_per_param self.epsilon = None self.kernel = kernel #self._smoother_distance = _smoother_distance # these are usually big tables, so we broadcast them to have them once # per executor instead of once per task self.observations_bds = backend.broadcast(observations) self.accepted_parameters_bds = None self.accepted_cov_mat_bds = None self.smooth_distances_bds = None self.all_distances_bds = None def _update_broadcasts(self, backend, accepted_parameters, accepted_cov_mat, smooth_distances, all_distances): def destroy(bc): if bc != None: bc.unpersist #bc.destroy if not accepted_parameters is None: self.accepted_parameters_bds = backend.broadcast(accepted_parameters) if not accepted_cov_mat is None: self.accepted_cov_mat_bds = backend.broadcast(accepted_cov_mat) if not smooth_distances is None: self.smooth_distances_bds = backend.broadcast(smooth_distances) if not all_distances is None: self.all_distances_bds = backend.broadcast(all_distances) def _smoother_distance_remote(self, distance, old_distance): """Smooths the distance using the Equation 14 of [1]. [1] C. Albert, H. R. Kuensch and A. Scheidegger. A Simulated Annealing Approach to Approximate Bayes Computations. Statistics and Computing 0960-3174 (2014). """ smoothed_distance = np.zeros(shape=(len(distance),)) for ind in range(0,len(distance)): if distance[ind] < np.min(old_distance): smoothed_distance[ind] = (distance[ind]/np.min(old_distance))/len(old_distance) else: smoothed_distance[ind] = np.mean(np.array(old_distance)<distance[ind]) return smoothed_distance # define helper functions for map step def _accept_parameter(self, seed): """ Samples a single model parameter and simulate from it until accepted with probabilty exp[-rho(x,y)/epsilon]. :type seed: int :rtype: np.array :return: accepted parameter """ rng = np.random.RandomState(seed) self.model.prior.reseed(rng.randint(np.iinfo(np.uint32).max, dtype=np.uint32)) self.kernel.reseed(rng.randint(np.iinfo(np.uint32).max, dtype=np.uint32)) all_parameters = [] all_distances = [] index = [] acceptance = 0 if self.accepted_cov_mat_bds == None: while acceptance == 0: self.model.sample_from_prior() new_theta = self.model.get_parameters() all_parameters.append(self.model.get_parameters()) y_sim = self.model.simulate(self.n_samples_per_param) distance = self.distance.distance(self.observations_bds.value(), y_sim) all_distances.append(distance) acceptance = rng.binomial(1,np.exp(-distance/self.epsilon),1) acceptance = 1 else: ## Select one arbitrary particle: index = rng.choice(self.n_samples, size=1)[0] ## Sample proposal parameter and calculate new distance: theta = self.accepted_parameters_bds.value()[index,:] while True: self.kernel.set_parameters([theta, self.accepted_cov_mat_bds.value()]) if len(theta) > 1: new_theta = self.kernel.sample(1)[0,:] else: new_theta = self.kernel.sample(1) theta_is_accepted = self.model.set_parameters(new_theta) if theta_is_accepted and self.model.prior.pdf(self.model.get_parameters()) != 0: break y_sim = self.model.simulate(self.n_samples_per_param) distance = self.distance.distance(self.observations_bds.value(), y_sim) smooth_distance = self._smoother_distance_remote([distance],self.all_distances_bds.value()) ## Calculate acceptance probability: ratio_prior_prob = self.model.prior.pdf(new_theta)/self.model.prior.pdf(self.accepted_parameters_bds.value()[index,:]) ratio_likelihood_prob = np.exp((self.smooth_distances_bds.value()[index] - smooth_distance) / self.epsilon) acceptance_prob = ratio_prior_prob*ratio_likelihood_prob ## If accepted if rng.rand(1) < acceptance_prob: acceptance = 1 else: distance = np.inf return (new_theta, distance, all_parameters, all_distances, index, acceptance)
[docs]class ABCsubsim: """This base class implements Approximate Bayesian Computation by subset simulation (ABCsubsim) algorithm of [1]. [1] M. Chiachio, J. L. Beck, J. Chiachio, and G. Rus., Approximate Bayesian computation by subset simulation. SIAM J. Sci. Comput., 36(3):A1339–A1358, 2014/10/03 2014. Parameters ---------- model : abcpy.models.Model Model object that conforms to the Model class. distance : abcpy.distances.Distance Distance object that conforms to the Distance class. kernel : abcpy.distributions.Distribution Distribution object defining the perturbation kernel needed for the sampling backend : abcpy.backends.Backend Backend object that conforms to the Backend class. seed : integer, optional Optional initial seed for the random number generator. The default value is generated randomly. """
[docs] def __init__(self, model, distance, kernel, backend, seed=None): self.model = model self.distance = distance self.kernel = kernel self.backend = backend self.rng = np.random.RandomState(seed)
[docs] def sample(self, observations, steps, n_samples = 10000, n_samples_per_param = 1, chain_length = 10, ap_change_cutoff = 10, full_output=0): """Samples from the posterior distribution of the model parameter given the observed data observations. Parameters ---------- observations : numpy.ndarray Observed data. steps : integer Number of iterations in the sequential algoritm ("generations") n_samples : integer, optional Number of samples to generate. The default value is 10000. n_samples_per_param : integer, optional Number of data points in each simulated data set. The default value is 1. chain_length : integer, optional Chain length of the MCMC. n_samples should be divisable by chain_length. The default value is 10. ap_change_cutoff : float, optional The cutoff value for the percentage change in the anneal parameter. If the change is less than ap_change_cutoff the iterations are stopped. The default value is 10. full_output: integer, optional If full_output==1, intermediate results are included in output journal. The default value is 0, meaning the intermediate results are not saved. Returns ------- abcpy.output.Journal A journal containing simulation results, metadata and optionally intermediate results. """ journal = Journal(full_output) journal.configuration["type_model"] = type(self.model) journal.configuration["type_dist_func"] = type(self.distance) journal.configuration["type_kernel_func"] = type(self.kernel) journal.configuration["n_samples"] = n_samples journal.configuration["n_samples_per_param"] = n_samples_per_param journal.configuration["chain_length"] = chain_length journal.configuration["ap_change_cutoff"] = ap_change_cutoff journal.configuration["full_output"] = full_output accepted_parameters = None accepted_weights = np.ones(shape=(n_samples,1)) accepted_cov_mat = None anneal_parameter = 0 anneal_parameter_old = 0 temp_chain_length = 1 # Initialize variables that need to be available remotely rc = _RemoteContextABCsubsim(self.backend, self.model, self.distance, self.kernel, observations, n_samples, n_samples_per_param, chain_length) for aStep in range(0,steps): # main ABCsubsim algorithm # print("INFO: Initialization of ABCsubsim") seed_arr = self.rng.randint(0, np.iinfo(np.uint32).max, size=int(n_samples/temp_chain_length), dtype=np.uint32) index_arr = np.linspace(0,n_samples/temp_chain_length-1,n_samples/temp_chain_length).astype(int).reshape(int(n_samples/temp_chain_length),) seed_index_arr = np.column_stack((seed_arr,index_arr)) seed_index_pds = self.backend.parallelize(seed_index_arr) # 0: update remotely required variables # print("INFO: Broadcasting parameters.") rc._update_broadcasts(self.backend, accepted_parameters, accepted_cov_mat) # 1: Calculate parameters # print("INFO: Initial accepted parameter parameters") params_and_dists_pds = self.backend.map(rc._accept_parameter, seed_index_pds) params_and_dists = self.backend.collect(params_and_dists_pds) new_parameters, new_distances = [list(t) for t in zip(*params_and_dists)] accepted_parameters = np.concatenate(new_parameters) distances = np.concatenate(new_distances) # 2: Sort and renumber samples SortIndex = sorted(range(len(distances)), key=lambda k: distances[k]) distances = distances[SortIndex] accepted_parameters = accepted_parameters[SortIndex,:] # 3: Calculate and broadcast annealling parameters temp_chain_length = chain_length if aStep > 0: anneal_parameter_old = anneal_parameter anneal_parameter = 0.5*(distances[int(n_samples/temp_chain_length)]+distances[int(n_samples/temp_chain_length)+1]) rc.anneal_parameter = anneal_parameter # 4: Update proposal covariance matrix (Parallelized) if aStep == 0: accepted_cov_mat = np.cov(accepted_parameters, rowvar=False) else: accepted_cov_mat = pow(2,1)*accepted_cov_mat rc._update_broadcasts(self.backend, accepted_parameters, accepted_cov_mat) seed_arr = self.rng.randint(0, np.iinfo(np.uint32).max, size=10, dtype=np.uint32) index_arr = np.linspace(0,10-1,10).astype(int).reshape(10,) seed_index_arr = np.column_stack((seed_arr,index_arr)) seed_index_pds = self.backend.parallelize(seed_index_arr) cov_mat_index_pds = self.backend.map(rc._update_cov_mat, seed_index_pds) cov_mat_index = self.backend.collect(cov_mat_index_pds) cov_mat, T, accept_index = [list(t) for t in zip(*cov_mat_index)] for ind in range(10): if accept_index[ind] == 1: accepted_cov_mat = cov_mat[ind] break # print("INFO: Saving intermediate configuration to output journal.") if full_output == 1: journal.add_parameters(accepted_parameters) journal.add_weights(accepted_weights) # Show progress anneal_parameter_change_percentage = 100*abs(anneal_parameter_old-anneal_parameter)/anneal_parameter print('Steps: ', aStep, 'annealing parameter: ', anneal_parameter, 'change (%) in annealing parameter: ', anneal_parameter_change_percentage ) if anneal_parameter_change_percentage < ap_change_cutoff: break #Add anneal_parameter, number of final steps and final output to the journal # print("INFO: Saving final configuration to output journal.") if full_output == 0: journal.add_parameters(accepted_parameters) journal.add_weights(accepted_weights) journal.configuration["steps"] = aStep+1 journal.configuration["anneal_parameter"] = anneal_parameter return journal
class _RemoteContextABCsubsim: """ Contains everything that is sent over the network like broadcast vars and map functions """ def __init__(self, backend, model, distance, kernel, observations, n_samples, n_samples_per_param, chain_length): self.model = model self.distance = distance self.n_samples = n_samples self.n_samples_per_param = n_samples_per_param self.kernel = kernel self.chain_length = chain_length self.anneal_parameter = None # these are usually big tables, so we broadcast them to have them once # per executor instead of once per task self.observations_bds = backend.broadcast(observations) self.accepted_parameters_bds = None self.accepted_cov_mat_bds = None def _update_broadcasts(self, backend, accepted_parameters, accepted_cov_mat): def destroy(bc): if bc != None: bc.unpersist #bc.destroy if not accepted_parameters is None: self.accepted_parameters_bds = backend.broadcast(accepted_parameters) if not accepted_cov_mat is None: self.accepted_cov_mat_bds = backend.broadcast(accepted_cov_mat) # define helper functions for map step def _accept_parameter(self, seed_index): """ Samples a single model parameter and simulate from it until distance between simulated outcome and the observation is smaller than eplison. :type seed: int :rtype: np.array :return: accepted parameter """ seed = seed_index[0] index = seed_index[1] rng = np.random.RandomState(seed) self.model.prior.reseed(rng.randint(np.iinfo(np.uint32).max, dtype=np.uint32)) self.kernel.reseed(rng.randint(np.iinfo(np.uint32).max, dtype=np.uint32)) result_theta = [] result_distance = [] if self.accepted_parameters_bds == None: self.model.sample_from_prior() y_sim = self.model.simulate(self.n_samples_per_param) distance = self.distance.distance(self.observations_bds.value(), y_sim) result_theta.append(self.model.get_parameters()) result_distance.append(distance) else: theta = self.accepted_parameters_bds.value()[index] self.model.set_parameters(theta) y_sim = self.model.simulate(self.n_samples_per_param) distance = self.distance.distance(self.observations_bds.value(), y_sim) result_theta.append(theta) result_distance.append(distance) for ind in range(0,self.chain_length-1): while True: self.kernel.set_parameters([theta, self.accepted_cov_mat_bds.value()]) new_theta = self.kernel.sample(1)[0,:] theta_is_accepted = self.model.set_parameters(new_theta) if theta_is_accepted and self.model.prior.pdf(self.model.get_parameters()) != 0: break y_sim = self.model.simulate(self.n_samples_per_param) new_distance = self.distance.distance(self.observations_bds.value(), y_sim) ## Calculate acceptance probability: ratio_prior_prob = self.model.prior.pdf(new_theta)/self.model.prior.pdf(theta) self.kernel.set_parameters([new_theta, self.accepted_cov_mat_bds.value()]) kernel_numerator = self.kernel.pdf(theta) self.kernel.set_parameters([theta, self.accepted_cov_mat_bds.value()]) kernel_denominator = self.kernel.pdf(new_theta) ratio_likelihood_prob = kernel_numerator/kernel_denominator acceptance_prob = min(1,ratio_prior_prob*ratio_likelihood_prob)*(new_distance<self.anneal_parameter) ## If accepted if rng.binomial(1,acceptance_prob)==1: result_theta.append(new_theta) result_distance.append(new_distance) theta = new_theta distance = new_distance else: result_theta.append(theta) result_distance.append(distance) return (result_theta, result_distance) def _update_cov_mat(self, seed_t): seed = seed_t[0] t = seed_t[1] rng = np.random.RandomState(seed) self.model.prior.reseed(rng.randint(np.iinfo(np.uint32).max, dtype=np.uint32)) self.kernel.reseed(rng.randint(np.iinfo(np.uint32).max, dtype=np.uint32)) acceptance = 0 accepted_cov_mat_transformed = self.accepted_cov_mat_bds.value()*pow(2.0,-2.0*t) theta = self.accepted_parameters_bds.value()[0] self.model.set_parameters(theta) for ind in range(0,self.chain_length): while True: self.kernel.set_parameters([theta, accepted_cov_mat_transformed]) new_theta = self.kernel.sample(1)[0,:] theta_is_accepted = self.model.set_parameters(new_theta) if theta_is_accepted and self.model.prior.pdf(self.model.get_parameters()) != 0: break y_sim = self.model.simulate(self.n_samples_per_param) new_distance = self.distance.distance(self.observations_bds.value(), y_sim) ## Calculate acceptance probability: ratio_prior_prob = self.model.prior.pdf(new_theta)/self.model.prior.pdf(theta) self.kernel.set_parameters([new_theta, accepted_cov_mat_transformed]) kernel_numerator = self.kernel.pdf(theta) self.kernel.set_parameters([theta, accepted_cov_mat_transformed]) kernel_denominator = self.kernel.pdf(new_theta) ratio_likelihood_prob = kernel_numerator/kernel_denominator acceptance_prob = min(1,ratio_prior_prob*ratio_likelihood_prob)*(new_distance<self.anneal_parameter) ## If accepted if rng.binomial(1,acceptance_prob)==1: theta = new_theta acceptance = acceptance + 1 if acceptance/10<=0.5 and acceptance/10>=0.3: return(accepted_cov_mat_transformed, t, 1) else: return(accepted_cov_mat_transformed, t, 0)
[docs]class RSMCABC: """This base class implements Adaptive Population Monte Carlo Approximate Bayesian computation of Drovandi and Pettitt [1]. [1] CC. Drovandi CC and AN. Pettitt, Estimation of parameters for macroparasite population evolution using approximate Bayesian computation. Biometrics 67(1):225–233, 2011. Parameters ---------- model : abcpy.models.Model Model object that conforms to the Model class. distance : abcpy.distances.Distance Distance object that conforms to the Distance class. kernel : abcpy.distributions.Distribution Distribution object defining the perturbation kernel needed for the sampling backend : abcpy.backends.Backend Backend object that conforms to the Backend class. seed : integer, optional Optional initial seed for the random number generator. The default value is generated randomly. """
[docs] def __init__(self, model, distance, kernel, backend, seed=None): self.model = model self.distance = distance self.kernel = kernel self.backend = backend self.rng = np.random.RandomState(seed)
[docs] def sample(self, observations, steps, n_samples = 10000, n_samples_per_param = 1, alpha = 0.1, epsilon_init = 100, epsilon_final = 0.1, const = 1, covFactor = 2.0, full_output=0): """Samples from the posterior distribution of the model parameter given the observed data observations. Parameters ---------- observations : numpy.ndarray Observed data. steps : integer Number of iterations in the sequential algoritm ("generations") n_samples : integer, optional Number of samples to generate. The default value is 10000. n_samples_per_param : integer, optional Number of data points in each simulated data set. The default value is 1. alpha : float, optional A parameter taking values between [0,1], the default value is 0.1. epsilon_init : float, optional Initial value of threshold, the default is 100 epsilon_final : float, optional Terminal value of threshold, the default is 0.1 const : float, optional A constant to compute acceptance probabilty covFactor : float, optional scaling parameter of the covariance matrix. The default value is 2. full_output: integer, optional If full_output==1, intermediate results are included in output journal. The default value is 0, meaning the intermediate results are not saved. Returns ------- abcpy.output.Journal A journal containing simulation results, metadata and optionally intermediate results. """ journal = Journal(full_output) journal.configuration["type_model"] = type(self.model) journal.configuration["type_dist_func"] = type(self.distance) journal.configuration["n_samples"] = n_samples journal.configuration["n_samples_per_param"] = n_samples_per_param journal.configuration["steps"] = steps accepted_parameters = None accepted_cov_mat = None accepted_dist = None # Initialize variables that need to be available remotely rc = _RemoteContextRSMCABC(self.backend, self.model, self.distance, self.kernel, observations, n_samples, n_samples_per_param, alpha) # main RSMCABC algorithm # print("INFO: Starting RSMCABC iterations.") for aStep in range(steps): # 0: Compute epsilon, compute new covariance matrix for Kernel, # and finally Drawing new new/perturbed samples using prior or MCMC Kernel # print("DEBUG: Iteration " + str(aStep) + " of RSMCABC algorithm.") if aStep == 0: n_replenish = n_samples # Compute epsilon epsilon = [epsilon_init] R = int(1) else: n_replenish = round(n_samples*alpha) # Throw away N_alpha particles with largest dist accepted_parameters = np.delete(accepted_parameters, np.arange(round(n_samples*alpha))+(n_samples-round(n_samples*alpha)), 0) accepted_dist = np.delete(accepted_dist, np.arange(round(n_samples*alpha))+(n_samples-round(n_samples*alpha)), 0) # Compute epsilon epsilon.append(accepted_dist[-1]) # Calculate covariance # print("INFO: Calculating covariance matrix.") new_cov_mat = covFactor * np.cov(accepted_parameters, rowvar=False) accepted_cov_mat = new_cov_mat if epsilon[-1] < epsilon_final: break seed_arr = self.rng.randint(0, np.iinfo(np.uint32).max, size = n_replenish, dtype=np.uint32) seed_pds = self.backend.parallelize(seed_arr) #update remotely required variables # print("INFO: Broadcasting parameters.") rc.epsilon = epsilon rc.R = R # Broadcast updated variable rc._update_broadcasts(self.backend, accepted_parameters, accepted_dist, accepted_cov_mat) #calculate resample parameters #print("INFO: Resampling parameters") params_and_dist_index_pds = self.backend.map(rc._accept_parameter, seed_pds) params_and_dist_index = self.backend.collect(params_and_dist_index_pds) new_parameters, new_dist, new_index = [list(t) for t in zip(*params_and_dist_index)] new_parameters = np.array(new_parameters) new_dist = np.array(new_dist) new_index = np.array(new_index) # 1: Update all parameters, compute acceptance probability, compute epsilon if len(new_dist) == n_samples: accepted_parameters = new_parameters accepted_dist = new_dist else: accepted_parameters = np.concatenate((accepted_parameters,new_parameters)) accepted_dist = np.concatenate((accepted_dist, new_dist)) # 2: Compute acceptance probabilty and set R #print(aStep) #print(new_index) prob_acceptance = sum(new_index)/(R*n_replenish) if prob_acceptance == 1 or prob_acceptance == 0: R = 1 else: R = int(np.log(const)/np.log(1-prob_acceptance)) # print("INFO: Saving configuration to output journal.") if (full_output == 1 and aStep <= steps-1) or (full_output == 0 and aStep == steps-1): journal.add_parameters(accepted_parameters) journal.add_weights(np.ones(shape=(n_samples,1))*(1/n_samples)) #Add epsilon_arr to the journal journal.configuration["epsilon_arr"] = epsilon return journal
class _RemoteContextRSMCABC: """ Contains everything that is sent over the network like broadcast vars and map functions """ def __init__(self, backend, model, distance, kernel, observations, n_samples, n_samples_per_param, alpha): self.model = model self.distance = distance self.n_samples = n_samples self.n_samples_per_param = n_samples_per_param self.kernel = kernel self.alpha = alpha self.epsilon = None self.R = None # these are usually big tables, so we broadcast them to have them once # per executor instead of once per task self.observations_bds = backend.broadcast(observations) self.accepted_parameters_bds = None self.accepted_dist_bds = None self.accepted_cov_mat_bds = None def _update_broadcasts(self, backend, accepted_parameters, accepted_dist, accepted_cov_mat): def destroy(bc): if bc != None: bc.unpersist #bc.destroy if not accepted_parameters is None: self.accepted_parameters_bds = backend.broadcast(accepted_parameters) if not accepted_dist is None: self.accepted_dist_bds = backend.broadcast(accepted_dist) if not accepted_cov_mat is None: self.accepted_cov_mat_bds = backend.broadcast(accepted_cov_mat) # define helper functions for map step def _accept_parameter(self, seed): """ Samples a single model parameter and simulate from it until distance between simulated outcome and the observation is smaller than eplison. :type seed: int :rtype: np.array :return: accepted parameter """ rng = np.random.RandomState(seed) self.model.prior.reseed(rng.randint(np.iinfo(np.uint32).max, dtype=np.uint32)) self.kernel.reseed(rng.randint(np.iinfo(np.uint32).max, dtype=np.uint32)) distance = self.distance.dist_max() if self.accepted_parameters_bds == None: while distance > self.epsilon[-1]: self.model.sample_from_prior() y_sim = self.model.simulate(self.n_samples_per_param) distance = self.distance.distance(self.observations_bds.value(), y_sim) index_accept = 1 else: index = rng.choice(len(self.accepted_parameters_bds.value()), size=1) theta = self.accepted_parameters_bds.value()[index[0]] index_accept = 0.0 for ind in range(self.R): while True: self.kernel.set_parameters([theta, self.accepted_cov_mat_bds.value()]) new_theta = self.kernel.sample(1)[0,:] theta_is_accepted = self.model.set_parameters(new_theta) if theta_is_accepted and self.model.prior.pdf(self.model.get_parameters()) != 0: break y_sim = self.model.simulate(self.n_samples_per_param) distance = self.distance.distance(self.observations_bds.value(), y_sim) ratio_prior_prob = self.model.prior.pdf(new_theta)/self.model.prior.pdf(theta) self.kernel.set_parameters([new_theta, self.accepted_cov_mat_bds.value()]) kernel_numerator = self.kernel.pdf(theta) self.kernel.set_parameters([theta, self.accepted_cov_mat_bds.value()]) kernel_denominator = self.kernel.pdf(new_theta) ratio_kernel_prob = kernel_numerator/kernel_denominator probability_acceptance = min(1,ratio_prior_prob*ratio_kernel_prob) if distance < self.epsilon[-1] and rng.binomial(1,probability_acceptance) == 1: index_accept += 1 else: self.model.set_parameters(theta) distance = self.accepted_dist_bds.value()[index[0]] return (self.model.get_parameters(), distance, index_accept)
[docs]class APMCABC: """This base class implements Adaptive Population Monte Carlo Approximate Bayesian computation of M. Lenormand et al. [1]. [1] M. Lenormand, F. Jabot and G. Deffuant, Adaptive approximate Bayesian computation for complex models. Computational Statistics, 28:2777–2796, 2013. Parameters ---------- model : abcpy.models.Model Model object that conforms to the Model class. distance : abcpy.distances.Distance Distance object that conforms to the Distance class. kernel : abcpy.distributions.Distribution Distribution object defining the perturbation kernel needed for the sampling backend : abcpy.backends.Backend Backend object that conforms to the Backend class. seed : integer, optional Optional initial seed for the random number generator. The default value is generated randomly. """
[docs] def __init__(self, model, distance, kernel, backend, seed=None): self.model = model self.distance = distance self.kernel = kernel self.backend = backend self.rng = np.random.RandomState(seed)
[docs] def sample(self, observations, steps, n_samples = 10000, n_samples_per_param = 1, alpha = 0.9, acceptance_cutoff = 0.2, covFactor = 2.0, full_output=0): """Samples from the posterior distribution of the model parameter given the observed data observations. Parameters ---------- observations : numpy.ndarray Observed data. steps : integer Number of iterations in the sequential algoritm ("generations") n_samples : integer, optional Number of samples to generate. The default value is 10000. n_samples_per_param : integer, optional Number of data points in each simulated data set. The default value is 1. alpha : float, optional A parameter taking values between [0,1], the default value is 0.1. acceptance_cutoff : float, optional Acceptance ratio cutoff, The default value is 0.2 covFactor : float, optional scaling parameter of the covariance matrix. The default value is 2. full_output: integer, optional If full_output==1, intermediate results are included in output journal. The default value is 0, meaning the intermediate results are not saved. Returns ------- abcpy.output.Journal A journal containing simulation results, metadata and optionally intermediate results. """ journal = Journal(full_output) journal.configuration["type_model"] = type(self.model) journal.configuration["type_dist_func"] = type(self.distance) journal.configuration["n_samples"] = n_samples journal.configuration["n_samples_per_param"] = n_samples_per_param journal.configuration["steps"] = steps accepted_parameters = None accepted_weights = None accepted_cov_mat = None accepted_dist = None alpha_accepted_parameters = None alpha_accepted_weights = None alpha_accepted_dist = None # Initialize variables that need to be available remotely rc = _RemoteContextAPMCABC(self.backend, self.model, self.distance, self.kernel, observations, n_samples, n_samples_per_param, alpha) # main APMCABC algorithm # print("INFO: Starting APMCABC iterations.") for aStep in range(steps): # 0: Drawing new new/perturbed samples using prior or MCMC Kernel # print("DEBUG: Iteration " + str(aStep) + " of APMCABC algorithm.") if aStep > 0: n_additional_samples = n_samples - round(n_samples*alpha) else: n_additional_samples = n_samples seed_arr = self.rng.randint(0, np.iinfo(np.uint32).max, size = n_additional_samples, dtype=np.uint32) seed_pds = self.backend.parallelize(seed_arr) #update remotely required variables # print("INFO: Broadcasting parameters.") rc._update_broadcasts(self.backend, alpha_accepted_parameters, alpha_accepted_weights, alpha_accepted_dist, accepted_cov_mat) #calculate resample parameters #print("INFO: Resampling parameters") params_and_dist_weights_pds = self.backend.map(rc._accept_parameter, seed_pds) params_and_dist_weights = self.backend.collect(params_and_dist_weights_pds) new_parameters, new_dist, new_weights = [list(t) for t in zip(*params_and_dist_weights)] new_parameters = np.array(new_parameters) new_dist = np.array(new_dist) new_weights = np.array(new_weights).reshape(n_additional_samples,1) # 1: Update all parameters, compute acceptance probability, compute epsilon if len(new_weights) == n_samples: accepted_parameters = new_parameters accepted_dist = new_dist accepted_weights = new_weights # Compute acceptance probability prob_acceptance = 1 # Compute epsilon epsilon = [np.percentile(accepted_dist, alpha*100)] else: accepted_parameters = np.concatenate((alpha_accepted_parameters,new_parameters)) accepted_dist = np.concatenate((alpha_accepted_dist, new_dist)) accepted_weights = np.concatenate((alpha_accepted_weights, new_weights)) # Compute acceptance probability prob_acceptance = sum(new_dist < epsilon[-1])/len(new_dist) # Compute epsilon epsilon.append(np.percentile(accepted_dist, alpha*100)) # 2: Update alpha_parameters, alpha_dist and alpha_weights index_alpha = accepted_dist < epsilon[-1] alpha_accepted_parameters = accepted_parameters[index_alpha,:] alpha_accepted_weights = accepted_weights[index_alpha]/sum(accepted_weights[index_alpha]) alpha_accepted_dist = accepted_dist[index_alpha] # 3: calculate covariance # print("INFO: Calculating covariance matrix.") new_cov_mat = covFactor * np.cov(alpha_accepted_parameters, aweights = alpha_accepted_weights.reshape(-1), rowvar=False) accepted_cov_mat = new_cov_mat # print("INFO: Saving configuration to output journal.") if (full_output == 1 and aStep <= steps-1) or (full_output == 0 and aStep == steps-1): journal.add_parameters(accepted_parameters) journal.add_weights(accepted_weights) # 4: Check probability of acceptance lower than acceptance_cutoff if prob_acceptance < acceptance_cutoff: break #Add epsilon_arr to the journal journal.configuration["epsilon_arr"] = epsilon return journal
class _RemoteContextAPMCABC: """ Contains everything that is sent over the network like broadcast vars and map functions """ def __init__(self, backend, model, distance, kernel, observations, n_samples, n_samples_per_param, alpha): self.model = model self.distance = distance self.n_samples = n_samples self.n_samples_per_param = n_samples_per_param self.kernel = kernel self.alpha = alpha self.epsilon = None # these are usually big tables, so we broadcast them to have them once # per executor instead of once per task self.observations_bds = backend.broadcast(observations) self.alpha_accepted_parameters_bds = None self.alpha_accepted_weights_bds = None self.alpha_accepted_dist = None self.accepted_cov_mat_bds = None def _update_broadcasts(self, backend, alpha_accepted_parameters, alpha_accepted_weights, alpha_accepted_dist, accepted_cov_mat): def destroy(bc): if bc != None: bc.unpersist #bc.destroy if not alpha_accepted_parameters is None: self.alpha_accepted_parameters_bds = backend.broadcast(alpha_accepted_parameters) if not alpha_accepted_weights is None: self.alpha_accepted_weights_bds = backend.broadcast(alpha_accepted_weights) if not alpha_accepted_dist is None: self.alpha_accepted_dist_bds = backend.broadcast(alpha_accepted_dist) if not accepted_cov_mat is None: self.accepted_cov_mat_bds = backend.broadcast(accepted_cov_mat) # define helper functions for map step def _accept_parameter(self, seed): """ Samples a single model parameter and simulate from it until distance between simulated outcome and the observation is smaller than eplison. :type seed: int :rtype: np.array :return: accepted parameter """ rng = np.random.RandomState(seed) self.model.prior.reseed(rng.randint(np.iinfo(np.uint32).max, dtype=np.uint32)) self.kernel.reseed(rng.randint(np.iinfo(np.uint32).max, dtype=np.uint32)) if self.alpha_accepted_parameters_bds == None: self.model.sample_from_prior() y_sim = self.model.simulate(self.n_samples_per_param) dist = self.distance.distance(self.observations_bds.value(), y_sim) weight = 1.0 else: index = rng.choice(len(self.alpha_accepted_weights_bds.value()), size=1, p=self.alpha_accepted_weights_bds.value().reshape(-1)) theta = self.alpha_accepted_parameters_bds.value()[index[0]] # trucate the normal to the bounds of parameter space of the model # truncating the normal like this is fine: https://arxiv.org/pdf/0907.4010v1.pdf while True: self.kernel.set_parameters([theta, self.accepted_cov_mat_bds.value()]) new_theta = self.kernel.sample(1)[0,:] theta_is_accepted = self.model.set_parameters(new_theta) if theta_is_accepted and self.model.prior.pdf(self.model.get_parameters()) != 0: break y_sim = self.model.simulate(self.n_samples_per_param) dist = self.distance.distance(self.observations_bds.value(), y_sim) prior_prob = self.model.prior.pdf(new_theta) denominator = 0.0 for i in range(0, len(self.alpha_accepted_weights_bds.value())): self.kernel.set_parameters([self.alpha_accepted_parameters_bds.value()[i,:], self.accepted_cov_mat_bds.value()]) pdf_value = self.kernel.pdf(new_theta) denominator += self.alpha_accepted_weights_bds.value()[i,0] * pdf_value weight = 1.0 * prior_prob / denominator return (self.model.get_parameters(), dist, weight)
[docs]class SMCABC: """This base class implements Adaptive Population Monte Carlo Approximate Bayesian computation of Del Moral et al. [1]. [1] P. Del Moral, A. Doucet, A. Jasra, An adaptive sequential Monte Carlo method for approximate Bayesian computation. Statistics and Computing, 22(5):1009–1020, 2012. Parameters ---------- model : abcpy.models.Model Model object that conforms to the Model class. distance : abcpy.distances.Distance Distance object that conforms to the Distance class. kernel : abcpy.distributions.Distribution Distribution object defining the perturbation kernel needed for the sampling backend : abcpy.backends.Backend Backend object that conforms to the Backend class. seed : integer, optional Optional initial seed for the random number generator. The default value is generated randomly. """
[docs] def __init__(self, model, distance, kernel, backend, seed=None): self.model = model self.distance = distance self.kernel = kernel self.backend = backend self.rng = np.random.RandomState(seed)
[docs] def sample(self, observations, steps, n_samples = 10000, n_samples_per_param = 1, epsilon_final = 0.1, alpha = 0.95, covFactor = 2, resample = None, full_output=0): """Samples from the posterior distribution of the model parameter given the observed data observations. Parameters ---------- observations : numpy.ndarray Observed data. steps : integer Number of iterations in the sequential algoritm ("generations") epsilon_final : float, optional The final threshold value of epsilon to be reached. The default value is 0.1. n_samples : integer, optional Number of samples to generate. The default value is 10000. n_samples_per_param : integer, optional Number of data points in each simulated data set. The default value is 1. alpha : float, optional A parameter taking values between [0,1], determinining the rate of change of the threshold epsilon. The default value is 0.5. covFactor : float, optional scaling parameter of the covariance matrix. The default value is 2. full_output: integer, optional If full_output==1, intermediate results are included in output journal. The default value is 0, meaning the intermediate results are not saved. Returns ------- abcpy.output.Journal A journal containing simulation results, metadata and optionally intermediate results. """ journal = Journal(full_output) journal.configuration["type_model"] = type(self.model) journal.configuration["type_dist_func"] = type(self.distance) journal.configuration["n_samples"] = n_samples journal.configuration["n_samples_per_param"] = n_samples_per_param journal.configuration["steps"] = steps accepted_parameters = None accepted_weights = None accepted_cov_mat = None accepted_y_sim = None # Define the resmaple parameter if resample == None: resample = n_samples*0.5 #Define epsilon_init epsilon = [10000] # Initialize variables that need to be available remotely rc = _RemoteContextSMCABC(self.backend, self.model, self.distance, self.kernel, observations, n_samples, n_samples_per_param) # main SMC ABC algorithm # print("INFO: Starting SMCABC iterations.") for aStep in range(0, steps): # Break if epsilon in previous step is less than epsilon_final if epsilon[-1] == epsilon_final: break # 0: Compute the Epsilon if accepted_y_sim != None: # Compute epsilon for next step fun = lambda epsilon_var: self._compute_epsilon(epsilon_var, \ epsilon, observations, accepted_y_sim, accepted_weights, n_samples, n_samples_per_param, alpha) epsilon_new = self._bisection(fun, epsilon_final, epsilon[-1], 0.001) if epsilon_new < epsilon_final: epsilon_new = epsilon_final epsilon.append(epsilon_new) # 1: calculate weights for new parameters # print("INFO: Calculating weights.") if accepted_y_sim != None: new_weights = np.zeros(shape=(n_samples),) for ind1 in range(n_samples): numerator = 0.0 denominator = 0.0 for ind2 in range(n_samples_per_param): numerator += (self.distance.distance(observations, [accepted_y_sim[ind1][ind2]]) < epsilon[-1]) denominator += (self.distance.distance(observations, [accepted_y_sim[ind1][ind2]]) < epsilon[-2]) if denominator != 0.0: new_weights[ind1] = accepted_weights[ind1]*(numerator/denominator) else: new_weights[ind1] = 0 new_weights = new_weights / sum(new_weights) else: new_weights = np.ones(shape=(n_samples),)*(1.0 /n_samples) # 2: Resample if accepted_y_sim != None and pow(sum(pow(new_weights,2)),-1) < resample: print('Resampling') # Weighted resampling: index_resampled = self.rng.choice(np.arange(n_samples), n_samples, replace = 1, p = new_weights) accepted_parameters = accepted_parameters[index_resampled,:] new_weights = np.ones(shape=(n_samples),)*(1.0 /n_samples) # Update the weights accepted_weights = new_weights.reshape(len(new_weights),1) # 3: Drawing new perturbed samples using MCMC Kernel # print("DEBUG: Iteration " + str(aStep) + " of SMCABC algorithm.") seed_arr = self.rng.randint(0, np.iinfo(np.uint32).max, size=n_samples, dtype=np.uint32) index_arr = np.arange(n_samples) seed_index_arr = np.column_stack((seed_arr,index_arr)) seed_index_pds = self.backend.parallelize(seed_index_arr) #update remotely required variables # print("INFO: Broadcasting parameters.") rc.epsilon = epsilon rc._update_broadcasts(self.backend, accepted_parameters, accepted_weights, accepted_cov_mat, accepted_y_sim) #calculate resample parameters #print("INFO: Resampling parameters") params_and_ysim_pds = self.backend.map(rc._accept_parameter, seed_index_pds) params_and_ysim = self.backend.collect(params_and_ysim_pds) new_parameters, new_y_sim = [list(t) for t in zip(*params_and_ysim)] new_parameters = np.array(new_parameters) #Update the parameters accepted_parameters = new_parameters accepted_y_sim = new_y_sim # 4: calculate covariance # print("INFO: Calculating covariance matrix.") new_cov_mat = covFactor * np.cov(accepted_parameters, aweights = accepted_weights.reshape(-1), rowvar=False) accepted_cov_mat = new_cov_mat # print("INFO: Saving configuration to output journal.") if (full_output == 1 and aStep <= steps-1) or (full_output == 0 and aStep == steps-1): journal.add_parameters(accepted_parameters) journal.add_weights(accepted_weights) #Add epsilon_arr to the journal journal.configuration["epsilon_arr"] = epsilon return journal
def _compute_epsilon(self, epsilon_new, epsilon, observations, accepted_y_sim, accepted_weights, n_samples, n_samples_per_param, alpha): RHS = alpha*pow(sum(pow(accepted_weights,2)),-1) LHS = np.zeros(shape=(n_samples),) for ind1 in range(n_samples): numerator = 0.0 denominator = 0.0 for ind2 in range(n_samples_per_param): numerator += (self.distance.distance(observations, [accepted_y_sim[ind1][ind2]]) < epsilon_new) denominator += (self.distance.distance(observations, [accepted_y_sim[ind1][ind2]]) < epsilon[-1]) LHS[ind1] = accepted_weights[ind1]*(numerator/denominator) if sum(LHS) == 0: result = RHS else: LHS = LHS/sum(LHS) LHS = pow(sum(pow(LHS,2)),-1) result = RHS-LHS return(result) def _bisection(self, func, low, high, tol): midpoint = (low+high)/2.0 while (high-low)/2.0 > tol: if func(midpoint) == 0: return midpoint elif func(low)*func(midpoint) < 0: high = midpoint else : low = midpoint midpoint = (low+high)/2.0 return midpoint
class _RemoteContextSMCABC: """ Contains everything that is sent over the network like broadcast vars and map functions """ def __init__(self, backend, model, distance, kernel, observations, n_samples, n_samples_per_param): self.model = model self.distance = distance self.n_samples = n_samples self.n_samples_per_param = n_samples_per_param self.kernel = kernel self.epsilon = None # these are usually big tables, so we broadcast them to have them once # per executor instead of once per task self.observations_bds = backend.broadcast(observations) self.accepted_parameters_bds = None self.accepted_weights_bds = None self.accepted_cov_mat_bds = None self.accepted_y_sim_bds = None def _update_broadcasts(self, backend, accepted_parameters, accepted_weights, accepted_cov_mat, accepted_y_sim): def destroy(bc): if bc != None: bc.unpersist #bc.destroy if not accepted_parameters is None: self.accepted_parameters_bds = backend.broadcast(accepted_parameters) if not accepted_weights is None: self.accepted_weights_bds = backend.broadcast(accepted_weights) if not accepted_cov_mat is None: self.accepted_cov_mat_bds = backend.broadcast(accepted_cov_mat) if not accepted_y_sim is None: self.accepted_y_sim_bds = backend.broadcast(accepted_y_sim) # define helper functions for map step def _accept_parameter(self, seed_index): """ Samples a single model parameter and simulate from it until distance between simulated outcome and the observation is smaller than eplison. :type seed: int :rtype: np.array :return: accepted parameter """ seed = seed_index[0] index = seed_index[1] rng = np.random.RandomState(seed) self.model.prior.reseed(rng.randint(np.iinfo(np.uint32).max, dtype=np.uint32)) self.kernel.reseed(rng.randint(np.iinfo(np.uint32).max, dtype=np.uint32)) #print("on seed " + str(seed) + " distance: " + str(distance) + " epsilon: " + str(self.epsilon)) if self.accepted_parameters_bds == None: self.model.sample_from_prior() y_sim = self.model.simulate(self.n_samples_per_param) else: if self.accepted_weights_bds.value()[index] > 0: theta = self.accepted_parameters_bds.value()[index] while True: self.kernel.set_parameters([theta, self.accepted_cov_mat_bds.value()]) new_theta = self.kernel.sample(1)[0,:] theta_is_accepted = self.model.set_parameters(new_theta) if theta_is_accepted and self.model.prior.pdf(self.model.get_parameters()) != 0: break y_sim = self.model.simulate(self.n_samples_per_param) y_sim_old = self.accepted_y_sim_bds.value()[index] ## Calculate acceptance probability: numerator = 0.0 denominator = 0.0 for ind in range(self.n_samples_per_param): numerator += (self.distance.distance(self.observations_bds.value(), [y_sim[ind]]) < self.epsilon[-1]) denominator += (self.distance.distance(self.observations_bds.value(), [y_sim_old[ind]]) < self.epsilon[-1]) ratio_data_epsilon = numerator/denominator ratio_prior_prob = self.model.prior.pdf(new_theta)/self.model.prior.pdf(theta) self.kernel.set_parameters([new_theta, self.accepted_cov_mat_bds.value()]) kernel_numerator = self.kernel.pdf(theta) self.kernel.set_parameters([theta, self.accepted_cov_mat_bds.value()]) kernel_denominator = self.kernel.pdf(new_theta) ratio_likelihood_prob = kernel_numerator/kernel_denominator acceptance_prob = min(1,ratio_data_epsilon*ratio_prior_prob*ratio_likelihood_prob) if rng.binomial(1,acceptance_prob) == 1: self.model.set_parameters(new_theta) else: self.model.set_parameters(theta) y_sim = self.accepted_y_sim_bds.value()[index] else: self.model.set_parameters(self.accepted_parameters_bds.value()[index]) y_sim = self.accepted_y_sim_bds.value()[index] return (self.model.get_parameters(), y_sim)