Source code for pytadbit.imp.impoptimizer

"""
28 Aug 2013


"""
from pytadbit.imp.imp_modelling import generate_3d_models
from pytadbit.utils.extraviews import plot_2d_optimization_result
from pytadbit.utils.extraviews import plot_3d_optimization_result
import numpy as np

[docs]class IMPoptimizer(object): """ This class optimizes a set of paramaters (scale, maxdist, lowfreq and upfreq) in order to maximize the correlation between the models generated by IMP and the input data. :param experiment: an instance of the class pytadbit.experiment.Experiment :param start: first bin to model (bin number) :param end: last bin to model (bin number) :param 5000 n_models: number of models to generate :param 1000 n_keep: number of models used in the final analysis (usually the top 20% of the generated models). The models are ranked according to their objective function value (the lower the better) :param 1 close_bins: number of particles away (i.e. the bin number difference) a particle pair must be in order to be considered as neighbors (e.g. 1 means consecutive particles) """ def __init__(self, experiment, start, end, n_models=500, cutoff=300, n_keep=100, close_bins=1): self.resolution = experiment.resolution print experiment print start, end self.zscores, self.values = experiment._sub_experiment_zscore(start, end) self.n_models = n_models self.n_keep = n_keep self.close_bins = close_bins self.cutoff = cutoff self.scale_range = [] self.maxdist_range = [] self.lowfreq_range = [] self.upfreq_range = [] self.results = {}
[docs] def plot_2d(self, axes=('scale', 'maxdist', 'upfreq', 'lowfreq'), show_best=0, skip=None): """ A grid of heatmaps representing the result of the optimization. :param 'scale','maxdist','upfreq','lowfreq' axes: list of axes to be represented in the plot. The order will define which parameter will be placed on the x, y, z or w axe. :param 0 show_best: number of best correlation values to highlight in the plot :param None skip: if passed (as a dictionary), fix a given axe, e.g.: {'scale': 0.001, 'maxdist': 500} """ results = self._result_to_array() plot_2d_optimization_result((('scale', 'maxdist', 'upfreq', 'lowfreq'), (self.scale_range, self.maxdist_range, self.upfreq_range, self.lowfreq_range), results), axes=axes, show_best=show_best, skip=skip)
[docs] def plot_3d(self, axes=('scale', 'maxdist', 'upfreq', 'lowfreq')): """ A grid of heatmaps representing the result of the optimization. :param 'scale','maxdist','upfreq','lowfreq' axes: tuple of axes to be represented in the plot. The order will define which parameter will be placed on the x, y, z or w axe. """ results = self._result_to_array() plot_3d_optimization_result((('scale', 'maxdist', 'upfreq', 'lowfreq'), (self.scale_range, self.maxdist_range, self.upfreq_range, self.lowfreq_range), results), axes=axes)
def _result_to_array(self): results = np.empty((len(self.scale_range), len(self.maxdist_range), len(self.upfreq_range), len(self.lowfreq_range))) for w, scale in enumerate(self.scale_range): for x, maxdist in enumerate(self.maxdist_range): for y, upfreq in enumerate(self.upfreq_range): for z, lowfreq in enumerate(self.lowfreq_range): try: results[w, x, y, z] = self.results[ (my_round(scale), my_round(maxdist), my_round(upfreq), my_round(lowfreq))] except KeyError: results[w, x, y, z] = float('nan') return results
[docs] def write_result(self, f_name): """ This function writes a log file of all the values tested for each parameter, and the resulting correlation value. This file can be used to load or merge data a posteriori using the function pytadbit.imp.impoptimizer.IMPoptimizer.load_from_file :param f_name: file name with the absolute path """ out = open(f_name, 'w') out.write(('## n_models: %s cutoff: %s n_keep: %s ' + 'close_bins: %s\n') % (self.n_models, self.cutoff, self.n_keep, self.close_bins)) out.write('# scale\tmax_dist\tup_freq\tlow_freq\tcorrelation\n') for scale in self.scale_range: for maxdist in self.maxdist_range: for upfreq in self.upfreq_range: for lowfreq in self.lowfreq_range: try: result = self.results[(my_round(scale), my_round(maxdist), my_round(upfreq), my_round(lowfreq))] out.write('%s\t%s\t%s\t%s\t%s\n' % ( scale, maxdist, upfreq, lowfreq, result)) except KeyError: continue out.close()
[docs] def load_from_file(self, f_name): """ Loads the optimized parameters from a file generated with the function: pytadbit.imp.impoptimizer.IMPoptimizer.write_result. This function does not overwrite the parameters that were already loaded or calculated. :param f_name: file name with the absolute path """ for line in open(f_name): # Check same parameters if line.startswith('##'): n_models, _, cutoff, _, n_keep, _, close_bins = line.split()[2:] if ([int(n_models), int(cutoff), int(n_keep), int(close_bins)] != [self.n_models, self.cutoff, self.n_keep, self.close_bins]): raise Exception('Parameters does not match: %s\n%s' % ( [int(n_models), int(cutoff), int(n_keep), int(close_bins)], [self.n_models, self.cutoff, self.n_keep, self.close_bins])) if line.startswith('#'): continue scale, maxdist, upfreq, lowfreq, result = line.split() scale, maxdist, upfreq, lowfreq = ( float(scale), int(maxdist), float(upfreq), float(lowfreq)) self.results[(my_round(scale), my_round(maxdist), my_round(upfreq), my_round(lowfreq))] = float(result) if not scale in self.scale_range: self.scale_range.append(scale) if not maxdist in self.maxdist_range: self.maxdist_range.append(maxdist) if not upfreq in self.upfreq_range: self.upfreq_range.append(upfreq) if not lowfreq in self.lowfreq_range: self.lowfreq_range.append(lowfreq) self.scale_range.sort() self.maxdist_range.sort() self.lowfreq_range.sort() self.upfreq_range.sort()
def my_round(num, val=4): num = round(num, val) return str(int(num) if num == int(num) else num)

Table Of Contents