Source code for pytadbit.utils.tadmaths

"""
06 Aug 2013


"""

from bisect   import bisect_left
from itertools import combinations
from pytadbit.eqv_rms_drms import rmsdRMSD_wrapper
from pytadbit.consistency import consistency_wrapper
from math     import log10
import numpy as np


class Interpolate(object):
    """
    simple linear interpolation
    """
    def __init__(self, x_list, y_list):
        for i, (x, y) in enumerate(zip(x_list, x_list[1:])):
            if y - x < 0:
                raise ValueError("x must be in strictly ascending")
            if y - x == 0 and i >= len(x_list)-2:
                x_list = x_list[:-1]
                y_list = y_list[:-1]
        if any(y - x <= 0 for x, y in zip(x_list, x_list[1:])):
            raise ValueError("x must be in strictly ascending")
        x_list = self.x_list = map(float, x_list)
        y_list = self.y_list = map(float, y_list)
        intervals = zip(x_list, x_list[1:], y_list, y_list[1:])
        self.slopes = [(y2 - y1)/(x2 - x1) for x1, x2, y1, y2 in intervals]
        
    def __call__(self, x):
        i = bisect_left(self.x_list, x) - 1
        return self.y_list[i] + self.slopes[i] * (x - self.x_list[i])


def zscore(values, size):
    """
    _______________________/___
                          /
                         /
                        /
                       /
                      /
                     /
                    /
                   /
                  /
                 /
                /
               /
              /                     score
          ___/_________________________________
            /

    """
    # do not take into account the diagonal
    nop = dict([(i + size * i,  None) for i in xrange(size)])
    minv = min([v for v in values if v]) / 2
    # get the log10 of values
    vals = [log10(v) if v > 0 and not v in nop else log10(minv) for v in values]
    mean_v = np.mean(vals)
    std_v = np.std(vals)
    # replace values by z-score
    for i in xrange(len(values)):
        if values[i] > 0:
            values[i] = (vals[i] - mean_v) / std_v
        elif values[i] == 0:
            values[i] = (log10(minv) - mean_v) / std_v
        else:
            values[i] = -99


def calc_consistency(models, nloci, dcutoff=200):
    combines = list(combinations(models, 2))
    parts = [0 for _ in xrange(nloci)]
    for pm in consistency_wrapper([models[m]['x'] for m in models],
                                  [models[m]['y'] for m in models],
                                  [models[m]['z'] for m in models],
                                  nloci, dcutoff, range(len(models)),
                                  len(models)):
        for i, p in enumerate(pm):
            parts[i] += p
    return [float(p)/len(combines) * 100 for p in parts]


[docs]def calinski_harabasz(scores, clusters): """ Implementation of the CH score [CalinskiHarabasz1974]_, that has shown to be one the most accurate way to compare clustering methods [MilliganCooper1985]_ [Tibshirani2001]_. The CH score is: .. math:: CH(k) = \\frac{B(k) / (k-1)}{W(k)/(n - k)} Where :math:`B(k)` and :math:`W(k)` are between and within cluster sums of squares, with :math:`k` clusters, and :math:`n` the total number of points (models in this case). :param scores: a dict with, as keys, a tuple with a pair of models; and, as value, the distance between these models. :param clusters: a dict with, as key, the cluster number, and as value a list of models :param nmodels: total number of models :returns: the CH score """ cluster_list = [c for c in clusters if len(clusters[c]) > 1] if len(cluster_list) <= 1: return 0.0 nmodels = sum([len(clusters[c]) for c in cluster_list]) between_cluster = (sum([sum([sum([scores[(md1, md2)]**2 for md1 in clusters[cl1]]) for md2 in clusters[cl2]]) / (len(clusters[cl1]) * len(clusters[cl2])) for cl1, cl2 in combinations(cluster_list, 2)]) / ((len(cluster_list) - 1.0) / 2)) within_cluster = (sum([sum([scores[(md1, md2)]**2 for md1, md2 in combinations(clusters[cls], 2)]) / (len(clusters[cls]) * (len(clusters[cls]) - 1.0) / 2) for cls in cluster_list])) return ((between_cluster / (len(cluster_list) - 1)) / (within_cluster / (nmodels - len(cluster_list))))
[docs]def calc_eqv_rmsd(models, nloci, dcutoff=200, var='score', one=False): """ :param nloci: number of particles per model :param 200 dcutoff: distance in nanometer from which it is considered that two particles are separated. :param 0.75 fact: Factor for equivalent positions :param 'score' var: value to return, can be either (i) 'drmsd' (symmetry independent: mirrors will show no differences) (ii) 'score' that is: :: dRMSD[i] / max(dRMSD) score[i] = eqvs[i] * ----------------------- RMSD[i] / max(RMSD) where eqvs[i] is the number of equivalent position for the ith pairwise model comparison. :returns: a score (depends on 'var' argument) """ scores = rmsdRMSD_wrapper([models[m]['x'] for m in xrange(len(models))], [models[m]['y'] for m in xrange(len(models))], [models[m]['z'] for m in xrange(len(models))], nloci, dcutoff, range(len(models)), len(models), int(one)) # scores = {} # nrmsds = [] # drmsds = [] # for md1 in xrange(len(models)): # md1s = models[md1] # for md2 in xrange(md1 + 1, len(models)): # md2s = models[md2] # eqv, nrmsd, drmsd = rmsdRMSD_wrapper( # md1s['x'], md1s['y'], md1s['z'], # md2s['x'], md2s['y'], md2s['z'], nloci, dcutoff, 0) # nrmsds.append(nrmsd) # drmsds.append(drmsd) # scores[(md1, md2)] = eqv * drmsd / nrmsd # if one: # return drmsd # max_rmsd_ov_max_drmsd = max(nrmsds) / max(drmsds) # if var=='score': # for md1, md2 in scores.keys()[:]: # score = scores[(md1, md2)] * max_rmsd_ov_max_drmsd # scores[(md1, md2)] = score # scores[(md2, md1)] = score # elif var=='drmsd': # for i, (md1, md2) in enumerate(scores.keys()): # scores[(md2, md1)] = drmsds[i] return scores
def dihedral(a,b,c,d): """ Calculates dihedral angle between 4 points in 3D (array with x,y,z) """ v1 = getNormedVector(b - a) v2 = getNormedVector(b - c) v3 = getNormedVector(c - b) v4 = getNormedVector(c - d) v1v2 = np.cross(v1, v2) v2v3 = np.cross(v3, v4) sign = -1 if np.linalg.det([v2, v1v2, v2v3]) < 0 else 1 angle = getAngle(v1v2, v2v3) angle, sign = (angle, sign) if angle <= 90 else (180 - angle, - sign) return sign * angle def getNormedVector(dif): return (dif) / np.linalg.norm(dif) def getAngle(v1v2, v2v3): return np.rad2deg( np.arccos(np.dot( v1v2 / np.linalg.norm(v1v2), v2v3.T / np.linalg.norm(v2v3))) ) # from numpy import zeros # from math import acos # def crossproduct(ar1, ar2, base=1) : # """Calculate the cross-product of two vectors. # Positional arguments : # ar1 -- first vector (ndarray) # ar2 -- second vector (ndarray) # their base is given by the keyword argument of the same name # Keyword arguments : # base -- base index (default 1) # """ # for ar_ in (ar1, ar2) : # if ar_ is None or 3 + base != len(ar_) : # raise Exception('Invalid parameter passed') # q1_ = ar1[base+1] * ar2[base+2] - ar1[base+2] * ar2[base+1] # q2_ = ar1[base+2] * ar2[base+0] - ar1[base+0] * ar2[base+2] # q3_ = ar1[base+0] * ar2[base+1] - ar1[base+1] * ar2[base+0] # ans = zeros(base + 3, 'd') # ans[base:] = (q1_, q2_, q3_) # return ans # def spatproduct(ar1, ar2, ar3, base=1) : # """Calculate the scalar triple product of three vectors. # Positional arguments : # ar1 -- first vector (ndarray) # ar2 -- second vector (ndarray) # ar3 -- third vector (ndarray) # their base is given by the keyword argument of the same name # Keyword arguments : # base -- base index (default 1) # """ # for ar_ in (ar1, ar2, ar3) : # if ar_ is None or 3 + base != len(ar_) : # raise Exception('Invalid parameter passed') # mat_ = zeros((3, 3), 'd') # mat_[0] = ar1[base:] # mat_[1] = ar2[base:] # mat_[2] = ar3[base:] # return np.linalg.det(mat_) # def angle_vectors(ar1, ar2, base=1) : # """Calculate the angle between two vectors. # Positional arguments : # ar1 -- first vector (ndarray) # br2 -- second vector (ndarray) # their base is given by the keyword argument of the same name # Keyword arguments : # base -- base index (default 1) # Return the angle in grad. # """ # for ar_ in (ar1, ar2) : # if ar_ is None or 3 + base != len(ar_) : # raise Exception('Invalid parameter passed') # ar1 = np.array(ar1[base:], 'd') # ar2 = np.array(ar2[base:], 'd') # scalar_prod = np.dot(ar1, ar2) # if 0. == scalar_prod : # return 90./np.pi # arg_ = scalar_prod / np.sqrt(np.dot(ar1, ar1) * np.dot(ar2, ar2)) # # sometimes acos behaves strange... # if 1. < arg_ : # arg_ = 1. # elif -1. > arg_ : # arg_ = -1 # return 180./np.pi * acos(arg_) # def distance(ar1, ar2, base=1) : # """Calculate the distance between two points. # Positional arguments : # ar1 -- first point (ndarray) # ar2 -- second point (ndarray) # their base is given by the keyword argument of the same name # Keyword arguments : # base -- base index (default 1) # """ # for ar_ in (ar1, ar2) : # if ar_ is None or 3 + base != len(ar_) : # raise Exception('Invalid parameter passed') # dist = np.array(ar1[base:], 'd') - np.array(ar2[base:], 'd') # return np.linalg.norm(dist) # def angle(ar1, ar2, ar3, base=1) : # """Calculate the angle between three points. # Positional arguments : # ar1 -- first point (ndarray) # ar2 -- second point (ndarray) # ar3 -- third point (ndarray) # their base is given by the keyword argument of the same name # Keyword arguments : # base -- base index (default 1) # Return the angle in grad. # """ # for ar_ in (ar1, ar2, ar3) : # if ar_ is None or 3 + base != len(ar_) : # raise Exception('Invalid parameter passed') # v_ba = np.array(ar1, 'd') - np.array(ar2, 'd') # v_bc = np.array(ar3, 'd') - np.array(ar2, 'd') # return angle_vectors(v_ba, v_bc, base) # def dihedral_bis(ar1, ar2, ar3, ar4, base=0) : # """Calculate the dihedral angle between four points. # Positional arguments : # ar1 -- first point (ndarray) # ar2 -- second point (ndarray) # ar3 -- third point (ndarray) # ar4 -- fourth point (ndarray) # their base is given by the keyword argument of the same name # Keyword arguments : # base -- base index (default 1) # Return the dihedral angle in grad. # """ # # for ar_ in (ar1, ar2, ar3, ar4) : # # if ar_ is None or 3 + base != len(ar_) : # # raise Exception('Invalid parameter passed') # v_ba = np.array(ar1, 'd') - np.array(ar2, 'd') # v_bc = np.array(ar3, 'd') - np.array(ar2, 'd') # v_cb = np.array(ar2, 'd') - np.array(ar3, 'd') # v_cd = np.array(ar4, 'd') - np.array(ar3, 'd') # # normal to the plane (a, b, c) # norm1 = crossproduct(v_ba, v_bc, base) # # normal to the plane (b, c, d) # norm2 = crossproduct(v_cb, v_cd, base) # # scalar triple product which defines the sign of the dihedral angle # if 0. > spatproduct(v_bc, norm1, norm2, base) : # sign = -1. # else : # sign = +1. # return sign * angle_vectors(norm1, norm2, base)

Table Of Contents