"""
06 Aug 2013
"""
from bisect import bisect_left
from itertools import combinations
from pytadbit.eqv_rms_drms import rmsdRMSD_wrapper
from pytadbit.consistency import consistency_wrapper
from math import log10
import numpy as np
class Interpolate(object):
"""
simple linear interpolation
"""
def __init__(self, x_list, y_list):
for i, (x, y) in enumerate(zip(x_list, x_list[1:])):
if y - x < 0:
raise ValueError("x must be in strictly ascending")
if y - x == 0 and i >= len(x_list)-2:
x_list = x_list[:-1]
y_list = y_list[:-1]
if any(y - x <= 0 for x, y in zip(x_list, x_list[1:])):
raise ValueError("x must be in strictly ascending")
x_list = self.x_list = map(float, x_list)
y_list = self.y_list = map(float, y_list)
intervals = zip(x_list, x_list[1:], y_list, y_list[1:])
self.slopes = [(y2 - y1)/(x2 - x1) for x1, x2, y1, y2 in intervals]
def __call__(self, x):
i = bisect_left(self.x_list, x) - 1
return self.y_list[i] + self.slopes[i] * (x - self.x_list[i])
def zscore(values, size):
"""
_______________________/___
/
/
/
/
/
/
/
/
/
/
/
/
/ score
___/_________________________________
/
"""
# do not take into account the diagonal
nop = dict([(i + size * i, None) for i in xrange(size)])
minv = min([v for v in values if v]) / 2
# get the log10 of values
vals = [log10(v) if v > 0 and not v in nop else log10(minv) for v in values]
mean_v = np.mean(vals)
std_v = np.std(vals)
# replace values by z-score
for i in xrange(len(values)):
if values[i] > 0:
values[i] = (vals[i] - mean_v) / std_v
elif values[i] == 0:
values[i] = (log10(minv) - mean_v) / std_v
else:
values[i] = -99
def calc_consistency(models, nloci, dcutoff=200):
combines = list(combinations(models, 2))
parts = [0 for _ in xrange(nloci)]
for pm in consistency_wrapper([models[m]['x'] for m in models],
[models[m]['y'] for m in models],
[models[m]['z'] for m in models],
nloci, dcutoff, range(len(models)),
len(models)):
for i, p in enumerate(pm):
parts[i] += p
return [float(p)/len(combines) * 100 for p in parts]
[docs]def calinski_harabasz(scores, clusters):
"""
Implementation of the CH score [CalinskiHarabasz1974]_, that has shown to be
one the most accurate way to compare clustering methods
[MilliganCooper1985]_ [Tibshirani2001]_.
The CH score is:
.. math::
CH(k) = \\frac{B(k) / (k-1)}{W(k)/(n - k)}
Where :math:`B(k)` and :math:`W(k)` are between and within cluster sums of
squares, with :math:`k` clusters, and :math:`n` the total number of
points (models in this case).
:param scores: a dict with, as keys, a tuple with a pair of models; and, as
value, the distance between these models.
:param clusters: a dict with, as key, the cluster number, and as value a
list of models
:param nmodels: total number of models
:returns: the CH score
"""
cluster_list = [c for c in clusters if len(clusters[c]) > 1]
if len(cluster_list) <= 1:
return 0.0
nmodels = sum([len(clusters[c]) for c in cluster_list])
between_cluster = (sum([sum([sum([scores[(md1, md2)]**2
for md1 in clusters[cl1]])
for md2 in clusters[cl2]])
/ (len(clusters[cl1]) * len(clusters[cl2]))
for cl1, cl2 in combinations(cluster_list, 2)])
/ ((len(cluster_list) - 1.0) / 2))
within_cluster = (sum([sum([scores[(md1, md2)]**2
for md1, md2 in combinations(clusters[cls], 2)])
/ (len(clusters[cls]) * (len(clusters[cls]) - 1.0) / 2)
for cls in cluster_list]))
return ((between_cluster / (len(cluster_list) - 1))
/
(within_cluster / (nmodels - len(cluster_list))))
[docs]def calc_eqv_rmsd(models, nloci, dcutoff=200, var='score', one=False):
"""
:param nloci: number of particles per model
:param 200 dcutoff: distance in nanometer from which it is considered
that two particles are separated.
:param 0.75 fact: Factor for equivalent positions
:param 'score' var: value to return, can be either (i) 'drmsd' (symmetry
independent: mirrors will show no differences) (ii) 'score' that is:
::
dRMSD[i] / max(dRMSD)
score[i] = eqvs[i] * -----------------------
RMSD[i] / max(RMSD)
where eqvs[i] is the number of equivalent position for the ith
pairwise model comparison.
:returns: a score (depends on 'var' argument)
"""
scores = rmsdRMSD_wrapper([models[m]['x'] for m in xrange(len(models))],
[models[m]['y'] for m in xrange(len(models))],
[models[m]['z'] for m in xrange(len(models))],
nloci, dcutoff, range(len(models)),
len(models), int(one))
# scores = {}
# nrmsds = []
# drmsds = []
# for md1 in xrange(len(models)):
# md1s = models[md1]
# for md2 in xrange(md1 + 1, len(models)):
# md2s = models[md2]
# eqv, nrmsd, drmsd = rmsdRMSD_wrapper(
# md1s['x'], md1s['y'], md1s['z'],
# md2s['x'], md2s['y'], md2s['z'], nloci, dcutoff, 0)
# nrmsds.append(nrmsd)
# drmsds.append(drmsd)
# scores[(md1, md2)] = eqv * drmsd / nrmsd
# if one:
# return drmsd
# max_rmsd_ov_max_drmsd = max(nrmsds) / max(drmsds)
# if var=='score':
# for md1, md2 in scores.keys()[:]:
# score = scores[(md1, md2)] * max_rmsd_ov_max_drmsd
# scores[(md1, md2)] = score
# scores[(md2, md1)] = score
# elif var=='drmsd':
# for i, (md1, md2) in enumerate(scores.keys()):
# scores[(md2, md1)] = drmsds[i]
return scores
def dihedral(a,b,c,d):
"""
Calculates dihedral angle between 4 points in 3D (array with x,y,z)
"""
v1 = getNormedVector(b - a)
v2 = getNormedVector(b - c)
v3 = getNormedVector(c - b)
v4 = getNormedVector(c - d)
v1v2 = np.cross(v1, v2)
v2v3 = np.cross(v3, v4)
sign = -1 if np.linalg.det([v2, v1v2, v2v3]) < 0 else 1
angle = getAngle(v1v2, v2v3)
angle, sign = (angle, sign) if angle <= 90 else (180 - angle, - sign)
return sign * angle
def getNormedVector(dif):
return (dif) / np.linalg.norm(dif)
def getAngle(v1v2, v2v3):
return np.rad2deg(
np.arccos(np.dot(
v1v2 / np.linalg.norm(v1v2),
v2v3.T / np.linalg.norm(v2v3)))
)
# from numpy import zeros
# from math import acos
# def crossproduct(ar1, ar2, base=1) :
# """Calculate the cross-product of two vectors.
# Positional arguments :
# ar1 -- first vector (ndarray)
# ar2 -- second vector (ndarray)
# their base is given by the keyword argument of the same name
# Keyword arguments :
# base -- base index (default 1)
# """
# for ar_ in (ar1, ar2) :
# if ar_ is None or 3 + base != len(ar_) :
# raise Exception('Invalid parameter passed')
# q1_ = ar1[base+1] * ar2[base+2] - ar1[base+2] * ar2[base+1]
# q2_ = ar1[base+2] * ar2[base+0] - ar1[base+0] * ar2[base+2]
# q3_ = ar1[base+0] * ar2[base+1] - ar1[base+1] * ar2[base+0]
# ans = zeros(base + 3, 'd')
# ans[base:] = (q1_, q2_, q3_)
# return ans
# def spatproduct(ar1, ar2, ar3, base=1) :
# """Calculate the scalar triple product of three vectors.
# Positional arguments :
# ar1 -- first vector (ndarray)
# ar2 -- second vector (ndarray)
# ar3 -- third vector (ndarray)
# their base is given by the keyword argument of the same name
# Keyword arguments :
# base -- base index (default 1)
# """
# for ar_ in (ar1, ar2, ar3) :
# if ar_ is None or 3 + base != len(ar_) :
# raise Exception('Invalid parameter passed')
# mat_ = zeros((3, 3), 'd')
# mat_[0] = ar1[base:]
# mat_[1] = ar2[base:]
# mat_[2] = ar3[base:]
# return np.linalg.det(mat_)
# def angle_vectors(ar1, ar2, base=1) :
# """Calculate the angle between two vectors.
# Positional arguments :
# ar1 -- first vector (ndarray)
# br2 -- second vector (ndarray)
# their base is given by the keyword argument of the same name
# Keyword arguments :
# base -- base index (default 1)
# Return the angle in grad.
# """
# for ar_ in (ar1, ar2) :
# if ar_ is None or 3 + base != len(ar_) :
# raise Exception('Invalid parameter passed')
# ar1 = np.array(ar1[base:], 'd')
# ar2 = np.array(ar2[base:], 'd')
# scalar_prod = np.dot(ar1, ar2)
# if 0. == scalar_prod :
# return 90./np.pi
# arg_ = scalar_prod / np.sqrt(np.dot(ar1, ar1) * np.dot(ar2, ar2))
# # sometimes acos behaves strange...
# if 1. < arg_ :
# arg_ = 1.
# elif -1. > arg_ :
# arg_ = -1
# return 180./np.pi * acos(arg_)
# def distance(ar1, ar2, base=1) :
# """Calculate the distance between two points.
# Positional arguments :
# ar1 -- first point (ndarray)
# ar2 -- second point (ndarray)
# their base is given by the keyword argument of the same name
# Keyword arguments :
# base -- base index (default 1)
# """
# for ar_ in (ar1, ar2) :
# if ar_ is None or 3 + base != len(ar_) :
# raise Exception('Invalid parameter passed')
# dist = np.array(ar1[base:], 'd') - np.array(ar2[base:], 'd')
# return np.linalg.norm(dist)
# def angle(ar1, ar2, ar3, base=1) :
# """Calculate the angle between three points.
# Positional arguments :
# ar1 -- first point (ndarray)
# ar2 -- second point (ndarray)
# ar3 -- third point (ndarray)
# their base is given by the keyword argument of the same name
# Keyword arguments :
# base -- base index (default 1)
# Return the angle in grad.
# """
# for ar_ in (ar1, ar2, ar3) :
# if ar_ is None or 3 + base != len(ar_) :
# raise Exception('Invalid parameter passed')
# v_ba = np.array(ar1, 'd') - np.array(ar2, 'd')
# v_bc = np.array(ar3, 'd') - np.array(ar2, 'd')
# return angle_vectors(v_ba, v_bc, base)
# def dihedral_bis(ar1, ar2, ar3, ar4, base=0) :
# """Calculate the dihedral angle between four points.
# Positional arguments :
# ar1 -- first point (ndarray)
# ar2 -- second point (ndarray)
# ar3 -- third point (ndarray)
# ar4 -- fourth point (ndarray)
# their base is given by the keyword argument of the same name
# Keyword arguments :
# base -- base index (default 1)
# Return the dihedral angle in grad.
# """
# # for ar_ in (ar1, ar2, ar3, ar4) :
# # if ar_ is None or 3 + base != len(ar_) :
# # raise Exception('Invalid parameter passed')
# v_ba = np.array(ar1, 'd') - np.array(ar2, 'd')
# v_bc = np.array(ar3, 'd') - np.array(ar2, 'd')
# v_cb = np.array(ar2, 'd') - np.array(ar3, 'd')
# v_cd = np.array(ar4, 'd') - np.array(ar3, 'd')
# # normal to the plane (a, b, c)
# norm1 = crossproduct(v_ba, v_bc, base)
# # normal to the plane (b, c, d)
# norm2 = crossproduct(v_cb, v_cd, base)
# # scalar triple product which defines the sign of the dihedral angle
# if 0. > spatproduct(v_bc, norm1, norm2, base) :
# sign = -1.
# else :
# sign = +1.
# return sign * angle_vectors(norm1, norm2, base)