# -*- coding: utf-8 -*-
# Aqua-Duct, a tool facilitating analysis of the flow of solvent molecules in molecular dynamic simulations
# Copyright (C) 2016-2017 Tomasz Magdziarz, Alicja Płuciennik, Michał Stolarczyk <info@aquaduct.pl>
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program. If not, see <http://www.gnu.org/licenses/>.
from aquaduct import logger, logger_name
################################################################################
import multiprocessing as mp
import ConfigParser
import cPickle as pickle
import StringIO
import copy
import gzip
import os
import re
import operator
import shlex
import sys
import numpy as np
import MDAnalysis as mda
import roman # TODO: remove this dependency!
from collections import namedtuple, OrderedDict # TODO: check if OrderedDict is REALLY used
from functools import wraps
from itertools import izip_longest
from keyword import iskeyword
from distutils.version import LooseVersion
from scipy.spatial.distance import cdist
# If scipy is relatively old and numpy is relatively new this triggers warning on oldnumeric module deprecation.
# This warning emerges if MDAnalysis is imported and then scipy. Observed under Ubuntu 14.04.
from aquaduct import greetings as greetings_aquaduct
from aquaduct import version as aquaduct_version
from aquaduct import version_nice as aquaduct_version_nice
from aquaduct.geom import traces
from aquaduct.geom.cluster import PerformClustering, DBSCAN, AffinityPropagation, MeanShift, KMeans, Birch, BarberCluster, get_required_params
from aquaduct.geom.cluster import AVAILABLE_METHODS as available_clusterization_methods
from aquaduct.geom.convexhull import is_point_within_convexhull
from aquaduct.geom.master import CTypeSpathsCollection
from aquaduct.geom.smooth import WindowSmooth, MaxStepSmooth, WindowOverMaxStepSmooth, ActiveWindowSmooth, \
ActiveWindowOverMaxStepSmooth, DistanceWindowSmooth, DistanceWindowOverMaxStepSmooth, SavgolSmooth
from aquaduct.traj.dumps import TmpDumpWriterOfMDA
from aquaduct.traj.inlets import Inlets, InletTypeCodes
from aquaduct.traj.paths import GenericPaths, yield_single_paths
from aquaduct.traj.reader import ReadViaMDA,atom2vdw_radius
from aquaduct.traj.selections import CompactSelectionMDA, SelectionMDA
from aquaduct.utils import clui
from aquaduct.utils.helpers import range2int, Auto, what2what, lind, is_number
from aquaduct.utils.multip import optimal_threads
from aquaduct.traj.barber import WhereToCut
def version():
# TODO: remove it
return 0, 10, 0
[docs]def version_nice():
return '.'.join(map(str, version()))
def version_onenumber():
return ''.join(map(str, version()))
__mail__ = 'info@aquaduct.pl'
__version__ = version_nice()
###############################################################################
# configuration file helpers
class ConfigSpecialNames:
special_names_dict = {'none': None,
'null': None,
'true': True,
'false': False,
'yes': True,
'no': False,
'auto': Auto}
def special_name(self, name):
if isinstance(name, (str, unicode)):
if name.lower() in self.special_names_dict:
return self.special_names_dict[name.lower()]
return name
# TODO: add parser for initial checking of configuration file
[docs]class ValveConfig(object, ConfigSpecialNames):
[docs] def __init__(self):
self.config = self.get_default_config()
self.config_filename = ''
def __make_options_nt(self, input_options):
# options = {opt: self.special_name(input_options[opt]) for opt in input_options}
options = list()
for opt in input_options:
if iskeyword(opt):
logger.warning('Invalid keyword <%s> in config file skipped. Check configuration file.' % opt)
continue
if opt.replace('_','').isalnum():
options.append((opt, self.special_name(input_options[opt])))
else:
logger.debug('Keyword <%s> in config file skipped.' % opt)
options = OrderedDict(options)
options_nt = namedtuple('Options', options.keys())
return options_nt(**options)
[docs] @staticmethod
def common_config_names():
# execute - what to do: skip, run
# load - load previous results form file name
# save - save results to file name
return 'execute dump'.split()
[docs] @staticmethod
def common_traj_data_config_names():
# scope - scope definition
# scope_convexhull - take convex hull of scope, true of false
# object - object definition
return 'scope scope_convexhull object'.split()
[docs] @staticmethod
def global_name():
return 'global'
[docs] @staticmethod
def cluster_name():
return 'clusterization'
[docs] @staticmethod
def recluster_name():
return 'reclusterization'
[docs] @staticmethod
def recursive_clusterization_name():
return 'recursive_clusterization'
[docs] @staticmethod
def recursive_threshold_name():
return 'recursive_threshold'
[docs] @staticmethod
def smooth_name():
return 'smooth'
[docs] def stage_names(self, nr=None):
if nr is None:
return [self.stage_names(nr) for nr in range(5)]
else:
if nr == 0:
return 'traceable_residues'
elif nr == 1:
return 'raw_paths'
elif nr == 2:
return 'separate_paths'
elif nr == 3:
return 'inlets_clusterization'
elif nr == 4:
return 'analysis'
elif nr == 5:
return 'visualize'
raise NotImplementedError('Stage %r is not implemented.' % nr)
[docs] def get_common_traj_data(self, stage):
assert isinstance(stage, int)
# options = {name: None for name in self.common_traj_data_config_names()}
options = OrderedDict(((name, None) for name in self.common_traj_data_config_names()))
for nr in range(stage + 1)[::-1]:
section = self.stage_names(nr)
for name in self.common_traj_data_config_names():
if self.config.has_option(section, name):
value = self.special_name(self.config.get(section, name))
if (value is not None) and (options[name] is None):
options.update({name: value})
return self.__make_options_nt(options)
[docs] def get_global_options(self):
section = self.global_name()
names = self.config.options(section)
# options = {name: self.config.get(section, name) for name in names}
options = OrderedDict(((name, self.config.get(section, name)) for name in names))
return self.__make_options_nt(options)
[docs] def get_stage_options(self, stage):
assert isinstance(stage, int)
stage_name = self.stage_names(stage)
names = self.config.options(stage_name)
# options = {name: self.config.get(stage_name, name) for name in names}
options = OrderedDict(((name, self.config.get(stage_name, name)) for name in names))
if stage in [0, 1]:
options.update(self.get_common_traj_data(stage)._asdict())
return self.__make_options_nt(options)
[docs] def get_cluster_options(self, section_name=None):
if section_name is None:
section = self.cluster_name()
else:
section = section_name
names = self.config.options(section)
# options = {name: self.config.get(section, name) for name in names}
options = OrderedDict(((name, self.config.get(section, name)) for name in names))
# special recursive_clusterization option
if self.recursive_clusterization_name() not in options:
options.update({self.recursive_clusterization_name(): None})
if self.recursive_threshold_name() not in options:
options.update({self.recursive_threshold_name(): None})
return self.__make_options_nt(options)
[docs] def get_recluster_options(self):
return self.get_cluster_options(section_name=self.recluster_name())
[docs] def get_smooth_options(self):
section = self.smooth_name()
names = self.config.options(section)
# options = {name: self.config.get(section, name) for name in names}
options = OrderedDict(((name, self.config.get(section, name)) for name in names))
return self.__make_options_nt(options)
[docs] def get_default_config(self):
# snr = 0 # stage number
config = ConfigParser.RawConfigParser()
#config = RawConfigParserWithComments()
def common(section):
for setting in self.common_config_names():
value = None
if setting == 'execute':
value = 'runonce'
elif setting == 'dump':
value = '%d_%s_data.dump' % (snr + 1, section)
if value is None:
config.set(section, setting)
else:
config.set(section, setting, value=value)
def common_traj_data(section,commented=False):
for setting in self.common_traj_data_config_names():
if commented:
setting = '#'+setting
config.set(section, setting, 'None')
################
# global settings
section = self.global_name()
config.add_section(section)
#config.add_comment(section,'global settings')
# top - top file name
# nc - netcdf file name
config.set(section, 'top') # topology
config.set(section, 'trj') # trajectory
# config.set(section, 'pbar', 'simple')
################
snr = 0 # stage number
# stage I
# find traceable residues
section = self.stage_names(snr)
config.add_section(section)
common(section)
common_traj_data(section)
################
snr += 1
# stage II
# find raw paths
section = self.stage_names(snr)
config.add_section(section)
common(section)
common_traj_data(section)
config.set(section, 'clear_in_object_info', 'False')
################
snr += 1
# stage III
# create separate frames
section = self.stage_names(snr)
config.add_section(section)
common(section)
config.set(section, 'auto_barber_tovdw', 'True')
config.set(section, 'auto_barber_maxcut', '2.8')
config.set(section, 'auto_barber_mincut', 'None')
config.set(section, 'auto_barber_maxcut_level', 'True')
config.set(section, 'auto_barber_mincut_level', 'True')
config.set(section, 'auto_barber', 'False')
config.set(section, 'discard_empty_paths', 'True')
config.set(section, 'sort_by_id', 'True')
config.set(section, 'apply_smoothing', 'False')
config.set(section, 'apply_soft_smoothing', 'True')
config.set(section, 'discard_short_paths', '1')
################
snr += 1
# stage IV
# inlets clusterisation
section = self.stage_names(snr)
config.add_section(section)
common(section)
config.set(section, 'max_level', '0')
config.set(section, 'recluster_outliers', 'False')
config.set(section, 'detect_outliers', 'False')
config.set(section, 'singletons_outliers', 'False')
config.set(section, 'create_master_paths', 'False')
################
# smooth
section = self.smooth_name()
config.add_section(section)
config.set(section, 'method', 'window')
################
# clusterization
section = self.cluster_name()
config.add_section(section)
config.set(section, 'method', 'barber')
config.set(section, self.recursive_clusterization_name(), self.cluster_name())
config.set(section, self.recursive_threshold_name(), 'False')
################
# reclusterization
section = self.recluster_name()
config.add_section(section)
config.set(section, 'method', 'dbscan')
config.set(section, self.recursive_clusterization_name(), 'False')
config.set(section, self.recursive_threshold_name(), 'False')
################
snr += 1
# stage V
# analysis
section = self.stage_names(snr)
config.add_section(section)
common(section)
config.remove_option(section, 'dump')
config.set(section, 'save', '%d_%s_results.txt' % (snr + 1, section))
config.set(section, 'dump_config', 'True')
################
snr += 1
# stage VI
# visualize
section = self.stage_names(snr)
config.add_section(section)
common(section)
config.remove_option(section, 'dump')
config.set(section, 'save', '%d_%s_results.py' % (snr + 1, section))
config.set(section, 'simply_smooths', 'RecursiveVector')
# visualize spaths, all paths in one object
config.set(section, 'all_paths_raw', 'False')
config.set(section, 'all_paths_smooth', 'False')
config.set(section, 'all_paths_split', 'False') # split by in obj out
config.set(section, 'all_paths_raw_io', 'False')
config.set(section, 'all_paths_smooth_io', 'False')
# visualize spaths, separate objects
config.set(section, 'paths_raw', 'False')
config.set(section, 'paths_smooth', 'False')
config.set(section, 'paths_states', 'False')
config.set(section, 'paths_raw_io', 'False')
config.set(section, 'paths_smooth_io', 'False')
config.set(section, 'ctypes_raw', 'False')
config.set(section, 'ctypes_smooth', 'False')
# visualize clusters
config.set(section, 'inlets_clusters', 'False')
# show protein
config.set(section, 'show_molecule', 'None')
config.set(section, 'show_molecule_frames', '0')
config.set(section, 'show_chull', 'None')
config.set(section, 'show_chull_frames', '0')
config.set(section, 'show_object', 'None')
config.set(section, 'show_object_frames', '0')
return config
[docs] def load_config(self, filename):
self.config.read(filename)
self.config_filename = filename
[docs] def save_config_stream(self, fs):
self.config.write(fs)
[docs] def save_config(self, filename):
with open(filename, 'w') as fs:
self.save_config_stream(fs)
[docs] def dump_config(self,dump_template=False):
skip_list = '''simply_smooths'''.split() # these options are very optional!
concise = True
if dump_template:
concise = False
output = []
options = [self.get_global_options()] + \
[self.get_stage_options(stage) for stage in range(6)] + \
[self.get_cluster_options(), self.get_recluster_options(),
self.get_smooth_options()]
names = [self.global_name()] + \
[self.stage_names(stage) for stage in range(6)] + \
[self.cluster_name(), self.recluster_name(),
self.smooth_name()]
def value2str(val):
if val is Auto:
val = "Auto"
else:
val = str(val)
return val
# this loop ensures it is returned in particular order
for opts, name in zip(options, names):
output.append('[%s]' % name) # section name
# general comments
if dump_template:
comment = self.get_general_comment(name)
if comment:
output.extend(['# %s' % line for line in comment])
for key,value in opts._asdict().iteritems(): # loop over options
if key in skip_list: continue
# comment scope etc. in stage II if dump_template
if dump_template and name == self.stage_names(1):
if key in self.common_traj_data_config_names():
key = '#' + key
output.append('%s = %s' % (key, value2str(value)))
if not concise: output.append('')
# is something missing? another loop over all additional sections
for miss in self.config.sections():
if miss in names: continue # skip if it was already dumped in the loop above
output.append('[%s]' % miss)
for key in self.config.options(miss): # loop over options in section miss
if key in skip_list: continue
value = self.config.get(miss, key)
output.append('%s = %s' % (key, value2str(value)))
if not concise: output.append('')
while not len(output[-1].strip()):
output.pop()
return output
################################################################################
# reader helper class
class TrajectoryReader(object):
def __init__(self, top, trj,frames_window=None):
assert isinstance(top, (str, unicode)), "Topology file name missing, %s given instead" % str(top)
assert isinstance(trj, (str, unicode)), "Trajectory file(s) name(s) missing, %s given instead" % str(trj)
self.top = top
self.trj = shlex.split(trj)
self.frames_window = frames_window
def get(self):
return ReadViaMDA(self.top, self.trj, window=self.frames_window)
@property
def max_frame(self):
with self.get() as tmp_reader:
return tmp_reader.number_of_frames - 1 # returns 0-based value
def rebuild_selection(selection, reader):
return CompactSelectionMDA(selection).toSelectionMDA(reader)
################################################################################
# convex hull helpers
# TODO: Move it to separate module.
# TODO: Following functions are or will be deprecated, remove them as soon as possible.
def CHullCheck(point):
return CHullCheck.chull.point_within(point)
def CHullCheck_init(args):
CHullCheck.chull = copy.deepcopy(args[0])
def CHullCheck_pool(chull, threads=optimal_threads.threads_count):
return mp.Pool(threads, CHullCheck_init, [(chull,)])
def CHullCheck_exec(chull, points, threads=optimal_threads.threads_count):
pool = CHullCheck_pool(chull, threads=threads)
out = pool.map(CHullCheck, points)
pool.close()
pool.join()
del pool
return out
################################################################################
# in scope helpers
def check_res_in_scope(options, scope, res, res_coords):
if options.scope_convexhull:
# TODO: Remove it! This is deprecated code. It, probably, never runs.
if len(res_coords) == 0:
return []
# find convex hull of protein
chull = scope.get_convexhull_of_atom_positions()
is_res_in_scope = CHullCheck_exec(chull, res_coords, threads=optimal_threads.threads_count)
else:
if res.unique_resids_number() == 0:
return []
res_in_scope_uids = scope.unique_resids(ikwid=True)
# res_in_scope_uids = traj_reader.parse_selection(options.scope).unique_resids(ikwid=True)
is_res_in_scope = [r.unique_resids(ikwid=True) in res_in_scope_uids for r in res.iterate_over_residues()]
return is_res_in_scope
def get_res_in_scope(is_res_in_scope, res):
res_new = None
for iris, r in zip(is_res_in_scope, res.iterate_over_residues()):
if iris:
if res_new is None:
res_new = r
else:
res_new += r
return res_new
################################################################################
# separators - logging
def sep():
return clui.gsep(sep='-', times=48)
def asep():
return clui.gsep(sep='=', times=72)
################################################################################
# save - load helpers
def save_dump(filename, data_to_save, **kwargs):
with clui.fbm('Saving data dump in %s file' % filename):
with gzip.open(filename, mode='w', compresslevel=9) as f:
# first version:
pickle.dump({'version': version(),
'aquaduct_version': aquaduct_version()}, f)
# then data to save:
pickle.dump(data_to_save, f)
# then other kwargs
pickle.dump(kwargs, f)
class LoadDumpWrapper(object):
"""This is wrapper for pickled data that provides compatibility
with earlier versions of Aqua-Duct.
Conversions in use:
1) replace 'aquaduct.' by 'aquaduct.'
"""
def __init__(self, filehandle):
self.fh = filehandle
def convert(self, s):
new_s = s
new_s = new_s.replace('aqueduct.', 'aquaduct.')
new_s = new_s.replace('aqueduct_version', 'aquaduct_version')
return new_s
def read(self, *args, **kwargs):
return self.convert(self.fh.read(*args, **kwargs))
def readline(self, *args, **kwargs):
return self.convert(self.fh.readline(*args, **kwargs))
def load_dump(filename):
with clui.fbm('Loading data dump from %s file' % filename):
with gzip.open(filename, mode='r') as protof:
f = LoadDumpWrapper(protof)
# version!
loaded_data = pickle.load(f)
check_versions(loaded_data)
# loaded data!
loaded_data = pickle.load(f)
# enything else?
try:
other_data = pickle.load(f)
loaded_data.update({'other_data': other_data})
except:
pass
return loaded_data
# loaded_data_nt = namedtuple('LoadedData', loaded_data.keys())
# return loaded_data_nt(**loaded_data)
def check_version_compliance(current, loaded, what):
if current[0] > loaded[0]:
logger.error('Loaded data has %s major version lower then the application.' % what)
if current[0] < loaded[0]:
logger.error('Loaded data has %s major version higher then the application.' % what)
if current[0] != loaded[0]:
logger.error('Possible problems with API compliance.')
if current[1] > loaded[1]:
logger.warning('Loaded data has %s minor version lower then the application.' % what)
if current[1] < loaded[1]:
logger.warning('Loaded data has %s minor version higher then the application.' % what)
if current[1] != loaded[1]:
logger.warning('Possible problems with API compliance.')
def check_versions(version_dict):
assert isinstance(version_dict, (dict, OrderedDict)), "File is corrupted, cannot read version data."
assert 'version' in version_dict, "File is corrupted, cannot read version data."
assert 'aquaduct_version' in version_dict, "File is corrupted, cannot read version data."
check_version_compliance(aquaduct_version(), version_dict['aquaduct_version'], 'Aqua-Duct')
check_version_compliance(version(), version_dict['version'], 'Valve')
################################################################################
# save - load per stage
def save_stage_dump(name, **kwargs):
# check if name is None
if name is not None:
# ok, check if some of kwargs have to be changed
data_to_save = {}
for key, value in kwargs.iteritems():
# SelectionMDA
if isinstance(value, SelectionMDA):
value = CompactSelectionMDA(value)
# options
if 'options' in key:
value = value._asdict()
data_to_save.update({key: value})
# now we are redy to save
save_dump(name, data_to_save)
def load_stage_dump(name, reader=None):
if name is not None:
loaded_data = {}
for key, value in load_dump(name).iteritems():
# CompactSelectionMDA
if isinstance(value, CompactSelectionMDA):
with reader.get() as traj_reader:
value = value.toSelectionMDA(traj_reader)
loaded_data.update({key: value})
return loaded_data
################################################################################
def get_smooth_method(soptions):
assert soptions.method in ['window', 'mss', 'window_mss', 'awin',
'awin_mss', 'dwin', 'dwin_mss',
'savgol'], 'Unknown smoothing method %s.' % soptions.method
opts = {}
if 'recursive' in soptions._asdict():
opts.update({'recursive': int(soptions.recursive)})
def window_opts():
if 'window' in soptions._asdict():
opts.update({'window': int(float(soptions.window))})
function_opts()
def awin_dwin_opts():
if 'window' in soptions._asdict():
opts.update({'window': float(soptions.window)})
function_opts()
def function_opts():
if 'function' in soptions._asdict():
assert soptions.function in ['mean', 'median'], 'Unknown smoothing function %s.' % soptions.function
if soptions.function == 'mean':
opts.update({'function': np.mean})
if soptions.function == 'median':
opts.update({'function': np.median})
def mss_opts():
if 'step' in soptions._asdict():
opts.update({'step': float(soptions.step)})
def savgol_opts():
window_length = 5 # TODO: magic constant (default value)
if 'window' in soptions._asdict():
window_length = int(float(soptions.window))
assert window_length % 2 == 1, 'Window in Savgol method must be positive odd number, %d given instead.' % window_length
opts.update({'window_length': window_length})
polyorder = 2 # TODO: magic constant (default value)
if 'polyorder' in soptions._asdict():
polyorder = int(float(soptions.polyorder))
assert polyorder > 0, 'Polynomial order should be greater then 0, %d given instead.' % polyorder
opts.update({'polyorder': polyorder})
assert polyorder < window_length, 'Polynomial order (%d) should be less then window (%d).' % (
polyorder, window_length)
'''
if 'deriv' in soptions._asdict():
deriv = int(float(soptions.deriv))
assert deriv >= 0 and deriv <= polyorder, 'Order of derrivative should be integer greater or equal 0 and less then or equal to Polynomial order (%d), %d given instead.' % (polyorder,deriv)
opts.update({'deriv': deriv})
if 'delta' in soptions._asdict():
delta = float(soptions.delta)
assert delta >= 0, 'Delta should be greater or equal 0, %f given instead.' % delta
if 'deriv' in soptions._asdict():
if deriv == 0:
logger.warning('Delta %f make no sense if deriv is 0.' % delta)
opts.update({'delta': delta})
mode = 'interp' # TODO: magic constant (default value)
if 'mode' in soptions._asdict():
mode = str(soptions.mode)
assert mode in ['mirror', 'constant', 'nearest', 'wrap', 'interp'], 'Unknown mode %s.' % mode
opts.update({'mode': mode})
if 'cval' in soptions._asdict():
cval = float(soptions.cval)
if mode != 'constant':
logger.warning('Cval make no sense if mode is %s.' % mode)
opts.update({'cval': cval})
'''
if soptions.method == 'window':
window_opts()
smooth = WindowSmooth(**opts)
elif soptions.method == 'awin':
awin_dwin_opts()
smooth = ActiveWindowSmooth(**opts)
elif soptions.method == 'dwin':
awin_dwin_opts()
smooth = DistanceWindowSmooth(**opts)
elif soptions.method == 'mss':
mss_opts()
smooth = MaxStepSmooth(**opts)
elif soptions.method == 'window_mss':
window_opts()
mss_opts()
smooth = WindowOverMaxStepSmooth(**opts)
elif soptions.method == 'awin_mss':
awin_dwin_opts()
mss_opts()
smooth = ActiveWindowOverMaxStepSmooth(**opts)
elif soptions.method == 'dwin_mss':
awin_dwin_opts()
mss_opts()
smooth = DistanceWindowOverMaxStepSmooth(**opts)
elif soptions.method == 'savgol':
savgol_opts()
smooth = SavgolSmooth(**opts)
return smooth
def get_clustering_method(coptions,config):
assert coptions.method in available_clusterization_methods, 'Unknown clusterization method %s.' % coptions.method
opts = {}
def dbscan_opts():
if 'eps' in coptions._asdict():
opts.update({'eps': float(coptions.eps)})
if 'min_samples' in coptions._asdict():
opts.update({'min_samples': int(coptions.min_samples)})
if 'metric' in coptions._asdict():
assert coptions.metric in ['cityblock', 'cosine', 'euclidean',
'manhattan'], "Unknown metric <%s>." % coptions.metric
opts.update({'metric': str(coptions.metric)})
if 'algorithm' in coptions._asdict():
assert coptions.algorithm in ['auto', 'ball_tree', 'kd_tree',
'brute'], "Unknown NN algorithm <%s>." % coptions.algorithm
opts.update({'algorithm': str(coptions.algorithm)})
if 'leaf_size' in coptions._asdict():
opts.update({'leaf_size': int(coptions.leaf_size)})
def affprop_opts():
if 'damping' in coptions._asdict():
opts.update({'damping': float(coptions.damping)})
if 'convergence_iter' in coptions._asdict():
opts.update({'convergence_iter': int(coptions.convergence_iter)})
if 'max_iter' in coptions._asdict():
opts.update({'max_iter': int(coptions.max_iter)})
if 'preference' in coptions._asdict():
opts.update({'preference': float(coptions.preference)})
def kmeans_opts():
if 'n_clusters' in coptions._asdict():
opts.update({'n_clusters': int(coptions.n_clusters)})
if 'max_iter' in coptions._asdict():
opts.update({'max_iter': int(coptions.max_iter)})
if 'n_init' in coptions._asdict():
opts.update({'n_init': int(coptions.n_init)})
if 'init' in coptions._asdict():
assert coptions.init in ['k-means++', 'random'], "Unknown initialization method <%s>." % coptions.init
opts.update({'init': str(coptions.init)})
if 'tol' in coptions._asdict():
opts.update({'tol': float(coptions.tol)})
def meanshift_opts():
if 'cluster_all' in coptions._asdict():
opts.update({'cluster_all': bool(coptions.cluster_all)})
if 'bandwidth' in coptions._asdict():
if coptions.bandwidth in (Auto, None):
opts.update({'bandwidth': coptions.bandwidth})
else:
opts.update({'bandwidth': float(coptions.bandwidth)})
if 'bin_seeding' in coptions._asdict():
opts.update({'bin_seeding': bool(coptions.bin_seeding)})
if 'min_bin_freq' in coptions._asdict():
opts.update({'min_bin_freq': int(coptions.min_bin_freq)})
def birch_opts():
if 'threshold' in coptions._asdict():
opts.update({'threshold': float(coptions.threshold)})
if 'branching_factor' in coptions._asdict():
opts.update({'branching_factor': int(coptions.branching_factor)})
if 'n_clusters' in coptions._asdict():
opts.update({'n_clusters': int(coptions.n_clusters)})
def barber_opts():
abo = config.get_stage_options(2)
if abo.auto_barber:
opts.update({'selection': str(abo.auto_barber)})
if is_number(abo.auto_barber_mincut):
opts.update({'mincut': float(abo.auto_barber_mincut)})
else:
opts.update({'mincut': None})
if is_number(abo.auto_barber_maxcut):
opts.update({'maxcut': float(abo.auto_barber_maxcut)})
else:
opts.update({'maxcut': None})
opts.update({'mincut_level': bool(abo.auto_barber_mincut_level)})
opts.update({'maxcut_level': bool(abo.auto_barber_maxcut_level)})
opts.update({'tovdw': bool(abo.auto_barber_tovdw)})
if 'auto_barber' in coptions._asdict():
opts.update({'selection': str(coptions.auto_barber)})
if 'auto_barber_maxcut' in coptions._asdict():
if is_number(coptions.auto_barber_maxcut):
opts.update({'maxcut': float(coptions.auto_barber_maxcut)})
else:
opts.update({'maxcut': None})
if 'auto_barber_mincut' in coptions._asdict():
if is_number(coptions.auto_barber_mincut):
opts.update({'mincut': float(coptions.auto_barber_mincut)})
else:
opts.update({'mincut': None})
if 'auto_barber_mincut_level' in coptions._asdict():
opts.update({'mincut_level': bool(coptions.auto_barber_mincut_level)})
if 'auto_barber_maxcut_level' in coptions._asdict():
opts.update({'maxcut_level': bool(coptions.auto_barber_maxcut_level)})
if 'auto_barber_tovdw' in coptions._asdict():
opts.update({'tovdw': bool(coptions.auto_barber_tovdw)})
if coptions.method == 'dbscan':
dbscan_opts()
method = DBSCAN
elif coptions.method == 'affprop':
affprop_opts()
method = AffinityPropagation
elif coptions.method == 'kmeans':
kmeans_opts()
method = KMeans
elif coptions.method == 'meanshift':
meanshift_opts()
method = MeanShift
elif coptions.method == 'birch':
birch_opts()
method = Birch
elif coptions.method == 'barber':
barber_opts()
method = BarberCluster
return PerformClustering(method, **opts)
def get_linearize_method(loption):
if loption:
assert isinstance(loption, (str, unicode)), "Wrong Linearize method definition: %r" % loption
possible_formats = [
re.compile('^(recursive|oneway|hobbit)(triangle|vector)[(][+-]?(\d+(\.\d*)?|\.\d+)([eE][+-]?\d+)?[)]$'),
re.compile('^(recursive|oneway|hobbit)(triangle|vector)[(][)]$'),
re.compile('^(recursive|oneway|hobbit)(triangle|vector)$')]
assert True in [pf.match(loption.lower()) is not None for pf in
possible_formats], "Wrong Linearize method definition: %s" % loption
# http://stackoverflow.com/questions/12929308/python-regular-expression-that-matches-floating-point-numbers#12929311
way = [w for w in ['recursive', 'oneway', 'hobbit'] if w in loption.lower()][0]
crit = [c for c in ['triangle', 'vector'] if c in loption.lower()][0]
threshold = re.compile('[+-]?(\d+(\.\d*)?|\.\d+)([eE][+-]?\d+)?').findall(loption)
if len(threshold):
threshold = float(threshold[0][0])
else:
threshold = None
# get method
if way == 'recursive':
if crit == 'triangle':
met = traces.LinearizeRecursiveTriangle
else:
met = traces.LinearizeRecursiveVector
elif way == 'oneway':
if crit == 'triangle':
met = traces.LinearizeOneWayTriangle
else:
met = traces.LinearizeOneWayVector
elif way == 'hobbit':
if crit == 'triangle':
met = traces.LinearizeHobbitTriangle
else:
met = traces.LinearizeHobbitVector
if threshold is None:
return met()
return met(threshold)
################################################################################
[docs]def valve_begin():
clui.message(greetings_aquaduct()) # nice greetings
clui.message('Aqua-Duct version %s' % aquaduct_version_nice())
# clui.message('Valve driver version %s' % version_nice())
clui.message(sep())
[docs]def valve_end():
clui.message(sep())
clui.message('Let the Valve be always open!')
clui.message('Goodby!')
[docs]def valve_load_config(filename, config):
assert filename is not None, "No config file provided."
assert os.path.isfile(filename), "Config file %s does not exist." % filename
with clui.fbm('Load configuration file'):
config.load_config(filename)
[docs]def valve_read_trajectory(top, traj, frames_window=None):
with clui.fbm('Read trajectory'):
return TrajectoryReader(top, traj, frames_window=frames_window)
# read trajectory
# traj_list = shlex.split(traj)
# return ReadAmberNetCDFviaMDA(top, traj_list)
# reader = ReadDCDviaMDA(topology, trajectory)
def valve_begin_stage(stage, config):
clui.message(sep())
clui.message('Starting Stage %s: %s' % (roman.toRoman(stage + 1), config.stage_names(stage)))
options = config.get_stage_options(stage)
clui.message('Execute mode: %s' % options.execute)
return options
[docs]def valve_exec_stage(stage, config, stage_run, reader=None, no_io=False, run_status=None,
**kwargs):
with clui.tictoc('Stage %s (%s)' % (roman.toRoman(stage + 1), config.stage_names(stage))):
# This function runs stages in a smart way, checks execution logic, and loads/saves dumps if required.
options = valve_begin_stage(stage, config)
run_status.update({stage: False})
# TODO: Consider to create traj_reader object here instead of doing it in stage_run or in load...
# execute?
can_be_loaded = False
if (not no_io) and options.dump:
if os.path.isfile(options.dump) or os.path.islink(options.dump):
can_be_loaded = True
# has to be run?
if options.execute in ['runonce'] and can_be_loaded and stage > 0:
if run_status[stage - 1]:
can_be_loaded = False
if options.execute in ['run'] or (options.execute in ['runonce'] and not can_be_loaded):
result = stage_run(config, options, reader=reader, **kwargs)
run_status.update({stage: True})
if not no_io:
###########
# S A V E #
###########
save_stage_dump(options.dump, **result)
elif options.execute in ['skip'] or (options.execute in ['runonce'] and can_be_loaded):
if not no_io:
###########
# L O A D #
###########
if options.dump:
result = load_stage_dump(options.dump, reader=reader)
else:
raise NotImplementedError('exec mode %s not implemented' % options.execute)
# remove options stuff
if not no_io:
if result is not None:
return dict(((key, val) for key, val in result.iteritems() if 'options' not in key))
################################################################################
# stages run
# traceable_residues
[docs]def stage_I_run(config, options,
reader=None,
**kwargs):
# create pool of workers - mapping function
map_fun = map
if optimal_threads.threads_count > 1:
pool = mp.Pool(optimal_threads.threads_count)
map_fun = pool.map
with reader.get() as traj_reader:
clui.message("Loop over frames - search of residues in object:")
pbar = clui.pbar(traj_reader.number_of_frames)
scope = traj_reader.parse_selection(options.scope)
# scope will be used to derrive center of system
center_of_system = np.array([0., 0., 0.])
# create some containers
res_ids_in_object_over_frames = {}
all_res = None
# the loop over frames
for frame in traj_reader.iterate_over_frames():
# center of system
center_of_system += scope.center_of_mass()
# current res selection
res = traj_reader.parse_selection(options.object)
# find matching residues:
res_new = scope.containing_residues(res, convex_hull=options.scope_convexhull, map_fun=map_fun)
# adds them to all_res
if all_res:
all_res += res_new
all_res.uniquify()
else:
all_res = res_new
# remeber ids of res in object in current frame
if res_new is not None:
res_ids_in_object_over_frames.update({frame: res_new.unique_resids(ikwid=True)})
else:
res_ids_in_object_over_frames.update({frame: []})
pbar.update(frame)
# destroy pool of workers
if optimal_threads.threads_count > 1:
pool.close()
pool.join()
del pool
pbar.finish()
center_of_system /= (frame + 1)
logger.info('Center of system is %0.2f, %0.2f, %0.2f' % tuple(center_of_system))
if all_res is None:
raise ValueError("No traceable residues was found.")
clui.message("Number of residues to trace: %d" % all_res.unique_resids_number())
return {'all_res': all_res,
'res_ids_in_object_over_frames': res_ids_in_object_over_frames,
'center_of_system': center_of_system,
'options': options}
################################################################################
# raw_paths
[docs]def stage_II_run(config, options,
reader=None,
all_res=None,
res_ids_in_object_over_frames=None,
**kwargs):
if options.clear_in_object_info:
clui.message('Clear data on residues in object over frames.')
clui.message('This will be recalculated on demand.')
res_ids_in_object_over_frames = {}
with reader.get() as traj_reader:
with clui.fbm("Init paths container"):
paths = dict(((resid, GenericPaths(resid, min_pf=0, max_pf=traj_reader.number_of_frames-1)) for resid in
all_res.unique_resids(ikwid=True)))
scope = traj_reader.parse_selection(options.scope)
with clui.fbm("Rebuild treceable residues with current trajectory"):
all_res = rebuild_selection(all_res, traj_reader)
clui.message("Trajectory scan:")
pbar = clui.pbar(traj_reader.number_of_frames)
# create pool of workers - mapping function
map_fun = map
if optimal_threads.threads_count > 1:
pool = mp.Pool(optimal_threads.threads_count)
map_fun = pool.map
for frame in traj_reader.iterate_over_frames():
all_res_coords = list(all_res.center_of_mass_of_residues()) # this uses iterate over residues
all_resids = [res.first_resid() for res in all_res.iterate_over_residues()]
# check if is res are in scope
is_res_in_scope = scope.contains_residues(all_res, convex_hull=options.scope_convexhull, map_fun=map_fun)
# loop over coord, is in scope, and resid
for nr, (coord, isscope, resid) in enumerate(zip(all_res_coords, is_res_in_scope, all_resids)):
# the point is that nr is not pointing to correct element in paths
# now, nr is useless because paths is a dictionary, use resids instead
assert paths[resid].id == resid, \
"Internal error. Paths IDs not synced with resids. \
Please send a bug report to the developer(s): %s" % __mail__
if isscope:
paths[resid].add_coord(coord)
# do we have info on res_ids_in_object_over_frames?
if frame not in res_ids_in_object_over_frames:
res = traj_reader.parse_selection(options.object)
# discard res out of scope
res_new = get_res_in_scope(is_res_in_scope, res)
# remeber ids of res in object in current frame
if res_new is not None:
res_ids_in_object_over_frames.update({frame: res_new.unique_resids(ikwid=True)})
else:
res_ids_in_object_over_frames.update({frame: []})
# in scope
if resid not in res_ids_in_object_over_frames[frame]:
paths[resid].add_scope(frame)
else:
# in object
paths[resid].add_object(frame)
pbar.update(frame)
# destroy pool of workers
if optimal_threads.threads_count > 1:
pool.close()
pool.join()
del pool
pbar.finish()
clui.message("Number of paths: %d" % len(paths))
return {'all_res': all_res, 'paths': paths, 'options': options}
################################################################################
class ABSphere(namedtuple('ABSphere', 'center radius')):
pass
# separate_paths
[docs]def stage_III_run(config, options,
paths=None,
reader=None,
**kwargs):
soptions = config.get_smooth_options()
if options.discard_empty_paths:
with clui.fbm("Discard residues with empty paths"):
for key in paths.keys():
if len(paths[key].frames) == 0:
paths.pop(key)
clui.message("Create separate paths:")
pbar = clui.pbar(len(paths))
# yield_single_paths requires a list of paths not a dictionary
spaths = [sp for sp, nr in yield_single_paths(paths.values(), progress=True) if pbar.update(nr + 1) is None]
pbar.finish()
if options.discard_short_paths > 0:
shorter_then = int(options.discard_short_paths)
with clui.fbm("Discard paths shorter then %d" % shorter_then):
spaths = [sp for sp in spaths if sp.size > shorter_then]
if options.auto_barber:
with reader.get() as traj_reader:
wtc = WhereToCut(spaths,traj_reader,
selection=options.auto_barber,
mincut=options.auto_barber_mincut,
mincut_level=options.auto_barber_mincut_level,
maxcut=options.auto_barber_maxcut,
maxcut_level=options.auto_barber_maxcut_level,
tovdw=options.auto_barber_mincut)
# cut thyself!
wtc.cut_thyself()
clui.message("Auto Barber in action:")
pbar = clui.pbar(len(paths))
for p in paths.values():
p.barber_with_spheres(wtc.spheres)
pbar.next()
pbar.finish()
# now, it might be that some of paths are empty
# paths = {k: v for k, v in paths.iteritems() if len(v.coords) > 0}
paths = dict((k, v) for k, v in paths.iteritems() if
len(v.coords) > 0) # more universal as dict comprehension may not work in <2.7
clui.message("Recreate separate paths:")
pbar = clui.pbar(len(paths))
# yield_single_paths requires a list of paths not a dictionary
spaths = [sp for sp, nr in yield_single_paths(paths.values(), progress=True) if pbar.update(nr + 1) is None]
pbar.finish()
if options.discard_short_paths > 0:
shorter_then = int(options.discard_short_paths)
with clui.fbm("Discard (again) paths shorter then %d" % shorter_then):
spaths = [sp for sp in spaths if sp.size > shorter_then]
if options.sort_by_id:
with clui.fbm("Sort separate paths by resid"):
spaths = sorted(spaths, key=lambda sp: (sp.id.id, sp.id.nr))
# apply smoothing?
if options.apply_smoothing or options.apply_soft_smoothing:
smooth = get_smooth_method(soptions)
if options.apply_smoothing:
clui.message('Applying hard smoothing:')
pbar = clui.pbar(len(spaths))
for nr, sp in enumerate(spaths):
sp.apply_smoothing(smooth)
pbar.update(nr + 1)
pbar.finish()
if options.apply_soft_smoothing:
clui.message('Applying soft smoothing:')
pbar = clui.pbar(len(spaths))
for nr, sp in enumerate(spaths):
sp.get_coords(smooth=smooth)
pbar.update(nr + 1)
pbar.finish()
clui.message("Number of paths: %d" % len(paths))
clui.message("Number of spaths: %d" % len(spaths))
return {'paths': paths, 'spaths': spaths, 'options': options, 'soptions': soptions}
################################################################################
def get_skip_size_function(rt=None):
if not isinstance(rt, str): return None
assert re.compile('^[<>=]+[0-9.]+$').match(rt) is not None, "Wrong threshold definition: %s" % rt
op = re.compile('[<>=]+')
op = ''.join(sorted(op.findall(rt)[0]))
vl = re.compile('[0-9.]+')
vl = float(vl.findall(rt)[0])
operator_dict = {'>': operator.gt,
'=>': operator.ge,
'<=': operator.le,
'<': operator.lt}
operator_dict = {'>': operator.ge,
'=>': operator.gt,
'<=': operator.lt,
'<': operator.le}
assert op in operator_dict.keys(), "Unsupported operator %s in threshold %s" % (op, rt)
return lambda size_of_cluster: operator_dict[op](vl, size_of_cluster)
def get_allow_size_function(rt=None):
if not isinstance(rt, str): return None
assert re.compile('^[<>=]+[0-9.]+$').match(rt) is not None, "Wrong threshold definition: %s" % rt
op = re.compile('[<>=]+')
op = ''.join(sorted(op.findall(rt)[0]))
vl = re.compile('[0-9.]+')
vl = float(vl.findall(rt)[0])
operator_dict = {'>': operator.gt,
'=>': operator.ge,
'<=': operator.le,
'<': operator.lt}
assert op in operator_dict.keys(), "Unsupported operator %s in threshold %s" % (op, rt)
return lambda size_of_cluster: operator_dict[op](size_of_cluster, vl)
class SkipSizeFunction(object):
def __init__(self,ths_def):
self.thresholds = []
if isinstance(ths_def,(str,unicode)):
for thd in ths_def.split():
self.thresholds.append(get_allow_size_function(thd))
def __call__(self,size_of_cluster):
for thd in self.thresholds:
if not thd(size_of_cluster):
return True
return False
def potentially_recursive_clusterization(config=None,
clusterization_name=None,
inlets_object=None,
spaths=None,
traj_reader=None,
message='clusterization',
deep=0,
max_level=5):
with clui.fbm("Performing %s, level %d of %d" % (message, deep, max_level), cont=False):
logger.debug('Clustering options section: %s' % clusterization_name)
cluster_options = config.get_cluster_options(section_name=clusterization_name)
clui.message('Clustering options:')
for k, v in cluster_options._asdict().iteritems():
clui.message("%s = %s" % (str(k), str(v)))
# TODO: Print clusterization options in a nice way!
clustering_function = get_clustering_method(cluster_options,config)
# special case of barber!!!
if cluster_options.method == 'barber':
inlets_refs = inlets_object.get_inlets_references()
wtc = WhereToCut([sp for sp in spaths if sp.id in inlets_refs],
traj_reader,
forceempty=True,
**clustering_function.method_kwargs)
radii = [sphe.radius for sphe in wtc.spheres]
inlets_object.add_radii(radii)
# get skip_size function according to recursive_treshold
#skip_size = get_skip_size_function(cluster_options.recursive_threshold)
skip_size = SkipSizeFunction(cluster_options.recursive_threshold)
inlets_object.perform_reclustering(clustering_function, skip_outliers=True, skip_size=skip_size)
clui.message('Number of clusters detected so far: %d' % len(inlets_object.clusters_list))
if cluster_options.recursive_clusterization:
deep += 1
if deep > max_level:
return
return potentially_recursive_clusterization(config=config,
clusterization_name=cluster_options.recursive_clusterization,
inlets_object=inlets_object,
spaths=spaths,
traj_reader=traj_reader,
deep=deep,
max_level=max_level)
# inlets_clusterization
[docs]def stage_IV_run(config, options,
spaths=None,
center_of_system=None,
reader=None,
**kwargs):
coptions = config.get_cluster_options()
rcoptions = config.get_recluster_options()
soptions = config.get_smooth_options()
max_level = int(options.max_level)
assert max_level >= 0
# new style clustering
with clui.fbm("Create inlets"):
# here we can check center of system
inls = Inlets(spaths, center_of_system=center_of_system)
clui.message("Number of inlets: %d" % inls.size)
def noo():
# returns number of outliers
if 0 in inls.clusters_list:
return inls.clusters.count(0)
return 0
if inls.size > 0:
# ***** CLUSTERIZATION *****
with clui.fbm("Performing clusterization", cont=False):
with reader.get() as traj_reader:
potentially_recursive_clusterization(config=config,
clusterization_name=config.cluster_name(),
inlets_object=inls,
spaths=spaths,
traj_reader=traj_reader,
message='clusterization',
max_level=max_level)
# with log.fbm("Performing clusterization"):
# clustering_function = get_clustering_method(coptions)
# inls.perform_clustering(clustering_function)
clui.message('Number of outliers: %d' % noo())
# ***** OUTLIERS DETECTION *****
if options.detect_outliers:
with clui.fbm("Detecting outliers",cont=False):
if options.detect_outliers is not Auto:
threshold = float(options.detect_outliers)
clusters = list(inls.clusters)
no_out_detected = 0
for cluster, center, std in zip(inls.clusters_list,
inls.clusters_centers,
inls.clusters_std):
if cluster == 0:
continue
inls_lim = inls.lim2clusters(cluster)
dmat = cdist(np.matrix(center), inls_lim.coords, metric='euclidean').flatten()
# Auto procedure
if options.detect_outliers is Auto:
threshold = std * 4 # FIXME: magic constant!
# defined threshold procedure
for nr, (d, ids) in enumerate(zip(dmat, inls_lim.inlets_ids)):
# print d, threshold
if d > threshold:
clusters[ids] = 0
no_out_detected += 1
clui.message('Detected %d outliers.' % no_out_detected)
inls.add_outliers_annotations(clusters)
clui.message('Number of clusters detected so far: %d' % len(inls.clusters_list))
clui.message('Number of outliers: %d' % noo())
# ***** RECLUSTERIZATION *****
if options.recluster_outliers:
with clui.fbm("Performing reclusterization of outliers", cont=False):
with reader.get() as traj_reader:
'''
potentially_recursive_clusterization(config=config,
clusterization_name=config.recluster_name(),
inlets_object=inls,
spaths=spaths,
traj_reader=traj_reader,
message='reclusterization',
max_level=max_level)
'''
clustering_function = get_clustering_method(rcoptions,config)
# perform reclusterization
# inls.recluster_outliers(clustering_function)
inls.recluster_cluster(clustering_function, 0)
clui.message('Number of clusters detected so far: %d' % len(inls.clusters_list))
clui.message('Number of outliers: %d' % noo())
# ***** SINGLETONS REMOVAL *****
if options.singletons_outliers:
with clui.fbm("Removing clusters of size %d" % int(options.singletons_outliers)):
inls.small_clusters_to_outliers(int(options.singletons_outliers))
clui.message('Number of clusters detected so far: %d' % len(inls.clusters_list))
clui.message('Number of outliers: %d' % noo())
clui.message('Clustering history:')
clui.message(clui.print_simple_tree(inls.tree,prefix='').rstrip())
with clui.fbm("Calculating cluster types"):
ctypes = inls.spaths2ctypes(spaths)
# now, there is something to do with ctypes!
# we can create master paths!
# but only if user wants this
master_paths = {}
master_paths_smooth = {}
if options.create_master_paths:
with clui.fbm("Creating master paths for cluster types", cont=False):
smooth = get_smooth_method(soptions)
ctypes_generic = [ct.generic for ct in ctypes]
ctypes_generic_list = sorted(list(set(ctypes_generic)))
pbar = clui.pbar(len(spaths) * 2)
for nr, ct in enumerate(ctypes_generic_list):
logger.debug('CType %s (%d)' % (str(ct), nr))
sps = lind(spaths, what2what(ctypes_generic, [ct]))
logger.debug('CType %s (%d), number of spaths %d' % (str(ct), nr, len(sps)))
# print len(sps),ct
ctspc = CTypeSpathsCollection(spaths=sps, ctype=ct, pbar=pbar, threads=optimal_threads.threads_count)
master_paths.update({ct: ctspc.get_master_path(resid=nr)})
master_paths_smooth.update({ct: ctspc.get_master_path(resid=nr, smooth=smooth)})
del ctspc
pbar.finish()
else:
clui.message("No inlets found. Clusterization skipped.")
# make empty results
ctypes = inls.spaths2ctypes(spaths)
master_paths = {}
master_paths_smooth = {}
return {'inls': inls,
'ctypes': ctypes,
'master_paths': master_paths,
'master_paths_smooth': master_paths_smooth}
################################################################################
def make_line(template, line):
return (' '.join(template)) % tuple(line)
def make_header_template(line_template):
header_template = []
col_re = re.compile('[0-9]+')
for l in line_template:
header_template.append('%{0}s'.format(col_re.findall(l)[0]))
return header_template
def nr_header():
return ['Nr'], ['%7d']
def get_header_line_and_line_template(header_line_and_line_template, head_nr=False):
header, line_template = header_line_and_line_template
header_template = make_header_template(line_template)
# head_nr? only to header
if head_nr:
nrh, nrlt = nr_header()
header_template = make_header_template(nrlt + line_template)
header = nrh + header
header_line = make_line(header_template, header)
return header_line, line_template
def spath_id_header():
return ['ID'], ['%9s']
def add_path_id_head(gen):
sph, splt = spath_id_header()
@wraps(gen)
def patched(*args, **kwargs):
add_id = True
if 'add_id' in kwargs:
add_id = kwargs.pop('add_id')
h, lt = gen(*args, **kwargs)
if add_id:
return sph + h, splt + lt
return h, lt
return patched
def add_path_id(gen):
@wraps(gen)
def patched(spath, *args, **kwargs):
add_id = True
if 'add_id' in kwargs:
add_id = kwargs.pop('add_id')
line = gen(spath, *args, **kwargs)
if add_id:
line = [spath.id] + line
return line
return patched
def size_header():
return ['Size'], ['%7d']
def add_size_head(gen):
sph, splt = size_header()
@wraps(gen)
def patched(*args, **kwargs):
h, lt = gen(*args, **kwargs)
return sph + h, splt + lt
return patched
def add_size(gen):
@wraps(gen)
def patched(spaths, add_size=True, *args, **kwargs):
line = gen(spaths, *args, **kwargs)
if add_size:
line = [len(spaths)] + line
return line
return patched
def cluster_id_header():
return ['Cluster'], ['%7d']
def add_cluster_id_head(gen):
sph, splt = cluster_id_header()
@wraps(gen)
def patched(*args, **kwargs):
h, lt = gen(*args, **kwargs)
return sph + h, splt + lt
return patched
def add_cluster_id(gen):
@wraps(gen)
def patched(cluster, something, add_id=True, *args, **kwargs):
line = gen(cluster, something, *args, **kwargs)
if add_id:
line = [int(cluster)] + line
return line
return patched
def ctype_id_header():
return ['CType'], ['%7s']
def add_ctype_id_head(gen):
sph, splt = ctype_id_header()
@wraps(gen)
def patched(*args, **kwargs):
h, lt = gen(*args, **kwargs)
return sph + h, splt + lt
return patched
def add_ctype_id(gen):
@wraps(gen)
def patched(ctype, something, add_id=True, *args, **kwargs):
line = gen(ctype, something, *args, **kwargs)
if add_id:
line = [str(ctype)] + line
return line
return patched
class PrintAnalysis(object):
nr_template = '%7d '
# TODO: Change it in such a way that it cooperates well with debug-file option.
def __init__(self, fileoption):
self.output2stderr = False
if fileoption:
self.filehandle = open(fileoption, 'w')
# self.output2stderr = True
else:
self.filehandle = sys.stdout
def __call__(self, info2print, nr=None):
if nr is not None:
info2print = (self.nr_template % nr) + info2print
if self.output2stderr:
clui.message(info2print)
print >> self.filehandle, info2print
def sep(self):
self(asep())
def thead(self, info2print):
self(clui.thead(info2print))
def under(self, info2print):
self(clui.underline(info2print))
################################################################################
@add_path_id_head
def spath_basic_info_header():
header = 'BeginF InpF ObjF OutF EndF'.split()
line_template = ['%7d'] * len(header)
return header, line_template
@add_path_id
def spath_basic_info(spath):
line = []
line.append(spath.begins)
line.extend(map(len, (spath.path_in, spath.path_object, spath.path_out)))
line.append(spath.ends)
return line
################
@add_path_id_head
def spath_lenght_total_info_header():
header = 'InpL ObjL OutL'.split()
line_template = ['%9.1f'] * len(header)
return header, line_template
@add_path_id
def spath_lenght_total_info(spath):
line = []
for t in traces.midpoints(spath.coords):
if len(t) > 1: # traces.length_step_std requires at least 2 points
line.append(traces.length_step_std(t)[0])
else:
line.append(float('nan'))
return line
################
@add_path_id_head
def spath_steps_info_header():
header = 'InpS InpStdS ObjS ObjStdS OutS OutStdS'.split()
line_template = ['%8.2f', '%8.3f'] * (len(header) / 2)
return header, line_template
@add_path_id
def spath_steps_info(spath):
line = []
for t in traces.midpoints(spath.coords):
if len(t) > 0:
line.extend(traces.length_step_std(t)[1:])
else:
line.extend([float('nan'), float('nan')])
return line
################
@add_path_id_head
def spath_ctype_header():
header, line_template = ctype_id_header()
return header, line_template
@add_path_id
def spath_ctype(spath, ctype=None):
line = [str(ctype)]
return line
################
@add_path_id_head
def spath_full_info_header():
header = []
line_template = []
for h, lt in (spath_basic_info_header(add_id=False),
spath_lenght_total_info_header(add_id=False),
spath_steps_info_header(add_id=False),
spath_ctype_header(add_id=False)):
header += h
line_template += lt
return header, line_template
@add_path_id
def spath_full_info(spath, ctype=None):
line = []
for l in (spath_basic_info(spath, add_id=False),
spath_lenght_total_info(spath, add_id=False),
spath_steps_info(spath, add_id=False),
spath_ctype(spath, ctype=ctype, add_id=False)):
line += l
return line
################################################################################
@add_size_head
def spaths_lenght_total_header():
header = 'Inp InpStd Obj ObjStd Out OutStd'.split()
line_template = ['%8.1f', '%8.2f'] * (len(header) / 2)
return header, line_template
@add_size
def spaths_length_total(spaths):
line = []
d4s = []
for sp in spaths:
d4s.append(spath_lenght_total_info(sp, add_id=False))
d4s = np.array(d4s)
line.extend(np.mean(d4s, 0))
line.extend(np.std(d4s, 0))
return [line[0], line[3], line[1], line[4], line[2], line[5]]
################################################################################
@add_cluster_id_head
def clusters_inlets_header():
header = 'Size SInp IInp IOut SOut'.split()
header = 'Size INCOMING OUTGOING'.split()
line_template = ['%7d'] + ['%8d'] * (len(header) - 1)
return header, line_template
@add_cluster_id
def clusters_inlets(cluster, inlets):
line = [inlets.size]
line.append(inlets.lim2types([InletTypeCodes.surface_incoming]).size)
# line.append(inlets.lim2types([InletTypeCodes.internal_incoming]).size)
# line.append(inlets.lim2types([InletTypeCodes.internal_outgoing]).size)
line.append(inlets.lim2types([InletTypeCodes.surface_outgoing]).size)
return line
@add_ctype_id_head
def ctypes_spaths_info_header():
header, line_template = spaths_lenght_total_header()
return header, line_template
@add_ctype_id
def ctypes_spaths_info(ctype, spaths):
line = []
line += spaths_length_total(spaths)
return line
################################################################################
# analysis
[docs]def stage_V_run(config, options,
spaths=None,
paths=None,
inls=None,
ctypes=None,
**kwargs):
# file handle?
pa = PrintAnalysis(options.save)
if options.save:
clui.message('Using user provided file (%s).' % options.save)
# clui.message(sep())
# clui.message('')
else:
clui.message('Using standard output.')
clui.message(sep())
clui.message('')
############
pa.sep()
pa('Aqua-Duct analysis')
pa(clui.get_str_timestamp())
############
if options.dump_config:
pa.sep()
pa.under('Configuration file name: %s' % config.config_filename)
pa(os.linesep.join(config.dump_config()))
############
pa.sep()
pa("Number of traceable residues: %d" % len(paths))
pa("Number of separate paths: %d" % len(spaths))
############
pa.sep()
pa("Number of inlets: %d" % inls.size)
no_of_clusters = len(inls.clusters_list) - {True: 1, False: 0}[0 in inls.clusters_list] # minus outliers, if any
pa("Number of clusters: %d" % no_of_clusters)
pa("Outliers: %s" % ({True: 'yes', False: 'no'}[0 in inls.clusters_list]))
############
pa.sep()
pa("Clusters summary - inlets")
header_line, line_template = get_header_line_and_line_template(clusters_inlets_header(), head_nr=True)
pa.thead(header_line)
for nr, cl in enumerate(inls.clusters_list):
inls_lim = inls.lim2clusters(cl)
pa(make_line(line_template, clusters_inlets(cl, inls_lim)), nr=nr)
############
pa.sep()
pa("Separate paths clusters types summary - mean lengths of paths")
header_line, line_template = get_header_line_and_line_template(ctypes_spaths_info_header(), head_nr=True)
pa.thead(header_line)
ctypes_generic = [ct.generic for ct in ctypes]
ctypes_generic_list = sorted(list(set(ctypes_generic)))
# sorted by ctype
ctypes_size = []
for nr, ct in enumerate(ctypes_generic_list):
sps = lind(spaths, what2what(ctypes_generic, [ct]))
ctypes_size.append(len(sps))
# pa(make_line(line_template, ctypes_spaths_info(ct, sps)), nr=nr)
'''
# sorted by sizes:
ctypes_generic_list = sorted(ctypes_generic_list, key=lambda ctyp: ctypes_size[ctypes_generic_list.index(ctyp)],
reverse=True)
'''
for nr, ct in enumerate(ctypes_generic_list):
sps = lind(spaths, what2what(ctypes_generic, [ct]))
ctypes_size.append(len(sps))
pa(make_line(line_template, ctypes_spaths_info(ct, sps)), nr=nr)
############
pa.sep()
pa("List of separate paths and properties")
header_line, line_template = get_header_line_and_line_template(spath_full_info_header(), head_nr=True)
pa.thead(header_line)
for nr, (sp, ctype) in enumerate(izip_longest(spaths, ctypes, fillvalue=None)):
if ctype is not None:
ctype = ctype.generic
pa(make_line(line_template, spath_full_info(sp, ctype=ctype)), nr=nr)
################################################################################
# visualize
def plot_spaths_traces(spaths, spp=None, name=None, split=False, states=False, separate=False, smooth=None):
if states or separate:
spaths_iter = spaths
else:
spaths_iter = [spaths]
state = None
name_separate = ''
for nr, sp in enumerate(spaths_iter):
if states:
state = nr + 1
if separate:
name_separate = '_%d' % (nr + 1)
if states or separate:
sp = [sp]
if split:
spp.paths_trace(sp, name=name + '_in' + name_separate, plot_object=False, plot_out=False, state=state,
smooth=smooth)
spp.paths_trace(sp, name=name + '_obj' + name_separate, plot_in=False, plot_out=False, state=state,
smooth=smooth)
spp.paths_trace(sp, name=name + '_out' + name_separate, plot_in=False, plot_object=False, state=state,
smooth=smooth)
else:
spp.paths_trace(sp, name=name + name_separate, state=state, smooth=smooth)
def plot_spaths_inlets(spaths, spp=None, name=None, states=False, separate=False, smooth=None):
if states or separate:
spaths_iter = spaths
else:
spaths_iter = [spaths]
state = None
name_separate = ''
for nr, sp in enumerate(spaths_iter):
if states:
state = nr + 1
if separate:
name_separate = '_%d' % (nr + 1)
if states or separate:
sp = [sp]
spp.paths_inlets(sp, name=name + name_separate, state=state, smooth=smooth)
def is_pymol_connector_session(filename):
session_ext = re.compile('[.][pP][sS][eE]')
if filename:
ext = os.path.splitext(filename)[-1]
if session_ext.match(ext):
return True
return False
def is_pymol_connector_script(filename):
script = re.compile('.*[.]([pP][yY]|[.pP][yY][.][gG][zZ])$')
if filename:
if script.match(filename):
return True
return False
[docs]def stage_VI_run(config, options,
reader=None,
spaths=None,
inls=None,
ctypes=None,
master_paths=None,
master_paths_smooth=None,
**kwargs):
from aquaduct.visual.pymol_connector import ConnectToPymol, SinglePathPlotter
# from aquaduct.visual.pymol_connector import cmd as pymol_cmd
from aquaduct.visual.helpers import ColorMapDistMap
soptions = config.get_smooth_options()
smooth = get_smooth_method(soptions)
# start pymol
with clui.fbm("Starting PyMOL connection", cont=False):
pymol_connector = ConnectToPymol()
if is_pymol_connector_script(options.save):
pymol_connector.init_script(options.save)
else:
pymol_connector.init_pymol()
if options.simply_smooths:
spp = SinglePathPlotter(pymol_connector, linearize=get_linearize_method(options.simply_smooths))
else:
spp = SinglePathPlotter(pymol_connector, linearize=None)
ctypes_generic = [ct.generic for ct in ctypes]
ctypes_generic_list = sorted(list(set(ctypes_generic)))
if options.show_molecule:
with clui.fbm("Molecule"):
with reader.get() as traj_reader:
mda_ppr = mda.core.flags["permissive_pdb_reader"]
mda.core.flags["permissive_pdb_reader"] = False
pdb = TmpDumpWriterOfMDA()
frames_to_show = range2int(options.show_molecule_frames)
pdb.dump_frames(traj_reader, frames=frames_to_show, selection=options.show_molecule)
pymol_connector.load_pdb('molecule', pdb.close())
del pdb
mda.core.flags["permissive_pdb_reader"] = mda_ppr
# it would be nice to plot convexhull
if options.show_chull:
with clui.fbm("Convexhull"):
with reader.get() as traj_reader:
scope = traj_reader.parse_selection(options.show_chull)
frames_to_show = range2int(options.show_chull_frames)
for frame in frames_to_show:
traj_reader.set_real_frame(frame)
chull = scope.get_convexhull_of_atom_positions()
spp.convexhull(chull, state=frame + 1)
if options.show_object:
with clui.fbm("Object shape"):
with reader.get() as traj_reader:
object_shape = traj_reader.parse_selection(options.show_object)
frames_to_show = range2int(options.show_object_frames)
for frame in frames_to_show:
traj_reader.set_real_frame(frame)
chull = object_shape.get_convexhull_of_atom_positions()
spp.convexhull(chull, name='object_shape', color=np.array([255, 153, 0]) / 255.,
state=frame + 1) # orange
if options.inlets_clusters:
with clui.fbm("Clusters"):
# TODO: require stage V for that?
no_of_clusters = len(inls.clusters_list) # total, including outliers
cmap = ColorMapDistMap()
for c in inls.clusters_list:
# coords for current cluster
ics = inls.lim2clusters(c).coords
if c == 0:
c_name = 'out'
else:
c_name = str(int(c))
spp.scatter(ics, color=cmap(c), name="cluster_%s" % c_name)
if options.ctypes_raw:
with clui.fbm("CTypes raw"):
for nr, ct in enumerate(ctypes_generic_list):
clui.message(str(ct), cont=True)
sps = lind(spaths, what2what(ctypes_generic, [ct]))
plot_spaths_traces(sps, name=str(ct) + '_raw', split=False, spp=spp)
if ct in master_paths:
plot_spaths_traces([master_paths[ct]], name=str(ct) + '_raw_master', split=False, spp=spp)
if options.ctypes_smooth:
with clui.fbm("CTypes smooth"):
for nr, ct in enumerate(ctypes_generic_list):
clui.message(str(ct), cont=True)
sps = lind(spaths, what2what(ctypes_generic, [ct]))
plot_spaths_traces(sps, name=str(ct) + '_smooth', split=False, spp=spp, smooth=smooth)
if ct in master_paths_smooth:
if master_paths_smooth[ct] is None: continue
plot_spaths_traces([master_paths_smooth[ct]], name=str(ct) + '_smooth_master', split=False, spp=spp,
smooth=lambda anything: anything)
if ct in master_paths:
if master_paths[ct] is None: continue
plot_spaths_traces([master_paths[ct]], name=str(ct) + '_raw_master_smooth', split=False, spp=spp,
smooth=smooth)
if options.all_paths_raw:
with clui.fbm("All raw paths"):
plot_spaths_traces(spaths, name='all_raw', split=options.all_paths_split, spp=spp)
if options.all_paths_raw_io:
with clui.fbm("All raw paths io"):
plot_spaths_inlets(spaths, name='all_raw_paths_io', spp=spp)
if options.all_paths_smooth:
with clui.fbm("All smooth paths"):
plot_spaths_traces(spaths, name='all_smooth', split=options.all_paths_split, spp=spp, smooth=smooth)
if options.all_paths_smooth_io:
with clui.fbm("All smooth paths io"):
plot_spaths_inlets(spaths, name='all_smooth_paths_io', spp=spp)
with clui.fbm("Paths as states"):
if options.paths_raw:
clui.message("raw", cont=True)
plot_spaths_traces(spaths, name='raw_paths', states=options.paths_states, separate=not options.paths_states,
spp=spp)
if options.paths_smooth:
clui.message("smooth", cont=True)
plot_spaths_traces(spaths, name='smooth_paths', states=options.paths_states,
separate=not options.paths_states, smooth=smooth, spp=spp)
if options.paths_raw_io:
clui.message("raw_io", cont=True)
plot_spaths_inlets(spaths, name='raw_paths_io', states=options.paths_states,
separate=not options.paths_states, spp=spp)
if options.paths_smooth_io:
clui.message("smooth_io", cont=True)
plot_spaths_inlets(spaths, name='smooth_paths_io', states=options.paths_states,
separate=not options.paths_states, smooth=smooth, spp=spp)
if options.show_molecule:
pymol_connector.orient_on('molecule')
if is_pymol_connector_session(options.save):
with clui.fbm("Saving session (%s)" % options.save):
clui.message("") # new line
pbar = clui.pbar(len(spaths))
# FIXME: Loop over states is not required if there is no object with many states.
import time
for state in range(len(spaths)):
pymol_connector.cmd.set_frame(state + 1)
pbar.update(state)
time.sleep(0.1)
pbar.finish()
clui.message("Finalizing session saving...", cont=True) # new line
pymol_connector.cmd.set_frame(1)
pymol_connector.cmd.save(options.save, state=0)
pymol_connector.cmd.quit()
################################################################################
__all__ = '''clui
optimal_threads
aquaduct_version_nice
version_nice
ValveConfig
valve_begin
valve_load_config
valve_read_trajectory
valve_exec_stage
stage_I_run
stage_II_run
stage_III_run
stage_IV_run
stage_V_run
stage_VI_run
valve_end
'''.split()