Source code for aquaduct.apps.valvecore

# -*- coding: utf-8 -*-

# Aqua-Duct, a tool facilitating analysis of the flow of solvent molecules in molecular dynamic simulations
# Copyright (C) 2016-2017  Tomasz Magdziarz, Alicja Płuciennik, Michał Stolarczyk <info@aquaduct.pl>
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program.  If not, see <http://www.gnu.org/licenses/>.


from aquaduct import logger, logger_name

################################################################################

import multiprocessing as mp
import ConfigParser
import cPickle as pickle
import StringIO
import copy
import gzip
import os
import re
import operator
import shlex
import sys

import numpy as np
import MDAnalysis as mda
import roman  # TODO: remove this dependency!

from collections import namedtuple, OrderedDict  # TODO: check if OrderedDict is REALLY used
from functools import wraps
from itertools import izip_longest
from keyword import iskeyword
from distutils.version import LooseVersion

from scipy.spatial.distance import cdist
# If scipy is relatively old and numpy is relatively new this triggers warning on oldnumeric module deprecation.
# This warning emerges if MDAnalysis is imported and then scipy. Observed under Ubuntu 14.04.

from aquaduct import greetings as greetings_aquaduct
from aquaduct import version as aquaduct_version
from aquaduct import version_nice as aquaduct_version_nice
from aquaduct.geom import traces
from aquaduct.geom.cluster import PerformClustering, DBSCAN, AffinityPropagation, MeanShift, KMeans, Birch, BarberCluster, get_required_params
from aquaduct.geom.cluster import  AVAILABLE_METHODS as available_clusterization_methods

from aquaduct.geom.convexhull import is_point_within_convexhull
from aquaduct.geom.master import CTypeSpathsCollection
from aquaduct.geom.smooth import WindowSmooth, MaxStepSmooth, WindowOverMaxStepSmooth, ActiveWindowSmooth, \
    ActiveWindowOverMaxStepSmooth, DistanceWindowSmooth, DistanceWindowOverMaxStepSmooth, SavgolSmooth
from aquaduct.traj.dumps import TmpDumpWriterOfMDA
from aquaduct.traj.inlets import Inlets, InletTypeCodes
from aquaduct.traj.paths import GenericPaths, yield_single_paths
from aquaduct.traj.reader import ReadViaMDA,atom2vdw_radius
from aquaduct.traj.selections import CompactSelectionMDA, SelectionMDA
from aquaduct.utils import clui
from aquaduct.utils.helpers import range2int, Auto, what2what, lind, is_number
from aquaduct.utils.multip import optimal_threads
from aquaduct.traj.barber import WhereToCut

def version():
    # TODO: remove it
    return 0, 10, 0


[docs]def version_nice():
    return '.'.join(map(str, version()))


def version_onenumber():
    return ''.join(map(str, version()))


__mail__ = 'info@aquaduct.pl'
__version__ = version_nice()


###############################################################################
# configuration file helpers

class ConfigSpecialNames:
    special_names_dict = {'none': None,
                          'null': None,
                          'true': True,
                          'false': False,
                          'yes': True,
                          'no': False,
                          'auto': Auto}

    def special_name(self, name):
        if isinstance(name, (str, unicode)):
            if name.lower() in self.special_names_dict:
                return self.special_names_dict[name.lower()]
        return name


# TODO: add parser for initial checking of configuration file
[docs]class ValveConfig(object, ConfigSpecialNames):
[docs]    def __init__(self):
        self.config = self.get_default_config()
        self.config_filename = ''

    def __make_options_nt(self, input_options):
        # options = {opt: self.special_name(input_options[opt]) for opt in input_options}
        options = list()
        for opt in input_options:
            if iskeyword(opt):
                logger.warning('Invalid keyword <%s> in config file skipped. Check configuration file.' % opt)
                continue
            if opt.replace('_','').isalnum():
                options.append((opt, self.special_name(input_options[opt])))
            else:
                logger.debug('Keyword <%s> in config file skipped.' % opt)
        options = OrderedDict(options)
        options_nt = namedtuple('Options', options.keys())
        return options_nt(**options)

[docs]    @staticmethod
    def common_config_names():
        # execute - what to do: skip, run
        # load - load previous results form file name
        # save - save results to file name
        return 'execute dump'.split()

[docs]    @staticmethod
    def common_traj_data_config_names():
        # scope - scope definition
        # scope_convexhull - take convex hull of scope, true of false
        # object - object definition
        return 'scope scope_convexhull object'.split()

[docs]    @staticmethod
    def global_name():
        return 'global'

[docs]    @staticmethod
    def cluster_name():
        return 'clusterization'

[docs]    @staticmethod
    def recluster_name():
        return 'reclusterization'

[docs]    @staticmethod
    def recursive_clusterization_name():
        return 'recursive_clusterization'

[docs]    @staticmethod
    def recursive_threshold_name():
        return 'recursive_threshold'

[docs]    @staticmethod
    def smooth_name():
        return 'smooth'

[docs]    def stage_names(self, nr=None):
        if nr is None:
            return [self.stage_names(nr) for nr in range(5)]
        else:
            if nr == 0:
                return 'traceable_residues'
            elif nr == 1:
                return 'raw_paths'
            elif nr == 2:
                return 'separate_paths'
            elif nr == 3:
                return 'inlets_clusterization'
            elif nr == 4:
                return 'analysis'
            elif nr == 5:
                return 'visualize'
        raise NotImplementedError('Stage %r is not implemented.' % nr)

[docs]    def get_common_traj_data(self, stage):
        assert isinstance(stage, int)
        # options = {name: None for name in self.common_traj_data_config_names()}
        options = OrderedDict(((name, None) for name in self.common_traj_data_config_names()))
        for nr in range(stage + 1)[::-1]:
            section = self.stage_names(nr)
            for name in self.common_traj_data_config_names():
                if self.config.has_option(section, name):
                    value = self.special_name(self.config.get(section, name))
                    if (value is not None) and (options[name] is None):
                        options.update({name: value})
        return self.__make_options_nt(options)

[docs]    def get_global_options(self):
        section = self.global_name()
        names = self.config.options(section)
        # options = {name: self.config.get(section, name) for name in names}
        options = OrderedDict(((name, self.config.get(section, name)) for name in names))
        return self.__make_options_nt(options)

[docs]    def get_stage_options(self, stage):
        assert isinstance(stage, int)
        stage_name = self.stage_names(stage)
        names = self.config.options(stage_name)
        # options = {name: self.config.get(stage_name, name) for name in names}
        options = OrderedDict(((name, self.config.get(stage_name, name)) for name in names))
        if stage in [0, 1]:
            options.update(self.get_common_traj_data(stage)._asdict())
        return self.__make_options_nt(options)

[docs]    def get_cluster_options(self, section_name=None):
        if section_name is None:
            section = self.cluster_name()
        else:
            section = section_name
        names = self.config.options(section)
        # options = {name: self.config.get(section, name) for name in names}
        options = OrderedDict(((name, self.config.get(section, name)) for name in names))
        # special recursive_clusterization option
        if self.recursive_clusterization_name() not in options:
            options.update({self.recursive_clusterization_name(): None})
        if self.recursive_threshold_name() not in options:
            options.update({self.recursive_threshold_name(): None})
        return self.__make_options_nt(options)

[docs]    def get_recluster_options(self):
        return self.get_cluster_options(section_name=self.recluster_name())

[docs]    def get_smooth_options(self):
        section = self.smooth_name()
        names = self.config.options(section)
        # options = {name: self.config.get(section, name) for name in names}
        options = OrderedDict(((name, self.config.get(section, name)) for name in names))
        return self.__make_options_nt(options)

[docs]    def get_default_config(self):
        # snr = 0 # stage number

        config = ConfigParser.RawConfigParser()
        #config = RawConfigParserWithComments()

        def common(section):
            for setting in self.common_config_names():
                value = None
                if setting == 'execute':
                    value = 'runonce'
                elif setting == 'dump':
                    value = '%d_%s_data.dump' % (snr + 1, section)
                if value is None:
                    config.set(section, setting)
                else:
                    config.set(section, setting, value=value)

        def common_traj_data(section,commented=False):
            for setting in self.common_traj_data_config_names():
                if commented:
                    setting = '#'+setting
                config.set(section, setting, 'None')

        ################
        # global settings
        section = self.global_name()
        config.add_section(section)
        #config.add_comment(section,'global settings')


        # top - top file name
        # nc - netcdf file name
        config.set(section, 'top')  # topology
        config.set(section, 'trj')  # trajectory

        # config.set(section, 'pbar', 'simple')


        ################
        snr = 0  # stage number
        # stage I
        # find traceable residues
        section = self.stage_names(snr)
        config.add_section(section)

        common(section)
        common_traj_data(section)

        ################
        snr += 1
        # stage II
        # find raw paths
        section = self.stage_names(snr)
        config.add_section(section)

        common(section)
        common_traj_data(section)

        config.set(section, 'clear_in_object_info', 'False')

        ################
        snr += 1
        # stage III
        # create separate frames
        section = self.stage_names(snr)
        config.add_section(section)

        common(section)

        config.set(section, 'auto_barber_tovdw', 'True')
        config.set(section, 'auto_barber_maxcut', '2.8')
        config.set(section, 'auto_barber_mincut', 'None')
        config.set(section, 'auto_barber_maxcut_level', 'True')
        config.set(section, 'auto_barber_mincut_level', 'True')
        config.set(section, 'auto_barber', 'False')
        config.set(section, 'discard_empty_paths', 'True')
        config.set(section, 'sort_by_id', 'True')
        config.set(section, 'apply_smoothing', 'False')
        config.set(section, 'apply_soft_smoothing', 'True')
        config.set(section, 'discard_short_paths', '1')

        ################
        snr += 1
        # stage IV
        # inlets clusterisation
        section = self.stage_names(snr)
        config.add_section(section)

        common(section)

        config.set(section, 'max_level', '0')
        config.set(section, 'recluster_outliers', 'False')
        config.set(section, 'detect_outliers', 'False')
        config.set(section, 'singletons_outliers', 'False')
        config.set(section, 'create_master_paths', 'False')

        ################
        # smooth
        section = self.smooth_name()
        config.add_section(section)
        config.set(section, 'method', 'window')

        ################
        # clusterization
        section = self.cluster_name()
        config.add_section(section)
        config.set(section, 'method', 'barber')
        config.set(section, self.recursive_clusterization_name(), self.cluster_name())
        config.set(section, self.recursive_threshold_name(), 'False')

        ################
        # reclusterization
        section = self.recluster_name()
        config.add_section(section)
        config.set(section, 'method', 'dbscan')
        config.set(section, self.recursive_clusterization_name(), 'False')
        config.set(section, self.recursive_threshold_name(), 'False')

        ################
        snr += 1
        # stage V
        # analysis
        section = self.stage_names(snr)
        config.add_section(section)
        common(section)
        config.remove_option(section, 'dump')
        config.set(section, 'save', '%d_%s_results.txt' % (snr + 1, section))

        config.set(section, 'dump_config', 'True')

        ################
        snr += 1
        # stage VI
        # visualize
        section = self.stage_names(snr)
        config.add_section(section)
        common(section)
        config.remove_option(section, 'dump')
        config.set(section, 'save', '%d_%s_results.py' % (snr + 1, section))

        config.set(section, 'simply_smooths', 'RecursiveVector')
        # visualize spaths, all paths in one object
        config.set(section, 'all_paths_raw', 'False')
        config.set(section, 'all_paths_smooth', 'False')
        config.set(section, 'all_paths_split', 'False')  # split by in obj out
        config.set(section, 'all_paths_raw_io', 'False')
        config.set(section, 'all_paths_smooth_io', 'False')

        # visualize spaths, separate objects
        config.set(section, 'paths_raw', 'False')
        config.set(section, 'paths_smooth', 'False')
        config.set(section, 'paths_states', 'False')
        config.set(section, 'paths_raw_io', 'False')
        config.set(section, 'paths_smooth_io', 'False')

        config.set(section, 'ctypes_raw', 'False')
        config.set(section, 'ctypes_smooth', 'False')

        # visualize clusters
        config.set(section, 'inlets_clusters', 'False')

        # show protein
        config.set(section, 'show_molecule', 'None')
        config.set(section, 'show_molecule_frames', '0')
        config.set(section, 'show_chull', 'None')
        config.set(section, 'show_chull_frames', '0')
        config.set(section, 'show_object', 'None')
        config.set(section, 'show_object_frames', '0')

        return config

[docs]    def load_config(self, filename):
        self.config.read(filename)
        self.config_filename = filename

[docs]    def save_config_stream(self, fs):
        self.config.write(fs)

[docs]    def save_config(self, filename):
        with open(filename, 'w') as fs:
            self.save_config_stream(fs)

[docs]    def get_general_comment(self,section):
        if section == self.global_name():
            return ['Global settings.']
        if section == self.cluster_name():
            out = ['Possible clusterization methods:']
            # get default clusterization method
            defmet = self.config.get(section,'method')
            for method in available_clusterization_methods:
                if method == defmet: continue
                out.append('method = %s' %method)
                params = get_required_params(method)
                if params:
                    sss = ''
                    if len(params) > 1:
                        sss = 's'
                    out.append('Required parameter%s for %s method:' % (sss,method))
                    for param in params:
                        out.append('%s = None' % param)
                    #out.append('')
            return out


[docs]    def dump_config(self,dump_template=False):
        skip_list = '''simply_smooths'''.split() # these options are very optional!
        concise = True
        if dump_template:
            concise = False

        output = []

        options = [self.get_global_options()] + \
                  [self.get_stage_options(stage) for stage in range(6)] + \
                  [self.get_cluster_options(), self.get_recluster_options(),
                   self.get_smooth_options()]
        names = [self.global_name()] + \
                [self.stage_names(stage) for stage in range(6)] + \
                [self.cluster_name(), self.recluster_name(),
                 self.smooth_name()]

        def value2str(val):
            if val is Auto:
                val = "Auto"
            else:
                val = str(val)
            return val

        # this loop ensures it is returned in particular order
        for opts, name in zip(options, names):
            output.append('[%s]' % name) # section name
            # general comments
            if dump_template:
                comment = self.get_general_comment(name)
                if comment:
                    output.extend(['# %s' % line for line in comment])
            for key,value in opts._asdict().iteritems(): # loop over options
                if key in skip_list: continue
                # comment scope etc. in stage II if dump_template
                if dump_template and name == self.stage_names(1):
                    if key in self.common_traj_data_config_names():
                        key = '#' + key
                output.append('%s = %s' % (key, value2str(value)))
            if not concise: output.append('')

        # is something missing? another loop over all additional sections
        for miss in self.config.sections():
            if miss in names: continue # skip if it was already dumped in the loop above
            output.append('[%s]' % miss)
            for key in self.config.options(miss): # loop over options in section miss
                if key in skip_list: continue
                value = self.config.get(miss, key)
                output.append('%s = %s' % (key, value2str(value)))
            if not concise: output.append('')
        while not len(output[-1].strip()):
            output.pop()
        return output


################################################################################
# reader helper class

class TrajectoryReader(object):
    def __init__(self, top, trj,frames_window=None):
        assert isinstance(top, (str, unicode)), "Topology file name missing, %s given instead" % str(top)
        assert isinstance(trj, (str, unicode)), "Trajectory file(s) name(s) missing, %s given instead" % str(trj)
        self.top = top
        self.trj = shlex.split(trj)
        self.frames_window = frames_window

    def get(self):
        return ReadViaMDA(self.top, self.trj, window=self.frames_window)

    @property
    def max_frame(self):
        with self.get() as tmp_reader:
            return tmp_reader.number_of_frames - 1  # returns 0-based value


def rebuild_selection(selection, reader):
    return CompactSelectionMDA(selection).toSelectionMDA(reader)


################################################################################
# convex hull helpers
# TODO: Move it to separate module.
# TODO: Following functions are or will be deprecated, remove them as soon as possible.

def CHullCheck(point):
    return CHullCheck.chull.point_within(point)


def CHullCheck_init(args):
    CHullCheck.chull = copy.deepcopy(args[0])


def CHullCheck_pool(chull, threads=optimal_threads.threads_count):
    return mp.Pool(threads, CHullCheck_init, [(chull,)])


def CHullCheck_exec(chull, points, threads=optimal_threads.threads_count):
    pool = CHullCheck_pool(chull, threads=threads)
    out = pool.map(CHullCheck, points)
    pool.close()
    pool.join()
    del pool
    return out


################################################################################
# in scope helpers

def check_res_in_scope(options, scope, res, res_coords):
    if options.scope_convexhull:
        # TODO: Remove it! This is deprecated code. It, probably, never runs.
        if len(res_coords) == 0:
            return []
        # find convex hull of protein
        chull = scope.get_convexhull_of_atom_positions()
        is_res_in_scope = CHullCheck_exec(chull, res_coords, threads=optimal_threads.threads_count)
    else:
        if res.unique_resids_number() == 0:
            return []
        res_in_scope_uids = scope.unique_resids(ikwid=True)
        # res_in_scope_uids = traj_reader.parse_selection(options.scope).unique_resids(ikwid=True)
        is_res_in_scope = [r.unique_resids(ikwid=True) in res_in_scope_uids for r in res.iterate_over_residues()]
    return is_res_in_scope


def get_res_in_scope(is_res_in_scope, res):
    res_new = None
    for iris, r in zip(is_res_in_scope, res.iterate_over_residues()):
        if iris:
            if res_new is None:
                res_new = r
            else:
                res_new += r
    return res_new


################################################################################
# separators - logging

def sep():
    return clui.gsep(sep='-', times=48)


def asep():
    return clui.gsep(sep='=', times=72)


################################################################################
# save - load helpers

def save_dump(filename, data_to_save, **kwargs):
    with clui.fbm('Saving data dump in %s file' % filename):
        with gzip.open(filename, mode='w', compresslevel=9) as f:
            # first version:
            pickle.dump({'version': version(),
                         'aquaduct_version': aquaduct_version()}, f)
            # then data to save:
            pickle.dump(data_to_save, f)
            # then other kwargs
            pickle.dump(kwargs, f)


class LoadDumpWrapper(object):
    """This is wrapper for pickled data that provides compatibility
    with earlier versions of Aqua-Duct.

    Conversions in use:

    1) replace 'aquaduct.' by 'aquaduct.'

    """

    def __init__(self, filehandle):
        self.fh = filehandle

    def convert(self, s):
        new_s = s
        new_s = new_s.replace('aqueduct.', 'aquaduct.')
        new_s = new_s.replace('aqueduct_version', 'aquaduct_version')
        return new_s

    def read(self, *args, **kwargs):
        return self.convert(self.fh.read(*args, **kwargs))

    def readline(self, *args, **kwargs):
        return self.convert(self.fh.readline(*args, **kwargs))


def load_dump(filename):
    with clui.fbm('Loading data dump from %s file' % filename):
        with gzip.open(filename, mode='r') as protof:
            f = LoadDumpWrapper(protof)
            # version!
            loaded_data = pickle.load(f)
            check_versions(loaded_data)

            # loaded data!
            loaded_data = pickle.load(f)
            # enything else?
            try:
                other_data = pickle.load(f)
                loaded_data.update({'other_data': other_data})
            except:
                pass
        return loaded_data
        # loaded_data_nt = namedtuple('LoadedData', loaded_data.keys())
        # return loaded_data_nt(**loaded_data)


def check_version_compliance(current, loaded, what):
    if current[0] > loaded[0]:
        logger.error('Loaded data has %s major version lower then the application.' % what)
    if current[0] < loaded[0]:
        logger.error('Loaded data has %s major version higher then the application.' % what)
    if current[0] != loaded[0]:
        logger.error('Possible problems with API compliance.')
    if current[1] > loaded[1]:
        logger.warning('Loaded data has %s minor version lower then the application.' % what)
    if current[1] < loaded[1]:
        logger.warning('Loaded data has %s minor version higher then the application.' % what)
    if current[1] != loaded[1]:
        logger.warning('Possible problems with API compliance.')


def check_versions(version_dict):
    assert isinstance(version_dict, (dict, OrderedDict)), "File is corrupted, cannot read version data."
    assert 'version' in version_dict, "File is corrupted, cannot read version data."
    assert 'aquaduct_version' in version_dict, "File is corrupted, cannot read version data."
    check_version_compliance(aquaduct_version(), version_dict['aquaduct_version'], 'Aqua-Duct')
    check_version_compliance(version(), version_dict['version'], 'Valve')


################################################################################
# save - load per stage

def save_stage_dump(name, **kwargs):
    # check if name is None
    if name is not None:
        # ok, check if some of kwargs have to be changed
        data_to_save = {}
        for key, value in kwargs.iteritems():
            # SelectionMDA
            if isinstance(value, SelectionMDA):
                value = CompactSelectionMDA(value)
            # options
            if 'options' in key:
                value = value._asdict()
            data_to_save.update({key: value})
            # now we are redy to save
        save_dump(name, data_to_save)


def load_stage_dump(name, reader=None):
    if name is not None:
        loaded_data = {}
        for key, value in load_dump(name).iteritems():
            # CompactSelectionMDA
            if isinstance(value, CompactSelectionMDA):
                with reader.get() as traj_reader:
                    value = value.toSelectionMDA(traj_reader)
            loaded_data.update({key: value})
        return loaded_data


################################################################################

def get_smooth_method(soptions):
    assert soptions.method in ['window', 'mss', 'window_mss', 'awin',
                               'awin_mss', 'dwin', 'dwin_mss',
                               'savgol'], 'Unknown smoothing method %s.' % soptions.method

    opts = {}
    if 'recursive' in soptions._asdict():
        opts.update({'recursive': int(soptions.recursive)})

    def window_opts():
        if 'window' in soptions._asdict():
            opts.update({'window': int(float(soptions.window))})
        function_opts()

    def awin_dwin_opts():
        if 'window' in soptions._asdict():
            opts.update({'window': float(soptions.window)})
        function_opts()

    def function_opts():
        if 'function' in soptions._asdict():
            assert soptions.function in ['mean', 'median'], 'Unknown smoothing function %s.' % soptions.function
            if soptions.function == 'mean':
                opts.update({'function': np.mean})
            if soptions.function == 'median':
                opts.update({'function': np.median})

    def mss_opts():
        if 'step' in soptions._asdict():
            opts.update({'step': float(soptions.step)})

    def savgol_opts():
        window_length = 5  # TODO: magic constant (default value)
        if 'window' in soptions._asdict():
            window_length = int(float(soptions.window))
            assert window_length % 2 == 1, 'Window in Savgol method must be positive odd number, %d given instead.' % window_length
            opts.update({'window_length': window_length})
        polyorder = 2  # TODO: magic constant (default value)
        if 'polyorder' in soptions._asdict():
            polyorder = int(float(soptions.polyorder))
            assert polyorder > 0, 'Polynomial order should be greater then 0, %d given instead.' % polyorder
            opts.update({'polyorder': polyorder})
        assert polyorder < window_length, 'Polynomial order (%d) should be less then window (%d).' % (
            polyorder, window_length)
        '''
        if 'deriv' in soptions._asdict():
            deriv = int(float(soptions.deriv))
            assert deriv >= 0 and deriv <= polyorder, 'Order of derrivative should be integer greater or equal 0 and less then or equal to Polynomial order (%d), %d given instead.' % (polyorder,deriv)
            opts.update({'deriv': deriv})
        if 'delta' in soptions._asdict():
            delta = float(soptions.delta)
            assert delta >= 0, 'Delta should be greater or equal 0, %f given instead.' % delta
            if 'deriv' in soptions._asdict():
                if deriv == 0:
                    logger.warning('Delta %f make no sense if deriv is 0.' % delta)
            opts.update({'delta': delta})
        mode = 'interp' # TODO: magic constant (default value)
        if 'mode' in soptions._asdict():
            mode = str(soptions.mode)
            assert mode in ['mirror', 'constant', 'nearest', 'wrap', 'interp'], 'Unknown mode %s.' % mode
            opts.update({'mode': mode})
        if 'cval' in soptions._asdict():
            cval = float(soptions.cval)
            if mode != 'constant':
                logger.warning('Cval make no sense if mode is %s.' % mode)
            opts.update({'cval': cval})
        '''

    if soptions.method == 'window':
        window_opts()
        smooth = WindowSmooth(**opts)
    elif soptions.method == 'awin':
        awin_dwin_opts()
        smooth = ActiveWindowSmooth(**opts)
    elif soptions.method == 'dwin':
        awin_dwin_opts()
        smooth = DistanceWindowSmooth(**opts)
    elif soptions.method == 'mss':
        mss_opts()
        smooth = MaxStepSmooth(**opts)
    elif soptions.method == 'window_mss':
        window_opts()
        mss_opts()
        smooth = WindowOverMaxStepSmooth(**opts)
    elif soptions.method == 'awin_mss':
        awin_dwin_opts()
        mss_opts()
        smooth = ActiveWindowOverMaxStepSmooth(**opts)
    elif soptions.method == 'dwin_mss':
        awin_dwin_opts()
        mss_opts()
        smooth = DistanceWindowOverMaxStepSmooth(**opts)
    elif soptions.method == 'savgol':
        savgol_opts()
        smooth = SavgolSmooth(**opts)

    return smooth


def get_clustering_method(coptions,config):
    assert coptions.method in available_clusterization_methods, 'Unknown clusterization method %s.' % coptions.method

    opts = {}

    def dbscan_opts():
        if 'eps' in coptions._asdict():
            opts.update({'eps': float(coptions.eps)})
        if 'min_samples' in coptions._asdict():
            opts.update({'min_samples': int(coptions.min_samples)})
        if 'metric' in coptions._asdict():
            assert coptions.metric in ['cityblock', 'cosine', 'euclidean',
                                       'manhattan'], "Unknown metric <%s>." % coptions.metric
            opts.update({'metric': str(coptions.metric)})
        if 'algorithm' in coptions._asdict():
            assert coptions.algorithm in ['auto', 'ball_tree', 'kd_tree',
                                          'brute'], "Unknown NN algorithm <%s>." % coptions.algorithm
            opts.update({'algorithm': str(coptions.algorithm)})
        if 'leaf_size' in coptions._asdict():
            opts.update({'leaf_size': int(coptions.leaf_size)})

    def affprop_opts():
        if 'damping' in coptions._asdict():
            opts.update({'damping': float(coptions.damping)})
        if 'convergence_iter' in coptions._asdict():
            opts.update({'convergence_iter': int(coptions.convergence_iter)})
        if 'max_iter' in coptions._asdict():
            opts.update({'max_iter': int(coptions.max_iter)})
        if 'preference' in coptions._asdict():
            opts.update({'preference': float(coptions.preference)})

    def kmeans_opts():
        if 'n_clusters' in coptions._asdict():
            opts.update({'n_clusters': int(coptions.n_clusters)})
        if 'max_iter' in coptions._asdict():
            opts.update({'max_iter': int(coptions.max_iter)})
        if 'n_init' in coptions._asdict():
            opts.update({'n_init': int(coptions.n_init)})
        if 'init' in coptions._asdict():
            assert coptions.init in ['k-means++', 'random'], "Unknown initialization method <%s>." % coptions.init
            opts.update({'init': str(coptions.init)})
        if 'tol' in coptions._asdict():
            opts.update({'tol': float(coptions.tol)})

    def meanshift_opts():
        if 'cluster_all' in coptions._asdict():
            opts.update({'cluster_all': bool(coptions.cluster_all)})
        if 'bandwidth' in coptions._asdict():
            if coptions.bandwidth in (Auto, None):
                opts.update({'bandwidth': coptions.bandwidth})
            else:
                opts.update({'bandwidth': float(coptions.bandwidth)})
        if 'bin_seeding' in coptions._asdict():
            opts.update({'bin_seeding': bool(coptions.bin_seeding)})
        if 'min_bin_freq' in coptions._asdict():
            opts.update({'min_bin_freq': int(coptions.min_bin_freq)})

    def birch_opts():
        if 'threshold' in coptions._asdict():
            opts.update({'threshold': float(coptions.threshold)})
        if 'branching_factor' in coptions._asdict():
            opts.update({'branching_factor': int(coptions.branching_factor)})
        if 'n_clusters' in coptions._asdict():
            opts.update({'n_clusters': int(coptions.n_clusters)})

    def barber_opts():
        abo = config.get_stage_options(2)
        if abo.auto_barber:
            opts.update({'selection': str(abo.auto_barber)})
            if is_number(abo.auto_barber_mincut):
                opts.update({'mincut': float(abo.auto_barber_mincut)})
            else:
                opts.update({'mincut': None})
            if is_number(abo.auto_barber_maxcut):
                opts.update({'maxcut': float(abo.auto_barber_maxcut)})
            else:
                opts.update({'maxcut': None})
            opts.update({'mincut_level': bool(abo.auto_barber_mincut_level)})
            opts.update({'maxcut_level': bool(abo.auto_barber_maxcut_level)})
            opts.update({'tovdw': bool(abo.auto_barber_tovdw)})
        if 'auto_barber' in coptions._asdict():
            opts.update({'selection': str(coptions.auto_barber)})
        if 'auto_barber_maxcut' in coptions._asdict():
            if is_number(coptions.auto_barber_maxcut):
                opts.update({'maxcut': float(coptions.auto_barber_maxcut)})
            else:
                opts.update({'maxcut': None})
        if 'auto_barber_mincut' in coptions._asdict():
            if is_number(coptions.auto_barber_mincut):
                opts.update({'mincut': float(coptions.auto_barber_mincut)})
            else:
                opts.update({'mincut': None})
        if 'auto_barber_mincut_level' in coptions._asdict():
            opts.update({'mincut_level': bool(coptions.auto_barber_mincut_level)})
        if 'auto_barber_maxcut_level' in coptions._asdict():
            opts.update({'maxcut_level': bool(coptions.auto_barber_maxcut_level)})
        if 'auto_barber_tovdw' in coptions._asdict():
            opts.update({'tovdw': bool(coptions.auto_barber_tovdw)})

    if coptions.method == 'dbscan':
        dbscan_opts()
        method = DBSCAN
    elif coptions.method == 'affprop':
        affprop_opts()
        method = AffinityPropagation
    elif coptions.method == 'kmeans':
        kmeans_opts()
        method = KMeans
    elif coptions.method == 'meanshift':
        meanshift_opts()
        method = MeanShift
    elif coptions.method == 'birch':
        birch_opts()
        method = Birch
    elif coptions.method == 'barber':
        barber_opts()
        method = BarberCluster

    return PerformClustering(method, **opts)


def get_linearize_method(loption):
    if loption:
        assert isinstance(loption, (str, unicode)), "Wrong Linearize method definition: %r" % loption
        possible_formats = [
            re.compile('^(recursive|oneway|hobbit)(triangle|vector)[(][+-]?(\d+(\.\d*)?|\.\d+)([eE][+-]?\d+)?[)]$'),
            re.compile('^(recursive|oneway|hobbit)(triangle|vector)[(][)]$'),
            re.compile('^(recursive|oneway|hobbit)(triangle|vector)$')]
        assert True in [pf.match(loption.lower()) is not None for pf in
                        possible_formats], "Wrong Linearize method definition: %s" % loption
        # http://stackoverflow.com/questions/12929308/python-regular-expression-that-matches-floating-point-numbers#12929311
        way = [w for w in ['recursive', 'oneway', 'hobbit'] if w in loption.lower()][0]
        crit = [c for c in ['triangle', 'vector'] if c in loption.lower()][0]
        threshold = re.compile('[+-]?(\d+(\.\d*)?|\.\d+)([eE][+-]?\d+)?').findall(loption)
        if len(threshold):
            threshold = float(threshold[0][0])
        else:
            threshold = None
        # get method
        if way == 'recursive':
            if crit == 'triangle':
                met = traces.LinearizeRecursiveTriangle
            else:
                met = traces.LinearizeRecursiveVector
        elif way == 'oneway':
            if crit == 'triangle':
                met = traces.LinearizeOneWayTriangle
            else:
                met = traces.LinearizeOneWayVector
        elif way == 'hobbit':
            if crit == 'triangle':
                met = traces.LinearizeHobbitTriangle
            else:
                met = traces.LinearizeHobbitVector
        if threshold is None:
            return met()
        return met(threshold)


################################################################################

[docs]def valve_begin():
    clui.message(greetings_aquaduct())  # nice greetings
    clui.message('Aqua-Duct version %s' % aquaduct_version_nice())
    # clui.message('Valve driver version %s' % version_nice())
    clui.message(sep())


[docs]def valve_end():
    clui.message(sep())
    clui.message('Let the Valve be always open!')
    clui.message('Goodby!')


[docs]def valve_load_config(filename, config):
    assert filename is not None, "No config file provided."
    assert os.path.isfile(filename), "Config file %s does not exist." % filename
    with clui.fbm('Load configuration file'):
        config.load_config(filename)


[docs]def valve_read_trajectory(top, traj, frames_window=None):
    with clui.fbm('Read trajectory'):
        return TrajectoryReader(top, traj, frames_window=frames_window)
        # read trajectory
        # traj_list = shlex.split(traj)
        # return ReadAmberNetCDFviaMDA(top, traj_list)
        # reader = ReadDCDviaMDA(topology, trajectory)


def valve_begin_stage(stage, config):
    clui.message(sep())
    clui.message('Starting Stage %s: %s' % (roman.toRoman(stage + 1), config.stage_names(stage)))
    options = config.get_stage_options(stage)
    clui.message('Execute mode: %s' % options.execute)
    return options


[docs]def valve_exec_stage(stage, config, stage_run, reader=None, no_io=False, run_status=None,
                     **kwargs):
    with clui.tictoc('Stage %s (%s)' % (roman.toRoman(stage + 1), config.stage_names(stage))):

        # This function runs stages in a smart way, checks execution logic, and loads/saves dumps if required.
        options = valve_begin_stage(stage, config)

        run_status.update({stage: False})

        # TODO: Consider to create traj_reader object here instead of doing it in stage_run or in load...
        # execute?
        can_be_loaded = False
        if (not no_io) and options.dump:
            if os.path.isfile(options.dump) or os.path.islink(options.dump):
                can_be_loaded = True
        # has to be run?
        if options.execute in ['runonce'] and can_be_loaded and stage > 0:
            if run_status[stage - 1]:
                can_be_loaded = False

        if options.execute in ['run'] or (options.execute in ['runonce'] and not can_be_loaded):
            result = stage_run(config, options, reader=reader, **kwargs)
            run_status.update({stage: True})
            if not no_io:
                ###########
                # S A V E #
                ###########
                save_stage_dump(options.dump, **result)
        elif options.execute in ['skip'] or (options.execute in ['runonce'] and can_be_loaded):
            if not no_io:
                ###########
                # L O A D #
                ###########
                if options.dump:
                    result = load_stage_dump(options.dump, reader=reader)
        else:
            raise NotImplementedError('exec mode %s not implemented' % options.execute)
        # remove options stuff
        if not no_io:
            if result is not None:
                return dict(((key, val) for key, val in result.iteritems() if 'options' not in key))


################################################################################
# stages run

# traceable_residues
[docs]def stage_I_run(config, options,
                reader=None,
                **kwargs):

    # create pool of workers - mapping function
    map_fun = map
    if optimal_threads.threads_count > 1:
        pool = mp.Pool(optimal_threads.threads_count)
        map_fun = pool.map

    with reader.get() as traj_reader:

        clui.message("Loop over frames - search of residues in object:")
        pbar = clui.pbar(traj_reader.number_of_frames)

        scope = traj_reader.parse_selection(options.scope)
        # scope will be used to derrive center of system
        center_of_system = np.array([0., 0., 0.])

        # create some containers
        res_ids_in_object_over_frames = {}
        all_res = None

        # the loop over frames
        for frame in traj_reader.iterate_over_frames():
            # center of system
            center_of_system += scope.center_of_mass()
            # current res selection
            res = traj_reader.parse_selection(options.object)
            # find matching residues:
            res_new = scope.containing_residues(res, convex_hull=options.scope_convexhull, map_fun=map_fun)
            # adds them to all_res
            if all_res:
                all_res += res_new
                all_res.uniquify()
            else:
                all_res = res_new
            # remeber ids of res in object in current frame
            if res_new is not None:
                res_ids_in_object_over_frames.update({frame: res_new.unique_resids(ikwid=True)})
            else:
                res_ids_in_object_over_frames.update({frame: []})
            pbar.update(frame)

    # destroy pool of workers
    if optimal_threads.threads_count > 1:
        pool.close()
        pool.join()
        del pool

    pbar.finish()
    center_of_system /= (frame + 1)
    logger.info('Center of system is %0.2f, %0.2f, %0.2f' % tuple(center_of_system))

    if all_res is None:
        raise ValueError("No traceable residues was found.")

    clui.message("Number of residues to trace: %d" % all_res.unique_resids_number())

    return {'all_res': all_res,
            'res_ids_in_object_over_frames': res_ids_in_object_over_frames,
            'center_of_system': center_of_system,
            'options': options}


################################################################################

# raw_paths
[docs]def stage_II_run(config, options,
                 reader=None,
                 all_res=None,
                 res_ids_in_object_over_frames=None,
                 **kwargs):
    if options.clear_in_object_info:
        clui.message('Clear data on residues in object over frames.')
        clui.message('This will be recalculated on demand.')
        res_ids_in_object_over_frames = {}

    with reader.get() as traj_reader:

        with clui.fbm("Init paths container"):
            paths = dict(((resid, GenericPaths(resid, min_pf=0, max_pf=traj_reader.number_of_frames-1)) for resid in
                          all_res.unique_resids(ikwid=True)))

        scope = traj_reader.parse_selection(options.scope)

        with clui.fbm("Rebuild treceable residues with current trajectory"):
            all_res = rebuild_selection(all_res, traj_reader)

        clui.message("Trajectory scan:")
        pbar = clui.pbar(traj_reader.number_of_frames)

        # create pool of workers - mapping function
        map_fun = map
        if optimal_threads.threads_count > 1:
            pool = mp.Pool(optimal_threads.threads_count)
            map_fun = pool.map

        for frame in traj_reader.iterate_over_frames():

            all_res_coords = list(all_res.center_of_mass_of_residues())  # this uses iterate over residues
            all_resids = [res.first_resid() for res in all_res.iterate_over_residues()]
            # check if is res are in scope
            is_res_in_scope = scope.contains_residues(all_res, convex_hull=options.scope_convexhull, map_fun=map_fun)

            # loop over coord, is  in scope, and resid
            for nr, (coord, isscope, resid) in enumerate(zip(all_res_coords, is_res_in_scope, all_resids)):
                # the point is that nr is not pointing to correct element in paths
                # now, nr is useless because paths is a dictionary, use resids instead
                assert paths[resid].id == resid, \
                    "Internal error. Paths IDs not synced with resids. \
                     Please send a bug report to the developer(s): %s" % __mail__
                if isscope:
                    paths[resid].add_coord(coord)

                    # do we have info on res_ids_in_object_over_frames?
                    if frame not in res_ids_in_object_over_frames:
                        res = traj_reader.parse_selection(options.object)
                        # discard res out of scope
                        res_new = get_res_in_scope(is_res_in_scope, res)
                        # remeber ids of res in object in current frame
                        if res_new is not None:
                            res_ids_in_object_over_frames.update({frame: res_new.unique_resids(ikwid=True)})
                        else:
                            res_ids_in_object_over_frames.update({frame: []})

                    # in scope
                    if resid not in res_ids_in_object_over_frames[frame]:
                        paths[resid].add_scope(frame)
                    else:
                        # in object
                        paths[resid].add_object(frame)

            pbar.update(frame)

    # destroy pool of workers

    if optimal_threads.threads_count > 1:
        pool.close()
        pool.join()
        del pool

    pbar.finish()

    clui.message("Number of paths: %d" % len(paths))

    return {'all_res': all_res, 'paths': paths, 'options': options}


################################################################################

class ABSphere(namedtuple('ABSphere', 'center radius')):
    pass


# separate_paths
[docs]def stage_III_run(config, options,
                  paths=None,
                  reader=None,
                  **kwargs):
    soptions = config.get_smooth_options()

    if options.discard_empty_paths:
        with clui.fbm("Discard residues with empty paths"):
            for key in paths.keys():
                if len(paths[key].frames) == 0:
                    paths.pop(key)

    clui.message("Create separate paths:")
    pbar = clui.pbar(len(paths))
    # yield_single_paths requires a list of paths not a dictionary
    spaths = [sp for sp, nr in yield_single_paths(paths.values(), progress=True) if pbar.update(nr + 1) is None]
    pbar.finish()

    if options.discard_short_paths > 0:
        shorter_then = int(options.discard_short_paths)
        with clui.fbm("Discard paths shorter then %d" % shorter_then):
            spaths = [sp for sp in spaths if sp.size > shorter_then]

    if options.auto_barber:
        with reader.get() as traj_reader:
            wtc = WhereToCut(spaths,traj_reader,
                             selection=options.auto_barber,
                             mincut=options.auto_barber_mincut,
                             mincut_level=options.auto_barber_mincut_level,
                             maxcut=options.auto_barber_maxcut,
                             maxcut_level=options.auto_barber_maxcut_level,
                             tovdw=options.auto_barber_mincut)
            # cut thyself!
            wtc.cut_thyself()

        clui.message("Auto Barber in action:")
        pbar = clui.pbar(len(paths))
        for p in paths.values():
            p.barber_with_spheres(wtc.spheres)
            pbar.next()
        pbar.finish()
        # now, it might be that some of paths are empty
        # paths = {k: v for k, v in paths.iteritems() if len(v.coords) > 0}
        paths = dict((k, v) for k, v in paths.iteritems() if
                     len(v.coords) > 0)  # more universal as dict comprehension may not work in <2.7
        clui.message("Recreate separate paths:")
        pbar = clui.pbar(len(paths))
        # yield_single_paths requires a list of paths not a dictionary
        spaths = [sp for sp, nr in yield_single_paths(paths.values(), progress=True) if pbar.update(nr + 1) is None]
        pbar.finish()

        if options.discard_short_paths > 0:
            shorter_then = int(options.discard_short_paths)
            with clui.fbm("Discard (again) paths shorter then %d" % shorter_then):
                spaths = [sp for sp in spaths if sp.size > shorter_then]

    if options.sort_by_id:
        with clui.fbm("Sort separate paths by resid"):
            spaths = sorted(spaths, key=lambda sp: (sp.id.id, sp.id.nr))
    # apply smoothing?
    if options.apply_smoothing or options.apply_soft_smoothing:
        smooth = get_smooth_method(soptions)
    if options.apply_smoothing:
        clui.message('Applying hard smoothing:')
        pbar = clui.pbar(len(spaths))
        for nr, sp in enumerate(spaths):
            sp.apply_smoothing(smooth)
            pbar.update(nr + 1)
        pbar.finish()
    if options.apply_soft_smoothing:
        clui.message('Applying soft smoothing:')
        pbar = clui.pbar(len(spaths))
        for nr, sp in enumerate(spaths):
            sp.get_coords(smooth=smooth)
            pbar.update(nr + 1)
        pbar.finish()

    clui.message("Number of paths: %d" % len(paths))
    clui.message("Number of spaths: %d" % len(spaths))

    return {'paths': paths, 'spaths': spaths, 'options': options, 'soptions': soptions}


################################################################################

def get_skip_size_function(rt=None):
    if not isinstance(rt, str): return None
    assert re.compile('^[<>=]+[0-9.]+$').match(rt) is not None, "Wrong threshold definition: %s" % rt
    op = re.compile('[<>=]+')
    op = ''.join(sorted(op.findall(rt)[0]))
    vl = re.compile('[0-9.]+')
    vl = float(vl.findall(rt)[0])
    operator_dict = {'>': operator.gt,
                     '=>': operator.ge,
                     '<=': operator.le,
                     '<': operator.lt}
    operator_dict = {'>': operator.ge,
                     '=>': operator.gt,
                     '<=': operator.lt,
                     '<': operator.le}
    assert op in operator_dict.keys(), "Unsupported operator %s in threshold %s" % (op, rt)
    return lambda size_of_cluster: operator_dict[op](vl, size_of_cluster)

def get_allow_size_function(rt=None):
    if not isinstance(rt, str): return None
    assert re.compile('^[<>=]+[0-9.]+$').match(rt) is not None, "Wrong threshold definition: %s" % rt
    op = re.compile('[<>=]+')
    op = ''.join(sorted(op.findall(rt)[0]))
    vl = re.compile('[0-9.]+')
    vl = float(vl.findall(rt)[0])
    operator_dict = {'>': operator.gt,
                     '=>': operator.ge,
                     '<=': operator.le,
                     '<': operator.lt}
    assert op in operator_dict.keys(), "Unsupported operator %s in threshold %s" % (op, rt)
    return lambda size_of_cluster: operator_dict[op](size_of_cluster, vl)


class SkipSizeFunction(object):

    def __init__(self,ths_def):

        self.thresholds = []
        if isinstance(ths_def,(str,unicode)):
            for thd in ths_def.split():
                self.thresholds.append(get_allow_size_function(thd))

    def __call__(self,size_of_cluster):
        for thd in self.thresholds:
            if not thd(size_of_cluster):
                return True
        return False

def potentially_recursive_clusterization(config=None,
                                         clusterization_name=None,
                                         inlets_object=None,
                                         spaths=None,
                                         traj_reader=None,
                                         message='clusterization',
                                         deep=0,
                                         max_level=5):
    with clui.fbm("Performing %s, level %d of %d" % (message, deep, max_level), cont=False):
        logger.debug('Clustering options section: %s' % clusterization_name)
        cluster_options = config.get_cluster_options(section_name=clusterization_name)
        clui.message('Clustering options:')
        for k, v in cluster_options._asdict().iteritems():
            clui.message("%s = %s" % (str(k), str(v)))
        # TODO: Print clusterization options in a nice way!
        clustering_function = get_clustering_method(cluster_options,config)
        # special case of barber!!!
        if cluster_options.method == 'barber':
            inlets_refs = inlets_object.get_inlets_references()
            wtc = WhereToCut([sp for sp in spaths if sp.id in inlets_refs],
                             traj_reader,
                             forceempty=True,
                             **clustering_function.method_kwargs)
            radii = [sphe.radius for sphe in wtc.spheres]
            inlets_object.add_radii(radii)
        # get skip_size function according to recursive_treshold
        #skip_size = get_skip_size_function(cluster_options.recursive_threshold)
        skip_size = SkipSizeFunction(cluster_options.recursive_threshold)
        inlets_object.perform_reclustering(clustering_function, skip_outliers=True, skip_size=skip_size)
    clui.message('Number of clusters detected so far: %d' % len(inlets_object.clusters_list))

    if cluster_options.recursive_clusterization:
        deep += 1
        if deep > max_level:
            return
        return potentially_recursive_clusterization(config=config,
                                                    clusterization_name=cluster_options.recursive_clusterization,
                                                    inlets_object=inlets_object,
                                                    spaths=spaths,
                                                    traj_reader=traj_reader,
                                                    deep=deep,
                                                    max_level=max_level)


# inlets_clusterization
[docs]def stage_IV_run(config, options,
                 spaths=None,
                 center_of_system=None,
                 reader=None,
                 **kwargs):
    coptions = config.get_cluster_options()
    rcoptions = config.get_recluster_options()
    soptions = config.get_smooth_options()

    max_level = int(options.max_level)
    assert max_level >= 0

    # new style clustering
    with clui.fbm("Create inlets"):
        # here we can check center of system
        inls = Inlets(spaths, center_of_system=center_of_system)
    clui.message("Number of inlets: %d" % inls.size)

    def noo():
        # returns number of outliers
        if 0 in inls.clusters_list:
            return inls.clusters.count(0)
        return 0

    if inls.size > 0:
        # ***** CLUSTERIZATION *****
        with clui.fbm("Performing clusterization", cont=False):
            with reader.get() as traj_reader:
                potentially_recursive_clusterization(config=config,
                                                     clusterization_name=config.cluster_name(),
                                                     inlets_object=inls,
                                                     spaths=spaths,
                                                     traj_reader=traj_reader,
                                                     message='clusterization',
                                                     max_level=max_level)
        # with log.fbm("Performing clusterization"):
        #    clustering_function = get_clustering_method(coptions)
        #    inls.perform_clustering(clustering_function)
        clui.message('Number of outliers: %d' % noo())
        # ***** OUTLIERS DETECTION *****
        if options.detect_outliers:
            with clui.fbm("Detecting outliers",cont=False):
                if options.detect_outliers is not Auto:
                    threshold = float(options.detect_outliers)
                clusters = list(inls.clusters)
                no_out_detected = 0
                for cluster, center, std in zip(inls.clusters_list,
                                                inls.clusters_centers,
                                                inls.clusters_std):
                    if cluster == 0:
                        continue
                    inls_lim = inls.lim2clusters(cluster)
                    dmat = cdist(np.matrix(center), inls_lim.coords, metric='euclidean').flatten()
                    # Auto procedure
                    if options.detect_outliers is Auto:
                        threshold = std * 4  # FIXME: magic constant!
                    # defined threshold procedure
                    for nr, (d, ids) in enumerate(zip(dmat, inls_lim.inlets_ids)):
                        # print d, threshold
                        if d > threshold:
                            clusters[ids] = 0
                            no_out_detected += 1
                clui.message('Detected %d outliers.' % no_out_detected)
                inls.add_outliers_annotations(clusters)
            clui.message('Number of clusters detected so far: %d' % len(inls.clusters_list))
            clui.message('Number of outliers: %d' % noo())
        # ***** RECLUSTERIZATION *****
        if options.recluster_outliers:
            with clui.fbm("Performing reclusterization of outliers", cont=False):
                with reader.get() as traj_reader:
                    '''
                    potentially_recursive_clusterization(config=config,
                                                     clusterization_name=config.recluster_name(),
                                                     inlets_object=inls,
                                                     spaths=spaths,
                                                     traj_reader=traj_reader,
                                                     message='reclusterization',
                                                     max_level=max_level)
                    '''
                    clustering_function = get_clustering_method(rcoptions,config)
                    # perform reclusterization
                    # inls.recluster_outliers(clustering_function)
                    inls.recluster_cluster(clustering_function, 0)
            clui.message('Number of clusters detected so far: %d' % len(inls.clusters_list))
            clui.message('Number of outliers: %d' % noo())
        # ***** SINGLETONS REMOVAL *****
        if options.singletons_outliers:
            with clui.fbm("Removing clusters of size %d" % int(options.singletons_outliers)):
                inls.small_clusters_to_outliers(int(options.singletons_outliers))
            clui.message('Number of clusters detected so far: %d' % len(inls.clusters_list))
            clui.message('Number of outliers: %d' % noo())

        clui.message('Clustering history:')
        clui.message(clui.print_simple_tree(inls.tree,prefix='').rstrip())

        with clui.fbm("Calculating cluster types"):
            ctypes = inls.spaths2ctypes(spaths)

        # now, there is something to do with ctypes!
        # we can create master paths!
        # but only if user wants this
        master_paths = {}
        master_paths_smooth = {}
        if options.create_master_paths:
            with clui.fbm("Creating master paths for cluster types", cont=False):
                smooth = get_smooth_method(soptions)
                ctypes_generic = [ct.generic for ct in ctypes]
                ctypes_generic_list = sorted(list(set(ctypes_generic)))

                pbar = clui.pbar(len(spaths) * 2)
                for nr, ct in enumerate(ctypes_generic_list):
                    logger.debug('CType %s (%d)' % (str(ct), nr))
                    sps = lind(spaths, what2what(ctypes_generic, [ct]))
                    logger.debug('CType %s (%d), number of spaths %d' % (str(ct), nr, len(sps)))
                    # print len(sps),ct
                    ctspc = CTypeSpathsCollection(spaths=sps, ctype=ct, pbar=pbar, threads=optimal_threads.threads_count)
                    master_paths.update({ct: ctspc.get_master_path(resid=nr)})
                    master_paths_smooth.update({ct: ctspc.get_master_path(resid=nr, smooth=smooth)})
                    del ctspc
                pbar.finish()

    else:
        clui.message("No inlets found. Clusterization skipped.")
        # make empty results
        ctypes = inls.spaths2ctypes(spaths)
        master_paths = {}
        master_paths_smooth = {}

    return {'inls': inls,
            'ctypes': ctypes,
            'master_paths': master_paths,
            'master_paths_smooth': master_paths_smooth}


################################################################################

def make_line(template, line):
    return (' '.join(template)) % tuple(line)


def make_header_template(line_template):
    header_template = []
    col_re = re.compile('[0-9]+')
    for l in line_template:
        header_template.append('%{0}s'.format(col_re.findall(l)[0]))
    return header_template


def nr_header():
    return ['Nr'], ['%7d']


def get_header_line_and_line_template(header_line_and_line_template, head_nr=False):
    header, line_template = header_line_and_line_template

    header_template = make_header_template(line_template)
    # head_nr? only to header
    if head_nr:
        nrh, nrlt = nr_header()
        header_template = make_header_template(nrlt + line_template)
        header = nrh + header
    header_line = make_line(header_template, header)

    return header_line, line_template


def spath_id_header():
    return ['ID'], ['%9s']


def add_path_id_head(gen):
    sph, splt = spath_id_header()

    @wraps(gen)
    def patched(*args, **kwargs):
        add_id = True
        if 'add_id' in kwargs:
            add_id = kwargs.pop('add_id')
        h, lt = gen(*args, **kwargs)
        if add_id:
            return sph + h, splt + lt
        return h, lt

    return patched


def add_path_id(gen):
    @wraps(gen)
    def patched(spath, *args, **kwargs):
        add_id = True
        if 'add_id' in kwargs:
            add_id = kwargs.pop('add_id')
        line = gen(spath, *args, **kwargs)
        if add_id:
            line = [spath.id] + line
        return line

    return patched


def size_header():
    return ['Size'], ['%7d']


def add_size_head(gen):
    sph, splt = size_header()

    @wraps(gen)
    def patched(*args, **kwargs):
        h, lt = gen(*args, **kwargs)
        return sph + h, splt + lt

    return patched


def add_size(gen):
    @wraps(gen)
    def patched(spaths, add_size=True, *args, **kwargs):
        line = gen(spaths, *args, **kwargs)
        if add_size:
            line = [len(spaths)] + line
        return line

    return patched


def cluster_id_header():
    return ['Cluster'], ['%7d']


def add_cluster_id_head(gen):
    sph, splt = cluster_id_header()

    @wraps(gen)
    def patched(*args, **kwargs):
        h, lt = gen(*args, **kwargs)
        return sph + h, splt + lt

    return patched


def add_cluster_id(gen):
    @wraps(gen)
    def patched(cluster, something, add_id=True, *args, **kwargs):
        line = gen(cluster, something, *args, **kwargs)
        if add_id:
            line = [int(cluster)] + line
        return line

    return patched


def ctype_id_header():
    return ['CType'], ['%7s']


def add_ctype_id_head(gen):
    sph, splt = ctype_id_header()

    @wraps(gen)
    def patched(*args, **kwargs):
        h, lt = gen(*args, **kwargs)
        return sph + h, splt + lt

    return patched


def add_ctype_id(gen):
    @wraps(gen)
    def patched(ctype, something, add_id=True, *args, **kwargs):
        line = gen(ctype, something, *args, **kwargs)
        if add_id:
            line = [str(ctype)] + line
        return line

    return patched

class PrintAnalysis(object):
    nr_template = '%7d '

    # TODO: Change it in such a way that it cooperates well with debug-file option.
    def __init__(self, fileoption):
        self.output2stderr = False
        if fileoption:
            self.filehandle = open(fileoption, 'w')
            # self.output2stderr = True
        else:
            self.filehandle = sys.stdout

    def __call__(self, info2print, nr=None):
        if nr is not None:
            info2print = (self.nr_template % nr) + info2print
        if self.output2stderr:
            clui.message(info2print)
        print >> self.filehandle, info2print

    def sep(self):
        self(asep())

    def thead(self, info2print):
        self(clui.thead(info2print))

    def under(self, info2print):
        self(clui.underline(info2print))


################################################################################

@add_path_id_head
def spath_basic_info_header():
    header = 'BeginF InpF ObjF OutF EndF'.split()
    line_template = ['%7d'] * len(header)
    return header, line_template


@add_path_id
def spath_basic_info(spath):
    line = []
    line.append(spath.begins)
    line.extend(map(len, (spath.path_in, spath.path_object, spath.path_out)))
    line.append(spath.ends)
    return line


################

@add_path_id_head
def spath_lenght_total_info_header():
    header = 'InpL ObjL OutL'.split()
    line_template = ['%9.1f'] * len(header)
    return header, line_template


@add_path_id
def spath_lenght_total_info(spath):
    line = []
    for t in traces.midpoints(spath.coords):
        if len(t) > 1:  # traces.length_step_std requires at least 2 points
            line.append(traces.length_step_std(t)[0])
        else:
            line.append(float('nan'))
    return line


################

@add_path_id_head
def spath_steps_info_header():
    header = 'InpS InpStdS ObjS ObjStdS OutS OutStdS'.split()
    line_template = ['%8.2f', '%8.3f'] * (len(header) / 2)
    return header, line_template


@add_path_id
def spath_steps_info(spath):
    line = []
    for t in traces.midpoints(spath.coords):
        if len(t) > 0:
            line.extend(traces.length_step_std(t)[1:])
        else:
            line.extend([float('nan'), float('nan')])
    return line


################

@add_path_id_head
def spath_ctype_header():
    header, line_template = ctype_id_header()
    return header, line_template


@add_path_id
def spath_ctype(spath, ctype=None):
    line = [str(ctype)]
    return line


################

@add_path_id_head
def spath_full_info_header():
    header = []
    line_template = []
    for h, lt in (spath_basic_info_header(add_id=False),
                  spath_lenght_total_info_header(add_id=False),
                  spath_steps_info_header(add_id=False),
                  spath_ctype_header(add_id=False)):
        header += h
        line_template += lt
    return header, line_template


@add_path_id
def spath_full_info(spath, ctype=None):
    line = []
    for l in (spath_basic_info(spath, add_id=False),
              spath_lenght_total_info(spath, add_id=False),
              spath_steps_info(spath, add_id=False),
              spath_ctype(spath, ctype=ctype, add_id=False)):
        line += l
    return line


################################################################################

@add_size_head
def spaths_lenght_total_header():
    header = 'Inp InpStd Obj ObjStd Out OutStd'.split()
    line_template = ['%8.1f', '%8.2f'] * (len(header) / 2)
    return header, line_template


@add_size
def spaths_length_total(spaths):
    line = []
    d4s = []
    for sp in spaths:
        d4s.append(spath_lenght_total_info(sp, add_id=False))
    d4s = np.array(d4s)
    line.extend(np.mean(d4s, 0))
    line.extend(np.std(d4s, 0))
    return [line[0], line[3], line[1], line[4], line[2], line[5]]


################################################################################


@add_cluster_id_head
def clusters_inlets_header():
    header = 'Size SInp IInp IOut SOut'.split()
    header = 'Size INCOMING OUTGOING'.split()
    line_template = ['%7d'] + ['%8d'] * (len(header) - 1)
    return header, line_template


@add_cluster_id
def clusters_inlets(cluster, inlets):
    line = [inlets.size]
    line.append(inlets.lim2types([InletTypeCodes.surface_incoming]).size)
    # line.append(inlets.lim2types([InletTypeCodes.internal_incoming]).size)
    # line.append(inlets.lim2types([InletTypeCodes.internal_outgoing]).size)
    line.append(inlets.lim2types([InletTypeCodes.surface_outgoing]).size)
    return line


@add_ctype_id_head
def ctypes_spaths_info_header():
    header, line_template = spaths_lenght_total_header()
    return header, line_template


@add_ctype_id
def ctypes_spaths_info(ctype, spaths):
    line = []
    line += spaths_length_total(spaths)
    return line


################################################################################


# analysis
[docs]def stage_V_run(config, options,
                spaths=None,
                paths=None,
                inls=None,
                ctypes=None,
                **kwargs):
    # file handle?
    pa = PrintAnalysis(options.save)
    if options.save:
        clui.message('Using user provided file (%s).' % options.save)
        # clui.message(sep())
        # clui.message('')
    else:
        clui.message('Using standard output.')
        clui.message(sep())
        clui.message('')

    ############
    pa.sep()
    pa('Aqua-Duct analysis')
    pa(clui.get_str_timestamp())

    ############
    if options.dump_config:
        pa.sep()
        pa.under('Configuration file name: %s' % config.config_filename)
        pa(os.linesep.join(config.dump_config()))

    ############
    pa.sep()
    pa("Number of traceable residues: %d" % len(paths))
    pa("Number of separate paths: %d" % len(spaths))

    ############
    pa.sep()
    pa("Number of inlets: %d" % inls.size)
    no_of_clusters = len(inls.clusters_list) - {True: 1, False: 0}[0 in inls.clusters_list]  # minus outliers, if any
    pa("Number of clusters: %d" % no_of_clusters)
    pa("Outliers: %s" % ({True: 'yes', False: 'no'}[0 in inls.clusters_list]))

    ############
    pa.sep()
    pa("Clusters summary - inlets")
    header_line, line_template = get_header_line_and_line_template(clusters_inlets_header(), head_nr=True)
    pa.thead(header_line)

    for nr, cl in enumerate(inls.clusters_list):
        inls_lim = inls.lim2clusters(cl)
        pa(make_line(line_template, clusters_inlets(cl, inls_lim)), nr=nr)

    ############
    pa.sep()
    pa("Separate paths clusters types summary - mean lengths of paths")
    header_line, line_template = get_header_line_and_line_template(ctypes_spaths_info_header(), head_nr=True)
    pa.thead(header_line)

    ctypes_generic = [ct.generic for ct in ctypes]
    ctypes_generic_list = sorted(list(set(ctypes_generic)))

    # sorted by ctype
    ctypes_size = []
    for nr, ct in enumerate(ctypes_generic_list):
        sps = lind(spaths, what2what(ctypes_generic, [ct]))
        ctypes_size.append(len(sps))
        # pa(make_line(line_template, ctypes_spaths_info(ct, sps)), nr=nr)

    '''
    # sorted by sizes:
    ctypes_generic_list = sorted(ctypes_generic_list, key=lambda ctyp: ctypes_size[ctypes_generic_list.index(ctyp)],
                                 reverse=True)
    '''

    for nr, ct in enumerate(ctypes_generic_list):
        sps = lind(spaths, what2what(ctypes_generic, [ct]))
        ctypes_size.append(len(sps))
        pa(make_line(line_template, ctypes_spaths_info(ct, sps)), nr=nr)

    ############
    pa.sep()
    pa("List of separate paths and properties")
    header_line, line_template = get_header_line_and_line_template(spath_full_info_header(), head_nr=True)
    pa.thead(header_line)
    for nr, (sp, ctype) in enumerate(izip_longest(spaths, ctypes, fillvalue=None)):
        if ctype is not None:
            ctype = ctype.generic
        pa(make_line(line_template, spath_full_info(sp, ctype=ctype)), nr=nr)


################################################################################

# visualize


def plot_spaths_traces(spaths, spp=None, name=None, split=False, states=False, separate=False, smooth=None):
    if states or separate:
        spaths_iter = spaths
    else:
        spaths_iter = [spaths]
    state = None
    name_separate = ''
    for nr, sp in enumerate(spaths_iter):
        if states:
            state = nr + 1
        if separate:
            name_separate = '_%d' % (nr + 1)
        if states or separate:
            sp = [sp]
        if split:
            spp.paths_trace(sp, name=name + '_in' + name_separate, plot_object=False, plot_out=False, state=state,
                            smooth=smooth)
            spp.paths_trace(sp, name=name + '_obj' + name_separate, plot_in=False, plot_out=False, state=state,
                            smooth=smooth)
            spp.paths_trace(sp, name=name + '_out' + name_separate, plot_in=False, plot_object=False, state=state,
                            smooth=smooth)
        else:
            spp.paths_trace(sp, name=name + name_separate, state=state, smooth=smooth)


def plot_spaths_inlets(spaths, spp=None, name=None, states=False, separate=False, smooth=None):
    if states or separate:
        spaths_iter = spaths
    else:
        spaths_iter = [spaths]
    state = None
    name_separate = ''
    for nr, sp in enumerate(spaths_iter):
        if states:
            state = nr + 1
        if separate:
            name_separate = '_%d' % (nr + 1)
        if states or separate:
            sp = [sp]
        spp.paths_inlets(sp, name=name + name_separate, state=state, smooth=smooth)


def is_pymol_connector_session(filename):
    session_ext = re.compile('[.][pP][sS][eE]')
    if filename:
        ext = os.path.splitext(filename)[-1]
        if session_ext.match(ext):
            return True
    return False


def is_pymol_connector_script(filename):
    script = re.compile('.*[.]([pP][yY]|[.pP][yY][.][gG][zZ])$')
    if filename:
        if script.match(filename):
            return True
    return False


[docs]def stage_VI_run(config, options,
                 reader=None,
                 spaths=None,
                 inls=None,
                 ctypes=None,
                 master_paths=None,
                 master_paths_smooth=None,
                 **kwargs):
    from aquaduct.visual.pymol_connector import ConnectToPymol, SinglePathPlotter
    # from aquaduct.visual.pymol_connector import cmd as pymol_cmd
    from aquaduct.visual.helpers import ColorMapDistMap

    soptions = config.get_smooth_options()
    smooth = get_smooth_method(soptions)

    # start pymol
    with clui.fbm("Starting PyMOL connection", cont=False):
        pymol_connector = ConnectToPymol()
        if is_pymol_connector_script(options.save):
            pymol_connector.init_script(options.save)
        else:
            pymol_connector.init_pymol()

        if options.simply_smooths:
            spp = SinglePathPlotter(pymol_connector, linearize=get_linearize_method(options.simply_smooths))
        else:
            spp = SinglePathPlotter(pymol_connector, linearize=None)

    ctypes_generic = [ct.generic for ct in ctypes]
    ctypes_generic_list = sorted(list(set(ctypes_generic)))

    if options.show_molecule:
        with clui.fbm("Molecule"):
            with reader.get() as traj_reader:
                mda_ppr = mda.core.flags["permissive_pdb_reader"]
                mda.core.flags["permissive_pdb_reader"] = False
                pdb = TmpDumpWriterOfMDA()
                frames_to_show = range2int(options.show_molecule_frames)
                pdb.dump_frames(traj_reader, frames=frames_to_show, selection=options.show_molecule)
                pymol_connector.load_pdb('molecule', pdb.close())
                del pdb
                mda.core.flags["permissive_pdb_reader"] = mda_ppr
                # it would be nice to plot convexhull
    if options.show_chull:
        with clui.fbm("Convexhull"):
            with reader.get() as traj_reader:
                scope = traj_reader.parse_selection(options.show_chull)
                frames_to_show = range2int(options.show_chull_frames)
                for frame in frames_to_show:
                    traj_reader.set_real_frame(frame)
                    chull = scope.get_convexhull_of_atom_positions()
                    spp.convexhull(chull, state=frame + 1)

    if options.show_object:
        with clui.fbm("Object shape"):
            with reader.get() as traj_reader:
                object_shape = traj_reader.parse_selection(options.show_object)
                frames_to_show = range2int(options.show_object_frames)
                for frame in frames_to_show:
                    traj_reader.set_real_frame(frame)
                    chull = object_shape.get_convexhull_of_atom_positions()
                    spp.convexhull(chull, name='object_shape', color=np.array([255, 153, 0]) / 255.,
                                   state=frame + 1)  # orange

    if options.inlets_clusters:
        with clui.fbm("Clusters"):
            # TODO: require stage V for that?
            no_of_clusters = len(inls.clusters_list)  # total, including outliers
            cmap = ColorMapDistMap()
            for c in inls.clusters_list:
                # coords for current cluster
                ics = inls.lim2clusters(c).coords
                if c == 0:
                    c_name = 'out'
                else:
                    c_name = str(int(c))
                spp.scatter(ics, color=cmap(c), name="cluster_%s" % c_name)

    if options.ctypes_raw:
        with clui.fbm("CTypes raw"):
            for nr, ct in enumerate(ctypes_generic_list):
                clui.message(str(ct), cont=True)
                sps = lind(spaths, what2what(ctypes_generic, [ct]))
                plot_spaths_traces(sps, name=str(ct) + '_raw', split=False, spp=spp)
                if ct in master_paths:
                    plot_spaths_traces([master_paths[ct]], name=str(ct) + '_raw_master', split=False, spp=spp)

    if options.ctypes_smooth:
        with clui.fbm("CTypes smooth"):
            for nr, ct in enumerate(ctypes_generic_list):
                clui.message(str(ct), cont=True)
                sps = lind(spaths, what2what(ctypes_generic, [ct]))
                plot_spaths_traces(sps, name=str(ct) + '_smooth', split=False, spp=spp, smooth=smooth)
                if ct in master_paths_smooth:
                    if master_paths_smooth[ct] is None: continue
                    plot_spaths_traces([master_paths_smooth[ct]], name=str(ct) + '_smooth_master', split=False, spp=spp,
                                       smooth=lambda anything: anything)
                if ct in master_paths:
                    if master_paths[ct] is None: continue
                    plot_spaths_traces([master_paths[ct]], name=str(ct) + '_raw_master_smooth', split=False, spp=spp,
                                       smooth=smooth)

    if options.all_paths_raw:
        with clui.fbm("All raw paths"):
            plot_spaths_traces(spaths, name='all_raw', split=options.all_paths_split, spp=spp)
    if options.all_paths_raw_io:
        with clui.fbm("All raw paths io"):
            plot_spaths_inlets(spaths, name='all_raw_paths_io', spp=spp)

    if options.all_paths_smooth:
        with clui.fbm("All smooth paths"):
            plot_spaths_traces(spaths, name='all_smooth', split=options.all_paths_split, spp=spp, smooth=smooth)
    if options.all_paths_smooth_io:
        with clui.fbm("All smooth paths io"):
            plot_spaths_inlets(spaths, name='all_smooth_paths_io', spp=spp)

    with clui.fbm("Paths as states"):
        if options.paths_raw:
            clui.message("raw", cont=True)
            plot_spaths_traces(spaths, name='raw_paths', states=options.paths_states, separate=not options.paths_states,
                               spp=spp)
        if options.paths_smooth:
            clui.message("smooth", cont=True)
            plot_spaths_traces(spaths, name='smooth_paths', states=options.paths_states,
                               separate=not options.paths_states, smooth=smooth, spp=spp)
        if options.paths_raw_io:
            clui.message("raw_io", cont=True)
            plot_spaths_inlets(spaths, name='raw_paths_io', states=options.paths_states,
                               separate=not options.paths_states, spp=spp)
        if options.paths_smooth_io:
            clui.message("smooth_io", cont=True)
            plot_spaths_inlets(spaths, name='smooth_paths_io', states=options.paths_states,
                               separate=not options.paths_states, smooth=smooth, spp=spp)

    if options.show_molecule:
        pymol_connector.orient_on('molecule')

    if is_pymol_connector_session(options.save):
        with clui.fbm("Saving session (%s)" % options.save):
            clui.message("")  # new line
            pbar = clui.pbar(len(spaths))
            # FIXME: Loop over states is not required if there is no object with many states.
            import time
            for state in range(len(spaths)):
                pymol_connector.cmd.set_frame(state + 1)
                pbar.update(state)
                time.sleep(0.1)
            pbar.finish()
            clui.message("Finalizing session saving...", cont=True)  # new line
            pymol_connector.cmd.set_frame(1)
            pymol_connector.cmd.save(options.save, state=0)
            pymol_connector.cmd.quit()


################################################################################


__all__ = '''clui
optimal_threads
aquaduct_version_nice
version_nice
ValveConfig
valve_begin
valve_load_config
valve_read_trajectory
valve_exec_stage
stage_I_run
stage_II_run
stage_III_run
stage_IV_run
stage_V_run
stage_VI_run
valve_end
'''.split()