Source code for pytadbit.boundary_aligner.aligner

"""
02 Dec 2012


"""
from pytadbit.boundary_aligner.globally     import needleman_wunsch
from pytadbit.boundary_aligner.reciprocally import reciprocal


def consensusize(ali1, ali2, passed):
    """
    :param ali1: first aligned sequence
    :param ali1: second aligned sequence
    :param passed: in case first aligned sequence is already a consensus, it
       might be weighted
    :returns: consensus sequence corresponding to ali1 and ali2
    """
    consensus = []
    for pos in xrange(len(ali1)):
        if ali1[pos] != ali2[pos]:
            try:
                bound = (ali1[pos] * passed + ali2[pos]) / (1 + passed)
            except TypeError:
                bound = ali1[pos] if type(ali1[pos]) is not str else ali2[pos]
        else:
            bound = ali1[pos]
        consensus.append(bound)
    return consensus


[docs]def align(sequences, method='reciprocal', **kwargs): """ Align Topologically Associating Domains. Supports multiple alignment by building a consensus TAD and aligning each TAD to it. Note: as long as we are using multiple alignments in an iterative way, the order of sequences we be relevant. Here experiments are sorted according to the value of the first boundary found in order to try to reduce this problem. :param reciprocal method: method used to align """ if method == 'global': aligner = needleman_wunsch elif method == 'reciprocal': aligner = reciprocal else: raise NotImplementedError(('Only "global" and "reciprocal" are ' + 'implemented right now.\n')) if len(sequences) > 2: dico = {} reference = None for j, (i, seq) in enumerate(sorted(enumerate(sequences), key=lambda x: x[1])): reference = reference or seq dico[j] = {'sort':i, 'seq' :seq} aligneds = [] scores = 0 for other in xrange(1, len(sequences)): [align1, align2], score = aligner(reference, dico[other]['seq'], **kwargs) scores += score if len(reference) != len(align1): for pos in xrange(len(align1)): try: if align1[pos] != '-': continue for ali in aligneds: ali.insert(pos, '-') except IndexError: for ali in aligneds: ali.append('-') if not aligneds: aligneds.append(align1) aligneds.append(align2) reference = consensusize(align1, align2, other) sort_alis = [[] for _ in xrange(len(dico))] for seq in xrange(len(dico)): sort_alis[dico[seq]['sort']] = aligneds[seq][:] return sort_alis, scores return aligner(sequences[0], sequences[1], **kwargs)

Table Of Contents