Source code for pytadbit.boundary_aligner.aligner

"""
02 Dec 2012


"""
from pytadbit.boundary_aligner.globally     import needleman_wunsch
from pytadbit.boundary_aligner.reciprocally import reciprocal


def consensusize(ali1, ali2, passed):
    """
    :param ali1: first aligned sequence
    :param ali1: second aligned sequence
    :param passed: in case first aligned sequence is already a consensus, it
       might be weighted
    :returns: consensus sequence corresponding to ali1 and ali2
    """
    consensus = []
    for pos in xrange(len(ali1)):
        if ali1[pos] != ali2[pos]:
            try:
                bound = (ali1[pos] * passed + ali2[pos]) / (1 + passed)
            except TypeError:
                bound = ali1[pos] if type(ali1[pos]) is not str else ali2[pos]
        else:
            bound = ali1[pos]
        consensus.append(bound)
    return consensus


[docs]def align(sequences, method='reciprocal', **kwargs):
    """
    Align Topologically Associating Domains. Supports multiple alignment by
    building a consensus TAD and aligning each TAD to it.
    Note: as long as we are using multiple alignments in an iterative way,
    the order of sequences we be relevant. Here experiments are sorted according
    to the value of the first boundary found in order to try to reduce this
    problem.

    :param reciprocal method: method used to align
    """
    if method == 'global':
        aligner = needleman_wunsch
    elif method == 'reciprocal':
        aligner = reciprocal
    else:
        raise NotImplementedError(('Only "global" and "reciprocal" are ' +
                                   'implemented right now.\n'))
    if len(sequences) > 2:
        dico = {}
        reference = None
        for j, (i, seq) in enumerate(sorted(enumerate(sequences),
                                            key=lambda x: x[1])):
            reference = reference or seq
            dico[j] = {'sort':i,
                       'seq' :seq}
        aligneds = []
        scores = 0
        for other in xrange(1, len(sequences)):
            [align1, align2], score = aligner(reference, dico[other]['seq'],
                                              **kwargs)
            scores += score
            if len(reference) != len(align1):
                for pos in xrange(len(align1)):
                    try:
                        if align1[pos] != '-':
                            continue
                        for ali in aligneds:
                            ali.insert(pos, '-')
                    except IndexError:
                        for ali in aligneds:
                            ali.append('-')
            if not aligneds:
                aligneds.append(align1)
            aligneds.append(align2)
            reference = consensusize(align1, align2, other)
            
        sort_alis = [[] for _ in xrange(len(dico))]
        for seq in xrange(len(dico)):
            sort_alis[dico[seq]['sort']] = aligneds[seq][:]
        return sort_alis, scores
    return aligner(sequences[0], sequences[1], **kwargs)
Navigation

Source code for pytadbit.boundary_aligner.aligner

Table Of Contents

Quick search

Navigation