Source code for glypy.structure.glycan

'''
Represent a sugar graph with pseudo-directed edges.

'''

import operator
import logging
import itertools
from functools import partial
from collections import deque, defaultdict, Callable

from glypy.utils import identity, chrinc, uid
from glypy.composition import Composition

from .base import SaccharideCollection
from .monosaccharide import Monosaccharide, graph_clone, toggle as residue_toggle
from .crossring_fragments import crossring_fragments, CrossRingPair
from .fragment import Subtree

logger = logging.getLogger("Glycan")

_fragment_direction = {
    "A": -1,
    "B": -1,
    "C": -1,
    "X": 1,
    "Y": 1,
    "Z": 1
}

MAIN_BRANCH_SYM = '-'


[docs]def fragment_to_substructure(fragment, tree):
    """Extract the substructure of `tree` which is contained in `fragment`


    >>> from glypy import glycans as glycan_factory
    >>> from glypy.structure import glycan
    >>> n_linked_core = glycan_factory["N-Linked Core"]
    >>> frag = n_linked_core.fragments().next()
    >>> frag
    <Fragment  mass=221.089937203 kind=Y included_nodes=set([1]) link_ids={1: ('', 'Y')} name=Y1 crossring_cleavages={} score=0.0 composition=Composition({'H': 15, 'C': 8, 'O': 6, 'N': 1})>
    >>> glycan.fragment_to_substructure(frag, n_linked_core)
    RES
    1b:b-dglc-HEX-1:5
    2s:n-acetyl
    LIN
    1:1d(2+1)2n
    <BLANKLINE>
    >>>

    Parameters
    ----------
    fragment: Fragment
        The :class:`Fragment` to extract substructure for.
    tree: Glycan
        The |Glycan| to extract substructure from.

    Returns
    -------
    Glycan:
        The |Glycan| substructure defined by the nodes contained in `fragment` as
        found in `tree`
    """
    break_targets = fragment.link_ids
    crossring_targets = fragment.crossring_cleavages

    # All operations will be done on a copy of the tree of interest
    tree = tree.clone()
    crossring_targets_nodes = []
    break_targets_nodes = []
    # A point of reference known to be inside the fragment tree
    anchor = None
    for pos, link in tree.iterlinks():
        # If the current link's child was cross-ring cleaved,
        # then we must apply that cleavage and find an anchor
        if link.child.id in crossring_targets:
            ion_type = crossring_targets[link.child.id]
            c1, c2 = map(int, ion_type[0].split(","))
            target = tree.get(link.child.id)
            crossring_targets_nodes.append((target, c1, c2))
        # If this link was cleaved, break it and find an anchor
        if link.id in break_targets:
            break_targets_nodes.append(link)

    for target, c1, c2 in crossring_targets_nodes:
        a_frag, x_frag = crossring_fragments(
            target, c1, c2, attach=True, copy=False)
        next(residue_toggle(target))

        if crossring_targets[target.id][1] == "A":
            anchor = a_frag
        else:
            anchor = x_frag

    for link in break_targets_nodes:
        parent, child = link.break_link(refund=True)
        if parent.id in fragment.included_nodes:
            anchor = parent
        elif child.id in fragment.included_nodes:
            anchor = child

    # Build a new tree from the anchor
    substructure = Glycan(root=anchor, index_method=None).reroot()

    return substructure


[docs]class Glycan(SaccharideCollection):
    '''
    Represents a full graph of connected |Monosaccharide| objects and their connecting bonds.

    Attributes
    ----------
    root: |Monosaccharide|
        The first monosaccharide unit of the glycan, and the reducing end if present.
    index: |list|
        A list of the |Monosaccharide| instances in `self` in the order they are encountered
        by traversal by `traversal_methods[index_method]`
    link_index: |list|
        A list of the |Link| connecting the |Monosaccharide| instances in `self` in the order they
        are encountered by traversal by `traversal_methods[index_method]`
    reducing_end: |ReducedEnd| or |None|
        The reducing end on :attr:`root`.
    branch_lengths: |dict|
        A dictionary mapping branch symbols to their lengths
    '''

    _serializers = {}

    @classmethod
    def subtree_from(cls, tree, node, visited=None):
        if isinstance(node, int):
            node = tree[node]
        visited = {
            node.id for p,
            node in node.parents()} if visited is None else visited
        subtree = cls(root=node, index_method=None).clone(
            index_method=None, visited=visited)
        return subtree

    def fragment_to_substructure(self, fragment):
        return fragment_to_substructure(fragment, self)

    traversal_methods = {}

[docs]    def __init__(self, root=None, index_method='dfs'):
        '''
        Constructs a new Glycan from the collection of connected |Monosaccharide| objects
        rooted at `root`.

        If index_method is not |None|, the graph is indexed by the default search method
        given by `traversal_methods[index_method]`
        '''
        if root is None:
            root = Monosaccharide()
        self.root = root
        self.index = []
        self.link_index = []
        self.branch_lengths = {}
        if index_method is not None:
            self.reindex(index_method)

[docs]    def reindex(self, method='dfs'):
        '''
        Traverse the graph using the function specified by `method`. The order of
        traversal defines the new :attr:`id` value for each |Monosaccharide|
        and |Link|.

        The order of traversal also defines the ordering of the |Monosaccharide|
        in :attr:`index` and |Link| in :attr:`link_index`.

        '''
        self.deindex()
        traversal = self._get_traversal_method(method)
        index = []
        visited = set()
        i = 1
        for node in traversal():
            addr = id(node)
            if addr in visited:
                continue
            visited.add(addr)
            index.append(node)

        for node in index:
            if node.id < 0:
                node.id = i
                i += 1

        link_index = []
        for pos, link in self.iterlinks(method=method):
            link_index.append(link)

        i = 1
        for link in link_index:
            link.id = i
            i += 1

        self.index = index
        self.link_index = link_index

        self.label_branches()

        return self

    def _build_link_index(self, method='dfs'):
        link_index = []
        for pos, link in self.iterlinks(method=method):
            link_index.append(link)
        self.link_index = link_index

    def _build_node_index(self, method='dfs'):
        index = []
        for node in self.iternodes(method=method):
            index.append(node)
        self.index = index

[docs]    def deindex(self):
        '''
        When combining two Glycan structures, very often their component ids will
        overlap, making it impossible to differentiate between a cycle and the new
        graph. This function mangles all of the node and link ids so that they are
        distinct from the pre-existing nodes.
        '''
        if self.index is not None and len(self.index) > 0:
            base = uid()
            for node in self.index:
                node.id += base
                node.id *= -1
            for link in self.link_index:
                link.id += base
                link.id *= -1
        return self

[docs]    def reroot(self, index_method='dfs'):
        '''
        Set :attr:`root` to the node with the lowest :attr:`id`
        '''
        self.root = sorted(iter(self), key=operator.attrgetter('id'))[0]
        if index_method is not None:
            self.reindex(index_method)
        return self

[docs]    def __getitem__(self, ix):
        '''
        Alias for :attr:`index.__getitem__`
        '''
        if self.index is not None:
            return self.index[ix]
        else:
            raise IndexError(
                "Tried to access the index of an unindexed Glycan.")

    def __getstate__(self):
        return self.__dict__

    def __setstate__(self, state):
        self.__dict__.update(state)

    def __root__(self):
        return self.root

    def __tree__(self):
        return self

    def _derivatized(self, substituent, id_base):
        pass

    def _strip_derivatization(self):
        pass

    def get(self, ix):
        if self.index:
            iterable = self.index
        else:
            iterable = self
        for node in iterable:
            if node.id == ix:
                return node
        raise IndexError(
            "Could not find a node with the given id {}".format(ix))

    def get_link(self, ix):
        for pos, link in self.iterlinks():
            if link.id == ix or link.label == ix:
                return link
        raise IndexError(
            "Could not find a link with the given id or label {}".format(ix))

    @property
    def reducing_end(self):
        '''
        An alias for :attr:`Monosaccharide.reducing_end` for :attr:`root`
        '''
        return self.root.reducing_end

[docs]    def set_reducing_end(self, value):
        '''
        Sets the reducing end type, and configures the root residue appropriately.

        If the reducing_end is not |None|, then the following state changes are made to the root:

        .. code-block:: python

            self.root.ring_start = 0
            self.root.ring_end = 0
            self.root.anomer = "uncyclized"

        Else, the correct state is unknown:

        .. code-block:: python

            self.root.ring_start = None
            self.root.ring_end = None
            self.root.anomer = None

        '''
        self.root.reducing_end = value
        if self.reducing_end is not None:
            self.root.ring_start = 0
            self.root.ring_end = 0
            self.root.anomer = "uncyclized"
        else:
            self.root.ring_start = None
            self.root.ring_end = None
            self.root.anomer = None

    @reducing_end.setter
    def reducing_end(self, value):
        self.set_reducing_end(value)

[docs]    def depth_first_traversal(self, from_node=None, apply_fn=identity, visited=None):
        '''
        Make a depth-first traversal of the glycan graph. Children are explored in descending bond-order.

        This is the default traversal method for all |Glycan| objects. :meth:`~.dfs` is an alias of this method.
        Both names can be used to specify this strategy to :meth:`~._get_traversal_method`.

        Parameters
        ----------
        from_node: None or Monosaccharide
            If `from_node` is |None|, then traversal starts from the root node. Otherwise it begins
            from the given node.
        apply_fn: function
            A function applied to each node on arrival. If this function returns a non-None value,
            the result is yielded from the generator, otherwise it is ignored. Defaults to :func:`.identity`
        visited: set or None
            A :class:`set` of node ID values to ignore. If |None|, defaults to the empty `set`

        Yields
        ------
        Return Value of `apply_fn`, by default |Monosaccharide|

        See also
        --------
        Glycan.breadth_first_traversal
        '''
        node_stack = deque([self.root if from_node is None else from_node])
        visited = set() if visited is None else visited
        while len(node_stack) > 0:
            node = node_stack.pop()
            visited.add(node.id)
            if apply_fn is identity:
                yield node
            else:
                res = apply_fn(node)
                if res is not None:
                    yield res

            for link in node.links.values():
                terminal = link.parent
                if terminal.id not in visited:
                    node_stack.append(terminal)
                terminal = link.child
                if terminal.id not in visited:
                    node_stack.append(terminal)

    # Convenience aliases and the set up the traversal_methods entry
    dfs = depth_first_traversal
    traversal_methods['dfs'] = "dfs"
    traversal_methods['depth_first_traversal'] = "dfs"

[docs]    def breadth_first_traversal(self, from_node=None, apply_fn=identity, visited=None):
        '''
        Make a breadth-first traversal of the glycan graph. Children are explored in descending bond-order.

        :meth:`~.bfs` is an alias of this method.
        Both names can be used to specify this strategy to :meth:`~._get_traversal_method`.

        Parameters
        ----------
        from_node: None or Monosaccharide
            If `from_node` is |None|, then traversal starts from the root node. Otherwise it begins
            from the given node.
        apply_fn: function
            A function applied to each node on arrival. If this function returns a non-None value,
            the result is yielded from the generator, otherwise it is ignored. Defaults to :func:`.identity`
        visited: set or None
            A :class:`set` of node ID values to ignore. If |None|, defaults to the empty `set`

        Yields
        ------
        Return Value of `apply_fn`, by default |Monosaccharide|

        See also
        --------
        Glycan.depth_first_traversal
        '''
        node_queue = deque([self.root if from_node is None else from_node])
        visited = set() if visited is None else visited
        while len(node_queue) > 0:
            node = node_queue.popleft()
            visited.add(node.id)

            if apply_fn is identity:
                yield node
            else:
                res = apply_fn(node)
                if res is not None:
                    yield res
            # node_queue.extend(terminal for link in node.links.values()
            #                   for terminal in link if terminal.id not in visited)
            for link in node.links.values():
                terminal = link.parent
                if terminal.id not in visited:
                    node_queue.append(terminal)
                terminal = link.child
                if terminal.id not in visited:
                    node_queue.append(terminal)

    # Convenience aliases and the set up the traversal_methods entry
    bfs = breadth_first_traversal
    traversal_methods['bfs'] = "bfs"
    traversal_methods['breadth_first_traversal'] = "bfs"

    def indexed_traversal(self, from_node=None, apply_fn=identity, visited=None):
        if not self.index:
            self._build_node_index()
        if from_node is None and apply_fn is identity:
            for node in self.index:
                yield node
        else:
            i = 0
            n = len(self.index)
            if from_node is not None:
                while i < n:
                    node = self.index[i]
                    if node == from_node:
                        break
                    i += 1
            while i < n:
                node = self.index[i]
                if apply_fn is identity:
                    yield node
                else:
                    value = apply_fn(node)
                    if value is not None:
                        yield value

                i += 1

    traversal_methods['index'] = "indexed_traversal"

    def _get_traversal_method(self, method):
        if method == 'dfs':
            return self.dfs
        elif method == 'bfs':
            return self.bfs
        elif isinstance(method, Callable):
            return partial(method, self)
        traversal = self.traversal_methods.get(method, None)
        if traversal is None:
            raise AttributeError("Unknown traversal method: {}".format(method))
        traversal = getattr(self, traversal)
        return traversal

    def __iter__(self):
        return self.dfs()

[docs]    def iternodes(self, from_node=None, apply_fn=identity, method='dfs', visited=None):
        '''
        Generic iterator over nodes. :meth:`Glycan.__iter__` is an alias of this method

        Parameters
        ----------
        from_node: None or Monosaccharide
            If `from_node` is |None|, then traversal starts from the root node. Otherwise it begins
            from the given node.
        apply_fn: function
            A function applied to each node on arrival. If this function returns a non-None value,
            the result is yielded from the generator, otherwise it is ignored. Defaults to :func:`.identity`
        method: str or `function`
            Traversal method to use. See :meth:`._get_traversal_method`
        visited: set or None
            A :class:`set` of node ID values to ignore. If |None|, defaults to the empty `set`

        Yields
        ------
        Return Value of `apply_fn`, by default Monosaccharide

        See also
        --------
        depth_first_traversal
        breadth_first_traversal
        _get_traversal_method
        '''
        traversal = self._get_traversal_method(method)
        return traversal(
            from_node=from_node, apply_fn=apply_fn, visited=visited)

[docs]    def iterlinks(self, apply_fn=identity, substituents=False, method='dfs', visited=None):
        '''
        Iterates over all |Link| objects in |Glycan|.

        Parameters
        ----------
        substituents: bool
            If `substituents` is |True|, then include the |Link| objects in
            :attr:`substituent_links` on each |Monosaccharide|
        method: str or function
            The traversal method controlling the order of the nodes visited
        visited: None or set
            The collection of id values to ignore when traversing

        Yields
        ------
        Link
        '''
        traversal = self._get_traversal_method(method)
        links_visited = set()

        def links(obj):
            if substituents:
                for pos, link in obj.substituent_links.items():
                    res = apply_fn((pos, link))
                    if res:
                        yield res
            for pos, link in obj.links.items():
                if link.id in links_visited:
                    continue
                links_visited.add(link.id)
                res = apply_fn((pos, link))
                if res:
                    yield res

        return itertools.chain.from_iterable(
            traversal(apply_fn=links, visited=visited))

[docs]    def leaves(self, bidirectional=False, method='dfs', visited=None):
        '''
        Iterates over all |Monosaccharide| objects in |Glycan|, yielding only those
        that have no child nodes.

        Parameters
        ----------
        bidirectional: bool
            If `bidirectional` is |True|, then only |Monosaccharide| objects
            with only one entry in :attr:`links`.
        method: str or function
            The traversal method controlling the order of the nodes visited
        visited: None or set
            The collection of id values to ignore when traversing

        Yields
        ------
        |Monosaccharide|
        '''
        traversal = self._get_traversal_method(method)
        if bidirectional:
            def is_leaf(obj):
                if len(obj.links) == 1:
                    yield obj
        else:
            def is_leaf(obj):
                if len(list(obj.children())) == 0:
                    yield obj

        return itertools.chain.from_iterable(
            traversal(apply_fn=is_leaf, visited=visited))

[docs]    def label_branches(self):
        '''
        Labels each branch point with an alphabetical symbol. Also computes and stores
        each branch's length and stores it in :attr:`branch_lengths`. Sets :attr:`branch_lengths`
        of `self` and :attr:`Link.label` for each link attached to `self`.
        '''
        last_branch_label = MAIN_BRANCH_SYM
        self.branch_lengths = defaultdict(int)
        branch_parent_map = {}

        def parent_link_symbol(node):
            try:
                label = node.links[node.parents()[0][0]][0].label
                if label is None:
                    return MAIN_BRANCH_SYM
                else:
                    return label[0]
            except IndexError:
                return MAIN_BRANCH_SYM

        for node in self.dfs():
            links = []
            for link in node.links.values():
                if link.is_child(node):
                    continue
                links.append(link)

            if len(links) == 1:
                label_key = parent_link_symbol(node)
                self.branch_lengths[label_key] += 1
                label = "{}{}".format(
                    label_key, self.branch_lengths[label_key])
                links[0].label = label
            else:
                last_label_key = label_key = parent_link_symbol(node)
                count = self.branch_lengths[last_label_key]
                for link in links:
                    last_branch_label = chrinc(
                        last_branch_label) if last_branch_label != MAIN_BRANCH_SYM else 'a'
                    new_label_key = last_branch_label
                    branch_parent_map[new_label_key] = last_label_key
                    self.branch_lengths[new_label_key] = count + 1
                    label = "{}{}".format(
                        new_label_key, self.branch_lengths[new_label_key])
                    link.label = label

        # Update parent branch lengths
        longest = self.branch_lengths[MAIN_BRANCH_SYM]
        for branch in sorted(list(self.branch_lengths.keys()), reverse=True):
            if branch == '-':
                continue
            length = self.branch_lengths[branch]
            longest = max(longest, length)
            parent = branch_parent_map[branch]
            self.branch_lengths[parent] = max(length, self.branch_lengths[parent])
        self.branch_parent_map = branch_parent_map
        self.branch_lengths["-"] = longest

[docs]    def count_branches(self):
        '''
        Count the number of branches in the Glycan tree

        Returns
        -------
        int
        '''
        count = 0
        for node in self:
            if len(node.links) > 2:
                count += 2 if count == 0 else 1
        return count

[docs]    def order(self):
        '''
        The number of nodes in the graph. :meth:`__len__` is an alias of this

        Returns
        -------
        int
        '''
        count = 0
        for node in self.dfs():
            count += 1
        return count

    __len__ = order

    @classmethod
    def register_serializer(cls, name, method):
        cls._serializers[name] = method

    def serialize(self, name='glycoct'):
        return self._serializers[name](self)

    __repr__ = serialize

[docs]    def mass(self, average=False, charge=0, mass_data=None, method='dfs'):
        '''
        Calculates the total mass of the intact graph by querying each
        node for its mass.

        Parameters
        ----------
        average: bool
            Whether or not to use the average isotopic composition when calculating masses.
            When ``average == False``, masses are calculated using monoisotopic mass.
        charge: int
            If charge is non-zero, m/z is calculated, where m is the theoretical mass, and z is `charge`
        mass_data: dict
            If mass_data is None, standard NIST mass and isotopic abundance data are used. Otherwise the
            contents of mass_data are assumed to contain elemental mass and isotopic abundance information.


        Returns
        -------
        float

        See also
        --------
        :func:`glypy.composition.composition.calculate_mass`
        '''
        if charge == 0:
            return sum(
                node.mass(average=average, charge=0, mass_data=mass_data) for node in self.iternodes(method=method))
        else:
            return self.total_composition().calc_mass(average=average, charge=charge, mass_data=mass_data)

[docs]    def total_composition(self, method='dfs'):
        '''
        Computes the sum of the composition of all |Monosaccharide| objects in ``self``

        Returns
        -------
        :class:`~glypy.composition.Composition`
        '''

        return sum((node.total_composition() for node in self.iternodes(method=method)), Composition())

[docs]    def clone(self, index_method='dfs', visited=None, cls=None):
        '''
        Create a copy of `self`, indexed using `index_method`, a *traversal method*  or |None|.

        Returns
        -------
        :class:`~glypy.structure.glycan.Glycan`
        '''
        if cls is None:
            cls = self.__class__
        clone_root = graph_clone(self.root, visited=visited)
        duplicate = cls(root=clone_root, index_method=index_method)

        return duplicate

[docs]    def __eq__(self, other):
        '''
        Two glycans are considered equal if they are identically ordered nodes.

        Parameters
        ----------
        self, other: :class:`~glypy.structure.glycan.Glycan`

        Returns
        -------
        bool

        See also
        --------
        :meth:`glypy.structure.Monosaccharide.exact_ordering_equality`
        '''
        if other is None:
            return False
        elif not isinstance(other, Glycan):
            return False
        return self.root.exact_ordering_equality(other.root)

    exact_ordering_equality = __eq__

[docs]    def topological_equality(self, other):
        '''
        Two glycans are considered equal if they are topologically equal.

        Parameters
        ----------
        self: :class:`Glycan`
        other: :class:`Glycan`

        Returns
        -------
        bool

        See also
        --------
        :meth:`glypy.structure.Monosaccharide.topological_equality`
        '''
        return self.root.topological_equality(other.root)

    def __ne__(self, other):
        return not self == other

[docs]    def substructures(self, max_cleavages=1, min_cleavages=1, inplace=False):
        '''
        Generate disjoint subtrees from this glycan by examining by removing one or
        more monosaccharide-monosaccharide bond.


        Parameters
        ----------
        max_cleavages: |int|
            The maximum number of bonds to break per substructure
        min_cleavages: |int|
            The minimum number of bonds to break per substructure
        min_size: |int|
            The minimum number of monosaccharides per substructure

        See also
        --------
        :func:`glypy.composition.composition.calculate_mass`
        '''
        structure = self
        if not inplace:
            structure = self.clone()
        for frag in structure.break_links_subtrees(
                max_cleavages):
            yield frag

[docs]    def name_fragment(self, fragment):
        '''
        Attempt to assign a full name to a fragment based on the branch and position relative to
        the reducing end along side A/B/C/X/Y/Z, according to :title-reference:`Domon and Costello`
        '''

        break_targets = fragment.link_ids
        crossring_targets = fragment.crossring_cleavages

        # Accumulator for name components
        name_parts = []
        # Collect cross-ring fragment names
        for crossring_id in crossring_targets:
            # Seek the link that holds the fragmented residue
            for link in self.link_index:
                if link.child.id == crossring_id:
                    ion_type = crossring_targets[crossring_id]
                    label = link.label
                    if _fragment_direction[ion_type[1]] > 0:
                        name = "{}{}".format(
                            ''.join(map(str, ion_type)), label.replace(MAIN_BRANCH_SYM, ""))
                        name_parts.append(name)
                    else:
                        label_key = label[0]
                        distance = int(label[1:])
                        inverted_distance = self.branch_lengths[
                            label_key] - (distance - 1)
                        name = "{}{}{}".format(
                            ''.join(map(str, ion_type)), label_key.replace(MAIN_BRANCH_SYM, ""), inverted_distance)
                        name_parts.append(name)

        # Collect glycocidic fragment names
        for break_id, ion_type in break_targets.items():
            ion_type = ion_type[1]
            if _fragment_direction[ion_type] > 0:
                link = self.link_index[break_id - 1]
                label = link.label
                name = "{}{}".format(
                    ion_type,
                    label.replace(
                        MAIN_BRANCH_SYM,
                        ""))
                name_parts.append(name)
            else:
                link = self.link_index[break_id - 1]
                label = link.label
                label_key = label[0]
                distance = int(label[1:])
                inverted_distance = self.branch_lengths[
                    label_key] - (distance - 1)
                name = "{}{}{}".format(
                    ion_type, label_key.replace(MAIN_BRANCH_SYM, ""), inverted_distance)
                name_parts.append(name)

        return '-'.join(sorted(name_parts))

[docs]    def break_links_subtrees(self, n_links):
        """Iteratively generate all subtrees from glycosidic bond cleavages, creating all
        :math:`2{L \choose n}` subtrees.

        Parameters
        ----------
        n_links : int
            Number of links to break simultaneously

        Yields
        ------
        Subtree
        """
        if len(self.link_index) == 0:
            self._build_link_index()
        links = list(self.link_index)
        for breaks in itertools.combinations(links, n_links):

            subtrees = []
            for link in breaks:
                parent, child = link.break_link(refund=True)
                parent_tree = Glycan(parent, index_method=None)
                child_tree = Glycan(child, index_method=None)
                subtrees.append(parent_tree)
                subtrees.append(child_tree)

            unique_subtrees = []
            for subtree in subtrees:
                ids = {n.id for n in subtree}
                for uids, unique in unique_subtrees:
                    if ids == uids:
                        break
                else:
                    unique_subtrees.append((ids, subtree))

            for ids, subtree in unique_subtrees:
                subtree = subtree.clone(
                    index_method=None).reroot(
                    index_method=None)
                include_nodes = {n.id for n in subtree}

                link_ids = [link.id for link in breaks
                            if link.parent.id in include_nodes or
                            link.child.id in include_nodes]

                parent_break_ids = {link.id: link.parent.id for link in breaks
                                    if link.parent.id in include_nodes}

                child_break_ids = {link.id: link.child.id for link in breaks
                                   if link.child.id in include_nodes}

                yield Subtree(subtree, include_nodes, link_ids, parent_break_ids, child_break_ids)

            for link in breaks:
                link.apply()

[docs]    def crossring_subtrees(self, n_links):
        """Generate all combinations of cross ring fragments and
        glycosidic cleavages, cleaving between 1 and `n_links`
        monosaccharides paired with `n_links` - 1 to 0 glycosidic cleavages.

        Parameters
        ----------
        n_links : int
            Total number of breaks to create, between cross ring cleavages and
            complemenatary glycosidic cleavages.

        Yields
        ------
        Subtree
        """
        if len(self.link_index) == 0:
            self._build_link_index()

        links = list(self.link_index)
        # origin_mass = self.mass()
        # Localize globals
        _str = str
        # Break at least one ring
        for i in range(1, n_links + 1):
            # Generate all combinations of i rings to break
            for link_combination in itertools.combinations(links, i):
                # Creates a list of lists of CrossRingPairs, each inner list for a
                # single Monosaccharide
                crossring_combinations = [
                    CrossRingPair.from_link(link) for link in link_combination]
                # Combinations are splatted to unwrap the outer container so that
                # the inner lists are multiplexed.
                for breaks in itertools.product(*crossring_combinations):
                    subtrees = []
                    for ring in breaks:
                        parent, child = ring.break_link()
                        parent_tree = Glycan(parent, index_method=None)
                        parent_tree._build_node_index()
                        child_tree = Glycan(child, index_method=None)
                        child_tree._build_node_index()
                        subtrees.append(parent_tree)
                        subtrees.append(child_tree)

                    unique_subtrees = []
                    for tree in subtrees:
                        ids = {n.id for n in tree.index}
                        for uids, unique in unique_subtrees:
                            if ids == uids:
                                break
                        else:
                            unique_subtrees.append((ids, tree))

                    # If tis iteration hasn't broken all n rings, there are some
                    # breaks left over to be generated in glycosidic cleavages.
                    # Generate all possible glycosidic cleavage subtrees of the
                    # generated cross ring cleavage subtrees.

                    if n_links - i > 0:
                        for ids, subtree in unique_subtrees:
                            subtree._build_link_index()
                            partitions = subtree.break_links_subtrees(n_links - i)
                            for part in partitions:
                                included_crossring = {}
                                for crossring in breaks:
                                    if crossring.id in part.include_nodes:
                                        xring_residue = part.tree.get(
                                            crossring.id)
                                        try:
                                            included_crossring[xring_residue.id] = (','.join(
                                                (_str(xring_residue.cleave_1), _str(xring_residue.cleave_2))),
                                                xring_residue.kind)
                                        except AttributeError:
                                            pass
                                            # Not a CrossRingFragment in this instance

                                part.crossring_cleavages = included_crossring
                                yield part
                    else:
                        for ids, subtree in unique_subtrees:
                            subtree = subtree.reroot(
                                index_method=None).clone(
                                index_method=None)

                            included_crossring = {}
                            include_nodes = {n.id for n in subtree.indexed_traversal()}
                            for crossring in breaks:
                                if crossring.id in include_nodes:
                                    xring_residue = subtree.get(crossring.id)
                                    try:
                                        included_crossring[xring_residue.id] = (','.join(
                                            (_str(xring_residue.cleave_1), _str(xring_residue.cleave_2))),
                                            xring_residue.kind)
                                    except AttributeError:
                                        pass
                                        # Not a CrossRingFragment in this instance
                            yield Subtree(subtree, include_nodes, {}, {}, {}, crossring_cleavages=included_crossring)
                    # Re-join the ring, retracting all links from the crossring objects
                    for ring in breaks:
                        ring.apply()
                    # Clean up any lingering links trapped in the closure created by adjacent
                    # crossring pairs that were made visible by `ring.apply()`
                    while(any(ring.is_attached() for ring in breaks)):
                        for ring in breaks:
                            ring.release()
                    for ring in breaks:
                        ring.release()
                    # assert round(self.mass(), 4) == round(origin_mass, 4)

[docs]    def fragments(self, kind="BY", max_cleavages=1, average=False, charge=0, mass_data=None,
                  traversal_method='dfs'):
        '''
        Generate carbohydrate backbone fragments from this glycan by examining the disjoint subtrees
        created by removing one or more monosaccharide-monosaccharide bond.


        Parameters
        ----------
        kind: `sequence`
            Any `iterable` or `sequence` of characters corresponding to A/B/C/X/Y/Z
            as published by :title-reference:`Domon and Costello`
        max_cleavages: |int|
            The maximum number of bonds to break per fragment
        average: bool, optional, defaults to `False`
            Whether or not to use the average isotopic composition when calculating masses.
            When ``average == False``, masses are calculated using monoisotopic mass.
        charge: int, optional, defaults to 0
            If charge is non-zero, m/z is calculated, where m is the theoretical mass, and z is `charge`
        mass_data: dict, optional, defaults to `None`
            If mass_data is |None|, standard NIST mass and isotopic abundance data are used. Otherwise the
            contents of `mass_data` are assumed to contain elemental mass and isotopic abundance information.

        Yields
        ------
        :class:`Fragment`

        See also
        --------
        :func:`glypy.composition.composition.calculate_mass`
        :meth:`subtrees`
        :meth:`crossring_subtrees`
        :meth:`.Subtree.to_fragments`
        '''
        seen = set()
        source = self.clone()
        for i in range(1, max_cleavages + 1):
            gen = source.break_links_subtrees(i)
            if len(set("AX") & set(kind)) > 0:
                gen = itertools.chain(
                    gen,
                    source.crossring_subtrees(i))
            for subtree in gen:
                for fragment in subtree.to_fragments(kind, average=average,
                                                     charge=charge, mass_data=mass_data,
                                                     traversal_method=traversal_method):
                    fragment.name = self.name_fragment(fragment)
                    if fragment.name in seen:
                        continue
                    else:
                        seen.add(fragment.name)
                    yield fragment

[docs]    def subtrees(self, max_cleavages=1, include_crossring=False):
        '''
        Generate subtrees from this tree by breaking `max_cleavages` bonds or rings.

        Parameters
        ----------
        max_cleavages: int
            The maximum number of bonds to break per fragment
        include_crossring: bool
            Whether to include cross ring cleavages

        Yields
        ------
        Subtree
        '''
        source = self.clone()
        for i in range(1, max_cleavages + 1):
            gen = source.break_links_subtrees(i)
            if include_crossring:
                gen = itertools.chain(
                    gen,
                    source.crossring_subtrees(i))
            for subtree in gen:
                yield subtree


class NamedGlycan(Glycan):
    def __init__(self, name=None, *args, **kwargs):
        self.name = name
        super(NamedGlycan, self).__init__(*args, **kwargs)

    def clone(self, index_method='dfs', cls=None):
        if cls is None:
            cls = NamedGlycan
        inst = super(NamedGlycan, self).clone(index_method=index_method, cls=cls)
        inst.name = self.name
        return inst

    def __repr__(self):
        rep = super(NamedGlycan, self).__repr__()
        return "%s\n%s" % (self.name, rep)