Source code for phylonetwork.classes

from networkx import DiGraph, is_directed_acyclic_graph, dfs_successors 
from networkx import single_source_shortest_path_length,all_pairs_shortest_path_length,dijkstra_path_length

import numpy,pyparsing,copy

from .eNewick import eNewickParser
from .utils import total_cmp
import permutations
from .memoize import memoize_method
def memoize_method(f): return f # use it to document memoized methods (sphinx bug?)
from .exceptions import MalformedNewickException

[docs]class PhyloNetwork(DiGraph): """ Main class for phylogenetic networks and trees (with or without nested taxa). You can create a PhyloNetwork with its eNewick representation:: >>> network = PhyloNetwork(eNewick="((1,2),(3,4)5);") >>> network.taxa() ... ['1','2','3','4','5'] >>> network.leaves() ... ['_3', '_4', '_6', '_7'] >>> network.label('_6') ... '3' If your eNewick string is malformed, you'll receive a MalformedNewickException:: >>> network = PhyloNetwork(eNewick="(1)") ... Traceback (most recent call last): ... (...) ... PhyloNetwork.classes.MalformedNewickException: Malformed eNewick string You can also start with an existing networkx graph:: >>> graph = networkx.DiGraph() >>> graph.add_nodes_from(range(5)) >>> graph.add_edges_from([(0,1), (0,2), (1,3), (1,4)]) >>> network = PhyloNetwork(data = graph) >>> network.leaves() ... [2, 3, 4] >>> network.taxa() ... [] """ def __init__(self, data=None, name='', eNewick=None, ignore_prefix=None, id_offset=0): # initialization here DiGraph.__init__(self,data) self.name=name self._labels={} self._lastlabel=id_offset self.cache = {} if eNewick != None: self._from_eNewick(eNewick, ignore_prefix=ignore_prefix) @memoize_method
[docs] def is_phylogenetic_network(self): """ Returns True if the network is a Phylogenetic Network. False otherwise. EXAMPLE:: >>> network = PhyloNetwork(eNewick="((1,2),(3,4)5);") >>> network.is_phylogenetic_network() ... True >>> graph = networkx.DiGraph() >>> graph.add_nodes_from(range(4)) >>> graph.add_edges_from([(0,1), (0,2), (1,2), (2,3), (3,1)]) >>> PhyloNetwork(data=graph).is_phylogenetic_network() ... False """ if not is_directed_acyclic_graph(self): return False return True
[docs] def set_label(self, node, label): """ Set a new label to a node. EXAMPLE:: >>> graph = networkx.DiGraph() >>> graph.add_nodes_from(range(5)) >>> graph.add_edges_from([(0,1), (0,2), (1,3), (1,4)]) >>> network = PhyloNetwork(data = graph) >>> network.leaves() ... [2, 3, 4] >>> network.taxa() ... [] >>> network.set_label(2, 'Label 1') >>> network.set_label(3, 'Label 2') >>> network.set_label(4, 'Label 3') >>> network.taxa() ... ['Label 1', 'Label 2', 'Label 3'] >>> network.set_label(2, 'New label') >>> network.taxa() ... ['Label 2', 'Label 3', 'New label'] """ if node in self: self._labels[node] = label self.cache = {}
@memoize_method
[docs] def taxa(self): """ Returns the taxa (set of labels) of self. EXAMPLE:: >>> network = PhyloNetwork(eNewick="((1,2),(3,4)5);") >>> network.taxa() ... ['1', '2', '3', '4', '5'] >>> network.leaves() ... ['_3', '_4', '_6', '_7'] >>> network.label('_6') ... '3' >>> network.nodes() ... ['_7', '_6', '_5', '_4', '_3', '_2', '_1'] """ taxa = list(set(self._labels.values())) taxa.sort() return taxa
[docs] def label(self,node): """ Returns the label of node, or None if not labelled. EXAMPLE:: >>> network = PhyloNetwork(eNewick="((1,2),(3,4)5);") >>> network.taxa() ... ['1', '2', '3', '4', '5'] >>> network.leaves() ... ['_3', '_4', '_6', '_7'] >>> network.label('_6') ... '3' >>> network.nodes() ... ['_7', '_6', '_5', '_4', '_3', '_2', '_1'] >>> network.label('_1') ... None """ return self._labels.get(node)
@memoize_method
[docs] def node_by_taxa(self, taxa): """ Returns the node labelled by taxa or None if no node is labelled by taxa. Important: If more than one node is labelled with taxa, only the first one will be displayed. In order to get all nodes with a fixed label, use nodes_by_taxa. EXAMPLE:: >>> network = PhyloNetwork(eNewick="((1,2),(3,4)1);") >>> network.taxa() ... ['1', '2', '3', '4'] >>> network.nodes() ... ['_7', '_6', '_5', '_4', '_3', '_2', '_1'] >>> network.node_by_taxa('3') ... '_6' >>> network.node_by_taxa('non-existing taxa') ... None >>> network.node_by_taxa('1') ... '_5' """ for node in self.labelled_nodes(): if self.label(node) == taxa: return node return None
@memoize_method
[docs] def nodes_by_taxa(self,taxa): """ Returns all nodes labelled with taxa. EXAMPLE:: >>> network = PhyloNetwork(eNewick="((1,2),(3,4)1);") >>> network.taxa() ... ['1', '2', '3', '4'] >>> network.nodes() ... ['_7', '_6', '_5', '_4', '_3', '_2', '_1'] >>> network.node_by_taxa('3') ... '_6' >>> network.nodes_by_taxa('3') ... ['_6'] >>> network.node_by_taxa('1') ... '_5' >>> network.nodes_by_taxa('1') ... ['_5', '_3'] >>> network.nodes_by_taxa('non-existing taxa') ... set([]) """ tmp = [] for node in self.labelled_nodes(): if self.label(node) == taxa: tmp.append(node) return set(tmp)
[docs] def is_tree_node(self,u): """ Returns True if u is a tree node, False otherwise. EXAMPLE:: >>> network = PhyloNetwork(eNewick="((3),(1,2));") >>> network.nodes() ... ['_6', '_5', '_4', '_3', '_2', '_1'] >>> map(network.is_tree_node, network.nodes()) ... [True, True, True, True, True, True] >>> network.is_tree_node('non-existing node') ... False >>> network = PhyloNetwork(eNewick="((4,5#1)2,(#1,6)3)1;") >>> network.nodes() ... ['_5', '_4', '_3', '_2', '_1', '#1'] >>> map(network.is_tree_node, network.nodes()) ... [True, True, True, True, True, False] """ return self.in_degree(u)<=1
[docs] def is_hybrid_node(self,u): """ Returns True if u is not a tree node, False otherwise. EXAMPLE:: >>> network = PhyloNetwork(eNewick="((3),(1,2));") >>> network.nodes() ... ['_6', '_5', '_4', '_3', '_2', '_1'] >>> map(network.is_hybrid_node, network.nodes()) ... [False, False, False, False, False, False] >>> network.is_hybrid_node('non-existing node') ... False >>> network = PhyloNetwork(eNewick="((4,5#1)2,(#1,6)3)1;") >>> network.nodes() ... ['_5', '_4', '_3', '_2', '_1', '#1'] >>> map(network.is_hybrid_node, network.nodes()) ... [False, False, False, False, False, True] """ return self.in_degree(u)>1
[docs] def is_leaf(self,u): """ Returns True if u is a leaf, False otherwise. EXAMPLE:: >>> network = PhyloNetwork(eNewick="((3),(1,2));") >>> network.nodes() ... ['_6', '_5', '_4', '_3', '_2', '_1'] >>> network.leaves() ... ['_3', '_5', '_6'] >>> network.is_leaf('_1') ... False >>> network.is_leaf('_6') ... True >>> network.is_leaf('non-existing node') ... False """ return self.out_degree(u)==0
[docs] def is_root(self,u): """ Returns True if u is a root, False otherwise. EXAMPLE:: >>> network = PhyloNetwork(eNewick="((3),(1,2));") >>> network.nodes() ... ['_6', '_5', '_4', '_3', '_2', '_1'] >>> network.leaves() ... ['_3', '_5', '_6'] >>> network.is_root('_3') ... False >>> network.is_root('_1') ... True """ return self.in_degree(u)==0
[docs] def is_elementary_node(self,u): """ Returns True if u is an elementary node, False otherwise. EXAMPLE:: >>> network = PhyloNetwork(eNewick="((1)E,2);") >>> network.nodes() ... ['_4', '_3', '_2', '_1'] >>> map(network.is_elementary_node, network.nodes()) ... [False, False, True, False] >>> network.label('_2') ... 'E' >>> network.elementary_nodes() ... '_2' """ return ((self.in_degree(u)<=1) and (self.out_degree(u)==1))
[docs] def is_labelled(self,u): """ Returns True if u is a labelled node, False otherwise. EXAMPLE:: >>> network = PhyloNetwork(eNewick="((3),(1,2))4;") >>> network.nodes() ... ['_6', '_5', '_4', '_3', '_2', '_1'] >>> map(network.is_labelled, network.nodes()) ... [True, True, False, True, False, True] >>> network.label('_1') ... '4' """ return u in self._labels
@memoize_method
[docs] def leaves(self): """ Returns the set of leaves of self. EXAMPLE:: >>> network = PhyloNetwork(eNewick="((3),(1,(5,6)2))4;") >>> network.nodes() ... ['_8', '_7', '_6', '_5', '_4', '_3', '_2', '_1'] >>> network.leaves() ... ['_3', '_5', '_7', '_8'] >>> map(network.is_leaf, network.leaves()) ... [True, True, True, True] """ leaves = filter(self.is_leaf, self.nodes()) leaves.sort() return leaves
@memoize_method
[docs] def roots(self): """ Returns the set of roots of self. EXAMPLE:: >>> network = PhyloNetwork(eNewick="(1,2,3)ROOT;") >>> network.nodes() ... ['_4', '_3', '_2', '_1'] >>> network.roots() ... ['_1'] >>> network.label('_1') ... 'ROOT' EXAMPLE:: >>> graph = networkx.DiGraph() >>> graph.add_nodes_from(range(5)) >>> graph.add_edges_from([(0,1), (2,3), (1,4), (3,4)]) >>> network = PhyloNetwork(graph) >>> network.nodes() ... [0, 1, 2, 3, 4] >>> network.roots() ... [0, 2] >>> network.is_phylogenetic_network() ... True """ roots = filter(self.is_root, self.nodes()) roots.sort() return roots
@memoize_method
[docs] def labelled_nodes(self): """ Returns the set of labelled nodes. EXAMPLE:: >>> network = PhyloNetwork(eNewick="((A,B,C),1);") >>> network.nodes() ... ['_6', '_5', '_4', '_3', '_2', '_1'] >>> network.labelled_nodes() ... ['_6', '_5', '_4', '_3'] >>> map(network.label, network.labelled_nodes()) ... ['1', 'C', 'B', 'A'] >>> network.unlabelled_nodes() ... ['_2', '_1'] >>> map(network.label, network.unlabelled_nodes()) ... [None, None] """ return self._labels.keys()
@memoize_method
[docs] def unlabelled_nodes(self): """ Returns the set of unlabelled nodes. EXAMPLE:: >>> network = PhyloNetwork(eNewick="((A,B,C),1);") >>> network.nodes() ... ['_6', '_5', '_4', '_3', '_2', '_1'] >>> network.labelled_nodes() ... ['_6', '_5', '_4', '_3'] >>> map(network.label, network.labelled_nodes()) ... ['1', 'C', 'B', 'A'] >>> network.unlabelled_nodes() ... ['_2', '_1'] >>> map(network.label, network.unlabelled_nodes()) ... [None, None] """ return list(set(self.nodes())-set(self.labelled_nodes()))
@memoize_method
[docs] def interior_nodes(self): """ Returns the set of non-leaf nodes. EXAMPLE:: >>> network = PhyloNetwork(eNewick="((1,2),(3,4));") >>> network.nodes() ... ['_7', '_6', '_5', '_4', '_3', '_2', '_1'] >>> network.interior_nodes() ... ['_5', '_2', '_1'] >>> map(network.is_leaf, network.interior_nodes()) ... [False, False, False] """ return list(set(self.nodes())-set(self.leaves()))
@memoize_method
[docs] def elementary_nodes(self): """ Return the set of elementary nodes. EXAMPLE:: >>> network = PhyloNetwork(eNewick="((1)E,2);") >>> network.nodes() ... ['_4', '_3', '_2', '_1'] >>> map(network.is_elementary_node, network.nodes()) ... [False, False, True, False] >>> network.label('_2') ... 'E' >>> network.elementary_nodes() ... '_2' """ return filter(self.is_elementary_node, self.nodes())
@memoize_method
[docs] def depth(self,u): """ Returns the depth of u. If the node u is not from the phylogenetic network, then returns None. EXAMPLE:: >>> network = PhyloNetwork(eNewick="((((LEAF#1))),#1);") >>> network.nodes() ... ['#1', '_4', '_3', '_2', '_1'] >>> map(network.depth, network.nodes()) ... [1, 3, 2, 1, 0] >>> network.depth('non-existing node') ... None """ if not u in self: return None return min([dijkstra_path_length(self,root,u) for root in self.roots()])
@memoize_method
[docs] def height(self,u): """ Returns the height of u. If the node u is not from the phylogenetic network, then returns None. EXAMPLE:: >>> network = PhyloNetwork(eNewick="((((LEAF#1))),#1);") >>> network.nodes() ... ['#1', '_4', '_3', '_2', '_1'] >>> map(network.height, network.nodes()) ... [0, 1, 2, 3, 4] >>> network.height('non-existing node') ... None """ if not u in self: return None if self.is_leaf(u): return 0 else: return max(map(self.height, self.successors(u)))+1
@memoize_method
[docs] def mu(self,u): """ Returns a tuple containing the number of paths from u to all labelled nodes of the phylogenetic network. Returns None if the node u is not in the phylogenetic network. EXAMPLE:: >>> network = PhyloNetwork(eNewick="((((LEAF1, LEAF2#1)), #1)INT,#1);") >>> network.taxa() ... ['INT', 'LEAF1', LEAF2] >>> network.roots() ... '_1' >>> network.mu('_1') ... array([1, 1, 3]) >>> network.successors('_1') ... ['#1', '_2'] >>> network.mu('#1') # This is LEAF2 ... array([0, 0, 1]) >>> network.mu('_2') # We lost the path root -> LEAF2 ... array([1, 1, 2]) """ if u not in self: return None if self.is_leaf(u): mu = numpy.zeros(len(self.taxa()),int) else: mu = sum(map(self.mu,self.successors(u))) if self.is_labelled(u): pos=self.taxa().index(self.label(u)) mu[pos] += 1 return mu
@memoize_method
[docs] def mu_string(self): """ Returns a string representing the mu value of all nodes, with the same order as sorted_nodes() EXAMPLE:: >>> network = PhyloNetwork(eNewick="((((LEAF1, LEAF2#1)), #1)INT,#1);") >>> network.taxa() ... ['INT', 'LEAF1', LEAF2] >>> network.sorted_nodes() ... ['#1', '_5', '_4', '_3', '_2', '_1'] >>> network.mu_string() ... '[0 0 1]-[0 1 0]-[0 1 1]-[0 1 1]-[1 1 2]-[1 1 3]' >>> network.mu('#1') ... array([0, 0, 1]) >>> network.mu('_1') ... array([1, 1, 3]) """ return '-'.join([str(self.mu(u)) for u in self.sorted_nodes()])
@memoize_method
[docs] def sorted_nodes(self): """ Returns the set of nodes sorted with the total order over their mu value. EXAMPLE:: >>> network = PhyloNetwork(eNewick="((((LEAF1, LEAF2#1)), #1)INT,#1);") >>> network.taxa() ... ['INT', 'LEAF1', LEAF2] >>> network.sorted_nodes() ... ['#1', '_5', '_4', '_3', '_2', '_1'] >>> network.mu_string() ... '[0 0 1]-[0 1 0]-[0 1 1]-[0 1 1]-[1 1 2]-[1 1 3]' >>> network.nodes() ... ['_5', '_4', '_3', '_2', '_1', '#1'] """ nodes = self.nodes()[:] nodes.sort(cmp=lambda u,v:total_cmp(self.mu(u),self.mu(v))) return nodes
def _generate_new_id(self): """ For private use, it generates a new identification for every node when generating the phylogenetic network. """ try: self._lastlabel += 1 except: self._lastlabel = 1 return '_%d' % (self._lastlabel) #getlabel = _generate_new_id # DEPRECATED def _walk(self,parsed,ignore_prefix=None): if isinstance(parsed, pyparsing.ParseResults): if 'tag' in parsed: internal_label='#'+str(parsed['tag']) else: internal_label=self._generate_new_id() if 'length' in parsed: pass self.add_node(internal_label) if 'label' in parsed: self._labels[internal_label]=parsed['label'] for child in parsed: child_label=self._walk(child,ignore_prefix=ignore_prefix) if child_label: self.add_edge(internal_label,child_label) return internal_label def _from_eNewick(self,string,ignore_prefix=None): try: parsed=eNewickParser(string)[0] except pyparsing.ParseException: raise MalformedNewickException("Malformed eNewick string") self._walk(parsed,ignore_prefix=ignore_prefix) self.cache = {} def _eNewick_node(self,u,visited): if self.is_leaf(u): #return self._labels[u] return self._labels.get(u,'') if u in visited: return u visited.append(u) children=map(lambda x:self._eNewick_node(x,visited),self.successors(u)) internal=','.join(children) mylabel=self.label(u) or '' if self.is_hybrid_node(u): mylabel+=u return '('+internal+')'+mylabel def __str__(self): return self.eNewick() def __repr__(self): return "Phylogenetic Network with taxa [" + ",".join(map(str,self.taxa())) + "]."
[docs] def eNewick(self): """ Returns the eNewick representation of the network. """ visited=[] string = '' for root in self.roots(): string += self._eNewick_node(root,visited)+';' return string
@memoize_method
[docs] def descendant_nodes(self,u): """ Returns a set with all the descendents of u. EXAMPLE:: >>> network = PhyloNetwork(eNewick="((,(3,4)#1)2,#1)1;") >>> network.nodes() ... ['_5', '_4', '_3', '_2', '_1', '#1'] >>> network.node_by_taxa('2') ... '_2' >>> network.descendant_nodes('_2') ... ['_5', '_4', '#1' '_3', '_2'] >>> network.strict_descendant_nodes('_2') ... ['_3', '_2'] >>> network.descendant_taxa('_2') ... ['4', '3', '2'] """ return sum(dfs_successors(self,u).values(),[]) + [u] #return dfs_successors(self,u)
@memoize_method
[docs] def descendant_taxa(self,u): """ Returns a set with all the labelled descendents of u. EXAMPLE:: >>> network = PhyloNetwork(eNewick="((,(3,4)#1)2,#1)1;") >>> network.nodes() ... ['_5', '_4', '_3', '_2', '_1', '#1'] >>> network.node_by_taxa('2') ... '_2' >>> network.descendant_taxa('_2') ... ['4', '3', '2'] >>> network.strict_descendant_taxa('_2') ... ['2'] >>> network.descendant_nodes('_2') ... ['_5', '_4', '#1', '_3', '_2'] """ return [self.label(desc) for desc in self.descendant_nodes(u) if self.is_labelled(desc)]
@memoize_method
[docs] def strict_descendant_nodes(self,u): """ Returns a set with all the strict descendents of u. EXAMPLE:: >>> network = PhyloNetwork(eNewick="((,(3,4)#1)2,#1)1;") >>> network.nodes() ... ['_5', '_4', '_3', '_2', '_1', '#1'] >>> network.node_by_taxa('2') ... '_2' >>> network.descendant_nodes('_2') ... ['_5', '_4', '#1', '_3', '_2'] >>> network.strict_descendant_nodes('_2') ... ['_3', '_2'] """ if self.is_root(u): return self.descendant_nodes(u) pruned = copy.deepcopy(self) pruned.cache = {} pruned.remove_node(u) desc_pruned = [] for root in self.roots(): desc_pruned.extend(pruned.descendant_nodes(root)) #dfs_successors(pruned, root) return [desc for desc in self.descendant_nodes(u) if not desc in desc_pruned]
@memoize_method
[docs] def strict_descendant_taxa(self,u): """ Returns a set with all the strict labelled descendents of u. EXAMPLE:: >>> network = PhyloNetwork(eNewick="((,(3,4)#1)2,#1)1;") >>> network.nodes() ... ['_5', '_4', '_3', '_2', '_1', '#1'] >>> network.node_by_taxa('2') ... '_2' >>> network.descendant_taxa('_2') ... ['4', '3', '2'] >>> network.strict_descendant_taxa('_2') ... ['2'] """ return [self.label(desc) for desc in self.strict_descendant_nodes(u) if self.is_labelled(desc)]
@memoize_method
[docs] def ancestors(self,taxon): """ Returns a set with all nodes that have a fixed descendant taxa. EXAMPLE:: >>> network = PhyloNetwork(eNewick="((3,4)1,(5,6,7)2);") >>> network.ancestors('3') ... ['_3', '_2', '_1'] >>> '3' in network.descendant_taxa('_2') ... True """ return [u for u in self.sorted_nodes() if taxon in self.descendant_taxa(u)]
@memoize_method
[docs] def strict_ancestors(self,taxon): """ Returns a set with all nodes that have a fixed strict descendant taxa. EXAMPLE:: >>> network = PhyloNetwork(eNewick=((,(3,4)#1)2,#1)1;) >>> network.node_by_taxa('2') ... '_2' >>> '_2' in network.ancestors('3') ... True >>> '_2' in network.strict_ancestors('3') ... False """ return [u for u in self.sorted_nodes() if taxon in self.strict_descendant_taxa(u)]
@memoize_method
[docs] def CSA(self,tax1,tax2): """ Returns a set with the common strict ancestors of taxa1 and taxa2. EXAMPLE:: >>> network = PhyloNetwork(eNewick=((,(3,4)#1)2,#1)1;) >>> network.CSA('3', '4') ... ['#1', '_1'] >>> network.LCSA('3', '4') ... '#1' EXAMPLE:: >>> network = PhyloNetwork(eNewick="(((1)), 2);") >>> network.CSA('1', '2') ... ['_2', '_1'] >>> network.LCSA('1', '2') ... '_2' """ return [u for u in self.ancestors(tax1) if (u in self.ancestors(tax2)) and ((u in self.strict_ancestors(tax1)) or (u in self.strict_ancestors(tax2)))]
@memoize_method
[docs] def LCSA(self,tax1,tax2): """ Returns a minimum of CSA(taxa1, taxa2) respect the height of nodes. EXAMPLE:: >>> network = PhyloNetwork(eNewick=((,(3,4)#1)2,#1)1;) >>> network.CSA('3', '4') ... ['#1', '_1'] >>> network.LCSA('3', '4') ... '#1' EXAMPLE:: >>> network = PhyloNetwork(eNewick="(((1)), 2);") >>> network.CSA('1', '2') ... ['_2', '_1'] >>> network.LCSA('1', '2') ... '_2' """ csa=self.CSA(tax1,tax2) #print self,tax1,tax2,csa csa.sort(lambda x,y:cmp(self.height(x),self.height(y))) return csa[0]
@memoize_method
[docs] def nodal_matrix(self): """ Returns a matrix containing the nodal 'distance' between all labelled nodes. EXAMPLES:: >>> network = PhyloNetwork(eNewick="(((1,2), 3), 4);") >>> network.nodal_matrix() ... array([[0, 1, 2, 3], ... [1, 0, 2, 3], ... [1, 1, 0, 2], ... [1, 1, 1, 0]) """ n=len(self.taxa()) matrix=numpy.zeros((n,n),int) dicdist=all_pairs_shortest_path_length(self) for i in range(n): ti=self.taxa()[i] for j in range(i,n): tj=self.taxa()[j] lcsa=self.LCSA(ti,tj) matrix[i,j]=dicdist[lcsa][self.node_by_taxa(ti)] matrix[j,i]=dicdist[lcsa][self.node_by_taxa(tj)] return matrix
[docs] def nodal_area(self): """ Returns the sum of all elements of the nodal matrix. EXAMPLES:: >>> network = PhyloNetwork(eNewick="(((1,2), 3), 4);") >>> network.nodal_matrix() ... array([[0, 1, 2, 3], ... [1, 0, 2, 3], ... [1, 1, 0, 2], ... [1, 1, 1, 0]) >>> network.nodal_area() ... 19 """ mat=self.nodal_matrix() #mat=mat+mat.transpose() return sum(abs(mat.flatten()))
@memoize_method
[docs] def cophenetic_matrix(self): """ Returns a matrix with the cophenetic coeficient of labelled nodes. EXAMPLE:: >>> network = PhyloNetwork(eNewick="(((1,2), 3), 4);") >>> network.cophenetic_matrix() ... array([[3, 2, 1, 0], ... [0, 3, 1, 0], ... [0, 0, 2, 0], ... [0, 0, 0, 1]) """ n=len(self.taxa()) matrix=numpy.zeros((n,n),int) for i in range(n): ti=self.taxa()[i] for j in range(i,n): tj=self.taxa()[j] lcsa=self.LCSA(ti,tj) matrix[i,j]=self.depth(lcsa) return matrix
[docs] def common_taxa(self,net2): """ Returns common taxa between self and net2. EXAMPLE:: >>> network = PhyloNetwork(eNewick="((1,2), 3)4;") >>> network2 = PhyloNewtwork(eNewick="(1,(4,5)2);") >>> network.common_taxa(network2) ... ['1', '2', '4'] >>> network.common_taxa_leaves(network2) ... ['1'] """ common=[] taxa1=self.taxa() taxa2=net2.taxa() for taxon in taxa1: if taxon in taxa2: common.append(taxon) return common
[docs] def common_taxa_leaves(self,net2): """ Returns common taxa between self and net2 that are leaves on both networks. EXAMPLE:: >>> network = PhyloNetwork(eNewick="((1,2), 3)4;") >>> network2 = PhyloNewtwork(eNewick="(1,(4,5)2);") >>> network.common_taxa(network2) ... ['1', '2', '4'] >>> network.common_taxa_leaves(network2) ... ['1'] """ common=[] taxa1=filter(lambda l:self.is_leaf(self.node_by_taxa(l)),self.taxa()) taxa2=filter(lambda l:net2.is_leaf(net2.node_by_taxa(l)),net2.taxa()) for taxon in taxa1: if taxon in taxa2: common.append(taxon) return common
[docs] def topological_restriction(self,subtaxa): """ Returns a minimal subnetwork of self such that it containts a fixed subtaxa. EXAMPLE:: >>> network = PhyloNetwork(eNewick="((1,2), (3,4));") >>> network.taxa() ... ['1', '2', '3', '4'] >>> network.roots() ... ['_1'] >>> subnetwork = network.topological_restriction(['1', '2']) >>> subnetwork.taxa() ... ['1', '2'] >>> subnetwork.nodes() # '_1' should not be ... ['_4', '_3', '_2'] >>> subnetwork.roots() ... ['_2'] >>> subnetwork = network.topological_restriction(['1', '3']) >>> '_1' in subnetwork.roots() ... True """ restricted=copy.deepcopy(self) for taxon in restricted.taxa(): if not taxon in subtaxa: u=restricted.node_by_taxa(taxon) del restricted._labels[u] restricted.cache = {} while True: leaves_to_delete=[u for u in restricted.nodes() if \ restricted.is_leaf(u) and \ not u in restricted._labels ] if not leaves_to_delete: break else: restricted.remove_nodes_from(leaves_to_delete) for u in restricted.nodes(): if restricted.is_elementary_node(u) and \ not restricted.is_labelled(u): for parent in restricted.predecessors(u): for child in restricted.successors(u): restricted.add_edge(parent,child) restricted.remove_node(u) restricted.cache = {} return restricted
@memoize_method
[docs] def has_nested_taxa(self): """ Returns True is an internal node is labelled. False otherwise. """ for node in self.labelled_nodes(): if not self.is_leaf(node): return True return False
def matching_representation(self): try: return self._matching_representation except: self._matching_representation={} for u in self.nodes(): self._matching_representation[u]=0 for u in self.leaves(): pos=self.taxa().index(self.label(u)) self._matching_representation[u]=pos+1 h=1 i=len(self.leaves())+1 thislevel=filter(lambda u:self.height(u)==h,self.nodes()) while thislevel: minims={} for u in thislevel: minims[u]=min([self._matching_representation[v] for \ v in self.successors(u)]) thislevel.sort(lambda x,y: minims[x]-minims[y]) for k in range(len(thislevel)): self._matching_representation[thislevel[k]]=i i += 1 h += 1 thislevel=filter(lambda u:self.height(u)==h,self.nodes()) return self._matching_representation def matching_permutation(self): try: return self._matching_permutation except: permutation={} representation=self.matching_representation() for u in self.nodes(): children=self.successors(u) children.sort(cmp=lambda u,v:representation[u]-representation[v]) for i in range(len(children)): permutation[representation[children[i]]]=representation[children[(i+1) % len(children)]] self._matching_permutation=permutations.Permutation(permutation) return self._matching_permutation
[docs] def cluster(self,u): """ Returns the cluster of u in the network, None if the node is not in the network. """ if not u in self: return None cl=[] dictio=single_source_shortest_path_length(self,u) for node in dictio.keys(): if self.label(node): cl.append(self.label(node)) cl.sort() cl=tuple(cl) return cl
[docs] def cluster_representation(self): """ Returns the cluster of all nodes in the network. """ cls=map(self.cluster,self.nodes()) cls.sort() return cls
[docs] def nested_label(self,node): """ Returns a string representation of descendants of u. Very useful to identify where is a node located in the network. EXAMPLE:: >>> network = PhyloNetwork(eNewick="((2,3), (4,(5,6)))1;") >>> network.nodes() ... ['_9', '_8', '_7', '_6', '_5', '_4', '_3', '_2', '_1'] >>> network.nested_label('_7') # Where is node '_7' in the network? ... '{5,6}' # '_7' is the parent of leaves '5' and '6'. >>> network.nested_label('_6') ... '4' # '_6' is the leaf '4' >>> network.nested_label('_5') ... '{4,{5,6}}' # '_5' is the parent of leaf '4' and node '_7' """ try: return self._nested_label[node] except: if not hasattr(self,'_nested_label'): self._nested_label = {} if self.is_leaf(node): return self.label(node) else: desc_labels = [self.nested_label(u) for u in self.successors(node)] desc_labels.sort() if desc_labels is None: # If a leaf doesn't have a label self._nested_label[node] = "{}" return self.nested_label[node] self._nested_label[node] = '{'+','.join(desc_labels)+'}' return self._nested_label[node]
[docs] def nested_label_representation(self): """ Returns the nested label of all nodes in the network. EXAMPLE:: >>> network = PhyloNetwork(eNewick="((1,2), (3,4));") >>> network.nodes() ... ['_7', '_6', '_5', '_4', '_3', '_2', '_1'] >>> network.nested_label_representation() ... set(['{3,4}', '1', '3', '2', '4', '{1,2}', '{{1,2},{3,4}}']) """ nls=map(self.nested_label,self.nodes()) return set(nls)