from .classes import PhyloNetwork
from .operations import push_and_hang,hold_and_hang,push_and_label,hold_and_label
from .utils import random_weighted
from .memoize import memoize_function
def memoize_function(f): return f # use it to document memoized functions (sphinx bug?)
#import numpy
import random
# Sequential generator
[docs]def all_trees(taxa,binary=False,nested_taxa=True):
"""
Returns a generator for trees with given taxa.
If nested_taxa is True, then trees with internal nodes labeled
will also be produced.
If binary is True, then only binary trees will be generated; otherwise trees will
have internal nodes with arbitrary out-degree.
EXAMPLE::
>>> generator = all_trees(['A', 'B', 'C'], binary=True)
>>> generator.next().eNewick()
... '((C,B),A);'
>>> generator.next().eNewick()
... '(C,(B,A));'
>>> tmp = 2
>>> for tree in generator: tmp = tmp+1
>>> tmp
... 21
EXAMPLE::
>>> for tree in all_trees(['A', 'B', 'C', binary=True, nested_taxa=False):
>>> print tree.eNewick()
... ((C,B),A);
... (C,(B,A));
... ((C,A),B);
EXAMPLE::
>>> for tree in all_trees(['A', 'B', 'C', binary=False, nested_taxa=False):
>>> print tree.eNewick()
... ((C,B),A);
... (C,(B,A));
... ((C,A),B);
... (A,B,C);
"""
n=len(taxa)
if n==1:
yield PhyloNetwork(eNewick=('%s;' % taxa[0]))
return
taxon=taxa[-1]
parent_taxa=taxa[0:-1]
parent_generator=all_trees(parent_taxa,
binary=binary,nested_taxa=nested_taxa)
for parent in parent_generator:
for u in parent.nodes():
newtree=push_and_hang(parent,u,taxon)
yield newtree
# if not binary:
for u in parent.nodes():
if parent.is_leaf(u):
if nested_taxa:
newtree=hold_and_hang(parent,u,taxon)
yield newtree
else:
if (not binary) or (parent.out_degree(u)==1):
newtree=hold_and_hang(parent,u,taxon)
yield newtree
if nested_taxa:
for u in parent.nodes():
newtree=push_and_label(parent,u,taxon)
yield newtree
for u in parent.nodes():
newtree=hold_and_label(parent,u,taxon)
if newtree:
yield newtree
# Number of and random trees: binary without nested taxa
@memoize_function
[docs]def number_of_trees_bin_nont_partial(n,l,N):
"""
Gives the number of phylogenetic trees on n taxa with l leaves and N nodes.
Assume binary trees without nested taxa.
"""
if (l != n) or (N != 2*n-1) or (n < 0):
return 0
if n == 1:
return 1
return (N-2)*number_of_trees_bin_nont_partial(n-1,l-1,N-2)
@memoize_function
[docs]def number_of_trees_bin_nont_global(n):
"""
Gives the number of phylogenetic trees on n taxa.
Assume binary trees and without nested taxa.
"""
return number_of_trees_bin_nont_partial(n,n,2*n-1)
[docs]def random_tree_bin_nont_global(taxa, id_offset=0):
"""
Returns a random binary tree without nested taxa.
EXAMPLE::
>>> network = random_tree_bin_nont_global(['A', 'B', 'C', 'D'])
>>> network.nested_label(network.roots()[0])
... '{D,{C,{A,B}}}' # random
>>> network = random_tree_bin_nont_global(['A', 'B', 'C', 'D'])
>>> network.nested_label(network.roots()[0])
... '{A,{C,{B,D}}}' # random
"""
n = len(taxa)
if n == 1:
return PhyloNetwork(eNewick=('%s;' % taxa[0]),id_offset=id_offset)
parent = random_tree_bin_nont_global(taxa[0:-1],id_offset)
u = random.choice(parent.nodes())
newtree = push_and_hang(parent,u,taxa[-1])
return newtree
# Number of and random trees: not necessarily binary without nested taxa
@memoize_function
[docs]def number_of_trees_nobin_nont_partial(n,l,N):
"""
Gives the number of phylogenetic trees on n taxa with l leaves and N nodes.
Assume not necessarily binary trees and without nested taxa.
"""
if (l != n) or (N < n) or (n < 0) or (N >= 2*n):
return 0
if n==1:
return 1
return (N-2)*number_of_trees_nobin_nont_partial(n-1,l-1,N-2) + \
(N-n)*number_of_trees_nobin_nont_partial(n-1,l-1,N-1)
@memoize_function
[docs]def number_of_trees_nobin_nont_global(n):
"""
Gives the number of phylogenetic trees on n taxa.
Assume not necessarily binary trees and without nested taxa.
"""
if n == 1:
return 1
return sum([number_of_trees_nobin_nont_partial(n,n,N) for N in range(n+1,2*n)])
def random_tree_nobin_nont_partial(taxa,l,N,id_offset=0):
n = len(taxa)
if (l != n) or (N < n) or (n < 0) or (N >= 2*n):
return None
if n==1:
return PhyloNetwork(eNewick=('%s;' % taxa[0]),id_offset=id_offset)
choices = {
(push_and_hang,l-1,N-2,'nodes') : (N-2)*number_of_trees_nobin_nont_partial(n-1,l-1,N-2),
(hold_and_hang,l-1,N-1,'interior_nodes') : (N-n)*number_of_trees_nobin_nont_partial(n-1,l-1,N-1)
}
(operation, lp, Np, candidates_method) = random_weighted(choices)
parent = random_tree_nobin_nont_partial(taxa[0:-1],lp,Np,id_offset)
candidates = getattr(parent,candidates_method)()
u = random.choice(candidates)
newtree = operation(parent,u,taxa[-1])
return newtree
[docs]def random_tree_nobin_nont_global(taxa,id_offset=0):
"""
Returns a random tree without nested taxa.
EXAMPLE::
>>> network = random_tree_nobin_nont_global(['A', 'B', 'C', 'D'])
>>> network.nested_label(network.roots()[0])
... '{A,B,{C,D}}' # random, not binary
>>> network = random_tree_nobin_nont_global(['A', 'B', 'C', 'D'])
>>> network.nested_label(network.roots()[0])
... '{D,{A,{B,C}}}' # random
"""
n = len(taxa)
if n == 1:
return PhyloNetwork(eNewick=('%s;' % taxa[0]),id_offset=id_offset)
choices = {
N : number_of_trees_nobin_nont_partial(n,n,N) for N in range(n+1,2*n)
}
N = random_weighted(choices)
l = n
return random_tree_nobin_nont_partial(taxa,l,N,id_offset)
# Number of and random trees: binary with nested taxa
@memoize_function
[docs]def number_of_trees_bin_nt_partial(n,l,N,e):
"""
Gives the number of phylogenetic trees on n taxa with l leaves, N nodes, e of them being elementary.
Assume binary trees with nested taxa.
"""
#print "n=%d, l=%d, N=%d, e=%d" % (n,l,N,e)
if (l <= 0) or (l > n) or (n < 0) or (N < n) or (l+e > N) or (l+e > n) or (e < 0):
#h "n=%d, l=%d, N=%d, e=%d, --> %d" % (n,l,N,e,0)
return 0
if n==1:
#print "n=%d, l=%d, N=%d, e=%d, --> %d" % (n,l,N,e,1)
if (l == 1) and (N == 1) and (e == 0):
return 1
else:
return 0
count = (
(N-2)*number_of_trees_bin_nt_partial(n-1,l-1,N-2,e) + # P&H
(N-1)*number_of_trees_bin_nt_partial(n-1,l,N-1,e-1) + # P&L
(N-n+1)*number_of_trees_bin_nt_partial(n-1,l,N,e) + # H&L
(l)*number_of_trees_bin_nt_partial(n-1,l,N-1,e-1) + # H&H leaf
(e+1)*number_of_trees_bin_nt_partial(n-1,l-1,N-1,e+1) # H&H elem
)
#print "n=%d, l=%d, N=%d, e=%d, --> %d" % (n,l,N,e,count)
return count
@memoize_function
def number_of_trees_bin_nt_global(n):
if n==1:
return 1
return sum([number_of_trees_bin_nt_partial(n,l,N,e) for l in range(1,n+1)
for N in range(n,2*n)
for e in range(0,n)])
def random_tree_bin_nt_partial(taxa,l,N,e,id_offset):
n = len(taxa)
if (l <= 0) or (l > n) or (n < 0) or (N < n) or (l+e > N) or (l+e > n) or (e < 0):
return None
if n==1:
if (l == 1) and (N == 1) and (e == 0):
return PhyloNetwork(eNewick=('%s;' % taxa[0]),id_offset=id_offset)
else:
return None
choices = {
(push_and_hang,l-1,N-2,e,'nodes') : (N-2)*number_of_trees_bin_nt_partial(n-1,l-1,N-2,e),
(push_and_label,l,N-1,e-1,'nodes') : (N-1)*number_of_trees_bin_nt_partial(n-1,l,N-1,e-1),
(hold_and_label,l,N,e,'unlabelled_nodes') : (N-n+1)*number_of_trees_bin_nt_partial(n-1,l,N,e),
(hold_and_hang,l,N-1,e-1,'leaves') : (l)*number_of_trees_bin_nt_partial(n-1,l,N-1,e-1),
(hold_and_hang,l-1,N-1,e+1,'elementary_nodes') : (e+1)*number_of_trees_bin_nt_partial(n-1,l-1,N-1,e+1)
}
(operation, lp, Np, ep, candidates_method) = random_weighted(choices)
parent = random_tree_bin_nt_partial(taxa[0:-1],lp,Np,ep,id_offset)
candidates = getattr(parent,candidates_method)()
u = random.choice(candidates)
newtree = operation(parent,u,taxa[-1])
return newtree
[docs]def random_tree_bin_nt_global(taxa,id_offset=0):
"""
Returns a random binary tree with nested taxa.
EXAMPLE::
>>> network = random_tree_bin_nt_global(['A', 'B', 'C', 'D'])
>>> network.eNewick()
... '((D,A),B)C;' # random
>>> network = random_tree_bin_nt_global(['A', 'B', 'C', 'D'])
>>> network.eNewick()
... '((D,A),(B)C);' # random
"""
n = len(taxa)
if n == 1:
return PhyloNetwork(eNewick=('%s;' % taxa[0]),id_offset=id_offset)
choices = {
(l,N,e) : number_of_trees_bin_nt_partial(n,l,N,e) for l in range(1,n+1)
for N in range(n,2*n)
for e in range(0,n)
}
(l,N,e) = random_weighted(choices)
return random_tree_bin_nt_partial(taxa,l,N,e,id_offset)
# Number of and random trees: not necessarily binary with nested taxa
@memoize_function
[docs]def number_of_trees_nobin_nt_partial(n,l,N):
"""
Gives the number of phylogenetic trees on n taxa with l leaves, N nodes, e of them being elementary.
Assume binary trees with nested taxa.
"""
#print "n=%d, l=%d, N=%d, e=%d" % (n,l,N,e)
if (l <= 0) or (l > n) or (n < 0) or (N < n):
#print "n=%d, l=%d, N=%d, e=%d, --> %d" % (n,l,N,e,0)
return 0
if n==1:
#print "n=%d, l=%d, N=%d, --> %d" % (n,l,N,1)
if (l == 1) and (N == 1):
return 1
else:
return 0
count = (
(N-2)*number_of_trees_nobin_nt_partial(n-1,l-1,N-2) + # P&H
(N-l)*number_of_trees_nobin_nt_partial(n-1,l-1,N-1) + # H&H interior
l*number_of_trees_nobin_nt_partial(n-1,l,N-1) + # H&H leaf
(N-1)*number_of_trees_nobin_nt_partial(n-1,l,N-1) + # P&L
(N-n+1)*number_of_trees_nobin_nt_partial(n-1,l,N) # H&L
)
#print "n=%d, l=%d, N=%d, --> %d" % (n,l,N,count)
return count
@memoize_function
def number_of_trees_nobin_nt_global(n):
if n==1:
return 1
return sum([number_of_trees_nobin_nt_partial(n,l,N) for l in range(1,n+1)
for N in range(n,2*n)])
def random_tree_nobin_nt_partial(taxa,l,N,id_offset):
n = len(taxa)
if (l <= 0) or (l > n) or (n < 0) or (N < n):
return None
if n==1:
if (l == 1) and (N == 1):
return PhyloNetwork(eNewick=('%s;' % taxa[0]),id_offset=id_offset)
else:
return None
choices = {
(push_and_hang,l-1,N-2,'nodes') : (N-2)*number_of_trees_nobin_nt_partial(n-1,l-1,N-2),
(hold_and_hang,l-1,N-1,'interior_nodes') : (N-l)*number_of_trees_nobin_nt_partial(n-1,l-1,N-1),
(hold_and_hang,l,N-1,'leaves') : (l)*number_of_trees_nobin_nt_partial(n-1,l,N-1),
(push_and_label,l,N-1,'nodes') : (N-1)*number_of_trees_nobin_nt_partial(n-1,l,N-1),
(hold_and_label,l,N,'unlabelled_nodes') : (N-n+1)*number_of_trees_nobin_nt_partial(n-1,l,N)
}
#print choices
(operation, lp, Np, candidates_method) = random_weighted(choices)
parent = random_tree_nobin_nt_partial(taxa[0:-1],lp,Np,id_offset)
#print parent
candidates = getattr(parent,candidates_method)()
u = random.choice(candidates)
newtree = operation(parent,u,taxa[-1])
return newtree
[docs]def random_tree_nobin_nt_global(taxa,id_offset):
"""
Returns a random tree with nested taxa.
EXAMPLE::
>>> network = random_tree_nobin_nt_global(['A', 'B', 'C', 'D'])
>>> network.eNewick()
... '((D,A),C,B);' # random, non-binary
>>> network = random_tree_nobin_nt_global(['A', 'B', 'C', 'D'])
>>> network.eNewick()
... '(((D,C),B),A);' # random
"""
n = len(taxa)
if n == 1:
return PhyloNetwork(eNewick=('%s;' % taxa[0]),id_offset=id_offset)
choices = {
(l,N) : number_of_trees_nobin_nt_partial(n,l,N) for l in range(1,n+1)
for N in range(n,2*n)
}
#print choices
(l,N) = random_weighted(choices)
#print (l,N)
return random_tree_nobin_nt_partial(taxa,l,N,id_offset)
[docs]def random_tree_generator(taxa,binary=False,nested_taxa=True,id_offset=0):
"""
Returns generator of random trees.
If nested_taxa = True, then trees with internal labels will also be produced.
If binary = True, then only binary trees will be produced.
EXAMPLE::
>>> generator = random_tree_generator(['a', 'b', 'c', 'd', 'e', 'f'], binary=True)
>>> for i in range(3):
>>> print generator.next().eNewick()
... '((d,b),(c,((e)f)a));'
... '(c,(((a,e),d),f)b);'
... '(((c,f),((b)e)d))a;'
"""
if binary:
if nested_taxa:
f = random_tree_bin_nt_global
else:
f = random_tree_bin_nont_global
else:
if nested_taxa:
f = random_tree_nobin_nt_global
else:
f = random_tree_nobin_nont_global
while True:
yield f(taxa,id_offset)
if __name__ == "__main__":
tg = all_trees(['1','2','3'])
while tg:
print tg.next()