1 """
2 wrapper to some of the bedops (http://code.google.com/p/bedops/) utilities
3 """
4
5
6 import subprocess
7 import re
8 from itertools import ifilter
9 from functools import partial
10 import numpy as np
11
12 from . import parseBED
13
14 BEDMAP = "bedmap"
15
16
17 -def bedmap(refdata, mapdata, ops=("--echo", "--echo-map"),
18 parsePatt="([01])[|](.+)[|](.*)", onlyOverlapping=True):
19 """run bedmap on the given files and return an iterator over the results
20
21
22 bedmap command is: bedmap opts refdata mapdata
23
24 @param refdata: reference intervals. output is with respect to these. I.e.,
25 unless there is filtering, output has one line for each refdata line
26 @param mapdata: qualifying (e.g, within a range of a refdata element)
27 intervals on which to apply operations.
28 @param ops: operations (see bedmap help)
29 @param parsePatt: a regex object or pattern used to parse the output of bedmap.
30 Make this grouped, to get a tuple of groups. Notice that first group
31 of this should be "([01])"
32 @return: iterator over overapping features (str)
33 """
34
35 cmd = ("bedmap", "--indicator") + tuple(ops) + (refdata, mapdata)
36 out = ifilter(bool, subprocess.check_output(cmd).split("\n"))
37 if onlyOverlapping:
38 out = ifilter(lambda tp: tp[0] == '1', out)
39 for line in out:
40 match = re.match(parsePatt, line)
41 yield match.groups()[1:]
42
43
44 -def overlap_as_array(anchor, feat_lst, bin=1, dtype=np.float64,
45 parseFeature=partial(parseBED, use_score=True), respect_strand=True):
46 """convert an anchor and a list of overlapping features on a signal array
47
48 anchor: BED5
49 feat_list: [>=BED4, ...]
50 bin: bin the signal
51 dtype:type of array
52 use_score: whether to use score from feature. will set to 1.0 if False
53 respect_strand : whether to mirror the signal for anchors on the negative strand
54
55 returns a signal array of length anchor[2] - anchor[1]
56 """
57
58 def mydiff(v, S=anchor[1]):
59 dv = v - S
60 if dv < 0:
61 return 0
62 return dv
63
64 x = np.zeros((anchor[2] - anchor[1],), dtype=dtype)
65 if x.shape[0] % bin:
66 raise ValueError("bin (%d) not a multiple of track width (%d)" % (bin, x.shape[0]) )
67
68 for ft in filter(bool, feat_lst):
69 (ch, s, e, n, v) = parseFeature(ft)
70 x[mydiff(s):mydiff(e)] += v
71 binned_x = x.reshape((-1, bin)).sum(axis=1)
72 if respect_strand:
73 if len(anchor) < 6:
74 raise ValueError("respect_strand on >BED6 anchors.")
75 if anchor[5] == "-":
76 for i in range(binned_x.shape[0] / 2):
77 binned_x[i], binned_x[-(i + 1)] = binned_x[-(i + 1)], binned_x[i]
78 return binned_x
79
80
81 if __name__ == "__main__":
82 pass
83