Package dimer :: Package genome :: Module bedops
[hide private]
[frames] | no frames]

Source Code for Module dimer.genome.bedops

 1  """ 
 2  wrapper to some of the bedops (http://code.google.com/p/bedops/) utilities 
 3  """ 
 4   
 5   
 6  import subprocess 
 7  import re 
 8  from itertools import ifilter 
 9  from functools import partial 
10  import numpy as np 
11   
12  from . import parseBED 
13   
14  BEDMAP = "bedmap" 
15   
16   
17 -def bedmap(refdata, mapdata, ops=("--echo", "--echo-map"), 18 parsePatt="([01])[|](.+)[|](.*)", onlyOverlapping=True):
19 """run bedmap on the given files and return an iterator over the results 20 21 22 bedmap command is: bedmap opts refdata mapdata 23 24 @param refdata: reference intervals. output is with respect to these. I.e., 25 unless there is filtering, output has one line for each refdata line 26 @param mapdata: qualifying (e.g, within a range of a refdata element) 27 intervals on which to apply operations. 28 @param ops: operations (see bedmap help) 29 @param parsePatt: a regex object or pattern used to parse the output of bedmap. 30 Make this grouped, to get a tuple of groups. Notice that first group 31 of this should be "([01])" 32 @return: iterator over overapping features (str) 33 """ 34 35 cmd = ("bedmap", "--indicator") + tuple(ops) + (refdata, mapdata) 36 out = ifilter(bool, subprocess.check_output(cmd).split("\n")) 37 if onlyOverlapping: 38 out = ifilter(lambda tp: tp[0] == '1', out) 39 for line in out: 40 match = re.match(parsePatt, line) 41 yield match.groups()[1:]
42 43
44 -def overlap_as_array(anchor, feat_lst, bin=1, dtype=np.float64, 45 parseFeature=partial(parseBED, use_score=True), respect_strand=True):
46 """convert an anchor and a list of overlapping features on a signal array 47 48 anchor: BED5 49 feat_list: [>=BED4, ...] 50 bin: bin the signal 51 dtype:type of array 52 use_score: whether to use score from feature. will set to 1.0 if False 53 respect_strand : whether to mirror the signal for anchors on the negative strand 54 55 returns a signal array of length anchor[2] - anchor[1] 56 """ 57 58 def mydiff(v, S=anchor[1]): 59 dv = v - S 60 if dv < 0: 61 return 0 62 return dv
63 64 x = np.zeros((anchor[2] - anchor[1],), dtype=dtype) 65 if x.shape[0] % bin: 66 raise ValueError("bin (%d) not a multiple of track width (%d)" % (bin, x.shape[0]) ) 67 68 for ft in filter(bool, feat_lst): 69 (ch, s, e, n, v) = parseFeature(ft) 70 x[mydiff(s):mydiff(e)] += v 71 binned_x = x.reshape((-1, bin)).sum(axis=1) 72 if respect_strand: 73 if len(anchor) < 6: 74 raise ValueError("respect_strand on >BED6 anchors.") 75 if anchor[5] == "-": 76 for i in range(binned_x.shape[0] / 2): 77 binned_x[i], binned_x[-(i + 1)] = binned_x[-(i + 1)], binned_x[i] 78 return binned_x 79 80 81 if __name__ == "__main__": 82 pass 83