1 "modules for dealing with (mainly importing to datasets) genomic data"
2
3
4 -def parseBED(s, use_score=True, bg_val=1.0):
5 """parse a feature in BED4 or BED5 format
6
7 s: tab-separated line from the BED file
8 use_score: use the score of the feature or set it to bg_val
9 return: BED5+ feature"""
10
11 cols = s.rstrip().split("\t")
12 if len(cols) < 4:
13 raise ValueError("feature (%s) must be at least BED4" % s)
14
15 if len(cols) == 4:
16 if use_score:
17 raise ValueError("feature (%s) must be at least BED5 with use_score=True" % s)
18 else:
19 return (cols[0], int(cols[1]), int(cols[2]), cols[3], bg_val)
20 elif len(cols) > 4:
21 if use_score:
22 ctp = [str, int, int, str, float] + ([str] * (len(cols) - 5))
23 else:
24 ctp = [str, int, int, str, lambda v: 1.0] + ([str] * (len(cols) - 5))
25 return map(lambda t: t[0](t[1]), zip(ctp, cols))
26 else:
27 raise ValueError("please report this BUG")
28