Source code for pytadbit.parsers.hic_parser
"""
19 Dec 2012
"""
from math import sqrt
def _read_matrix(f_h):
"""
reads from file
"""
nums = []
while True:
values = f_h.next().split()
if values[0].startswith('#'):
# skip comments
continue
break
# check if we have headers/row-names in the file
start = 1
if values[0].isdigit():
try:
nums.append([int(v) for v in values])
except ValueError:
nums.append([int(float(v)) for v in values])
start = 0
# parse the rest of the file
for line in f_h:
values = line.split()[start:]
try:
nums.append([int(v) for v in values])
except ValueError:
nums.append([int(float(v)) for v in values])
f_h.close()
size = len(nums)
return tuple([nums[j][i] for i in xrange(size) for j in xrange(size)]), size
[docs]def read_matrix(things, parser=None):
"""
Read and checks a matrix from a file or a list.
:param things: might be either a file name, a file handler, a list of them
or a list of list (all with same length)
:param None parser: a parser function that returns a tuple of lists representing the data matrix,
with this file example.tsv:
::
chrT_001 chrT_002 chrT_003 chrT_004
chrT_001 629 164 88 105
chrT_002 86 612 175 110
chrT_003 159 216 437 105
chrT_004 100 111 146 278
the output of parser('example.tsv') might be:
``([629, 86, 159, 100, 164, 612, 216, 111, 88, 175, 437, 146, 105, 110,
105, 278])``
:returns: the corresponding matrix concatenated into a huge list, also
returns number or rows
"""
parser = parser or _read_matrix
if type(things) is not list:
things = [things]
matrices = []
sizes = []
for thing in things:
if type(thing) is file:
matrix, size = parser(thing)
matrices.append(matrix)
sizes.append(size)
elif type(thing) is str:
matrix, size = parser(open(thing))
matrices.append(matrix)
sizes.append(size)
elif type(thing) is list:
if all([len(thing)==len(l) for l in thing]):
matrices.append(reduce(lambda x, y: x+y, thing))
sizes.append(len(thing))
else:
raise Exception('must be list of lists, all with same length.')
elif type(thing) is tuple:
# case we know what we are doing and passing directly list of tuples
matrices.append(thing)
siz = sqrt(len(thing))
if int(siz) != siz:
raise AttributeError('ERROR: matrix should be square.\n')
sizes.append(int(siz))
elif 'matrix' in str(type(thing)):
try:
row, col = thing.shape
if row != col:
raise Exception('matrix needs to be square.')
matrices.append(thing.reshape(-1).tolist()[0])
sizes.append(row)
except Exception as exc:
print 'Error found:', exc
else:
raise Exception('Unable to read this file or whatever it is :)')
if all([s==sizes[0] for s in sizes]) and \
all([__check_hic(m, sizes[0]) for m in matrices]):
return matrices, sizes[0]
raise Exception('All matrices must have the same size ' +
'(same chromosome and same bins).')
def __check_hic(hic, size):
"""
check if hi-c data is symmetric
"""
for i in xrange(size):
for j in xrange(i + 1, size):
if not hic[i * size + j] == hic[j * size + i]:
raise AttributeError('ERROR: matrix should be square.\n')
return True