Source code for tethne.writers.corpora
"""
"""
[docs]def to_documents(target, ngrams):
"""
Parameters
----------
target : str
Target path for documents; e.g. './mycorpus' will result in
'./mycorpus_docs.txt' and './mycorpus_meta.csv'.
ngrams : dict
Keys are paper DOIs, values are lists of (Ngram, frequency) tuples.
Returns
-------
None : If all goes well.
Raises
------
ValueError
"""
try:
docFile = open(target + '_docs.txt', 'wb')
metaFile = open(target + '_meta.csv', 'wb')
except IOError:
raise ValueError('Invalid target. Could not open files for writing.')
metaFile.write('# doc\tdoi\n')
d = 0 # Document index in _docs.txt file.
try:
for key,values in ngrams.iteritems():
docFile.write(' '.join([ gram for gram,freq in values
for i in xrange(freq) ]) + '\n')
metaFile.write('{0}\t{1}\n'.format(d, key))
d += 1
except AttributeError: # .iteritems() raises an AttributeError if ngrams
# is not dict-like.
raise ValueError('Parameter \'ngrams\' must be dictionary-like.')
docFile.close()
metaFile.close()
return