Source code for fact.slowdata.tools
""" library of (hopefully) useful functions for the slow data DB interface
"""
import numpy as np
import pymongo
[docs]def cursor_to_rec_array(cursor):
""" convert a pymongo.cursor.Cursor to an numpz recarray
"""
array = cursor_to_structured_array(cursor)
return array.view(np.recarray)
[docs]def cursor_to_structured_array(cursor):
""" convert a pymongo.cursor.Cursor to an numpy structured array
"""
number_of_docs = cursor.count()
# if number_of_docs > 1000:
# logging.warning("loading {} documents form database"
# " .. might take a while".format(number_of_docs))
structured_array_dtype = make_numpy_dtype_from_cursor(cursor)
array = np.zeros(number_of_docs, structured_array_dtype)
for counter, document in enumerate(cursor):
for field_name in structured_array_dtype.names:
try:
array[field_name][counter] = document[field_name]
except KeyError:
array[field_name][counter] = np.nan
return array
[docs]def make_numpy_dtype_from_cursor(cursor):
""" infer datatype of structured array from document(s) from a cursor
"""
collection_of_this_cursor = cursor.collection
# get the newest entry from this collection
# this one defines the dtype of the numpy array,
# and we want to stick to the newest format.
example_document = collection_of_this_cursor.find_one(
{},
sort=[("Time", pymongo.DESCENDING)]
)
example_document = cursor[0]
if example_document is None:
# collection is empty
raise LookupError('associated collection of cursor is empty.')
return make_numpy_dtype_from_document(example_document)
[docs]def make_numpy_dtype_from_document(doc):
""" infer datatype of structured array from document
"""
list_of_names_n_types = []
for field_name in doc:
if '_id' in field_name:
continue
element = doc[field_name]
element_array = np.array(element)
list_of_names_n_types.append(
(str(field_name),
element_array.dtype.str,
element_array.shape)
)
return np.dtype(list_of_names_n_types)