Pyteomics documentation v2.1.5

pyteomics.mzml

Contents

Source code for pyteomics.mzml

"""
mzml - reader for mass spectrometry data in mzML format
=======================================================

Summary
-------

mzML is a standard rich XML-format for raw mass spectrometry data storage.
Please refer to http://www.psidev.info/index.php?q=node/257 for the detailed 
specification of the format and the structure of mzML files.

This module provides minimalistic infrastructure for access to data stored in
mzML files. The most important function is :py:func:`read`, which 
reads spectra and related information as saves them into human-readable dicts.
The rest of data can be obtained via a combination of :py:func:`get_node` and
:py:func:`read_params` functions. These functions rely on the terminology of 
the underlying `lxml library <http://lxml.de/>`_. 

Data access
-----------

  :py:func:`read` - iterate through spectra in mzML file. Data from a
  single spectrum are converted to a human-readable dict. Spectra themselves are 
  stored under 'm/z array' and 'intensity array' keys.

  :py:func:`version_info` - get version information about the mzML file

  :py:func:`iterfind` - iterate over elements in the mzML file.

-------------------------------------------------------------------------------

"""

#   Copyright 2012 Anton Goloborodko, Lev Levitsky
#
#   Licensed under the Apache License, Version 2.0 (the "License");
#   you may not use this file except in compliance with the License.
#   You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
#   Unless required by applicable law or agreed to in writing, software
#   distributed under the License is distributed on an "AS IS" BASIS,
#   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
#   See the License for the specific language governing permissions and
#   limitations under the License.

import numpy
import zlib
import base64
from lxml import etree
from . import auxiliary as aux

def _decode_base64_data_array(source, dtype, is_compressed):
    """Read a base64-encoded binary array.
    
    Parameters
    ----------
    source : str 
        A binary array encoded with base64.
    dtype : str
        The type of the array in numpy dtype notation.
    is_compressed : bool
        If True then the array will be decompressed with zlib.

    Returns
    -------
    out : numpy.array
    """

    decoded_source = base64.b64decode(source.encode('ascii'))
    if is_compressed:
        decoded_source = zlib.decompress(decoded_source)
    output = numpy.frombuffer(decoded_source, dtype=dtype)
    return output

@aux._file_reader('rb')
[docs]def read(source): """Parse ``source`` and iterate through spectra. Parameters ---------- source : str or file A path to a target mzML file or the file object itself. Returns ------- out : iterator An iterator over the dicts with spectra properties. """ return iterfind(source, 'spectrum')
def _get_info_smart(source, element, **kw): name = aux._local_name(element) kwargs = dict(kw) rec = kwargs.pop('recursive', None) if name in {'indexedmzML', 'mzML'}: info = _get_info(source, element, rec if rec is not None else False, **kwargs) else: info = _get_info(source, element, rec if rec is not None else True, **kwargs) if 'binary' in info: types = {'32-bit float': 'f', '64-bit float': 'd'} for t, code in types.items(): if t in info: dtype = code del info[t] break # sometimes it's under 'name' else: if 'name' in info: for t, code in types.items(): if t in info['name']: dtype = code info['name'].remove(t) break compressed = True if 'zlib compression' in info: del info['zlib compression'] elif 'name' in info and 'zlib compression' in info['name']: info['name'].remove('zlib compression') else: compressed = False info.pop('no compression', None) try: info['name'].remove('no compression') if not info['name']: del info['name'] except (KeyError, TypeError): pass b = info.pop('binary') if b: array = _decode_base64_data_array( b, dtype, compressed) else: array = numpy.array([], dtype=dtype) for k in info: if k.endswith(' array') and not info[k]: info = {k: array} break else: info['binary'] == array if 'binaryDataArray' in info: for array in info.pop('binaryDataArray'): info.update(array) intkeys = {'ms level'} for k in intkeys: if k in info: info[k] = int(info[k]) return info _version_info_env = {'format': 'mzML', 'element': 'mzML'} version_info = aux._make_version_info(_version_info_env) _schema_defaults = {'ints': { ('spectrum', 'index'), ('instrumentConfigurationList', 'count'), ('binaryDataArray', 'encodedLength'), ('cvList', 'count'), ('binaryDataArray', 'arrayLength'), ('scanWindowList', 'count'), ('componentList', 'count'), ('sourceFileList', 'count'), ('productList', 'count'), ('referenceableParamGroupList', 'count'), ('scanList', 'count'), ('spectrum', 'defaultArrayLength'), ('dataProcessingList', 'count'), ('sourceFileRefList', 'count'), ('scanSettingsList', 'count'), ('selectedIonList', 'count'), ('chromatogram', 'defaultArrayLength'), ('precursorList', 'count'), ('chromatogram', 'index'), ('processingMethod', 'order'), ('targetList', 'count'), ('sampleList', 'count'), ('softwareList', 'count'), ('binaryDataArrayList', 'count'), ('spectrumList', 'count'), ('chromatogramList', 'count')}, 'floats': {}, 'bools': {}, 'lists': {'scan', 'spectrum', 'sample', 'cv', 'dataProcessing', 'cvParam', 'source', 'userParam', 'detector', 'product', 'referenceableParamGroupRef', 'selectedIon', 'sourceFileRef', 'binaryDataArray', 'analyzer', 'scanSettings', 'instrumentConfiguration', 'chromatogram', 'target', 'processingMethod', 'precursor', 'sourceFile', 'referenceableParamGroup', 'contact', 'scanWindow', 'software'}, 'intlists': {}, 'floatlists': {}, 'charlists': {}} _schema_env = {'format': 'mzML', 'version_info': version_info, 'default_version': '1.1.0', 'defaults': _schema_defaults} _schema_info = aux._make_schema_info(_schema_env) _getinfo_env = {'keys': {'binaryDataArrayList'}, 'schema_info': _schema_info, 'get_info_smart': _get_info_smart} _get_info = aux._make_get_info(_getinfo_env) _iterfind_env = {'get_info_smart': _get_info_smart} iterfind = aux._make_iterfind(_iterfind_env)

Contents