CellNOpt homepage|cellnopt.core 1.0.0 documentation

Source code for cellnopt.core.metabolites

# -*- python -*-
#
#  This file is part of the cinapps.tcell package
#
#  Copyright (c) 2012-2013 - EMBL-EBI
#
#  File author(s): Thomas Cokelaer (cokelaer@ebi.ac.uk)
#
#  Distributed under the GPLv3 License.
#  See accompanying file LICENSE.txt or copy at
#      http://www.gnu.org/licenses/gpl-3.0.html
#
#  cellnopt.core website: http://www.cellnopt.org
#
##############################################################################
"""Module dedicates to the metabolites CNA format

:Status: mature but not all features implemented.

Based on load_substances_inter matlab version provided by Steffen, Klamt, MPI 
"Dynamik komplexer technischer Systeme" Magdeburg

"""
__all__ = ["Metabolites"]


[docs]class Metabolites(object): """Read metabolites file and convert to a Metabolites data structure. Metabolites format is a CSV format that looks like:: abl abl NaN 0 188 380 1 1 akap79 akap79 NaN 0 989 442 1 1 Columns are #. identifier of this substance in CNA (e.g.: G6P) without blanks stored in :ref:`specID` #. the full name of the metabolite (e.g.: glucose-6-phosphate; no blanks allowed!) stored in :attr:`specLongNames` #. could be # or a value stored in :ref:`specDefault` #. a value 0/1 (ignored right now) #. 4 following columns must be numerical values stored in :attr:`specBoxes` .. todo:: specify precicely the content of the columns :: >>> from cellnopt.core import Metabolites >>> m = Metabolites("metabolites") >>> m.specID """ def __init__(self, filename, verbose=True): """.. rubric:: Constructor :param str filename: input filename containing the metabolites data :param bool verbose: (True by default) """ self.filename = filename self.verbose = verbose #todo: check that the file exist #: attribute to store the specID self.specID = [] # todo: transform to dictionary #: attribute to store the specLongNames self.specLongNames = [] #: attributes to store specNotes self.specNotes = {} #: attribute to store spec default self.specDefault = [] #: attribute to store specBoxes self.specBoxes = [] #ws = char([9:13 32]); %A# whitespace for octave compatibility self._read_data() def _read_data(self): if self.verbose: print(' ') print('Reading Species ...') # scanning the entire file f = open(self.filename,'r') data = [] # the data structure to populate for line in f.readlines(): # for each line # convert tab.to white space, remove trailing and \n character line = line.replace('\t',' ').replace('\n','').strip() # do not consider commented or empty lines if line.startswith("%") or line.startswith('#'): continue if len(line) == 0: print("Found an empty line. Skipped") else: data.append(line) f.close() # The actual processing is done here for i, x in enumerate(data): # split the line using white space delimiter x = x.split() self.specID.append(x[0]) # Store the specID (first column) self.specLongNames.append(x[1]) # Store the second column (long name) # Then, it depends.... if x[2] == '#': self.specDefault.append('NaN') else: try: self.specDefault.append(float(x[2])) except: self.specDefault.append(x[2]) # x[3] is skipped... do not know why try: xpos, ypos, map_nr, rtype = x[4:] self.specBoxes.append([i+1, float(xpos), float(ypos), 0, float(map_nr), float(rtype)]) except: self.specBoxes.append([i+1, 50,50,0,1,1]) print 'warning. set default values' #raise ValueError('Could not parse line. Missing data/column ?') # Some sanity checks. check = set(self.specID) if len(check) != len(self.specID): raise ValueError('Found a duplicated metabolite name ! Fix the input file.') self.N = len(self.specID) print("Found %s species" % len(self)) # check that all attributes are correct. # in principle there is a metabolite_notes file to be read. If not # found, should raise a warning. self._set_notes() def __len__(self): return len(self.specID) def _set_notes(self, filename=None): """If a file called metabolites_notes or filename_notes. If not found, the attribute specNotes is kept empty """ if filename == None: filename = self.filename + "_notes" try: f = open(filename, "r") except IOError: # no valid file print("No valid notes were found. Skipping.") return except Exception, e: raise Exception # if the file was found, read it and populated specNotes [self.specNotes.setdefault(x) for x in self.specID] for line in f.readlines(): spec = line.split()[0].strip() note = " ".join(line.split()[1:]) self.specNotes[spec] = note f.close() assert sorted(self.specID) == sorted(self.specNotes.keys())