CellNOpt homepage|cellnopt 0.1.3 documentation

Source code for cno.io.sop2sif

# -*- python -*-
#
#  This file is part of the cinapps.tcell package
#
#  Copyright (c) 2012-2013 - EMBL-EBI
#
#  File author(s): Thomas Cokelaer (cokelaer@ebi.ac.uk)
#
#  Distributed under the GLPv3 License.
#  See accompanying file LICENSE.txt or copy at
#      http://www.gnu.org/licenses/gpl-3.0.html
#
#  website: www.cellnopt.org
#
##############################################################################
from __future__ import print_function

from  cno.io.reactions import Reactions


__all__ = ["SOP2SIF"]


[docs]class SOP2SIF(Reactions): r"""Converts a file from SOP to SIF format SOP stands for sum of products, it is a list of relations of the form:: !A+B=C For now, this function has been tested and used on the copy/paste of a PDF document into a file. Be careful because the interpretation of the characters may differ from one distribution to the other. The original data contains #. a special character for NOT, which is interpreted as \x2\xac (a L turned by 90 degrees clockwise) #. an inversed ^ character for OR, which is interpreted as " _ " #. a ^ character for AND, which is correctly interpreted. #. a -> character for "gives", which is transformed into ! character. On other systems, it may be interpreted differently, so we provide a mapping attribute :attr:`mapping` to perform the translation, which can be changed to your needs. The data looks like:: 1 !A + B = C 1 [references] 2 !A + B = E 2 [references] 3 !A + B = D 1 [references] ... N !A + B = D 2 [references] The :class:`SOP2SIF` class gets rid of the last column, the [references] and the column before it (made of 1 and 2). Then, we convert the reaction strings into the same format as in CellNOpt that is: #. A = C means A GIVES C #. A + B = C means A gives C OR B gives C #. !A means NOT A :: >>> s2s = SOP2SIF("data.sop") >>> s = s2s.sop2sif() >>> s2s.writeSIF("data.sif") """ def __init__(self, filename): super(SOP2SIF, self).__init__() self.filename = filename #self.data = None self._read_data() self.or_string = "__or__" # values of the mapping directory must be changed to match the contents # of the filename provided. #: The dictionary to map SOP special characters e.g if you code NOT with ! character, just fill this dictionary accordingly self.mapping = { 'not':'\xc2\xac', 'gives':'!', 'and':'^', 'or':' _ '} self._translate() def _read_data(self): """Reads the data and performs some cleanup""" # reads the data fh = open(self.filename, "r") self.data = fh.read() fh.close() # split lines self.data = self.data.split("\n")[0:-1] # strip spaces self.data = [x.strip() for x in self.data] # remove line with comments and empty lines self.data = [x for x in self.data if not x.startswith('#') and len(x)>0] def _translate(self): """Interprets the data and translate into proper format. This function transform all lines into proper reactions expanding the AND and OR gates. A^B=C (AND gate) is transformed into 3 reactions:: A=and1 B=and1 and1=C The OR gates (e.g., A+B=C) are split as well:: A=C B=C """ if self.mapping['not'] != '\xc2\xac' \ or self.mapping['gives'] != '!' \ or self.mapping['or'] != ' _ ': raise NotImplementedError data = self.data[:] # replace the direction character into equal. This should be done # before the NOT because in some cases, the direction sign is coded as # !, which is the NOT sign. data = [x.replace(self.mapping['gives'], '=') for x in data] data = [x.replace('= ', '=') for x in data] data = [x.replace(' =', '=') for x in data] # replace a special character by ! and get rid of spaces #if self.mapping['='] == '!': data = [x.replace(self.mapping['not'], '!') for x in data] data = [x.replace('! ', '!') for x in data] data = [x.replace(' ! ', '!') for x in data] # replace the "and" character by + and remove spaces if any data = [x.replace(self.mapping['and'], '^') for x in data] data = [x.replace(' ^', '^') for x in data] data = [x.replace('^ ', '^') for x in data] # get rid of parantheses data = [x.replace('(', '') for x in data] data = [x.replace(')', '') for x in data] # remove spaces, and replace the or by a special tag to be interpreted # later by the writeSIF method. if self.mapping['or'] == " _ ": #data = [x.replace(" _ ", self.or_string) for x in data] data = [x.replace(" _ ", "+") for x in data] else: raise NotImplementedError # Remove the first column (line number) data = [x.split(" ", 1)[1] for x in data] # If the reactions has no spaces, we can get rid of the # the tau and reference columns by simply performing a split operation. data = [x.split(" ")[0] for x in data] # some negative reactions are written as A=!B, which is # identical to !A=B, which is our convention. for i, reac in enumerate(data): lhs, rhs = reac.split('=') if rhs.startswith('!'): lhs = '!'+lhs rhs = rhs.replace('!','') newreac = lhs+'='+rhs print("Warning: found a ! in RHS. inversion performed: %s is now %s" %(rhs, newreac)) data[i] = newreac # cleanup the or gates by splitting them: A__or__B=C becomes A=C and B=C # and start to fill reactions ORreacs = [x for x in data if "+" in x] data = [x for x in data if "+" not in x] # add back the OR reactions for reacs in ORreacs: lhs, rhs = reacs.split("=") for l in lhs.split("+"): newreac = l + '=' + rhs data.append(newreac) # cleanup the AND gates by creating AND nodes ANDreacs = [x for x in data if "^" in x] data = [x for x in data if "^" not in x] for i,reac in enumerate(ANDreacs): andNode = "and%s" % str(i+1) lhs, rhs = reac.split("=") species = lhs.split("^") for specy in species: data.append("%s=%s" % (specy, andNode)) data.append("%s=%s" % (andNode, rhs)) self._reactions = [] for r in data: lhs, rhs = r.split('=') if "^" in rhs or "+" in rhs: print("Warning: reaction %s skipped (several + or ^ signs in RHS)" % r), continue self.add_reaction(r) print("Parsing done. Found %s reactions." % len(self.species))
[docs] def sop2sif(self, include_and_gates=True): """Converts the SOP data into a SIF class :param bool include_and_gates: if set to False, all reactions with AND gates are removed. :returns: an instance of :class:`cno.io.sif.SIF` """ from cno.io.sif import SIF s = SIF() for reac in self.reactions: s.add_reaction(reac) if include_and_gates == False: s.remove_and_gates() return s
[docs] def export2sif(self, filename, include_and_gates=True): """Save the reactions in a file using SIF format The data read from the SOP file is transformed into a SIF class before hand. :param bool include_and_gates: if set to False, all reactions with AND gates removed """ s = self.sop2sif(include_and_gates) s.save(filename)
def __len__(self): return len(self.reactions)