CellNOpt homepage|cellnopt.core 1.0.0 documentation

Source code for cellnopt.core.sif

# -*- python -*-
#
#  This file is part of the cinapps.tcell package
#
#  Copyright (c) 2012-2013 - EMBL-EBI
#
#  File author(s): Thomas Cokelaer (cokelaer@ebi.ac.uk)
#
#  Distributed under the GLPv3 License.
#  See accompanying file LICENSE.txt or copy at
#      http://www.gnu.org/licenses/gpl-3.0.html
#
#  website: www.cellnopt.org
#
##############################################################################
from __future__ import print_function
#from __future__ import unicode_literals

import csv
import os
import re

# could be replace since we just use 2 functions ?
from numpy import sort, array

from cellnopt.core.converter import Interactions
#from cellnopt.core.cnograph import CNOGraph

__all__ = ["SIF"]


[docs]class SIF(Interactions): """Manipulate network stored in SIF format. The SIF format is used in Cytoscape and CellNOpt (www.cellnop.org). However, the format used in CellNOpt(R) restrict edges to be only 1 or -1. Besides, special nodes called **AND** nodes can be added using the "and" string followed by a unique identifier(integer) e.g., and22; seebelow for details. .. seealso:: :ref:`sif` section in the online documentation. The SIF format is a tab-separated format. It encodes relations betwee nodes in a network. Each row contains a relation where the first column represents the input node, the second value is the type of relation. The following columns represents the output node(s). Here is a simple example:: A 1 B B 1 C A -1 B but it can be factorised:: A 1 B C B 1 C In SIF, only **OR** reactions can be encoded. The following:: A 1 C B 1 C means A OR B gives C. **AND** reactions cannot be encoded therefore we have to code AND gates in a special way using a dedicated syntax. In order to encode the **AND** reaction the SIF reaction should be encoded as follows:: A 1 and1 B 1 and1 and1 1 C An AND gate is made of the "and" string and a unique id concatenated as its end. A SIF file can be read as follows:: s = SIF(filename) Each line is transformed into reactions (A=B, !A=B). You can then add or remove reactions. If you save the file in a new SIF file, be aware than lines such as:: A 1 B C are expanded as:: A 1 B A 1 C Aliases to the columns are stored in read-only attributes called :attr:`nodes1`, :attr:`edges`, :attr:`nodes2`. You can only add or remove reactions. Reactions are stored in :attr:`reacID`. .. todo:: explain more precisely or simplify the 2 parameter ignore_and and convert_ands, which are different semantic ! one of r the ^ character, one for the and string. """ def __init__(self, filename=None, format="cno", ignore_and=False, convert_ands=True): """.. rubric:: Constructor :param str filename: optional input SIF file. :param str format: "cno" or "generic" are accepted (default is cno). The cno format accepted only relation as "1" for activation, and "-1" for inhibitions. The "generic" format allows to have any relations. The "cno" format also interprets nodes that starts with "and" as logical AND gates. :param bool ignore_and: if you want to ignore the and nodes (see above), set to True. :param bool convert_ands: if AND nodes are found (from cellnopt syntax, eg a^b), converts them into a single reaction (default is True). """ super(SIF, self).__init__() self.format = format self.ignore_and = ignore_and self.convert_ands = convert_ands self.clear() # check that the extension is correct and the file exists if isinstance(filename, str): self.filename = filename #extension = os.path.splitext(filename)[1] #if extension not in [".sif", ".SIF"]: # raise ValueError("SIF file must have extension sif or SIF. %s found." % extension) if os.path.isfile(filename) == False: raise IOError("File %s not found." % filename) self.loadSIF(filename) elif filename==None: pass elif hasattr(filename, "reacID"): for reac in filename.reacID: self.add_reaction(reac) else: raise ValueError("argument must be a valid filename (string)") #def clear(self): # # simply reset the relevant attributes # self._edges = [] # self._nodes1 = [] # self._nodes2 = [] # self._data = [] # self._rawdata = [] # for reac in self.reacID: # self.remove_reaction(reac)
[docs] def clear(self): """remove all reactions and species""" self.remove_species(self.specID)
def _get_and_nodes(self): _andNodes = [x for x in set(self.nodes1 + self.nodes2) if re.search('^[a,A][n,N][D,d]\d$',x)] if self.convert_ands: _andNodes += [x for x in set(self.nodes1 + self.nodes2) if "^" in x] return _andNodes andNodes = property(_get_and_nodes, doc="Returns list of AND nodes")
[docs] def remove_and_gates(self): toremove = [r for x in self.andNodes for r in self.reacID if x in r] for reac in toremove: self.remove_reaction(reac)
[docs] def loadSIF(self, filename): # Reads the data first self.clear() try: f = open(filename, 'r') reader = csv.reader(f) self._rawdata = [row for row in reader if len(row)] for i, row in enumerate(self._rawdata): if len(row[0].split()) < 3: raise Exception("Line %s contains a ill-formed reactions:%s" %(i+1, row)) except IOError, e: raise IOError(e) else: # if the file was opened, let us close it. f.close() self._interpret_reactions() self._convert_ands()
def _convert_ands(self): #TODO check consistency between OR and AND gates if self.convert_ands == False: return # the and species found in the SIF (e.g A 1 and1; B 1 and1; and1 1 C) # are converted to A^B=C dont_remove = [] for andNode in self.andNodes: if "^" in andNode: continue lhs_nodes = [ (x,e) for x,e,y in zip(self.nodes1, self.edges,self.nodes2) if y==andNode] rhs_node = [ y for x,e,y in zip(self.nodes1, self.edges,self.nodes2) if x==andNode] try: assert len(rhs_node) == 1, "%s %s %s" % (lhs_nodes, andNode, rhs_node) rhs_node = rhs_node[0] andNode = "^".join([self.notedge(e)+x for x,e in lhs_nodes]) reac = andNode + "=" + rhs_node self.add_reaction(reac) except: dont_remove.append(andNode) # The andNode (e.g., and1) are finally removed for andNode in self.andNodes: if andNode not in dont_remove and "^" not in andNode: self.remove_species(andNode) def _interpret_reactions(self): """interpret the data read from the SIF file""" for i, row in enumerate(self._rawdata): row = row[0].split() # row0 is the rawdata (string) node1 = row[0] edge = row[1] nodes = row[2:] # may be several nodes after edge (at least 1) for node2 in nodes: # some specific tests for CNO format if self.format == "cno": if edge not in ["1","-1"]: raise ValueError("Edges must be set to 1 or -1") if re.search('^[a,A][n,N][D,d]\d+$',node1): if edge == "-1": raise ValueError("ill-formed SIF file line %s: an AND gate cannot have -1 edge" % i) # sometimes, we may want to ignore reactions with and reactions if self.ignore_and: if "and" in node1 or "and" in node2: continue # otherwise, store reactions if edge == "1": reac = node1 + "=" + node2 elif edge == "-1": reac = "!" + node1 + "=" + node2 else: reac = node1 + "=" + node2 self.add_reaction(reac) # makes the nodes1, nodes2, edges and data read-only properties def _get_nodes1(self): return [x.split("=")[0].replace("!","") for x in self.reacID] nodes1 = property(fget=_get_nodes1, doc="returns list of nodes in the left-hand sides of the reactions") def _get_nodes2(self): return [x.split("=")[1] for x in self.reacID] nodes2 = property(fget=_get_nodes2, doc="returns list of nodes in the right-hand sides of the reactions") def _get_edges(self): nodes1 = [x.split("=")[0] for x in self.reacID] edges = ["-1" if x.startswith("!") else "1" for x in nodes1] return edges edges = property(fget=_get_edges, doc="returns list of edges found in the reactions") def _get_data(self): data = array([(x,y,z) for x,y,z in zip(self.nodes1, self.edges, self.nodes2)], dtype=[('nodes1',object),('edges','int'),('nodes2',object)]) # instead of object, we can use a string type S but we must provide the # length. So, we need to estimate the lnogest specy name first. return data data = property(fget=_get_data, doc="Returns list of relations") def __eq__(self, x): if isinstance(x, SIF) == False: return False if sorted(self.nodes1) != sorted(x.nodes1): return False if sorted(self.nodes2) != sorted(x.nodes2): return False if sorted(self.edges) != sorted(x.edges): return False return True
[docs] def add_reaction(self, reaction): """Adds a reaction into the network. Valid reactions are:: A=B A+B=C A^B=C A&B=C Where the LHS can use as many species as desired. The following reaction is valid:: A+B+C+D+E=F Note however that OR gates (+ sign) are splitted so A+B=C is added as 2 different reactions:: A=C B=C """ if "=" in reaction: lhs, rhs = reaction.split("=") else: raise ValueError("Reaction must contain a = sign") # OR gates can be splitted without issue. A+B=C can be added as 2 # reactions A=C and B=C but AND gates are kept as it is with the symbol # ^ if "+" in lhs: for specy in lhs.split("+"): reac = specy + "=" + rhs super(SIF, self).add_reaction(reac) else: reac = lhs + "=" + rhs super(SIF, self).add_reaction(reac)
[docs] def save(self, filename, order="nodes1"): """Save the reactions (sorting with respect to order parameter) :param str filename: where to save the nodes1 edges node2 :param str order: can be nodes1, edges or nodes2 """ assert order in ["nodes1", "nodes2", "edges"] f = open(filename, "w") for row in sort(self.data, order=order): f.write("%s %s %s\n" % (row[0], str(row[1]), row[2])) f.close()
def __str__(self): """prints in alphabetical order""" msg = self._underline("Original file") + "\n\n" M = max([len(x) for x in self.nodes1+self.nodes2]) M2 = max([len(x) for x in self.edges]) for row in sort(self.data, order="nodes1"): msg += "%*s %*s %*s\n" % (M+2, row[0], M2+2 , str(row[1]), M+2,row[2]) msg = msg.rstrip("\n") if self.format == "cno": msg += "\n\n" msg += self._underline("namesSpecies")+"\n" msg += ", ".join([x for x in self.namesSpecies]) + "\n\n" msg += self._underline("reacID")+"\n" msg += ", ".join([x for x in self.reacID]) + "\n\n" #s.na msg += self._underline("interMat")+"\n" #msg += self.interMat.__str__() + "\n\n" #msg += self._underline("notMat") #msg += self.notMat.__str__() return msg def _underline(self, msg): import easydev return easydev.underline(msg)
[docs] def sif2reaction(self): """Returns a Reactions instance generated from the SIF file. AND gates are interpreted. For instance the followinf SIF:: A 1 and1 B 1 and1 and1 1 C give:: A^B=C """ reacID = [] for n1, e, n2 in zip(self.nodes1, self.edges, self.nodes2): if n1 not in self.andNodes and n2 not in self.andNodes: if e == "-1": reacID.append("!" + "=".join([n1,n2])) else: reacID.append("=".join([n1,n2])) for andNode in self.andNodes: # what to do with and nodes ? lhs_nodes = [ (x,e) for x,e,y in zip(self.nodes1, self.edges,self.nodes2) if y==andNode] rhs_node = [ y for x,e,y in zip(self.nodes1, self.edges,self.nodes2) if x==andNode] assert len(rhs_node) == 1, rhs_node rhs_node = rhs_node[0] reac = "^".join([self.notedge(e)+x for x,e in lhs_nodes]) reac += "="+rhs_node reacID.append(reac) from reactions import Reactions reactions = Reactions() for reac in reacID: reactions.add_reaction(reac) return reactions
[docs] def notedge(self, x): """Returns ! character if x equals 1 and empty string otherwise""" if x=="-1": return "!" else: return ""
[docs] def sif2json(self): raise NotImplementedError
[docs] def export2SBMLQual(self, filename=None, modelName="CellNOpt_model"): """Exports SIF to SBMLqual format. :param filename: save to the filename if provided :param str modelName: name of the model in SBML document :return: the SBML text This is a level3, version1 exporter. :: >>> s = SIF() >>> s.add_reaction("A=B") >>> res = s.export2SBMLQual("test.xml") .. warning:: logical AND are not encoded yet. works only if no AND gates .. warning:: experimental """ from cellnopt.core import SBML s = SBML(self, version="1.0", model_name=modelName) if len(self.andNodes)>=1: print("and gates are handled safely. Use with great care !!!") print("!!!! READ THE COMMENT ABOVE") sbml = s.create_header() sbml += s.create_model_name() sbml += s.create_compartment(id="main", constant="true") # add the species first sbml += """\n <qual:listOfQualitativeSpecies xmlns:qual="http://www.sbml.org/sbml/level3/version1/qual/version1">""" for node in self.specID: sbml += """\n <qual:qualitativeSpecies qual:constant="false" qual:compartment="main" qual:id="%s"/>""" % node sbml += """\n </qual:listOfQualitativeSpecies>\n""" from cellnopt.core.cnograph import CNOGraph #should be here because cnograph import sif itself. c = CNOGraph(self) sbml += """\n <qual:listOfTransitions xmlns:qual="http://www.sbml.org/sbml/level3/version1/qual/version1">""" tid = 0 for node in sorted(c.nodes()): if "^" in node: continue # otherwise pickup in AND and OR cases... predecessors = c.predecessors(node) if len(predecessors): tid +=1 transitionName = "t%s" % tid # the inputs sbml += """\n <qual:transition qual:id="%s">""" % transitionName sbml += """\n <qual:listOfInputs>""" for pred in predecessors: if "^" not in pred: sign = c[pred][node]['link'] if sign == "+": sign = "positive" elif sign == "-": sign = "negative" else: raise ValueError("sign (%s) if wrong expected + or - sign)" % sign) sbml += """ <qual:input qual:thresholdLevel="1" qual:transitionEffect="none" qual:sign="%s" qual:qualitativeSpecies="%s" qual:id="theta_%s_%s"/>""" % (sign, pred, transitionName, pred) else: input_ands = pred.split('=')[0].split("^") for this in input_ands: if this.startswith("!"): pred = pred[1:] sign = 'negative' else: sign= 'positive' sbml += """ <qual:input qual:thresholdLevel="1" qual:transitionEffect="none" qual:sign="%s" qual:qualitativeSpecies="%s" qual:id="theta_%s_%s"/>""" % (sign,this, transitionName, this) sbml += """\n </qual:listOfInputs>\n""" # the output (only one) sbml += """\n <qual:listOfOutputs>""" sbml += """\n <qual:output qual:transitionEffect="assignmentLevel" qual:qualitativeSpecies="%s"/>""" % node sbml += """\n </qual:listOfOutputs>\n""" sbml +="""\n <qual:listOfFunctionTerms> <qual:defaultTerm qual:resultLevel="0"> </qual:defaultTerm> <qual:functionTerm qual:resultLevel="1"> <math xmlns="http://www.w3.org/1998/Math/MathML">""" # the equations depending on the number of inputs and type of # edge (-1 or 1) if len(predecessors)==1 and "^" not in predecessors[0]: link = c[predecessors[0]][node]['link'] if link == "+": relation = "<geq/>" elif link == "-": relation = "<lt/>" else: raise ValueError("wrong sign must be + or -") sbml +=""" <apply> %s <ci> %s </ci> <ci> theta_%s_%s </ci> </apply>""" % (relation, predecessors[0], transitionName, predecessors[0]) elif len(predecessors)==1 and "^" in predecessors[0]: input_ands = predecessors[0].split('=')[0].split("^") sbml += """ <apply> <and/>""" for pred in input_ands: if pred.startswith("!"): pred = pred[1:] relation = "<lt/>" else: relation = "<geq/>" sbml +=""" <apply> %s <ci> %s </ci> <ci> theta_%s_%s </ci> </apply>""" % (relation, pred, transitionName, pred) sbml += """\n </apply>""" elif len(predecessors)>1: for pred in predecessors: if "^" in pred: raise ValueError("combination of inputs must be made of simple ORs without ANDS. case not yet implemented") sbml += """ <apply> <or/>""" for pred in predecessors: link = c[pred][node]['link'] if link == "+": relation = "<geq/>" elif link == "-": relation = "<lt/>" else: raise ValueError("wrong sign must be + or -") sbml +=""" <apply> %s <ci> %s </ci> <ci> theta_%s_%s </ci> </apply>""" % (relation, pred, transitionName, pred) sbml += """\n </apply>""" else: print("Unknown case") sbml += """\n </math>""" if len(predecessors)>=1: sbml += """\n </qual:functionTerm>""" sbml += """\n </qual:listOfFunctionTerms>""" sbml += """\n </qual:transition>\n""" # The end sbml += """ </qual:listOfTransitions>\n""" sbml += """ </model>\n""" sbml += s.create_footer() if filename: fh = open(filename, "w") fh.write(sbml) fh.close() return sbml
[docs] def importSBMLQual(self, filename, clear=True): """import SBMLQual XML file into a SIF instance :param str filename: the filename of the SBMLQual :param bool clear: remove all existing nodes and edges .. warning:: experimental """ if clear: self.clear() import bs4 # !!!!!!!!!!!!!!!!!!!!!!! somehow xml is transformed in lower case????? res = bs4.BeautifulSoup(open(filename).read()) model = res.findAll("model")[0] allspecies = model.findAll("qual:listofqualitativespecies")[0] nodes = [ x.get('qual:id') for x in res.findChildren("qual:qualitativespecies")] andCounter = 1 for transition in res.findChildren("qual:transition"): inputs = [x['qual:qualitativespecies'] for x in transition.findChildren("qual:input")] signs = [x['qual:sign'] for x in transition.findChildren("qual:input")] output = [x['qual:qualitativespecies'] for x in transition.findChildren("qual:output")] assert len(output) == 1 assert len(inputs) == len(signs) outputs = output * len(signs) # there may be different functions so we will need to loop over them functions = transition.findChildren("qual:functionterm") for function in functions: # from function, figure out if we have a OR or AND gate ands = function.findChildren("and") #TODO naive approach. could use somthing more robust ??? # should include the OR and other function terms that are not AND if len(ands)==0: print(inputs, signs, outputs) for x,link,y in zip(inputs, signs, outputs): if link == "positive": self.add_reaction("%s=%s" % (x,y)) else: self.add_reaction("!%s=%s" % (x,y)) # should include the AND. there is no else because I',m not sure if # there could both OR and AND equations withi a functionTerms. if len(ands): print("Found an AND gate") andName = "and%s" % andCounter LHS = [] for x,link in zip(inputs, signs): if link == "positive": LHS.append("%s" % x) else: LHS.append("!%s" % x) reaction = "^".join(LHS) + "=%s" % outputs[0] print(reaction) self.add_reaction(reaction) andCounter += 1 for node in nodes: if node not in self.specID: raise NotImplementedError("A species without transition is not included in the network")
def __len__(self): return len(self.nodes1) def _rename_species(self, old, new): print("Draft method. Use with care.") # could be mced to Interactions class for i, r in enumerate(self._reacID): self._reacID[i] = r.replace(old, new)
[docs] def plot(self): """Plot the network .. note:: this method uses :class:`cellnopt.core.cnograph` so AND gates appear as small circles. """ from cellnopt.core import CNOGraph c = CNOGraph(self) c.graph_options['graph']['dpi'] = 200 c.plot()