CellNOpt homepage|cellnopt 0.1.3 documentation

Source code for cno.io.sif

# -*- python -*-
#
#  This file is part of the cno package
#
#  Copyright (c) 2012-2014 - EMBL-EBI
#
#  File author(s): Thomas Cokelaer (cokelaer@ebi.ac.uk)
#
#  Distributed under the GLPv3 License.
#  See accompanying file LICENSE.txt or copy at
#      http://www.gnu.org/licenses/gpl-3.0.html
#
#  website: http://github.com/cellnopt/cellnopt
#
##############################################################################
from __future__ import print_function

import csv
import os
import re

from cno.io.reactions import Reactions, Reaction
from cno.misc import CNOError

import numpy as np

__all__ = ["SIF"]


[docs]class SIF(Reactions): """Manipulate network stored in SIF format. The SIF format is used in Cytoscape and CellNOpt (www.cellnopt.org). However, the format used in CellNOpt(R) restrict edges to be only 1 or -1. Besides, special nodes called **AND** nodes can be added using the "and" string followed by a unique identifier(integer) e.g., and22; see below for details. .. seealso:: :ref:`sif` section in the online documentation. The SIF format is a tab-separated format. It encodes relations between nodes in a network. Each row contains a relation where the first column represents the input node, the second value is the type of relation. The following columns represents the output node(s). Here is a simple example:: A 1 B B 1 C A -1 B but it can be factorised:: A 1 B C B 1 C In SIF, only **OR** reactions can be encoded. The following:: A 1 C B 1 C means A OR B gives C. **AND** reactions cannot be encoded therefore we have to code AND gates in a special way using a dedicated syntax. In order to encode the **AND** reaction the SIF reaction should be encoded as follows:: A 1 and1 B 1 and1 and1 1 C An AND gate is made of the "and" string and a unique id concatenated as its end. A SIF file can be read as follows:: s = SIF(filename) Each line is transformed into reactions (A=B, !A=B). You can then add or remove reactions. If you save the file in a new SIF file, be aware than lines such as:: A 1 B C are expanded as:: A 1 B A 1 C Aliases to the columns are stored in read-only attributes called :attr:`nodes1`, :attr:`edges`, :attr:`nodes2`. You can only add or remove reactions. Reactions are stored in :attr:`reactions`. """ def __init__(self, filename=None, frmt="cno"): """.. rubric:: Constructor :param str filename: optional input SIF file. :param str frmt: "cno" or "generic" are accepted (default is cno). The cno format accepted only relation as "1" for activation, and "-1" for inhibitions. The "generic" format allows to have any relations. The "cno" format also interprets nodes that starts with "and" as logical AND gates. Such nodes are transformed into more cmpact notations. That is reactions. :: A 1 and1 B -1 and1 and1 1 C is transformed internally as 1 reaction: A^!B=C """ super(SIF, self).__init__() self.frmt = frmt self.ignore_and = False if frmt == 'cno': self.convert_ands = True else: self.convert_ands = False # check that the extension is correct and the file exists if isinstance(filename, str): self.filename = filename if os.path.isfile(filename) is False: raise IOError("File %s not found." % filename) if filename.endswith('xml'): self.read_sbmlqual(filename) else: self.read_sif(filename) elif filename is None: self.clear() # could be another instance of a SIF file (or cnograph, or reactions) elif hasattr(filename, "reactions"): for reac in filename.reactions: self.add_reaction(reac) else: raise ValueError("argument must be a valid filename (string)")
[docs] def clear(self): """remove all reactions and species""" self.remove_species(self.species)
[docs] def is_and(self, value): if self.convert_ands is True and self.and_symbol in value: return True if re.search('^[a,A][n,N][D,d]\d+$', value) : return True return False
def _get_and_nodes(self): _and_nodes = [x for x in set(self.nodes1 + self.nodes2) if self.is_and(x)] return _and_nodes and_nodes = property(_get_and_nodes, doc="Returns list of AND nodes")
[docs] def remove_and_gates(self): """Remove all AND gates""" toremove = [r for x in self.and_nodes for r in self.reactions if x in r] for reac in toremove: self.remove_reaction(reac)
[docs] def read_sif(self, filename): """Read a SIF file""" # Reads the data first self.clear() with open(filename, 'r') as fh: reader = csv.reader(fh) self._rawdata = [row for row in reader if len(row)] for i, row in enumerate(self._rawdata): if len(row[0].split()) < 3: raise Exception("Line %s contains a ill-formed reactions:%s" %(i+1, row)) self._interpret_reactions() if self.convert_ands: self._convert_ands()
def _convert_ands(self): #TODO check consistency between OR and AND gates # the and species found in the SIF (e.g A 1 and1; B 1 and1; and1 1 C) # are converted to A^B=C dont_remove = [] for and_node in self.and_nodes: if self.and_symbol in and_node: # if ^ in name, nothing to do continue # otherwise, this is the original cno format that needs some mangling lhs_nodes = [(x,e) for x, e, y in zip(self.nodes1, self.edges, self.nodes2) if y == and_node] rhs_node = [ y for x,e,y in zip(self.nodes1, self.edges,self.nodes2) if x == and_node] try: assert len(rhs_node) == 1, "%s %s %s" % (lhs_nodes, and_node, rhs_node) rhs_node = rhs_node[0] and_node = self.and_symbol.join([self.notedge(e)+x for x,e in lhs_nodes]) reac = and_node + "=" + rhs_node self.add_reaction(reac) except: dont_remove.append(and_node) # The and_node (e.g., and1) are finally removed for and_node in self.and_nodes: if and_node not in dont_remove and self.and_symbol not in and_node: self.remove_species(and_node) def _interpret_reactions(self): """interpret the data read from the SIF file""" for i, row in enumerate(self._rawdata): row = row[0].split() # row0 is the rawdata (string) node1 = row[0] edge = row[1] # SIF format allows several nodes on the RHS nodes = row[2:] for node2 in nodes: # some specific tests for CNO format if self.frmt == "cno": if edge not in ["1","-1"]: raise CNOError("Edges must be set to 1 or -1") if re.search('^[a,A][n,N][D,d]\d+$',node1): if edge == "-1": raise ValueError("ill-formed SIF file line %s: an AND gate cannot have -1 edge" % i) # sometimes, we may want to ignore reactions with and reactions if self.ignore_and: if self.is_and(node1) or self.is_and(node2): continue # otherwise, store reactions if edge == "1": reac = node1 + "=" + node2 elif edge == "-1": reac = "!" + node1 + "=" + node2 else: reac = node1 + "=" + node2 self.add_reaction(reac) # makes the nodes1, nodes2, edges and data read-only properties def _get_nodes1(self): return [x.split("=")[0].replace("!","") for x in self.reactions] nodes1 = property(fget=_get_nodes1, doc="returns list of nodes in the left-hand sides of the reactions") def _get_nodes2(self): return [x.split("=")[1] for x in self.reactions] nodes2 = property(fget=_get_nodes2, doc="returns list of nodes in the right-hand sides of the reactions") def _get_edges(self): nodes1 = [x.split("=")[0] for x in self.reactions] edges = ["-1" if x.startswith("!") else "1" for x in nodes1] return edges edges = property(fget=_get_edges, doc="returns list of edges found in the reactions")
[docs] def add_reaction(self, reaction): """Adds a reaction into the network. Valid reactions are:: A=B A+B=C A^B=C A&B=C Where the LHS can use as many species as desired. The following reaction is valid:: A+B+C+D+E=F Note however that OR gates (+ sign) are splitted so A+B=C is added as 2 different reactions:: A=C B=C """ if "=" in reaction: lhs, rhs = reaction.split("=") else: raise ValueError("Reaction must contain a = sign") # OR gates can be splitted without issue. A+B=C can be added as 2 # reactions A=C and B=C but AND gates are kept as it is with the symbol # ^ if "+" in lhs: for specy in lhs.split("+"): reac = specy + "=" + rhs super(SIF, self).add_reaction(reac) else: reac = lhs + "=" + rhs super(SIF, self).add_reaction(reac)
[docs] def save(self, filename): """Save the reactions (sorting with respect to order parameter) :param str filename: where to save the nodes1 edges node2 """ rhs = [x.rhs for x in self._reactions] # if we wer to use x.lhs, no need to select first item # but ! are kept. So, we really need lhs_species. # It assumes there is only 1 item in the lhs, # which should be true in the SIF format. lhs = [x.lhs_species[0] for x in self._reactions] f = open(filename, "w") sign2int = self.sign_operator_to_number counter = 1 for reac in self._reactions: r = Reaction(reac) lhs_species = r.get_signed_lhs_species() if self.and_symbol in r.lhs: # create the and gate andname = "and%s" % counter for sign, species in lhs_species.items(): for this in species: f.write("%s %s %s\n" % (this, sign2int(sign), andname)) f.write("%s 1 %s\n" % (andname,r.rhs )) counter += 1 else: # OR gate for sign, species in lhs_species.items(): for this in species: f.write("%s %s %s\n" % (this, sign2int(sign), r.rhs)) f.close()
[docs] def sign_operator_to_number(self, operator): assert operator in ['+', '-'] if operator == '+': return 1 else: return -1
[docs] def to_reactions(self): """Returns a Reactions instance generated from the SIF file. AND gates are interpreted. For instance the followinf SIF:: A 1 and1 B 1 and1 and1 1 C give:: A^B=C """ from reactions import Reactions reactions = Reactions() for reac in self._reactions: reactions.add_reaction(reac) return reactions
[docs] def notedge(self, x): """Returns ! character if x equals 1 and empty string otherwise""" if x=="-1": return "!" else: return ""
[docs] def to_json(self): """Not a standard, do we want to keep this format ? """ json = """{"links":[\n""" for n1, edge, n2 in zip(self.nodes1, self.edges, self.nodes2): json += """ {"source":%s, "target":%s, "link":%s},\n""" % (n1, n2, edge) json +="]}" return json
[docs] def to_sbmlqual(self, filename=None): """Exports SIF to SBMLqual format. :param filename: save to the filename if provided :return: the SBML text This is a level3, version1 exporter. :: >>> s = SIF() >>> s.add_reaction("A=B") >>> res = s.to_sbmlqual("test.xml") .. warning:: logical AND are not encoded yet. works only if no AND gates .. warning:: experimental """ #sif = self.to_cnograph() from cno.io.sbmlqual import SBMLQual qual = SBMLQual() sbml = qual.to_sbmlqual(self) if filename: fh = open(filename, "w") fh.write(sbml) fh.close() else: return sbml
[docs] def read_sbmlqual(self, filename): """import SBMLQual XML file into a SIF instance :param str filename: the filename of the SBMLQual :param bool clear: remove all existing nodes and edges .. warning:: experimental """ from cno.io.sbmlqual import SBMLQual qual = SBMLQual() sif = qual.read_sbmlqual(filename) self.clear() for reac in sif.reactions: self.add_reaction(reac) return sif
def _rename_species(self, old, new): raise NotImplementedError for i, r in enumerate(self._reactions): self._reactions[i] = r.replace(old, new)
[docs] def to_cnograph(self): # local import to prevent import cycling from cno.io.cnograph import CNOGraph c = CNOGraph() for reaction in self.reactions: c.add_reaction(reaction) return c
[docs] def plot(self): """Plot the network .. note:: this method uses :class:`~cno.io.cnograph.CNOGraph` so AND gates appear as small circles. """ c = self.to_cnograph() c.graph_options['graph']['dpi'] = 200 c.plot()
def __eq__(self, x): if isinstance(x, SIF) is False: return False if sorted(self.reactions) != sorted(x.reactions): return False return True def __str__(self): msg = "SIF object\n" msg += "- {0} reactions.\n".format(len(self.reactions)) msg += "- {0} species.".format(len(self.species)) msg += "\n" for reac in self.reactions: msg += reac + "\n" return msg def __len__(self): return len(self.nodes1) def __repr__(self): msg = "SIF object\n" msg += "- {0} reactions.\n".format(len(self.reactions)) msg += "- {0} species.".format(len(self.species)) return msg