Source code for msdas.phosphogrid
# -*- coding: utf-8 -*-
# -*- python -*-
#
# This file is part of MS-DAS software
#
# Copyright (c) 2011-2012 - EBI-EMBL
#
# File author(s): Claudia Hernand, Marti Bernardo,
# Thomas Cokelaer <cokelaer@ebi.ac.uk>
##
# Distributed under the GPLv3 License.
# See accompanying file LICENSE.txt or copy at
# http://www.gnu.org/licenses/gpl-3.0.html
#
# website:
#
##############################################################################
"""
Created on Fri Feb 7 16:58:10 2014
@author: chernand
"""
import os
import pandas as pd
from tools import Requires
__all__ = ["PhosphoGRID"]
[docs]class PhosphoGRID(Requires):
"""Build PKN from phosphogrid
You need first a list of protein names that you are interested in.
This list can be obtained from the merger instance as follows::
from msdas import *
m = MassSpecReader(get_yeast_small_data, "yeast")
gene_names = set(list(m.df.Protein))
Then, you can use this class as follows to visualise and save into a SIF file the
network built from phosphogrid database.
.. plot::
:include-source:
:width: 70%
>>> from msdas import *
>>> r = MassSpecReader(get_yeast_small_data())
>>> gene_names = set(list(r.df.Protein))
>>> p = phosphogrid.PhosphoGRID()
>>> p.run(gene_names=gene_names)
>>> p.export2sif()
>>> p.plot()
2 databases are used. Their filenames are hard coded as:
#. BIOGRID-PTM-RELATIONSHIPS-3.1.93.ptmrel.txt
#. BIOGRID-PTM-15-3.1.93.ptmtab.txt
You can change the name by changing :attr:`dbR_filename` and
:attr:`dbP_filename`.
"""
def __init__(self, directory="../../share/data/"):
""".. rubric:: constructor
:param str directory: directory where to find the databases
They can be found at Claudia's Hernand home directory.
"""
super(PhosphoGRID, self).__init__()
self.directory = directory
self.dbR_filename = "BIOGRID-PTM-RELATIONSHIPS-3.1.93.ptmrel.txt"
# original file was huged. We removed sequence, author,pumed that are not used.
#self.dfP_filename = "BIOGRID-PTM-15-3.1.93.ptmtab.txt"
self.dfP_filename = "BIOGRID-PTM-15-3.1.93_SMALL.ptmtab.txt"
[docs] def run(self, gene_names=None):
"""Build dataframe from the relations found in the PhosphoGRID databases
relations are saved in :attr:`dfSIF`
"""
#===PhosphoGRID Database
#Open file with relationships PhosphoGRID tab delimited files
import gzip
dbr_arch = gzip.GzipFile(self.directory+os.sep+self.dbR_filename+'.gz')
dbp_arch = gzip.GzipFile(self.directory+os.sep+self.dfP_filename+'.gz')
import StringIO
dfR = pd.read_csv(StringIO.StringIO(dbr_arch.read(self.dbR_filename)),
sep='\t', header=0)
dfP = pd.read_csv(StringIO.StringIO(dbp_arch.read(self.dfP_filename)),
sep='\t', header=0)
#===Merge PhosphoGRID data frame using PTMID as matching column
dfRP = dfR.merge(dfP, on="PTMID")
dfRP.columns = [this.replace("_x","_reg") for this in dfRP.columns.map(str)]
dfRP.columns = [this.replace("_y","_tgt") for this in dfRP.columns.map(str)]
dfRP["RelationshipCode"] = ""
#===Asign interaction type according to enzymatic activity
dfRP.RelationshipCode[dfRP.Relationship == "kinase"] = 1
dfRP.RelationshipCode[dfRP.Relationship == "phosphatase"] = -1
dfRP["Site"] = dfRP.Residue+dfRP.Position.map(str)
#Yeast dataframe
#Select unique gene names
if gene_names == None:
filename = self.directory + os.sep + "RawData.csv"
df = pd.read_csv(filename)
gene_names = df["Protein"].drop_duplicates()
else:
pass
#===Merge Yeast experimental dataset with PhosphoGRID
# Intersection between both datasets querying by Standard
# name == OfficialSymbol both in regulators and substrates
columns=["OfficialSymbol_reg","RelationshipCode","OfficialSymbol_tgt",
"Relationship","Identity","Site"]
out = pd.DataFrame(columns=columns)
for gene in gene_names:
this = dfRP[dfRP.OfficialSymbol_reg == gene][columns].drop_duplicates()
out = out.merge(this, "outer")
this = dfRP[dfRP.OfficialSymbol_tgt == gene][columns].drop_duplicates()
out = out.merge(this, "outer")
#Create SIF file
self.dfSIF = out[["OfficialSymbol_reg","RelationshipCode",
"OfficialSymbol_tgt"]].drop_duplicates()
[docs] def export2sif(self, filename="matchYeastPhosGRID_protein.sif"):
"""Export the found relations into a SIF file
:param str filename: Defaults to matchYeastPhosGRID_protein.sif
"""
# could be a decorator but argument are hidden, so just call it for now.
self._requires("dfSIF", "Call run() method first")
# TODO : check that dfSIF is present otherwise call run()
self.dfSIF.to_csv(filename, index=False, header=False, sep="\t")
self.sif_filename = filename
[docs] def plot(self):
"""Plot the relations using CNOGraph
.. warning:: consider using a faster visualisation tools if the number
of edges/nodes exceeds ~ 300
.. todo:: build the SIF without saving it and import directly in the
cnograph structure.
"""
self._requires("sif_filename", "sif_filename not set yet. Please, Call export2sif() method first.")
from cno import CNOGraph
c = CNOGraph(self.sif_filename)
c.plot()