MS-DAS 0.9.0 documentation

Source code for msdas.network

# -*- coding: utf-8 -*-
"""
Created on Fri Feb 21 16:02:50 2014

@author: cokelaer
"""
from readers import MassSpecReader
from cno import CNOGraph
from cno.io.multigraph import  CNOGraphMultiEdges
import pylab
import pandas as pd

__all__ = ["NetworkFromUniProt", "CombineNetworks"]


[docs]class NetworkFromUniProt(object): """Build a PKN based on the uniprot **interact with** **annotations** The annotations dataframe can be obtained from the MassSpecMerger instance :: from msdas import * a = annotations.Annotations(get_yeast_small_data(), "YEAST") a.get_uniprot_entries() a.set_annotations() Then, you can create this object:: n = network.NetworkFromUniProt(a.annotations) And finally get a graph structure that extract all relations found in the annotations dataframe based on the uniprot field called "Interacts with". """ def __init__(self, annotations, verbose=True): """.. rubric:: constructor :param df annotations: a dataframe similar to the :attr:`annotations` found in :meth:`MassSpecReader` """ if isinstance(annotations, str): self.annotations = pd.read_pickle(annotations) elif isinstance(annotations, MassSpecReader): self.annotations = annotations.df else: # trying as a dataframe self.annotations = annotations.copy() self.verbose=verbose
[docs] def get_cnograph_intact(self, label="entry_name"): """Return cnograph made of the protein names found in the interactions of the annotations. .. plot:: :include-source: :width: 50% from msdas import * a = annotations.Annotations(get_yeast_small_data(), "YEAST") a.get_uniprot_entries() a.set_annotations() n = network.NetworkFromUniProt(a.annotations) c = n.get_cnograph_intact() c.plotdot() """ assert label in ["entry_id", "entry_name"] c = CNOGraph() interactions = self.annotations["Interacts with"] # add all nodes c.add_nodes_from(interactions.index) # some have no interactions in which case, it is filled with NaN. let us drop those # entries. interactions = interactions.dropna() indices = interactions.index for i, index in enumerate(indices): print("{}/{}".format(i+1, len(indices))) these_interactions = interactions.ix[index].split(';') these_interactions = [x.strip() for x in these_interactions] for interaction in these_interactions: if interaction == "Itself": interaction = index c.add_reaction("{}={}".format(index, interaction)) if label == "entry_id": c._signals = list(self.annotations.index) else: # bioservices required because interacting species may not be part # of the list of measurements, from bioservices import UniProt u = UniProt(verbose=self.verbose) mapping = u.multi_mapping(fr="ACC", to="ID", query=c.nodes()) for k, v in mapping.iteritems(): if len(mapping[k])>1: print("ambigous case {} with more than 1 mapping. will take only first".format(k)) mapping[str(k)] = str(v[0].split("_")[0]) c.relabel_nodes(mapping) measured = [x.split("_")[0] for x in self.annotations['Entry name']] c._signals = measured return c
[docs]class CombineNetworks(object): """Combine several PKN from different methods THis class serves as an example on how to combine several PKNs into a common one. The input PKN used may come from: #. In this example, you need to build a uniprot PKN using :class:`NetworkFromUniProt`, a PKN using PhosphoGrid :class:`msdas.phospho.PhosphoGrid` and a list of names to indicates nodes where you have measurements. .. plot:: :include-source: :width: 50% # Get list of names from msdas import * a = annotations.Annotations(get_yeast_small_data(), "YEAST") a.get_uniprot_entries() a.set_annotations() n = network.NetworkFromUniProt(a.annotations) names = list(set(a.df.Protein)) from easydev import get_share_file as gsf n = network.CombineNetworks( {"Curated": gsf("msdas", "data", "PKN-yeast.sif"), "UniProt": "PKN-uniprot.sif", "PhosPho": "PKN-phospho.sif"}, signals=names[:], stimuli=["a", "NaCl"]) c = n.get_digraph() c.plot() #c.export2sif("PKN-combined.sif") """ def __init__(self, dict_network, stimuli=[], signals=[], cutnonc=True, remove_self_loops=True): """ :param dict dict_network: a dictionary of network. keys are used for labelling values must be a SIF filename :param list stimuli: list of stimuli :param list signals: list of signals :param bool cutnonc: remove useless nodes, not measured or without influence on signals :param bool remove_self_loops: remove self loop from the network. """ self.filenames = [] self.labels = [] for k,v in dict_network.iteritems(): self.filenames.append(v) self.labels.append(k) self.stimuli = stimuli[:] self.signals = signals[:] self.cutnonc = cutnonc self.remove_self_loops = remove_self_loops
[docs] def plot_multiedge_graph(self, cmap="jet"): """Creates a multiedge graph and plots it :param cmap: a valid color map from matplotlib. jet, spring, hot, ... :return: CNOGraphMultiEdges object .. plot:: :include-source: :width: 50% # Get list of names from msdas import * from easydev import gsf m = MassSpecReader() m.read_annotations(gsf("msdas", "data", "YEAST_annotations_small.pkl")) n = network.NetworkFromUniProt(a.annotations) names = list(set(m.df.Protein)) n = network.CombineNetworks( {"Curated": gsf("msdas", "data", "PKN-yeastScaffold.sif"), "UniProt": "PKN-uniprot.sif", "PhosPho": "PKN-phospho.sif"}, signals=names[:], stimuli=["a", "NaCl"]) c = n.plot_multiedge_graph() c.plot() """ N = len(self.labels) values = pylab.linspace(.1,.9, N) # build network c = self.get_multiedge_graph() c.plot(edge_attribute="edgecolor", edge_attribute_labels=False, cmap=cmap) # #build legend for i, label in enumerate(self.labels): print label, c._get_hex_color_from_value(values[i], cmap) pylab.barh(0,0,1,color=c._get_hex_color_from_value(values[i], cmap), label=label) pylab.legend(title="edge legend", fontsize="small", loc="lower right") return c
[docs] def get_multiedge_graph(self): """Creates a multiedge graph from the input networks :return: CNOGraphMultiEdges object """ # build network N = len(self.labels) values = pylab.linspace(.1,.9, N) c = CNOGraphMultiEdges() for i,filename in enumerate(self.filenames): print("Reading {}".format(filename)) graph = CNOGraph(filename) for e in graph.edges(data=True): c.add_edge(e[0], e[1], source=self.labels[i], edgecolor=values[i], **e[2]) c._signals = self.signals[:] c._stimuli = self.stimuli[:] if self.signals and self.stimuli and self.cutnonc: c.cutnonc() if self.remove_self_loops: c.remove_self_loops() return c
[docs] def get_digraph(self, sources_priority=["Curated", "PhosPho", "UniProt"]): """Creates a directed graph from the input networks""" multic = self.get_multiedge_graph() # We could cast the multiedge into a normal digraph but when edges have different # meaning, (e.g., activation/inhibition), then there is an ambiguity and # the remaining edge may not be the one we want. There is no clear answer # on what is the best one to keep but we could prioritise the # edge based on the source (e.g., curated is most trustful that a automatic # database). # let us merge edges with same input/output nodes if they have different # link base on the priority of the source. if sources_priority==None: labels = self.labels[:] else: labels = sources_priority[:] for node1 in multic.nodes(): for node2 in multic.edge[node1]: # figure out the one to keep and remove others keys = multic.edge[node1][node2].keys() sources = [multic[node1][node2][this]['source'] for this in keys] # FIXME: could have a + and - from same source ? #links = [multic[node1][node2][this]['link'] for this in multic.edge[node1][node2].keys()] orders = sorted(zip(sources, keys), cmp=lambda x,y: cmp(labels.index(x[0]), labels.index(y[0]))) # keep first one, get rid of others if len(orders)>1: if orders[0][0] == orders[1][0]: print("looks like {}-{ edge is ambiguous}".format(node1, node2) ) # remove all except first one for order in orders[1:] : key = order[1] # order is a 2-length tuple n e.g., ("Curated", 0) print("removing {}-{}, key={} (source={})".format(node1, node2, key, order[0])) multic.remove_edge(node1, node2,key) print("Keeping {}-{}, source={}, key={}\n".format(node1, node2, orders[0][0], orders[0][1])) multic.edge[node1][node2][orders[0][1]]['source'] = sources c = CNOGraph(multic) c._signals = self.signals[:] c._stimuli = self.stimuli[:] return c