Source code for cbmpy.CBCommon
"""
CBMPy: CBCommon module
======================
PySCeS Constraint Based Modelling (http://cbmpy.sourceforge.net)
Copyright (C) 2009-2017 Brett G. Olivier, VU University Amsterdam, Amsterdam, The Netherlands
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>
Author: Brett G. Olivier
Contact email: bgoli@users.sourceforge.net
Last edit: $Author: bgoli $ ($Id: CBCommon.py 575 2017-04-13 12:18:44Z bgoli $)
"""
## gets rid of "invalid variable name" info
# pylint: disable=C0103
## gets rid of "line to long" info
# pylint: disable=C0301
## use with caution: gets rid of module xxx has no member errors (run once enabled)
# pylint: disable=E1101
# preparing for Python 3 port
from __future__ import division, print_function
from __future__ import absolute_import
#from __future__ import unicode_literals
# this is a hack that needs to be streamlined a bit
#try:
#import cStringIO as csio
#except ImportError:
#import io as csio
chemElementDefs = None
HAVE_PYPARSING = False
try:
from . import pyparsing
HAVE_PYPARSING = True
except ImportError:
try:
print('Attempting global PyParsing import')
import pyparsing
HAVE_PYPARSING = True
except ImportError:
print('INFO please install pyparsing to enable chemical balance checking!')
if HAVE_PYPARSING:
pp_caps = "ABCDEFGHIJKLMNOPQRSTUVWXYZ"
pp_lowers = pp_caps.lower()
pp_digits = "0123456789"
pp_element = pyparsing.Word(pp_caps, pp_lowers)
pp_integer = pyparsing.Word(pp_digits)
pp_elementRef = pyparsing.Group(pp_element + pyparsing.Optional(pp_integer, default="1"))
pp_chemicalFormula = pyparsing.OneOrMore(pp_elementRef)
pp_gene_ass = pyparsing.Word(pyparsing.alphanums + '. ') | 'or' | 'OR' | 'and' | 'AND'
pp_gene_unnester = pyparsing.nestedExpr(content=pp_gene_ass)
ptElements = ('H', 'He', 'Li', 'Be', 'B', 'C', 'N', 'O', 'F', 'Ne', 'Na', 'Mg', 'Al', 'Si', 'P', 'S',\
'Cl', 'Ar', 'K', 'Ca', 'Sc', 'Ti', 'V', 'Cr', 'Mn', 'Fe', 'Co', 'Ni', 'Cu', 'Zn', 'Ga', 'G',\
'As', 'Se', 'Br', 'Kr', 'Rb', 'Sr', 'Y', 'Zr', 'Nb', 'Mo', 'Tc', 'Ru', 'Rh', 'Pd', 'Ag',\
'Cd', 'In', 'Sn', 'Sb', 'Te', 'I', 'Xe', 'Cs', 'Ba', 'La', 'Hf', 'Ta', 'W', 'Re', 'Os',\
'Ir', 'Pt', 'Au', 'Hg', 'Tl', 'Pb', 'Bi', 'Po', 'At', 'Rn', 'Fr', 'Ra', 'Ac', 'Rf', 'Db',\
'Sg', 'Bh', 'Hs', 'Mt', 'Ds', 'Rg', '112', '113', '114', '115', '116', '117', '118', 'La',\
'Ce', 'Pr', 'Nd', 'Pm', 'Sm', 'Eu', 'Gd', 'Tb', 'Dy', 'Ho', 'Er', 'Tm', 'Yb', 'Lu', 'Ac',\
'Th', 'Pa', 'U', 'Np', 'Pu', 'Am', 'Cm', 'Bk', 'Cf', 'Es', 'Fm', 'Md', 'No', 'Lr', 'X', 'R', 'Z')
## too crude, need to find a better logging strategy
#CBMPYLOG = csio.StringIO()
#def printl(st, scr=True, log=False):
#"""
#Updated print() function that also writes to a csio buffer
#- *st* the string
#- *prn* print to screen
#- *log* write to log buffer
#"""
#global CBMPYLOG
#if scr:
#print(st)
#if log:
#try:
#CBMPYLOG.write(st+'\n')
#except Exception as ex:
#print(ex)
#def flushLogToFile(fname):
#global CBMPYLOG
#F = file(fname, 'w')
#CBMPYLOG.seek(0)
#F.write(CBMPYLOG.read())
#F.flush()
#F.close()
#CBMPYLOG.close()
#CBMPYLOG = csio.StringIO()
[docs]def parseGeneAssociation(gs):
"""
Parse a COBRA style gene association into a nested list.
- *gs* a string containing a gene association
"""
# gs = '(b0810) and ( b0811 ) or ( b1234.0) and(b0809)and ( b7643 )OR(b0812 )AND( b0876)'
gl = pp_gene_unnester.parseString('('+gs+')').asList()
gl = gl[0]
def f(l):
for i in range(len(l)):
try:
l[i] = l[i].lower().strip()
except Exception:
f(l[i])
f(gl)
print(gl)
return gl
[docs]def checkChemFormula(cf, quiet=False):
"""
Checks whether a string conforms to a Chemical Formula C3Br5 etc, returns True/False. Please see the SBML
Level 3 specification and http://wikipedia.org/wiki/Hill_system for more information.
- *cf* a string that contains a formula to check
- *quiet* [default=False] do not print error messages
"""
if not HAVE_PYPARSING:
print('\nChemical Formula parser requires PyParsing!')
return False
try:
R2 = [(r[0], int(r[1])) for r in pp_chemicalFormula.parseString(cf)]
except Exception:
if not quiet:
print('WARNING: \"{}\" is not a valid chemical formula.'.format(cf))
return False
#print(R2)
cf2 = ''
for l_ in R2:
if l_[0] not in ptElements:
#print(R2)
if not quiet:
print('WARNING: \"{}\" is not a valid chemical formula.'.format(cf))
return False
else:
cf2 += l_[0]
if l_[1] != 1:
cf2 += str(l_[1])
if cf != cf2:
if not quiet:
print('INFO: \"{}\" check formula.'.format(cf))
return True
else:
return True
[docs]def extractGeneIdsFromString(g, return_clean_gpr=False):
"""
Extract and return a list of gene names from a gene association string formulation
- *g* a COBRA style gene association string
- *return_clean_gpr* [default=False] in addition to the list returns the "cleaned" GPR string
"""
#print('\n{}'.format(g))
g2 = g
if 'AND' in g2 or 'and' in g2:
g2 = g2.replace(')AND ', ') AND ').replace(')and ', ') and ')
g2 = g2.replace(' AND(', ' AND (').replace(' and(', ' and (')
g2 = g2.replace(')AND(', ') AND (').replace(')and(', ') and (')
if 'OR' in g2 or 'or' in g2:
g2 = g2.replace(')OR ', ') OR ').replace(')or ', ') or ')
g2 = g2.replace(' OR(', ' OR (').replace(' or(', ' or (')
g2 = g2.replace(')OR(', ') OR (').replace(')or(', ') or (')
g2 = g2.replace(' AND ', ' and ').replace(' OR ', ' or ')
g3 = g2.replace('(', '').replace(')', '')
tmp = []
if ' and ' in g3 or ' or ' in g3:
if ' and ' and not ' or ' in g3:
tmp = g3.split(' and ')
elif ' or ' and not ' and ' in g3:
tmp = g3.split(' or ')
else:
tmp1 = g3.split(' or ')
tmp = []
for s_ in tmp1:
if ' and ' in s_:
tmp3 = s_.split(' and ')
tmp += tmp3
else:
tmp += [s_]
else:
tmp = [g3]
names = []
for n_ in tmp:
n = n_.strip()
if n not in names:
names.append(n)
if names == ['']:
names = []
if not return_clean_gpr:
return names
else:
return names, g2
[docs]class ComboGen(object):
"""
Generate sets of unique combinations
"""
combo = None
combo_int = None
def __init__(self):
self.combo = []
def addCombination(self, data):
self.combo.append(','.join(data))
def uniqueCombinations(self, data, number, temp=None):
if temp == None:
temp = []
if not number:
self.addCombination(temp)
for i in range(len(data)):
temp.append(data[i])
self.uniqueCombinations(data[i+1:], number-1, temp)
temp.pop()
def numberifyComb2Int(self):
tmp = []
for c in self.combo:
tmp.append(tuple([int(c) for c in c.split(',')]))
self.combo_int = tmp
[docs]def processSpeciesChargeChemFormulaAnnot(s, getFromName=False, overwriteChemFormula=False, overwriteCharge=False):
"""
Disambiguate the chemical formula from either the Notes or the overloaded name
- *s* a species object
- *getFromName* [default=False] whether to try strip the chemical formula from the name (old COBRA style)
- *overwriteChemFormula* [default=False]
- *overwriteCharge* [default=False]
"""
tempF = ''
if overwriteChemFormula:
tempF = s.chemFormula
s.chemFormula = None
key = None
if s.chemFormula == None or s.chemFormula == '':
if 'chemFormula' in s.annotation:
key = 'chemFormula'
elif 'FORMULA' in s.annotation:
key = 'FORMULA'
if key != None:
s.chemFormula = s.annotation[key]
if not checkChemFormula(s.chemFormula, quiet=True):
if getFromName:
try:
n = s.getName()
n, cf = n.rsplit('_', 1)
if checkChemFormula(cf, quiet=True):
s.chemFormula = cf
s.setName(n)
else:
s.chemFormula = ''
except Exception:
s.chemFormula = ''
else:
s.chemFormula = ''
if overwriteChemFormula and s.chemFormula == '':
s.chemFormula = tempF
else:
if key != None:
s.annotation.pop(key)
key2 = None
tempC = None
if overwriteCharge:
tempC = s.charge
s.charge = None
if s.charge == None or s.charge == '':
if 'charge' in s.annotation:
key2 = 'charge'
elif 'CHARGE' in s.annotation:
key2 = 'CHARGE'
if key2 != None:
chrg = s.annotation[key2]
try:
s.charge = int(chrg)
except ValueError:
print('Invalid charge: {} defined for species {}'.format(chrg, s.getId()))
s.charge = None
key2 = None
if overwriteCharge and s.charge == None:
s.charge = tempC
if key2 != None:
s.annotation.pop(key2)
def binHash(keys, d):
out = []
for k in keys:
if k in d:
out.append(d[k])
else:
out.append(True)
return tuple(out)
[docs]def fixId(s, replace=None):
"""
Checks a string (Sid) to see if it is a valid C style variable. first letter must be an underscore or letter,
the rest should be alphanumeric or underscore.
- *s* the string to test
- *replace* [None] default is to leave out offensive character, otherwise replace with this one
"""
if s[0].isdigit():
s2 = '_'
#print('Illegal start character \"{}\": prefixing an underscore'.format(s[0]))
else:
s2 = ''
for c in s:
if c.isalnum() or c == '_':
s2 += c
elif replace != None:
s2 += replace
#print('Replacing illegal character \"{}\": {} --> {}'.format(c, s, s2))
#else:
#print('Skipping illegal character \"{}\": {} --> {}'.format(c, s, s2))
return s2
[docs]def checkId(s):
"""
Checks the validity of the string to see if it conforms to a C variable. Returns true/false
- *s* a string
"""
cntr = 0
for c in s:
if cntr == 0 and c.isalpha() or c == '_':
pass
elif cntr > 0 and c.isalnum() or c == '_':
pass
else:
print('\"{}\" is an invalid character in \"{}\"'.format(c, s))
return False
cntr += 1
return True