Source code for cbmpy.CBCommon

"""
CBMPy: CBCommon module
======================
PySCeS Constraint Based Modelling (http://cbmpy.sourceforge.net)
Copyright (C) 2009-2017 Brett G. Olivier, VU University Amsterdam, Amsterdam, The Netherlands

This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.

This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
GNU General Public License for more details.

You should have received a copy of the GNU General Public License
along with this program.  If not, see <http://www.gnu.org/licenses/>

Author: Brett G. Olivier
Contact email: bgoli@users.sourceforge.net
Last edit: $Author: bgoli $ ($Id: CBCommon.py 575 2017-04-13 12:18:44Z bgoli $)

"""
## gets rid of "invalid variable name" info
# pylint: disable=C0103
## gets rid of "line to long" info
# pylint: disable=C0301
## use with caution: gets rid of module xxx has no member errors (run once enabled)
# pylint: disable=E1101


# preparing for Python 3 port
from __future__ import division, print_function
from __future__ import absolute_import
#from __future__ import unicode_literals

# this is a hack that needs to be streamlined a bit
#try:
    #import cStringIO as csio
#except ImportError:
    #import io as csio

chemElementDefs = None
HAVE_PYPARSING = False
try:
    from . import pyparsing
    HAVE_PYPARSING = True
except ImportError:
    try:
        print('Attempting global PyParsing import')
        import pyparsing
        HAVE_PYPARSING = True
    except ImportError:
        print('INFO please install pyparsing to enable chemical balance checking!')

if HAVE_PYPARSING:
    pp_caps = "ABCDEFGHIJKLMNOPQRSTUVWXYZ"
    pp_lowers = pp_caps.lower()
    pp_digits = "0123456789"
    pp_element = pyparsing.Word(pp_caps, pp_lowers)
    pp_integer = pyparsing.Word(pp_digits)
    pp_elementRef = pyparsing.Group(pp_element + pyparsing.Optional(pp_integer, default="1"))
    pp_chemicalFormula = pyparsing.OneOrMore(pp_elementRef)


    pp_gene_ass = pyparsing.Word(pyparsing.alphanums + '. ') | 'or' | 'OR' | 'and' | 'AND'
    pp_gene_unnester = pyparsing.nestedExpr(content=pp_gene_ass)

ptElements = ('H', 'He', 'Li', 'Be', 'B', 'C', 'N', 'O', 'F', 'Ne', 'Na', 'Mg', 'Al', 'Si', 'P', 'S',\
              'Cl', 'Ar', 'K', 'Ca', 'Sc', 'Ti', 'V', 'Cr', 'Mn', 'Fe', 'Co', 'Ni', 'Cu', 'Zn', 'Ga', 'G',\
              'As', 'Se', 'Br', 'Kr', 'Rb', 'Sr', 'Y', 'Zr', 'Nb', 'Mo', 'Tc', 'Ru', 'Rh', 'Pd', 'Ag',\
              'Cd', 'In', 'Sn', 'Sb', 'Te', 'I', 'Xe', 'Cs', 'Ba', 'La', 'Hf', 'Ta', 'W', 'Re', 'Os',\
              'Ir', 'Pt', 'Au', 'Hg', 'Tl', 'Pb', 'Bi', 'Po', 'At', 'Rn', 'Fr', 'Ra', 'Ac', 'Rf', 'Db',\
              'Sg', 'Bh', 'Hs', 'Mt', 'Ds', 'Rg', '112', '113', '114', '115', '116', '117', '118', 'La',\
              'Ce', 'Pr', 'Nd', 'Pm', 'Sm', 'Eu', 'Gd', 'Tb', 'Dy', 'Ho', 'Er', 'Tm', 'Yb', 'Lu', 'Ac',\
              'Th', 'Pa', 'U', 'Np', 'Pu', 'Am', 'Cm', 'Bk', 'Cf', 'Es', 'Fm', 'Md', 'No', 'Lr', 'X', 'R', 'Z')

## too crude, need to find a better logging strategy
#CBMPYLOG = csio.StringIO()
#def printl(st, scr=True, log=False):
    #"""
    #Updated print() function that also writes to a csio buffer

    #- *st* the string
    #- *prn* print to screen
    #- *log* write to log buffer

    #"""
    #global CBMPYLOG
    #if scr:
        #print(st)
    #if log:
        #try:
            #CBMPYLOG.write(st+'\n')
        #except Exception as ex:
            #print(ex)

#def flushLogToFile(fname):
    #global CBMPYLOG
    #F = file(fname, 'w')
    #CBMPYLOG.seek(0)
    #F.write(CBMPYLOG.read())
    #F.flush()
    #F.close()
    #CBMPYLOG.close()
    #CBMPYLOG = csio.StringIO()


[docs]def parseGeneAssociation(gs):
    """
    Parse a COBRA style gene association into a nested list.

     - *gs* a string containing a gene association

    """
    # gs = '(b0810) and ( b0811 ) or ( b1234.0) and(b0809)and ( b7643 )OR(b0812 )AND( b0876)'
    gl = pp_gene_unnester.parseString('('+gs+')').asList()
    gl = gl[0]

    def f(l):
        for i in range(len(l)):
            try:
                l[i] = l[i].lower().strip()
            except Exception:
                f(l[i])

    f(gl)
    print(gl)
    return gl

[docs]def checkChemFormula(cf, quiet=False):
    """
    Checks whether a string conforms to a Chemical Formula C3Br5 etc, returns True/False. Please see the SBML
    Level 3 specification and http://wikipedia.org/wiki/Hill_system for more information.

     - *cf* a string that contains a formula to check
     - *quiet* [default=False] do not print error messages

    """
    if not HAVE_PYPARSING:
        print('\nChemical Formula parser requires PyParsing!')
        return False
    try:
        R2 = [(r[0], int(r[1])) for r in pp_chemicalFormula.parseString(cf)]
    except Exception:
        if not quiet:
            print('WARNING: \"{}\" is not a valid chemical formula.'.format(cf))
        return False

    #print(R2)
    cf2 = ''
    for l_ in R2:
        if l_[0] not in ptElements:
            #print(R2)
            if not quiet:
                print('WARNING: \"{}\" is not a valid chemical formula.'.format(cf))
            return False
        else:
            cf2 += l_[0]
            if l_[1] != 1:
                cf2 += str(l_[1])

    if cf != cf2:
        if not quiet:
            print('INFO: \"{}\" check formula.'.format(cf))
        return True
    else:
        return True

[docs]def extractGeneIdsFromString(g, return_clean_gpr=False):
    """
    Extract and return a list of gene names from a gene association string formulation

    - *g* a COBRA style gene association string
    - *return_clean_gpr* [default=False] in addition to the list returns the "cleaned" GPR string

    """
    #print('\n{}'.format(g))
    g2 = g
    if 'AND' in g2 or 'and' in g2:
        g2 = g2.replace(')AND ', ') AND ').replace(')and ', ') and ')
        g2 = g2.replace(' AND(', ' AND (').replace(' and(', ' and (')
        g2 = g2.replace(')AND(', ') AND (').replace(')and(', ') and (')
    if 'OR' in g2 or 'or' in g2:
        g2 = g2.replace(')OR ', ') OR ').replace(')or ', ') or ')
        g2 = g2.replace(' OR(', ' OR (').replace(' or(', ' or (')
        g2 = g2.replace(')OR(', ') OR (').replace(')or(', ') or (')

    g2 = g2.replace(' AND ', ' and ').replace(' OR ', ' or ')
    g3 = g2.replace('(', '').replace(')', '')
    tmp = []

    if ' and ' in g3 or ' or ' in g3:
        if ' and ' and not ' or ' in g3:
            tmp = g3.split(' and ')
        elif ' or ' and not ' and ' in g3:
            tmp = g3.split(' or ')
        else:
            tmp1 = g3.split(' or ')
            tmp = []
            for s_ in tmp1:
                if ' and ' in s_:
                    tmp3 = s_.split(' and ')
                    tmp += tmp3
                else:
                    tmp += [s_]
    else:
        tmp = [g3]
    names = []
    for n_ in tmp:
        n = n_.strip()
        if n not in names:
            names.append(n)
    if names == ['']:
        names = []

    if not return_clean_gpr:
        return names
    else:
        return names, g2



[docs]class ComboGen(object):
    """
    Generate sets of unique combinations

    """
    combo = None
    combo_int = None

    def __init__(self):
        self.combo = []

    def addCombination(self, data):
        self.combo.append(','.join(data))

    def uniqueCombinations(self, data, number, temp=None):
        if temp == None:
            temp = []
        if not number:
            self.addCombination(temp)
        for i in range(len(data)):
            temp.append(data[i])
            self.uniqueCombinations(data[i+1:], number-1, temp)
            temp.pop()

    def numberifyComb2Int(self):
        tmp = []
        for c in self.combo:
            tmp.append(tuple([int(c) for c in c.split(',')]))
        self.combo_int = tmp

[docs]def processSpeciesChargeChemFormulaAnnot(s, getFromName=False, overwriteChemFormula=False, overwriteCharge=False):
    """
    Disambiguate the chemical formula from either the Notes or the overloaded name

     - *s* a species object
     - *getFromName* [default=False] whether to try strip the chemical formula from the name (old COBRA style)
     - *overwriteChemFormula* [default=False]
     - *overwriteCharge* [default=False]

    """
    tempF = ''
    if overwriteChemFormula:
        tempF = s.chemFormula
        s.chemFormula = None
    key = None
    if s.chemFormula == None or s.chemFormula == '':
        if 'chemFormula' in s.annotation:
            key = 'chemFormula'
        elif 'FORMULA' in s.annotation:
            key = 'FORMULA'
        if key != None:
            s.chemFormula = s.annotation[key]

    if not checkChemFormula(s.chemFormula, quiet=True):
        if getFromName:
            try:
                n = s.getName()
                n, cf = n.rsplit('_', 1)
                if checkChemFormula(cf, quiet=True):
                    s.chemFormula = cf
                    s.setName(n)
                else:
                    s.chemFormula = ''
            except Exception:
                s.chemFormula = ''
        else:
            s.chemFormula = ''
        if overwriteChemFormula and s.chemFormula == '':
            s.chemFormula = tempF

    else:
        if key != None:
            s.annotation.pop(key)

    key2 = None
    tempC = None
    if overwriteCharge:
        tempC = s.charge
        s.charge = None
    if s.charge == None or s.charge == '':
        if 'charge' in s.annotation:
            key2 = 'charge'
        elif 'CHARGE' in s.annotation:
            key2 = 'CHARGE'
        if key2 != None:
            chrg = s.annotation[key2]
            try:
                s.charge = int(chrg)
            except ValueError:
                print('Invalid charge: {} defined for species {}'.format(chrg, s.getId()))
                s.charge = None
                key2 = None
    if overwriteCharge and s.charge == None:
        s.charge = tempC
    if key2 != None:
        s.annotation.pop(key2)


def binHash(keys, d):
    out = []
    for k in keys:
        if k in d:
            out.append(d[k])
        else:
            out.append(True)
    return tuple(out)

[docs]def fixId(s, replace=None):
    """
    Checks a string (Sid) to see if it is a valid C style variable. first letter must be an underscore or letter,
    the rest should be alphanumeric or underscore.

    - *s* the string to test
    - *replace* [None] default is to leave out offensive character, otherwise replace with this one

    """

    if s[0].isdigit():
        s2 = '_'
        #print('Illegal start character \"{}\": prefixing an underscore'.format(s[0]))
    else:
        s2 = ''
    for c in s:
        if c.isalnum() or c == '_':
            s2 += c
        elif replace != None:
            s2 += replace
            #print('Replacing illegal character \"{}\": {} --> {}'.format(c, s, s2))
        #else:
            #print('Skipping illegal character \"{}\": {} --> {}'.format(c, s, s2))
    return s2

[docs]def checkId(s):
    """
    Checks the validity of the string to see if it conforms to a C variable. Returns true/false

    - *s* a string

    """
    cntr = 0
    for c in s:
        if cntr == 0 and c.isalpha() or c == '_':
            pass
        elif cntr > 0 and c.isalnum() or c == '_':
            pass
        else:
            print('\"{}\" is an invalid character in \"{}\"'.format(c, s))
            return False
        cntr += 1
    return True