[docs]def parseGeneAssociation(gs): """ Parse a COBRA style gene association into a nested list. - *gs* a string containing a gene association """ # gs = '(b0810) and ( b0811 ) or ( b1234.0) and(b0809)and ( b7643 )OR(b0812 )AND( b0876)' gl = pp_gene_unnester.parseString('('+gs+')').asList() gl = gl[0] def f(l): for i in range(len(l)): try: l[i] = l[i].lower().strip() except Exception: f(l[i]) f(gl) print(gl) return gl
[docs]def checkChemFormula(cf, quiet=False): """ Checks whether a string conforms to a Chemical Formula C3Br5 etc, returns True/False. Please see the SBML Level 3 specification and for more information. - *cf* a string that contains a formula to check - *quiet* [default=False] do not print error messages """ if not HAVE_PYPARSING: print('\nChemical Formula parser requires PyParsing!') return False try: R2 = [(r[0], int(r[1])) for r in pp_chemicalFormula.parseString(cf)] except Exception: if not quiet: print('WARNING: \"{}\" is not a valid chemical formula.'.format(cf)) return False #print(R2) cf2 = '' for l_ in R2: if l_[0] not in ptElements: #print(R2) if not quiet: print('WARNING: \"{}\" is not a valid chemical formula.'.format(cf)) return False else: cf2 += l_[0] if l_[1] != 1: cf2 += str(l_[1]) if cf != cf2: if not quiet: print('INFO: \"{}\" check formula.'.format(cf)) return True else: return True
[docs]def extractGeneIdsFromString(g, return_clean_gpr=False): """ Extract and return a list of gene names from a gene association string formulation - *g* a COBRA style gene association string - *return_clean_gpr* [default=False] in addition to the list returns the "cleaned" GPR string """ #print('\n{}'.format(g)) g2 = g if 'AND' in g2 or 'and' in g2: g2 = g2.replace(')AND ', ') AND ').replace(')and ', ') and ') g2 = g2.replace(' AND(', ' AND (').replace(' and(', ' and (') g2 = g2.replace(')AND(', ') AND (').replace(')and(', ') and (') if 'OR' in g2 or 'or' in g2: g2 = g2.replace(')OR ', ') OR ').replace(')or ', ') or ') g2 = g2.replace(' OR(', ' OR (').replace(' or(', ' or (') g2 = g2.replace(')OR(', ') OR (').replace(')or(', ') or (') g2 = g2.replace(' AND ', ' and ').replace(' OR ', ' or ') g3 = g2.replace('(', '').replace(')', '') tmp = [] if ' and ' in g3 or ' or ' in g3: if ' and ' and not ' or ' in g3: tmp = g3.split(' and ') elif ' or ' and not ' and ' in g3: tmp = g3.split(' or ') else: tmp1 = g3.split(' or ') tmp = [] for s_ in tmp1: if ' and ' in s_: tmp3 = s_.split(' and ') tmp += tmp3 else: tmp += [s_] else: tmp = [g3] names = [] for n_ in tmp: n = n_.strip() if n not in names: names.append(n) if names == ['']: names = [] if not return_clean_gpr: return names else: return names, g2
[docs]class ComboGen(object): """ Generate sets of unique combinations """ combo = None combo_int = None def __init__(self): self.combo = [] def addCombination(self, data): self.combo.append(','.join(data)) def uniqueCombinations(self, data, number, temp=None): if temp == None: temp = [] if not number: self.addCombination(temp) for i in range(len(data)): temp.append(data[i]) self.uniqueCombinations(data[i+1:], number-1, temp) temp.pop() def numberifyComb2Int(self): tmp = [] for c in self.combo: tmp.append(tuple([int(c) for c in c.split(',')])) self.combo_int = tmp
[docs]def processSpeciesChargeChemFormulaAnnot(s, getFromName=False, overwriteChemFormula=False, overwriteCharge=False): """ Disambiguate the chemical formula from either the Notes or the overloaded name - *s* a species object - *getFromName* [default=False] whether to try strip the chemical formula from the name (old COBRA style) - *overwriteChemFormula* [default=False] - *overwriteCharge* [default=False] """ tempF = '' if overwriteChemFormula: tempF = s.chemFormula s.chemFormula = None key = None if s.chemFormula == None or s.chemFormula == '': if 'chemFormula' in s.annotation: key = 'chemFormula' elif 'FORMULA' in s.annotation: key = 'FORMULA' if key != None: s.chemFormula = s.annotation[key] if not checkChemFormula(s.chemFormula, quiet=True): if getFromName: try: n = s.getName() n, cf = n.rsplit('_', 1) if checkChemFormula(cf, quiet=True): s.chemFormula = cf s.setName(n) else: s.chemFormula = '' except Exception: s.chemFormula = '' else: s.chemFormula = '' if overwriteChemFormula and s.chemFormula == '': s.chemFormula = tempF else: if key != None: s.annotation.pop(key) key2 = None tempC = None if overwriteCharge: tempC = s.charge s.charge = None if s.charge == None or s.charge == '': if 'charge' in s.annotation: key2 = 'charge' elif 'CHARGE' in s.annotation: key2 = 'CHARGE' if key2 != None: chrg = s.annotation[key2] try: s.charge = int(chrg) except ValueError: print('Invalid charge: {} defined for species {}'.format(chrg, s.getId())) s.charge = None key2 = None if overwriteCharge and s.charge == None: s.charge = tempC if key2 != None: s.annotation.pop(key2)
def binHash(keys, d): out = [] for k in keys: if k in d: out.append(d[k]) else: out.append(True) return tuple(out)
[docs]def fixId(s, replace=None): """ Checks a string (Sid) to see if it is a valid C style variable. first letter must be an underscore or letter, the rest should be alphanumeric or underscore. - *s* the string to test - *replace* [None] default is to leave out offensive character, otherwise replace with this one """ if s[0].isdigit(): s2 = '_' #print('Illegal start character \"{}\": prefixing an underscore'.format(s[0])) else: s2 = '' for c in s: if c.isalnum() or c == '_': s2 += c elif replace != None: s2 += replace #print('Replacing illegal character \"{}\": {} --> {}'.format(c, s, s2)) #else: #print('Skipping illegal character \"{}\": {} --> {}'.format(c, s, s2)) return s2
[docs]def checkId(s): """ Checks the validity of the string to see if it conforms to a C variable. Returns true/false - *s* a string """ cntr = 0 for c in s: if cntr == 0 and c.isalpha() or c == '_': pass elif cntr > 0 and c.isalnum() or c == '_': pass else: print('\"{}\" is an invalid character in \"{}\"'.format(c, s)) return False cntr += 1 return True