Source code for pychemia.core.composition

from __future__ import unicode_literals
from numpy import array, argsort
from fractions import gcd as _gcd
from math import pi
from pychemia.utils.periodic import atomic_symbols, electronegativity, atomic_number, covalent_radius
from pychemia.utils.computing import deep_unicode
from builtins import str
from functools import reduce
from collections import Mapping


[docs]class Composition(Mapping): u""" The class Composition is basically a dictionary with species as keys and number of atoms of that specie as values. The methods provided for Composition objects should not contain geometrical information or graph connectivity. The main purpose of this class is to be able to parse formulas into compositions and return string formulas sorted in various ways. """ def __init__(self, value=None): """ Creates a new composition, internally it is a dictionary where each specie is the key and the value is an integer with the number of atoms of that specie :param value: (str, dict) The value could be a string with a chemical formula or the actual dictionary of species and values :rtype: Composition Example: >>> import pychemia >>> comp = pychemia.Composition({'Ba': 2, 'Cu': 3, 'O': 7, 'Y': 1}) >>> comp.formula u'Ba2Cu3O7Y' >>> comp = pychemia.Composition('Ba2Cu3O7Y') >>> comp2 = pychemia.Composition(comp) >>> len(comp2) 4 >>> comp.nspecies 4 >>> comp = pychemia.Composition() >>> comp.composition {} >>> len(comp) 0 """ self._composition = {} if value is not None: value = deep_unicode(value) if isinstance(value, str): self._set_composition(self.formula_parser(value)) elif isinstance(value, dict): self._set_composition(value) elif isinstance(value, Composition): self._set_composition(value.composition) elif hasattr(value, "__len__"): dvalue = {} for i in value: if i in dvalue: dvalue[i] += 1 else: dvalue[i] = 1 self._set_composition(dvalue) else: self._composition = {} def __len__(self): return len(self._composition) def __getitem__(self, item): if item in self._composition: return self._composition[item] else: return 0 def __repr__(self): return 'Composition(' + str(self.composition) + ')' def __str__(self): ret = '' for i in self.species: ret += " %3s: %4d " % (i, self.composition[i]) return ret def __iter__(self): return iter(self.composition) def __contains__(self, item): return item in self._composition def _set_composition(self, value): """ Checks the values of a dictionary before seting the actual composition :param value: (dict) :rtype: None """ for i in value: assert (i in atomic_symbols) assert (isinstance(value[i], int)) self._composition = value.copy() @property def composition(self): """ :return: The composition dictionary :rtype: dict """ return self._composition @property def formula(self): """ :return: The chemical formula with atoms sorted alphabetically :rtype: str """ return self.sorted_formula(sortby='alpha', reduced=True) @property def gcd(self): """ The number of formulas that can be extracted from a composition The greatest common denominator for the composition. :rtype: (int) Example: >>> import pychemia >>> comp = pychemia.Composition('NaCl') >>> comp.gcd 1 >>> comp = pychemia.Composition('Na2Cl2') >>> comp.gcd 2 >>> comp = pychemia.Composition() >>> comp.gcd is None True """ if self.natom > 0: return reduce(_gcd, self.values) else: return None @property def symbols(self): ret = [] for specie in self: number_atoms_specie = self.composition[specie] for i in range(number_atoms_specie): ret.append(specie) return sorted(deep_unicode(ret)) @property def species(self): """ :return: The list of species :rtype: list """ return [deep_unicode(x) for x in self._composition] @property def nspecies(self): return len(self.species) @property def values(self): """ :return: The number of atoms of each specie :rtype: list """ return [self._composition[x] for x in self._composition] @property def natom(self): """ :return: The number of atoms in the composition :rtype: int """ return sum(self.values) @staticmethod
[docs] def formula_parser(value): """ :return: Convert an string representing a chemical formula into a dictionary with the species as keys and values as the number of atoms of that specie :param value: (str) String representing a chemical formula :rtype: dict Examples: >>> import pychemia >>> import pprint >>> pychemia.Composition.formula_parser('Au20') {u'Au': 20} >>> ret = pychemia.Composition.formula_parser('UutUupUusUuo') >>> pprint.pprint(ret) {u'Uuo': 1, u'Uup': 1, u'Uus': 1, u'Uut': 1} """ ret = {} jump = False for i in range(len(value)): if jump > 0: # This char belongs to the current atom, move on jump -= 1 elif value[i].isupper(): # Atom Name starts with Uppercase if i + 1 < len(value) and value[i + 1].islower(): # Atom name has more than 1 char if i + 2 < len(value) and value[i + 2].islower(): # Atom name has more than 2 chars specie = value[i:i + 3] jump = 2 else: specie = value[i:i + 2] jump = 1 else: specie = value[i] jump = 0 j = 1 number = '' while True: if i + jump + j < len(value) and value[i + jump + j].isdigit(): number += value[i + jump + j] j += 1 else: break if number == '': ret[specie] = 1 else: ret[specie] = int(number) return ret
@staticmethod
[docs] def formula_to_list(formula, nunits=1): """ Reads a formula and returns a list of atomic symbols consistent with the formula and the number of formulas given by nunits :param formula: (str) Chemical formula as string :param nunits: (int) Number of formulas to apply :rtype : (list) Examples: >>> import pychemia >>> pychemia.Composition.formula_to_list('NaCl') [u'Na', u'Cl'] >>> flist = pychemia.Composition.formula_to_list(u'Uut2Uup3Uus4Uuo5') >>> len(flist) 14 >>> flist = pychemia.Composition.formula_to_list('Uut2Uup3Uus4Uuo5', nunits=2) >>> len(flist) 28 """ import re # decompose composition a = re.findall(r"[A-Z][a-z0-9]*", formula) composition = [] for i in a: m = re.match(r"([A-Za-z]+)([0-9]*)", i) if m.group(2) == "": n = int(1) else: n = int(m.group(2)) for j in range(n * nunits): composition.append(m.group(1)) return composition
[docs] def sorted_formula(self, sortby='alpha', reduced=True): """ :return: The chemical formula. It could be sorted alphabetically using sortby='alpha', by electronegativity using sortby='electroneg' or using Hill System with sortby='Hill' :param sortby: (str) 'alpha' : Alphabetically 'electroneg' : Electronegativity 'hill' : Hill System :param reduced: (bool) If the formula should be normalized :rtype: str >>> comp=Composition('YBa2Cu3O7') >>> comp.sorted_formula() u'Ba2Cu3O7Y' >>> comp.sorted_formula(sortby='hill') u'Ba2Cu3O7Y' >>> comp.sorted_formula(sortby='electroneg') u'Ba2YCu3O7' >>> comp = Composition('H10C5') >>> comp.sorted_formula(sortby='hill', reduced=True) u'CH2' >>> comp = Composition('IBr') >>> comp.sorted_formula(sortby='hill', reduced=False) u'BrI' >>> comp = Composition('Cl4C') >>> comp.sorted_formula(sortby='hill', reduced=False) u'CCl4' >>> comp = Composition('IH3C') >>> comp.sorted_formula(sortby='hill', reduced=False) u'CH3I' >>> comp = Composition('BrH5C2') >>> comp.sorted_formula(sortby='hill', reduced=False) u'C2H5Br' >>> comp = Composition('S04H2') >>> comp.sorted_formula(sortby='hill', reduced=False) u'H2S4' >>> comp = Composition('SO4H2') >>> comp.sorted_formula(sortby='hill', reduced=False) u'H2O4S' """ if reduced and self.gcd > 1: comp = Composition(self.composition) for i in comp.composition: comp._composition[i] //= self.gcd else: comp = self if sortby == 'electroneg': electroneg = list(electronegativity(comp.species)) for i in range(len(electroneg)): if electroneg[i] is None: electroneg[i] = -1 sortedspecies = array(comp.species)[argsort(electroneg)] elif sortby == "hill": # FIXME: Hill system exceptions not implemented sortedspecies = [] presortedspecies = sorted(comp.species) if 'C' in presortedspecies: sortedspecies.append('C') presortedspecies.pop(presortedspecies.index('C')) if 'H' in presortedspecies: sortedspecies.append('H') presortedspecies.pop(presortedspecies.index('H')) sortedspecies += presortedspecies else: sortedspecies = sorted(comp.species) ret = u'' for specie in sortedspecies: ret += '%s' % specie if comp.composition[specie] > 1: ret += "%d" % comp.composition[specie] return deep_unicode(ret)
[docs] def species_encoded(self, base): ret = 0 i = 0 for atom_number in sorted(atomic_number(self.species)): ret += atom_number * (base ** i) i += 1 return ret
[docs] def species_hex(self): """ Encodes the species into a hexadecimal representation where each specie is stored on a 2-Byte slot ordered by atomic number. This is a 'confortable' encoding where each 2 characters from the hexadecimal will encode a single species and the species are ordered by atomic number making the codification unique. :return: str Example: >>> comp = Composition('YBa2Cu3O7') >>> comp.species_hex() '0x38271d08' """ enc = self.species_encoded(256) return hex(enc)
@staticmethod
[docs] def get_species_from_hex(arg): """ Return a set of species from the encoded species hexadecimal representation. :param arg: str String with hexadecimal representation of list of species. :return: Example: >>> Composition.get_species_from_hex('0x38271d08') [8, 29, 39, 56] """ num = int(arg, 16) ret = [] while num > 0: ret.append(num % 256) num = (num-ret[-1])//256 return ret
[docs] def covalent_volume(self, packing='cubes'): """ Returns the volume occupied by a given formula assuming a 'cubes' packing or 'spheres' packing :param packing: (str) The kind of packing could be 'cubes' or 'spheres' :rtype : (float) >>> import pychemia >>> comp=pychemia.Composition('C5H10') >>> comp.covalent_volume() 19.942320000000002 >>> comp.covalent_volume(packing='spheres') 10.441774334589468 """ if packing == 'cubes': factor = 8 elif packing == 'spheres': factor = 4 * pi / 3.0 else: raise ValueError('Non-valid packing value ', packing) # find volume of unit cell by adding cubes volume = 0.0 for specie in self: number_atoms_specie = self.composition[specie] # Pack each atom in a cube (2*r)^3 volume += factor * number_atoms_specie * covalent_radius(specie) ** 3 return volume