Source code for multiset

# -*- coding: utf-8 -*-
"""An implementation of a multiset."""

from collections.abc import MutableSet, Set
from typing import Generic, Iterable, Mapping, Optional, TypeVar, Union

T = TypeVar('T')
OtherType = Union[Iterable[T], Mapping[T, int]]

[docs]class Multiset(dict, MutableSet, Mapping[T, int], Generic[T]): """A multiset implementation. A multiset is similar to the builtin :class:`set`, but elements can occur multiple times in the multiset. It is also similar to a :class:`list` without ordering of the values and hence no index-based operations. The multiset is implemented as a specialized :class:`dict` where the key is the element and the value its multiplicity. It supports all operations, that the :class:`set` supports In contrast to the builtin :class:`collections.Counter`, no negative counts are allowed, elements with zero counts are removed from the :class:`dict`, and set operations are supported. :see: https://en.wikipedia.org/wiki/Multiset """
[docs] def __init__(self, iterable: Optional[OtherType]=None) -> None: r"""Create a new, empty Multiset object. And if given, initialize with elements from input iterable. Or, initialize from a mapping of elements to their multiplicity. Example: >>> ms = Multiset() # a new, empty multiset >>> ms = Multiset('abc') # a new multiset from an iterable >>> ms = Multiset({'a': 4, 'b': 2}) # a new multiset from a mapping Args: iterable: An optional :class:`~typing.Iterable`\[~T] or :class:`~typing.Mapping`\[~T, :class:`int`] to initialize the multiset from. """ self._total = 0 super().__init__() if iterable is not None: self.update(iterable)
def __missing__(self, element: T): """The multiplicity of elements not in the multiset is zero.""" return 0 def __setitem__(self, element: T, multiplicity: int): """Set the element's multiplicity. This will remove the element if the multiplicity is less than or equal to zero. '""" old = self[element] new = multiplicity > 0 and multiplicity or 0 if multiplicity <= 0: if element in self: super().__delitem__(element) else: super().__setitem__(element, multiplicity) self._total += new - old def __str__(self): return '{%s}' % ', '.join(map(str, self)) def __repr__(self): items = ', '.join('%r: %r' % item for item in self.items()) return '%s({%s})' % (type(self).__name__, items) def __len__(self): """Returns the total number of elements in the multiset. Note that this is equivalent to the sum of the multiplicities: >>> ms = Multiset('aab') >>> len(ms) 3 >>> sum(ms.values()) 3 If you need the total number of elements, use either the :meth:`keys`() method >>> len(ms.keys()) 2 or convert to a :class:`set`: >>> len(set(ms)) 2 """ return self._total def __iter__(self): for element, multiplicity in self.items(): for _ in range(multiplicity): yield element
[docs] def update(self, *others: OtherType) -> None: r"""Like :meth:`dict.update` but add multiplicities instead of replacing them. >>> ms = Multiset('aab') >>> ms.update('abc') >>> sorted(ms) ['a', 'a', 'a', 'b', 'b', 'c'] Note that the operator ``+=`` is equivalent to :meth:`update`, except that the operator will only accept sets to avoid accidental errors. >>> ms += Multiset('bc') >>> sorted(ms) ['a', 'a', 'a', 'b', 'b', 'b', 'c', 'c'] For a variant of the operation which does not modify the multiset, but returns a new multiset instead see :meth:`combine`. Args: others: The other sets to add to this multiset. Can also be any :class:`~typing.Iterable`\[~T] or :class:`~typing.Mapping`\[~T, :class:`int`] which are then converted to :class:`Multiset`\[~T]. """ for other in others: if isinstance(other, Mapping): for elem, multiplicity in other.items(): self[elem] += multiplicity else: for elem in other: self[elem] += 1
[docs] def union_update(self, *others: OtherType) -> None: r"""Update the multiset, adding elements from all others using the maximum multiplicity. >>> ms = Multiset('aab') >>> ms.union_update('bc') >>> sorted(ms) ['a', 'a', 'b', 'c'] You can also use the ``|=`` operator for the same effect. However, the operator version will only accept a set as other operator, not any iterable, to avoid errors. >>> ms = Multiset('aab') >>> ms |= Multiset('bccd') >>> sorted(ms) ['a', 'a', 'b', 'c', 'c', 'd'] For a variant of the operation which does not modify the multiset, but returns a new multiset instead see :meth:`union`. Args: others: The other sets to union this multiset with. Can also be any :class:`~typing.Iterable`\[~T] or :class:`~typing.Mapping`\[~T, :class:`int`] which are then converted to :class:`Multiset`\[~T]. """ for other in map(self._as_multiset, others): for elem, multiplicity in other.items(): if multiplicity > self[elem]: self[elem] = multiplicity
def __ior__(self, other): if not isinstance(other, Set): return NotImplemented self.union_update(other) return self
[docs] def intersection_update(self, *others: OtherType) -> None: r"""Update the multiset, keeping only elements found in it and all others. >>> ms = Multiset('aab') >>> ms.intersection_update('bc') >>> sorted(ms) ['b'] You can also use the ``&=`` operator for the same effect. However, the operator version will only accept a set as other operator, not any iterable, to avoid errors. >>> ms = Multiset('aabc') >>> ms &= Multiset('abbd') >>> sorted(ms) ['a', 'b'] For a variant of the operation which does not modify the multiset, but returns a new multiset instead see :meth:`intersection`. Args: others: The other sets to intersect this multiset with. Can also be any :class:`~typing.Iterable`\[~T] or :class:`~typing.Mapping`\[~T, :class:`int`] which are then converted to :class:`Multiset`\[~T]. """ for other in map(self._as_multiset, others): for elem, current_count in list(self.items()): multiplicity = other[elem] if multiplicity < current_count: self[elem] = multiplicity
def __iand__(self, other): if not isinstance(other, Set): return NotImplemented self.intersection_update(other) return self
[docs] def difference_update(self, *others: OtherType) -> None: r"""Remove all elements contained the others from this multiset. >>> ms = Multiset('aab') >>> ms.difference_update('abc') >>> sorted(ms) ['a'] You can also use the ``-=`` operator for the same effect. However, the operator version will only accept a set as other operator, not any iterable, to avoid errors. >>> ms = Multiset('aabbbc') >>> ms -= Multiset('abd') >>> sorted(ms) ['a', 'b', 'b', 'c'] For a variant of the operation which does not modify the multiset, but returns a new multiset instead see :meth:`difference`. Args: others: The other sets to remove from this multiset. Can also be any :class:`~typing.Iterable`\[~T] or :class:`~typing.Mapping`\[~T, :class:`int`] which are then converted to :class:`Multiset`\[~T]. """ for other in map(self._as_multiset, others): for elem, multiplicity in other.items(): self.discard(elem, multiplicity)
def __isub__(self, other): if not isinstance(other, Set): return NotImplemented self.difference_update(other) return self
[docs] def symmetric_difference_update(self, other: OtherType) -> None: r"""Update the multiset to contain only elements in either this multiset or the other but not both. >>> ms = Multiset('aab') >>> ms.symmetric_difference_update('abc') >>> sorted(ms) ['a', 'c'] You can also use the ``^=`` operator for the same effect. However, the operator version will only accept a set as other operator, not any iterable, to avoid errors. >>> ms = Multiset('aabbbc') >>> ms ^= Multiset('abd') >>> sorted(ms) ['a', 'b', 'b', 'c', 'd'] For a variant of the operation which does not modify the multiset, but returns a new multiset instead see :meth:`symmetric_difference`. Args: other: The other set to take the symmetric difference with. Can also be any :class:`~typing.Iterable`\[~T] or :class:`~typing.Mapping`\[~T, :class:`int`] which are then converted to :class:`Multiset`\[~T]. """ other = self._as_multiset(other) keys = set(self.keys()) | set(other.keys()) for elem in keys: multiplicity = self[elem] other_count = other[elem] self[elem] = multiplicity > other_count and multiplicity - other_count or other_count - multiplicity
def __ixor__(self, other): if not isinstance(other, Set): return NotImplemented self.symmetric_difference_update(other) return self
[docs] def times_update(self, factor: int) -> None: """Update each this multiset by multiplying each element's multiplicity with the given scalar factor. >>> ms = Multiset('aab') >>> ms.times_update(2) >>> sorted(ms) ['a', 'a', 'a', 'a', 'b', 'b'] You can also use the ``*=`` operator for the same effect: >>> ms = Multiset('ac') >>> ms *= 3 >>> sorted(ms) ['a', 'a', 'a', 'c', 'c', 'c'] For a variant of the operation which does not modify the multiset, but returns a new multiset instead see :meth:`times`. Args: factor: The factor to multiply each multiplicity with. """ if factor <= 0: self.clear() else: for elem in self.keys(): self[elem] *= factor
def __imul__(self, factor): self.times_update(factor) return self
[docs] def add(self, element: T, multiplicity: int=1) -> None: # pylint: disable=arguments-differ """Adds an element to the multiset. >>> ms = Multiset() >>> ms.add('a') >>> sorted(ms) ['a'] An optional multiplicity can be specified to define how many of the element are added: >>> ms.add('b', 2) >>> sorted(ms) ['a', 'b', 'b'] This extends the :meth:`MutableSet.add` signature to allow specifying the multiplicity. Args: element: The element to add to the multiset. multiplicity: The multiplicity i.e. count of elements to add. """ self[element] = self[element] + multiplicity
[docs] def remove(self, element: T, multiplicity: Optional[int]=None) -> int: # pylint: disable=arguments-differ """Removes an element from the multiset. If no multiplicity is specified, the element is completely removed from the multiset: >>> ms = Multiset('aabbbc') >>> ms.remove('a') 2 >>> sorted(ms) ['b', 'b', 'b', 'c'] If the multiplicity is given, it is subtracted from the element's multiplicity in the multiset: >>> ms.remove('b', 2) 3 >>> sorted(ms) ['b', 'c'] This extends the :meth:`MutableSet.remove` signature to allow specifying the multiplicity. Args: element: The element to remove from the multiset. multiplicity: An optional multiplicity i.e. count of elements to remove. Returns: The multiplicity of the element in the multiset before the removal. Raises: KeyError: if the element is not contained in the set. Use :meth:`discard` if you do not want an exception to be raised. """ if element not in self: raise KeyError old_count = self[element] if multiplicity is None: del self[element] else: self[element] = self[element] - multiplicity return old_count
[docs] def discard(self, element: T, multiplicity: Optional[int]=None) -> int: # pylint: disable=arguments-differ """Removes the `element` from the multiset. If multiplicity is ``None``, all occurrences of the element are removed: >>> ms = Multiset('aab') >>> ms.discard('a') 2 >>> sorted(ms) ['b'] Otherwise, the multiplicity is subtracted from the one in the multiset: >>> ms = Multiset('aab') >>> ms.discard('a', 1) 2 >>> sorted(ms) ['a', 'b'] In contrast to :meth:`remove`, this does not raise an error if the element is not in the multiset: >>> ms = Multiset('a') >>> ms.discard('b') 0 >>> sorted(ms) ['a'] Args: element: The element to remove from the multiset. multiplicity: An optional multiplicity i.e. count of elements to remove. Returns: The multiplicity of the element in the multiset before the removal. """ if element in self: old_count = self[element] if multiplicity is None: del self[element] else: self[element] -= multiplicity return old_count else: return 0
def _as_multiset(self, other: OtherType) -> 'Multiset[T]': if not isinstance(other, Multiset): if not isinstance(other, Iterable): raise TypeError("'%s' object is not iterable" % type(other)) return type(self)(other) return other
[docs] def isdisjoint(self, other: OtherType) -> bool: r"""Return True if the set has no elements in common with other. Sets are disjoint iff their intersection is the empty set. >>> ms = Multiset('aab') >>> ms.isdisjoint('bc') False >>> ms.isdisjoint(Multiset('ccd')) True Args: other: The other set to check disjointedness. Can also be an :class:`~typing.Iterable`\[~T] or :class:`~typing.Mapping`\[~T, :class:`int`] which are then converted to :class:`Multiset`\[~T]. """ other = self._as_multiset(other) for elem in self.keys(): if elem in other: return False return True
[docs] def difference(self, *others: OtherType) -> 'Multiset[T]': r"""Return a new multiset with all elements from the others removed. >>> ms = Multiset('aab') >>> sorted(ms.difference('bc')) ['a', 'a'] You can also use the ``-`` operator for the same effect. However, the operator version will only accept a set as other operator, not any iterable, to avoid errors. >>> ms = Multiset('aabbbc') >>> sorted(ms - Multiset('abd')) ['a', 'b', 'b', 'c'] For a variant of the operation which modifies the multiset in place see :meth:`difference_update`. Args: others: The other sets to remove from the multiset. Can also be any :class:`~typing.Iterable`\[~T] or :class:`~typing.Mapping`\[~T, :class:`int`] which are then converted to :class:`Multiset`\[~T]. Returns: The resulting difference multiset. """ result = type(self)(self) result.difference_update(*others) return result
def __sub__(self, other: Set) -> bool: if not isinstance(other, Set): return NotImplemented return self.difference(other)
[docs] def union(self, *others: OtherType) -> 'Multiset[T]': r"""Return a new multiset with all elements from the multiset and the others with maximal multiplicities. >>> ms = Multiset('aab') >>> sorted(ms.union('bc')) ['a', 'a', 'b', 'c'] You can also use the ``|`` operator for the same effect. However, the operator version will only accept a set as other operator, not any iterable, to avoid errors. >>> ms = Multiset('aab') >>> sorted(ms | Multiset('aaa')) ['a', 'a', 'a', 'b'] For a variant of the operation which modifies the multiset in place see :meth:`union`. Args: others: The other sets to union the multiset with. Can also be any :class:`~typing.Iterable`\[~T] or :class:`~typing.Mapping`\[~T, :class:`int`] which are then converted to :class:`Multiset`\[~T]. Returns: The multiset resulting from the union. """ result = type(self)(self) result.union_update(*others) return result
def __or__(self, other: Set) -> bool: if not isinstance(other, Set): return NotImplemented return self.union(other) __ror__ = __or__
[docs] def combine(self, *others: OtherType) -> 'Multiset[T]': r"""Return a new multiset with all elements from the multiset and the others with their multiplicities summed up. >>> ms = Multiset('aab') >>> sorted(ms.combine('bc')) ['a', 'a', 'b', 'b', 'c'] You can also use the ``+`` operator for the same effect. However, the operator version will only accept a set as other operator, not any iterable, to avoid errors. >>> ms = Multiset('aab') >>> sorted(ms + Multiset('a')) ['a', 'a', 'a', 'b'] For a variant of the operation which modifies the multiset in place see :meth:`update`. Args: others: The other sets to add to the multiset. Can also be any :class:`~typing.Iterable`\[~T] or :class:`~typing.Mapping`\[~T, :class:`int`] which are then converted to :class:`Multiset`\[~T]. Returns: The multiset resulting from the addition of the sets. """ result = type(self)(self) result.update(*others) return result
def __add__(self, other: Set) -> bool: if not isinstance(other, Set): return NotImplemented return self.combine(other) __radd__ = __add__
[docs] def intersection(self, *others: OtherType) -> 'Multiset[T]': r"""Return a new multiset with elements common to the multiset and all others. >>> ms = Multiset('aab') >>> sorted(ms.intersection('abc')) ['a', 'b'] You can also use the ``&`` operator for the same effect. However, the operator version will only accept a set as other operator, not any iterable, to avoid errors. >>> ms = Multiset('aab') >>> sorted(ms & Multiset('aaac')) ['a', 'a'] For a variant of the operation which modifies the multiset in place see :meth:`intersection_update`. Args: others: The other sets intersect with the multiset. Can also be any :class:`~typing.Iterable`\[~T] or :class:`~typing.Mapping`\[~T, :class:`int`] which are then converted to :class:`Multiset`\[~T]. Returns: The multiset resulting from the intersection of the sets. """ result = type(self)(self) result.intersection_update(*others) return result
def __and__(self, other: Set) -> bool: if not isinstance(other, Set): return NotImplemented return self.intersection(other) __rand__ = __and__
[docs] def symmetric_difference(self, other: OtherType) -> 'Multiset[T]': r"""Return a new set with elements in either the set or other but not both. >>> ms = Multiset('aab') >>> sorted(ms.symmetric_difference('abc')) ['a', 'c'] You can also use the ``^`` operator for the same effect. However, the operator version will only accept a set as other operator, not any iterable, to avoid errors. >>> ms = Multiset('aab') >>> sorted(ms ^ Multiset('aaac')) ['a', 'b', 'c'] For a variant of the operation which modifies the multiset in place see :meth:`symmetric_difference_update`. Args: other: The other set to take the symmetric difference with. Can also be any :class:`~typing.Iterable`\[~T] or :class:`~typing.Mapping`\[~T, :class:`int`] which are then converted to :class:`Multiset`\[~T]. Returns: The resulting symmetric difference multiset. """ result = type(self)(self) result.symmetric_difference_update(other) return result
def __xor__(self, other: Set) -> bool: if not isinstance(other, Set): return NotImplemented return self.symmetric_difference(other) __rxor__ = __xor__
[docs] def times(self, factor: int) -> 'Multiset[T]': """Return a new set with each element's multiplicity multiplied with the given scalar factor. >>> ms = Multiset('aab') >>> sorted(ms.times(2)) ['a', 'a', 'a', 'a', 'b', 'b'] You can also use the ``*`` operator for the same effect: >>> sorted(ms * 3) ['a', 'a', 'a', 'a', 'a', 'a', 'b', 'b', 'b'] For a variant of the operation which modifies the multiset in place see :meth:`times_update`. Args: factor: The factor to multiply each multiplicity with. """ result = type(self)(self) result.times_update(factor) return result
def __mul__(self, factor: int) -> 'Multiset[T]': if not isinstance(factor, int): return NotImplemented return self.times(factor) __rmul__ = __mul__
[docs] def clear(self) -> None: """Empty the multiset.""" super().clear() self._total = 0
def _issubset(self, other: OtherType, strict: bool) -> bool: other = self._as_multiset(other) other_len = len(other) if len(self) > other_len: return False if len(self) == other_len and strict: return False for elem, multiplicity in self.items(): if multiplicity > other[elem]: return False return True
[docs] def issubset(self, other: OtherType) -> bool: """Return True iff this set is a subset of the other. >>> Multiset('ab').issubset('aabc') True >>> Multiset('aabb').issubset(Multiset('aabc')) False You can also use the ``<=`` operator for this comparison: >>> Multiset('ab') <= Multiset('ab') True When using the ``<`` operator for comparison, the sets are checked to be unequal in addition: >>> Multiset('ab') < Multiset('ab') False Args: other: The potential superset of the multiset to be checked. Returns: True iff this set is a subset of the other. """ return self._issubset(other, False)
def __le__(self, other: Set) -> bool: if not isinstance(other, Set): return NotImplemented return self._issubset(other, False) def __lt__(self, other: Set) -> bool: if not isinstance(other, Set): return NotImplemented return self._issubset(other, True) def _issuperset(self, other: OtherType, strict: bool) -> bool: other = self._as_multiset(other) other_len = len(other) if len(self) < other_len: return False if len(self) == other_len and strict: return False for elem, multiplicity in other.items(): if self[elem] < multiplicity: return False return True
[docs] def issuperset(self, other: OtherType) -> bool: """Return True iff this multiset is a superset of the other. >>> Multiset('aabc').issuperset('ab') True >>> Multiset('aabc').issuperset(Multiset('abcc')) False You can also use the ``>=`` operator for this comparison: >>> Multiset('ab') >= Multiset('ab') True When using the ``>`` operator for comparison, the sets are checked to be unequal in addition: >>> Multiset('ab') > Multiset('ab') False Args: other: The potential subset of the multiset to be checked. Returns: True iff this set is a subset of the other. """ return self._issuperset(other, False)
def __ge__(self, other: Set) -> bool: if not isinstance(other, Set): return NotImplemented return self._issuperset(other, False) def __gt__(self, other: Set) -> bool: if not isinstance(other, Set): return NotImplemented return self._issuperset(other, True) def __eq__(self, other: Set): if not isinstance(other, Set): return NotImplemented if isinstance(other, Multiset): return dict.__eq__(self, other) if len(self) != len(other): return False return self._issubset(other, False) def __ne__(self, other: Set): if not isinstance(other, Set): return NotImplemented if isinstance(other, Multiset): return dict.__ne__(self, other) if len(self) != len(other): return True return not self._issubset(other, False)
[docs] def get(self, elem: T, default: int) -> int: """Return the multiplicity for *elem* if it is in the multiset, else *default*. Makes the *default* argument of the original :meth:`dict.get` non-optional. Args: elem: The element of which to get the multiplicity. default: The default value to return if the element if not in the multiset. Returns: The multiplicity for *elem* if it is in the multiset, else *default*. """ return super().get(elem, default)
[docs] def pop(self, elem: T, default: int) -> int: """If *elem* is in the multiset, remove it and return its multiplicity, else return *default*. Makes the *default* argument of the original :meth:`dict.pop` non-optional. Args: elem: The element which is removed. default: The default value to return if the element if not in the multiset. Returns: The multiplicity for *elem* if it is in the multiset, else *default*. """ return super().pop(elem, default)
[docs] def setdefault(self, elem: T, default: int) -> int: """If *elem* is in the multiset, return its multiplicity. Else add it with a multiplicity of *default* and return *default*. Makes the *default* argument of the original :meth:`dict.setdefault` non-optional. Args: elem: The element which is added if not already present. default: The default multiplicity to add the element with if not in the multiset. Returns: The multiplicity for *elem* if it is in the multiset, else *default*. """ return super().setdefault(elem, default)
@classmethod
[docs] def fromkeys(cls, elements: Iterable[T], multiplicity: int) -> 'Multiset[T]': """Create a new multiset with the given *elements* and each multiplicity set to *multiplicity*. Makes the *value* argument of the original :meth:`dict.fromkeys` non-optional. Args: elements: The element for the new multiset. multiplicity: The multiplicity for all elements. Returns: The new multiset. """ return super().fromkeys(elements, multiplicity)
[docs] def copy(self) -> 'Multiset[T]': """Return a shallow copy of the multiset.""" return type(self)(self)
__copy__ = copy
if __name__ == '__main__': import doctest doctest.testmod(exclude_empty=True)