# -*- coding: utf-8 ; test-case-name: bridgedb.test.test_util -*-
#
# This file is part of BridgeDB, a Tor bridge distribution system.
#
# :authors: Isis Lovecruft 0xA3ADB67A2CDB8B35 <isis@torproject.org>
# Matthew Finkel 0x017DD169EA793BE2 <sysrqb@torproject.org>
# :copyright: (c) 2013-2015, Isis Lovecruft
# (c) 2013-2015, Matthew Finkel
# (c) 2007-2015, The Tor Project, Inc.
# :license: 3-Clause BSD, see LICENSE for licensing information
"""Common utilities for BridgeDB."""
from functools import partial
import abc
import logging
import logging.config
import logging.handlers
import os
from twisted.python import components
def _getLogHandlers(logToFile=True, logToStderr=True):
"""Get the appropriate list of log handlers.
:param bool logToFile: If ``True``, add a logfile handler.
:param bool logToStderr: If ``True``, add a stream handler to stderr.
:rtype: list
:returns: A list containing the appropriate log handler names from the
:class:`logging.config.dictConfigClass`.
"""
logHandlers = []
if logToFile:
logHandlers.append('rotating')
if logToStderr:
logHandlers.append('console')
return logHandlers
def _getRotatingFileHandler(filename, mode='a', maxBytes=1000000, backupCount=0,
encoding='utf-8', uid=None, gid=None):
"""Get a :class:`logging.RotatingFileHandler` with a logfile which is
readable+writable only by the given **uid** and **gid**.
:param str filename: The full path to the log file.
:param str mode: The mode to open **filename** with. (default: ``'a'``)
:param int maxBytes: Rotate logfiles after they have grown to this size in
bytes.
:param int backupCount: The number of logfiles to keep in rotation.
:param str encoding: The encoding for the logfile.
:param int uid: The owner UID to set on the logfile.
:param int gid: The GID to set on the logfile.
:rtype: :class:`logging.handlers.RotatingFileHandler`
:returns: A logfile handler which will rotate files and chown/chmod newly
created files.
"""
# Default to the current process owner's uid and gid:
uid = os.getuid() if not uid else uid
gid = os.getgid() if not gid else gid
if not os.path.exists(filename):
open(filename, 'a').close()
os.chown(filename, uid, gid)
try:
os.chmod(filename, os.ST_WRITE | os.ST_APPEND)
except AttributeError: # pragma: no cover
logging.error("""
XXX FIXME: Travis chokes on `os.ST_WRITE` saying that the module doesn't
have that attribute, for some reason:
https://travis-ci.org/isislovecruft/bridgedb/builds/24145963#L1601""")
os.chmod(filename, 384)
fileHandler = partial(logging.handlers.RotatingFileHandler,
filename,
mode,
maxBytes=maxBytes,
backupCount=backupCount,
encoding=encoding)
return fileHandler
[docs]def levenshteinDistance(s1, s2, len1=None, len2=None,
offset1=0, offset2=0, memo=None):
"""Compute the Levenstein Distance between two strings.
The `Levenshtein String Distance Algorithm
<https://en.wikipedia.org/wiki/Levenshtein_distance>` efficiently computes
the number of characters which must be changed in **s1** to make it
identical to **s2**.
>>> from bridgedb.util import levenshteinDistance
>>> levenshteinDistance('cat', 'cat')
0
>>> levenshteinDistance('cat', 'hat')
1
>>> levenshteinDistance('arma', 'armadillo')
5
:param str s1: The string which should be changed.
:param str s2: The string which **stringOne** should be compared to.
"""
len1 = len(s1) if len1 is None else len1
len2 = len(s2) if len2 is None else len2
memo = {} if memo is None else memo
key = ','.join([str(offset1), str(len1), str(offset2), str(len2)])
if memo.get(key) is not None: return memo[key]
if len1 == 0: return len2
elif len2 == 0: return len1
cost = 0 if (s1[offset1] == s2[offset2]) else 1
distance = min(
levenshteinDistance(s1, s2, len1-1, len2, offset1+1, offset2, memo) + 1,
levenshteinDistance(s1, s2, len1, len2-1, offset1, offset2+1, memo) + 1,
levenshteinDistance(s1, s2, len1-1, len2-1, offset1+1, offset2+1, memo) + cost,
)
memo[key] = distance
return distance
[docs]def isascii(s):
"""Return True if there are no non-ASCII characters in s, False otherwise.
Note that this function differs from the str.is* methods in that
it returns True for the empty string, rather than False.
>>> from bridgedb.util import isascii
>>> isascii('\x80')
False
>>> isascii('foo\tbar\rbaz\n')
True
>>> isascii('foo bar')
True
:param str s: The string to check for non-ASCII characters.
"""
return all(map((lambda ch: ord(ch) < 128), s))
[docs]def isascii_noncontrol(s):
"""Return True if there are no non-ASCII or control characters in
s, False otherwise.
Note that this function differs from the str.is* methods in that
it returns True for the empty string, rather than False.
>>> from bridgedb.util import isascii_noncontrol
>>> isascii_noncontrol('\x80')
False
>>> isascii_noncontrol('foo\tbar\rbaz\n')
False
>>> isascii_noncontrol('foo bar')
True
:param str s: The string to check for non-ASCII or control characters.
"""
return all(map((lambda ch: 32 <= ord(ch) < 127), s))
[docs]def replaceControlChars(text, replacement=None, encoding="utf-8"):
"""Remove ASCII control characters [0-31, 92, 127].
>>> from bridgedb.util import replaceControlChars
>>> replaceControlChars('foo\n bar\\ baz\r \t\0quux\n')
'foo bar baz quux'
>>> replaceControlChars("\bI wonder if I'm outside the quotes now")
"I wonder if I'm outside the quotes now"
:param str text: Some text to remove ASCII control characters from.
:param int replacement: If given, the **replacement** should be an integer
representing the decimal representation of the byte to replace
occurences of ASCII control characters with. For example, if they
should be replaced with the character ``'a'``, then ``97`` should be
used as the **replacement**, because ``ord('a') == 97``.
:param str encoding: The encoding of the **text**.
:rtype: str
:returns: The sanitized **text**.
"""
escaped = bytearray()
for byte in bytearray(text, encoding):
if byte in range(0, 32) + [92, 127]:
if replacement:
byte = replacement
else:
continue
escaped += bytearray([byte])
return str(escaped)
[docs]def registerAdapter(adapter, adapted, interface):
"""Register a Zope interface adapter for global use.
See :api:`twisted.python.components.registerAdapter` and the Twisted
Matrix Labs `howto documentation for components`_.
.. howto documentation for components:
https://twistedmatrix.com/documents/current/core/howto/components.html
"""
try:
components.registerAdapter(adapter, adapted, interface)
except ValueError: # An adapter class was already registered
pass
[docs]class mixin:
"""Subclasses of me can be used as a mixin class by registering another
class, ``ClassA``, which should be mixed with the ``mixin`` subclass, in
order to provide simple, less error-prone, multiple inheritance models::
>>> from __future__ import print_function
>>> from bridgedb.util import mixin
>>>
>>> class ClassA(object):
... def sayWhich(self):
... print("ClassA.sayWhich() called.")
... def doSuperThing(self):
... print("%s" % super(ClassA, self))
... def doThing(self):
... print("ClassA is doing a thing.")
...
>>> class ClassB(ClassA):
... def sayWhich(self):
... print("ClassB.sayWhich() called.")
... def doSuperThing(self):
... print("%s" % super(ClassB, self))
... def doOtherThing(self):
... print("ClassB is doing something else.")
...
>>> class ClassM(mixin):
... def sayWhich(self):
... print("ClassM.sayWhich() called.")
...
>>> ClassM.register(ClassA)
>>>
>>> class ClassC(ClassM, ClassB):
... def sayWhich(self):
... super(ClassC, self).sayWhich()
...
>>> c = ClassC()
>>> c.sayWhich()
ClassM.sayWhich() called.
>>> c.doSuperThing()
<super: <class 'ClassB'>, <ClassC object>>
>>> c.doThing()
ClassA is doing a thing.
>>> c.doOtherThing()
ClassB is doing something else.
.. info:: This class' name is lowercased because pylint is hardcoded to
expect mixin classes to end in ``'mixin'``.
"""
__metaclass__ = abc.ABCMeta