Open Table Of Contents

Source code for bridgedb.Bucket

# -*- coding: utf-8 -*-

"""
This module is responsible for everything concerning file bucket bridge 
distribution. File bucket bridge distribution means that unallocated bridges 
are allocated to a certain pseudo-distributor and later written to a file.

For example, the following is a dict of pseudo-distributors (also called 
'bucket identifiers') with numbers of bridges assigned to them:

        FILE_BUCKETS = { "name1": 10, "name2": 15, "foobar": 3 }

This configuration for buckets would result in 3 files being created for bridge
distribution: name1-2010-07-17.brdgs, name2-2010-07-17.brdgs and 
foobar-2010-07-17.brdgs. The first file would contain 10 bridges from BridgeDB's
'unallocated' pool. The second file would contain 15 bridges from the same pool
and the third one similarly 3 bridges. These files can then be handed out to 
trusted parties via mail or fed to other distribution mechanisms such as 
twitter.

Note that in BridgeDB slang, the _distributor_ would still be 'unallocated',
even though in the database, there would now by 'name1', 'name2' or 'foobar'
instead of 'unallocated'. This is why they are called pseudo-distributors.
"""

import logging
import time
import bridgedb.Storage
import bridgedb.Bridges 
import binascii
import sqlite3
from gettext import gettext as _
toHex = binascii.b2a_hex


# What should pseudo distributors be prefixed with in the database so we can
# distinguish them from real distributors?
PSEUDO_DISTRI_PREFIX = "pseudo_"

# Set to rediculously high number
BUCKET_MAX_BRIDGES = 1000000


[docs]class BucketData(object): """Configures a bridge bucket with the number of bridges which should be allocated, the name of the bucket, and other similar data. :param str name: The name of this bucket (from the config file). This will be prefixed by the :data:`PSEUDO_DISTRIBUTOR_PREFIX`. :type needed: str or int :param needed: The number of bridges needed for this bucket (also from the config file). :param int allocated: Number of bridges already allocated for this bucket. """ def __init__(self, name, needed): self.name = name if needed == "*": needed = BUCKET_MAX_BRIDGES self.needed = int(needed) self.allocated = 0
[docs]class BucketManager(object): """BucketManager reads a number of file bucket identifiers from the config. They're expected to be in the following format:: FILE_BUCKETS = { "name1": 10, "name2": 15, "foobar": 3 } This syntax means that certain buckets ("name1", "name2" and so on) are given a number of bridges (10, 15 and so on). Names can be anything. The name will later be the prefix of the file that is written with the assigned number of bridges in it. Instead of a number, a wildcard item ("*") is allowed, too. This means that the corresponsing bucket file will get the maximum number of possible bridges (as many as are left in the unallocated bucket). The files will be written in ip:port format, one bridge per line. The way this works internally is as follows: First of all, the assignBridgesToBuckets() routine runs through the database of bridges and looks up the 'distributor' field of each bridge. Unallocated bridges are sent to a pool for later assignement. Already allocated bridges for file bucket distribution are sorted and checked. They're checked for whether their bucket identifier still exists in the current config and also whether the number of assigned bridges is still valid. If either the bucket identifier is not existing anymore or too many bridges are currently assigned to it, bridges will go to the unassigned pool. In the second step, after bridges are sorted and the unassigned pool is ready, the assignBridgesToBuckets() routine assigns one bridge from the unassigned pool to a known bucket identifier at a time until it either runs out of bridges in the unallocated pool or the number of needed bridges for that bucket is reached. When all bridges are assigned in this way, they can then be dumped into files by calling the dumpBridges() routine. :type cfg: :class:`bridgedb.persistent.Conf` :ivar cfg: The central configuration instance. :ivar list bucketList: A list of BucketData instances, holding all configured (and thus requested) buckets with their respective numbers. :ivar list unallocatedList: Holds all bridges from the 'unallocated' pool. :ivar bool unallocated_available: Is at least one unallocated bridge available? :ivar str distributor_prefix: The 'distributor' field in the database will hold the name of our pseudo-distributor, prefixed by this string. By default, this uses :data:`PSEUDO_DISTRIBUTOR_PREFIX`. :ivar db: The bridge database instance. """ def __init__(self, cfg): """Create a ``BucketManager``. :type cfg: :class:`bridgedb.persistent.Conf` :param cfg: The central configuration instance. """ self.cfg = cfg self.bucketList = [] self.unallocatedList = [] self.unallocated_available = False self.distributor_prefix = PSEUDO_DISTRI_PREFIX
[docs] def addToUnallocatedList(self, hex_key): """Add a bridge by **hex_key** into the unallocated pool.""" with bridgedb.Storage.getDB() as db: try: db.updateDistributorForHexKey("unallocated", hex_key) except: db.rollback() raise else: db.commit() self.unallocatedList.append(hex_key) self.unallocated_available = True
[docs] def getBucketByIdent(self, bucketIdent): """If we know this bucket identifier, then return the corresponding :class:`BucketData` object. """ for d in self.bucketList: if d.name == bucketIdent: return d return None
[docs] def assignUnallocatedBridge(self, bucket): """Assign an unallocated bridge to a certain **bucket**.""" hex_key = self.unallocatedList.pop() # Mark pseudo-allocators in the database as such allocator_name = bucket.name #print "KEY: %d NAME: %s" % (hex_key, allocator_name) logging.debug("Moving %s to %s" % (hex_key, allocator_name)) with bridgedb.Storage.getDB() as db: try: db.updateDistributorForHexKey(allocator_name, hex_key) except: db.rollback() logging.warn("Failed to move %s to new distributor (%s)" % (hex_key, allocator_name)) # Ok, this seems useless, but for consistancy's sake, we'll # re-assign the bridge from this missed db update attempt to the # unallocated list. Remember? We pop()'d it before. self.addToUnallocatedList(hex_key) raise else: db.commit() bucket.allocated += 1 if len(self.unallocatedList) < 1: self.unallocated_available = False return True
[docs] def assignBridgesToBuckets(self): """Read file bucket identifiers from the configuration, sort them, and write necessary changes to the database. """ logging.debug("Assigning bridges to buckets for pseudo-distributors") # Build distributor list for k, v in self.cfg.FILE_BUCKETS.items(): prefixed_key = self.distributor_prefix + k d = BucketData(prefixed_key, v) self.bucketList.append(d) # Loop through all bridges and sort out distributors with bridgedb.Storage.getDB() as db: allBridges = db.getAllBridges() for bridge in allBridges: if bridge.distributor == "unallocated": self.addToUnallocatedList(bridge.hex_key) continue # Filter non-pseudo distributors (like 'https' and 'email') early, # too if not bridge.distributor.startswith(self.distributor_prefix): continue # Return the bucket in case we know it already d = self.getBucketByIdent(bridge.distributor) if d is not None: # Does this distributor need another bridge? If not, re-inject # it into the 'unallocated' pool for for later assignment if d.allocated < d.needed: d.allocated += 1 else: # Bucket has enough members already, free this one self.addToUnallocatedList(bridge.hex_key) # We don't know it. Maybe an old entry. Free it. else: self.addToUnallocatedList(bridge.hex_key) # Loop through bucketList while we have and need unallocated # bridges, assign one bridge at a time while self.unallocated_available and len(self.bucketList) > 0: logging.debug("We have %d unallocated bridges and %d buckets to " \ "fill. Let's do it." % (len(self.unallocatedList), len(self.bucketList))) for d in self.bucketList: if d.allocated < d.needed: try: if not self.assignUnallocatedBridge(d): break except sqlite3.DatabaseError as e: dist = d.name.replace(self.distributor_prefix, "") logging.warn("Couldn't assign unallocated bridge to " \ "%s: %s" % (dist, e)) else: # When we have enough bridges, remove bucket identifier # from list self.bucketList.remove(d)
[docs] def dumpBridgesToFile(self, filename, bridges): """Dump a list of given **bridges** into **filename**.""" logging.debug("Dumping bridge assignments to file: %r" % filename) # get the bridge histories and sort by Time On Same Address bridgeHistories = [] with bridgedb.Storage.getDB() as db: for b in bridges: if self.cfg.COLLECT_TIMESTAMPS: bh = db.getBridgeHistory(b.hex_key) if bh: bridgeHistories.append(bh) bridgeHistories.sort(lambda x,y: cmp(x.weightedFractionalUptime, y.weightedFractionalUptime)) try: f = open(filename, 'w') if self.cfg.COLLECT_TIMESTAMPS: for bh in bridgeHistories: days = bh.tosa / long(60*60*24) line = "%s:%s\t(%d days at this address)" % \ (bh.ip, bh.port, days) if str(bh.fingerprint) in blocklist.keys(): line = line + "\t(Might be blocked): (%s)" % \ ",".join(blocklist[bh.fingerprint]) f.write(line + '\n') else: for bridge in bridges: line = "%s:%d %s" \ % (bridge.address, bridge.or_port, bridge.hex_key) f.write(line + '\n') f.close() except IOError: print "I/O error: %s" % filename
[docs] def dumpBridges(self): """Dump all known file distributors to files, sorted by distributor.""" logging.info("Dumping all distributors to file.") with bridgedb.Storage.getDB() as db: allBridges = db.getAllBridges() bridgeDict = {} # Sort returned bridges by distributor for bridge in allBridges: dist = str(bridge.distributor) if dist in bridgeDict.keys(): bridgeDict[dist].append(bridge) else: bridgeDict[dist] = [bridge] # Now dump to file(s) for k in bridgeDict.keys(): dist = k if (dist.startswith(self.distributor_prefix)): # Subtract the pseudo distributor prefix dist = dist.replace(self.distributor_prefix, "") # Be safe. Replace all '/' in distributor names dist = dist.replace("/", "_") filename = dist + "-" + time.strftime("%Y-%m-%d") + ".brdgs" self.dumpBridgesToFile(filename, bridgeDict[k])