Open Table Of Contents

Source code for bridgedb.Stability

# -*- coding: utf-8 ; test-case-name: bridgedb.test.test_Stability -*-
#
# This file is part of BridgeDB, a Tor bridge distribution system.
#
# :authors: please see the AUTHORS file for attributions
# :copyright: (c) 2013-2015, Isis Lovecruft
#             (c) 2013-2015, Matthew Finkel
#             (c) 2012-2015, Aaron Gibson
#             (c) 2007-2015, Nick Mathewson
#             (c) 2007-2015, The Tor Project, Inc.
# :license: see LICENSE for licensing information

"""This module provides functionality for tracking bridge stability metrics.

Bridge stability metrics are calculated using the model introduced in
`"An Analysis of Tor Bridge Stability"`_ and
`implemented in the Tor Metrics library`_.

.. An Analysis of Tor Bridge Stability:
    https://metrics.torproject.org/papers/bridge-stability-2011-10-31.pdf
    Karsten Loesing, An Analysis of Tor Bridge Stability. Technical Report.
    The Tor Project, October 2011.

.. implemented in the Tor Metrics library:
    https://gitweb.torproject.org/metrics-tasks/task-4255/SimulateBridgeStability.java
"""

import logging
import bridgedb.Storage

from bridgedb.schedule import toUnixSeconds


# tunables 
weighting_factor = float(19)/float(20)
discountIntervalMillis = long(60*60*12*1000)


[docs]class BridgeHistory(object): """ Record Class that tracks a single Bridge The fields stored are: fingerprint, ip, port, weightedUptime, weightedTime, weightedRunLength, totalRunWeights, lastSeenWithDifferentAddressAndPort, lastSeenWithThisAddressAndPort, lastDiscountedHistoryValues. fingerprint The Bridge Fingerprint (unicode) ip The Bridge IP (unicode) port The Bridge orport (integer) weightedUptime Weighted uptime in seconds (long int) weightedTime Weighted time in seconds (long int) weightedRunLength Weighted run length of previous addresses or ports in seconds. (long int) totalRunWeights Total run weights of previously used addresses or ports. (float) lastSeenWithDifferentAddressAndPort Timestamp in milliseconds when this bridge was last seen with a different address or port. (long int) lastSeenWithThisAddressAndPort Timestamp in milliseconds when this bridge was last seen with this address and port. (long int) lastDiscountedHistoryValues: Timestamp in milliseconds when this bridge was last discounted. (long int) lastUpdatedWeightedTime: Timestamp in milliseconds when the weighted time was updated. (long int) """ def __init__(self, fingerprint, ip, port, weightedUptime, weightedTime, weightedRunLength, totalRunWeights, lastSeenWithDifferentAddressAndPort, lastSeenWithThisAddressAndPort, lastDiscountedHistoryValues, lastUpdatedWeightedTime): self.fingerprint = fingerprint self.ip = ip self.port = port self.weightedUptime = long(weightedUptime) self.weightedTime = long(weightedTime) self.weightedRunLength = long(weightedRunLength) self.totalRunWeights = float(totalRunWeights) self.lastSeenWithDifferentAddressAndPort = \ long(lastSeenWithDifferentAddressAndPort) self.lastSeenWithThisAddressAndPort = long(lastSeenWithThisAddressAndPort) self.lastDiscountedHistoryValues = long(lastDiscountedHistoryValues) self.lastUpdatedWeightedTime = long(lastUpdatedWeightedTime)
[docs] def discountWeightedFractionalUptimeAndWeightedTime(self, discountUntilMillis): """ discount weighted times """ if self.lastDiscountedHistoryValues == 0: self.lastDiscountedHistoryValues = discountUntilMillis rounds = self.numDiscountRounds(discountUntilMillis) if rounds > 0: discount = lambda x: (weighting_factor**rounds)*x self.weightedUptime = discount(self.weightedUptime) self.weightedTime = discount(self.weightedTime) self.weightedRunLength = discount(self.weightedRunLength) self.totalRunWeights = discount(self.totalRunWeights) self.lastDiscountedHistoryValues += discountIntervalMillis * rounds return rounds
[docs] def numDiscountRounds(self, discountUntilMillis): """ return the number of rounds of discounting needed to bring this history element current """ result = discountUntilMillis - self.lastDiscountedHistoryValues result = int(result/discountIntervalMillis) return max(result,0)
@property def weightedFractionalUptime(self): """Weighted Fractional Uptime""" if self.weightedTime <0.0001: return long(0) return long(10000) * self.weightedUptime / self.weightedTime @property def tosa(self): """the Time On Same Address (TOSA)""" return ( self.lastSeenWithThisAddressAndPort - \ self.lastSeenWithDifferentAddressAndPort ) / 1000 @property def familiar(self): """ A bridge is 'familiar' if 1/8 of all active bridges have appeared more recently than it, or if it has been around for a Weighted Time of 8 days. """ # if this bridge has been around longer than 8 days if self.weightedTime >= long(8 * 24 * 60 * 60): return True # return True if self.weightedTime is greater than the weightedTime # of the > bottom 1/8 all bridges, sorted by weightedTime with bridgedb.Storage.getDB() as db: allWeightedTimes = [ bh.weightedTime for bh in db.getAllBridgeHistory()] numBridges = len(allWeightedTimes) logging.debug("Got %d weightedTimes", numBridges) allWeightedTimes.sort() if self.weightedTime >= allWeightedTimes[numBridges/8]: return True return False @property def wmtbac(self): """Weighted Mean Time Between Address Change""" totalRunLength = self.weightedRunLength + \ ((self.lastSeenWithThisAddressAndPort - self.lastSeenWithDifferentAddressAndPort) / long(1000)) totalWeights = self.totalRunWeights + 1.0 if totalWeights < 0.0001: return long(0) assert(isinstance(long,totalRunLength)) assert(isinstance(long,totalWeights)) return totalRunlength / totalWeights
[docs]def addOrUpdateBridgeHistory(bridge, timestamp): with bridgedb.Storage.getDB() as db: bhe = db.getBridgeHistory(bridge.fingerprint) if not bhe: # This is the first status, assume 60 minutes. secondsSinceLastStatusPublication = long(60*60) lastSeenWithDifferentAddressAndPort = timestamp * long(1000) lastSeenWithThisAddressAndPort = timestamp * long(1000) bhe = BridgeHistory( bridge.fingerprint, bridge.address, bridge.orPort, 0,#weightedUptime 0,#weightedTime 0,#weightedRunLength 0,# totalRunWeights lastSeenWithDifferentAddressAndPort, # first timestamnp lastSeenWithThisAddressAndPort, 0,#lastDiscountedHistoryValues, 0,#lastUpdatedWeightedTime ) # first time we have seen this descriptor db.updateIntoBridgeHistory(bhe) # Calculate the seconds since the last parsed status. If this is # the first status or we haven't seen a status for more than 60 # minutes, assume 60 minutes. statusPublicationMillis = long(timestamp * 1000) if (statusPublicationMillis - bhe.lastSeenWithThisAddressAndPort) > 60*60*1000: secondsSinceLastStatusPublication = long(60*60) logging.debug("Capping secondsSinceLastStatusPublication to 1 hour") # otherwise, roll with it else: secondsSinceLastStatusPublication = \ (statusPublicationMillis - bhe.lastSeenWithThisAddressAndPort)/1000 if secondsSinceLastStatusPublication <= 0 and bhe.weightedTime > 0: # old descriptor, bail logging.warn("Received old descriptor for bridge %s with timestamp %d", bhe.fingerprint, statusPublicationMillis/1000) return bhe # iterate over all known bridges and apply weighting factor discountAndPruneBridgeHistories(statusPublicationMillis) # Update the weighted times of bridges updateWeightedTime(statusPublicationMillis) # For Running Bridges only: # compare the stored history against the descriptor and see if the # bridge has changed its address or port bhe = db.getBridgeHistory(bridge.fingerprint) if not bridge.running: logging.info("%s is not running" % bridge.fingerprint) return bhe # Parse the descriptor and see if the address or port changed # If so, store the weighted run time if bridge.orport != bhe.port or bridge.ip != bhe.ip: bhe.totalRunWeights += 1.0; bhe.weightedRunLength += bhe.tosa bhe.lastSeenWithDifferentAddressAndPort =\ bhe.lastSeenWithThisAddressAndPort # Regardless of whether the bridge is new, kept or changed # its address and port, raise its WFU times and note its # current address and port, and that we saw it using them. bhe.weightedUptime += secondsSinceLastStatusPublication bhe.lastSeenWithThisAddressAndPort = statusPublicationMillis bhe.ip = str(bridge.ip) bhe.port = bridge.orport return db.updateIntoBridgeHistory(bhe)
[docs]def discountAndPruneBridgeHistories(discountUntilMillis): with bridgedb.Storage.getDB() as db: bhToRemove = [] bhToUpdate = [] for bh in db.getAllBridgeHistory(): # discount previous values by factor of 0.95 every 12 hours bh.discountWeightedFractionalUptimeAndWeightedTime(discountUntilMillis) # give the thing at least 24 hours before pruning it if bh.weightedFractionalUptime < 1 and bh.weightedTime > 60*60*24: logging.debug("Removing bridge from history: %s" % bh.fingerprint) bhToRemove.append(bh.fingerprint) else: bhToUpdate.append(bh) for k in bhToUpdate: db.updateIntoBridgeHistory(k) for k in bhToRemove: db.delBridgeHistory(k)
[docs]def updateWeightedTime(statusPublicationMillis): bhToUpdate = [] with bridgedb.Storage.getDB() as db: for bh in db.getBridgesLastUpdatedBefore(statusPublicationMillis): interval = (statusPublicationMillis - bh.lastUpdatedWeightedTime)/1000 if interval > 0: bh.weightedTime += min(3600,interval) # cap to 1hr bh.lastUpdatedWeightedTime = statusPublicationMillis #db.updateIntoBridgeHistory(bh) bhToUpdate.append(bh) for bh in bhToUpdate: db.updateIntoBridgeHistory(bh)
[docs]def updateBridgeHistory(bridges, timestamps): """Process all the timestamps and update the bridge stability statistics in the database. .. warning: This function is extremely expensive, and will keep getting more and more expensive, on a linearithmic scale, every time it is called. Blame the :mod:`bridgedb.Stability` module. :param dict bridges: All bridges from the descriptors, parsed into :class:`bridgedb.bridges.Bridge`s. :param dict timestamps: A dictionary whose keys are bridge fingerprints, and whose values are lists of integers, each integer being a timestamp (in seconds since Unix Epoch) for when a descriptor for that bridge was published. :rtype: dict :returns: The original **timestamps**, but which each list of integers (re)sorted. """ logging.debug("Beginning bridge stability calculations") sortedTimestamps = {} for fingerprint, stamps in timestamps.items()[:]: stamps.sort() bridge = bridges[fingerprint] for timestamp in stamps: logging.debug( ("Adding/updating timestamps in BridgeHistory for %s in " "database: %s") % (fingerprint, timestamp)) timestamp = toUnixSeconds(timestamp.timetuple()) addOrUpdateBridgeHistory(bridge, timestamp) # Replace the timestamps so the next sort is (hopefully) less # expensive: sortedTimestamps[fingerprint] = stamps logging.debug("Stability calculations complete") return sortedTimestamps