Source code for libpgm.sampleaggregator

# Copyright (c) 2012, CyberPoint International, LLC
# All rights reserved.
# 
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions are met:
#     * Redistributions of source code must retain the above copyright
#       notice, this list of conditions and the following disclaimer.
#     * Redistributions in binary form must reproduce the above copyright
#       notice, this list of conditions and the following disclaimer in the
#       documentation and/or other materials provided with the distribution.
#     * Neither the name of the CyberPoint International, LLC nor the
#       names of its contributors may be used to endorse or promote products
#       derived from this software without specific prior written permission.
# 
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
# ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
# WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
# DISCLAIMED. IN NO EVENT SHALL CYBERPOINT INTERNATIONAL, LLC BE LIABLE FOR ANY
# DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
# (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
# LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
# ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
# SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
'''
This module provides tools for collecting and managing sets of samples generated by the library's sampling functions. By averaging a series of samples, the progam can approximate a joint probability distribution without having to do the exact calculations, which may be useful in large networks. 

'''


[docs]class SampleAggregator(object): ''' This class is a machine for aggregating data from sample sequences. It contains the method *aggregate*. ''' def __init__(self): self.seq = None '''The sequence inputted.''' self.avg = None '''The average of all the entries in *seq*, represented as a dict where each vertex has an entry whose value is a dict of {key, value} pairs, where each key is a possible outcome of that vertex and its value is the approximate frequency.'''
[docs] def aggregate(self, samplerstatement): ''' Generate a sequence of samples using *samplerstatement* and return the average of its results. Arguments: 1. *samplerstatement* -- The statement of a function (with inputs) that would output a sequence of samples. For example: ``bn.randomsample(50)`` where ``bn`` is an instance of the :doc:`DiscreteBayesianNetwork <discretebayesiannetwork>` class. This function stores the output of *samplerstatement* in the attribute *seq*, and then averages *seq* and stores the approximate distribution found in the attribute *avg*. It then returns *avg*. Usage example: this would print the average of 10 data points:: import json from libpgm.nodedata import NodeData from libpgm.graphskeleton import GraphSkeleton from libpgm.discretebayesiannetwork import DiscreteBayesianNetwork from libpgm.sampleaggregator import SampleAggregator # load nodedata and graphskeleton nd = NodeData() skel = GraphSkeleton() nd.load("../tests/unittestdict.txt") skel.load("../tests/unittestdict.txt") # topologically order graphskeleton skel.toporder() # load bayesian network bn = DiscreteBayesianNetwork(skel, nd) # build aggregator agg = SampleAggregator() # average samples result = agg.aggregate(bn.randomsample(10)) # output print json.dumps(result, indent=2) ''' # get sequence seq = samplerstatement # denominator denom = len(seq) output = dict() for key in seq[0].keys(): output[key] = dict() for trial in seq: keyss = output[key].keys() vall = trial[key] if (keyss.count(vall) > 0): output[key][trial[key]] += 1 else: output[key][trial[key]] = 1 # normalize for entry in output[key].keys(): output[key][entry] = output[key][entry] / float(denom) self.seq = seq self.avg = output return output