Package prest :: Module drvr
[hide private]
[frames] | no frames]

Source Code for Module prest.drvr

  1  #!/usr/bin/env python3 
  2  # -*- coding: utf-8 -*- 
  3   
  4  """The Driver class holds everything the program needs to perform prediction. 
  5   
  6  It gather instances of needed classes, implement some wrapper methods and the 
  7  very important predict() method which actualy compute the suggested words. 
  8  """ 
  9   
 10  try: 
 11      import configparser 
 12  except ImportError: 
 13      import ConfigParser as configparser 
 14  try: 
 15      from StringIO import StringIO 
 16  except ImportError: 
 17      from io import StringIO 
 18  import clbk 
 19  import slct 
 20  import prdct 
 21  import cntxt 
 22  import sys 
 23  import re 
 24  from lg import lg 
 25   
 26   
27 -class UnknownTypeCastError(Exception):
28 - def __init__(self, value):
29 self.value = value
30
31 - def __str__(self):
32 return repr('Unknown cast type "%s"' % self.value)
33 34
35 -class MissingConfigurationSection(Exception):
36 - def __init__(self, value):
37 self.value = value
38
39 - def __str__(self):
40 return repr('Section "%s" is missing from the configuration' 41 % self.value)
42 43
44 -class MissingConfigurationOption(Exception):
45 - def __init__(self, value):
46 self.value = value
47
48 - def __str__(self):
49 return repr('Section "%s" is missing from the configuration' 50 % self.value)
51 52
53 -class Configuration(configparser.ConfigParser):
54 """Copy a configuration file (ini format) in memory. 55 56 This class subclass the Configparser class. Configparser is used to read() a 57 configuration file (ini format) in memory in the form of a dictionary 58 associating sections and options. 59 This class implement a new method which allow to retrieve and cast a 60 configuration option and asserts the option do exists and can be casted. 61 The config file could be edited by the user and you know... never trust 62 user input. 63 64 G{classtree Configuration} 65 """ 66
67 - def __init__(self):
68 """Configuration creator.""" 69 super().__init__()
70
71 - def getas(self, section, option, typeCast=None):
72 """A more secure way to retrieve configuration options. 73 74 This method check if the section and the option is in the configuration 75 dictionary, else it raise an error. Also this method allow an optional 76 parameter for casting the result before returning it. Allowed type 77 casting are: 78 - bool: cast to bool 79 - int: cas to int 80 - float: cast to float 81 - list: cast to list 82 - intlist: cast to list and cast each element to int 83 - floatlist: cast to list and cast each element to float 84 The method makes sure the casting is possible. 85 86 @param section: 87 The section from which to retrieve the option. 88 @type section: str 89 @param option: 90 The option to retrieve inside the section. 91 @type option: str 92 @param typeCast: 93 Indicate how to cast the result. If no value are passed the 94 result is returned as a string. 95 96 @return: 97 The casted value of the given option found inside the given section. 98 @rtype: 99 str or int or bool or float or list (depends on "typeCast" param) 100 101 @raise MissingConfigurationSection: 102 If the given section cannot be found in the configuration 103 dictionary. 104 @raise MissingConfigurationOption: 105 If the given option cannot be found in the configuration dictionary. 106 @raise UnknownTypeCastError: 107 If the given typeCast value is not "bool", "int", "float", "list", 108 "intlist" or "floatlist". 109 """ 110 if not self.has_section(section): 111 raise MissingConfigurationSection(section) 112 if not self.has_option(section, option): 113 raise MissingConfigurationOption(option) 114 if not typeCast or typeCast == 'str': 115 return self.get(section, option) 116 if typeCast == 'bool': 117 return self.getboolean(section, option) 118 if typeCast == 'int': 119 return self.getint(section, option) 120 if typeCast == 'float': 121 return self.getfloat(section, option) 122 if typeCast == 'list': 123 return self.get(section, option).split() 124 if typeCast == 'intlist': 125 return [int(x) for x in self.get(section, option).split()] 126 if typeCast == 'floatlist': 127 return [float(x) for x in self.get(section, option).split()] 128 raise UnknownTypeCastError(typeCast)
129 130
131 -class Driver:
132 """The Driver class gather classes inctances and variables of the program. 133 134 G{classtree Driver} 135 """ 136
137 - def __init__(self, callback, configFile=''):
138 """The driver class. It hold every elements needed for the prediction. 139 140 @param callback: 141 The callback is used to access the input buffers from anywhere. 142 @type callback: L{clbk.Callback} 143 @param configFile: 144 Path of the configuration file. 145 @type configFile: str 146 """ 147 self.configFile = configFile 148 self.configuration = self.make_config() 149 self.callback = callback 150 self.predictorRegistry = prdct.PredictorRegistry(self.configuration) 151 self.contextMonitor = cntxt.ContextMonitor( 152 self.configuration, self.predictorRegistry, callback) 153 self.predictorActivator = prdct.PredictorActivator( 154 self.configuration, self.predictorRegistry) 155 self.selector = slct.Selector(self.configuration, self.contextMonitor)
156
157 - def predict(self):
158 """Request suggested words to predictors. 159 160 This method: 161 - Do the next two steps until it cannot get more suggestions. 162 - Call the PredictorActivator.predict() which: 163 - Call the predict() method of each predictors in the 164 predictorRegistry. Each predict() method should return a 165 Prediction instance containing the suggested words computed by 166 the predictor (it may be empty). 167 - Merge the Prediction instances into a single Prediction 168 instance. 169 - Select the best suggestions in the Prediction instance and remove 170 the excess. 171 - Learn from what the user have typped. 172 - Return the selected suggestions. 173 174 @return: 175 The suggested words list. 176 @rtype: list 177 """ 178 factor = 1 179 predictions = self.predictorActivator.predict(factor) 180 result = self.selector.select(predictions) 181 182 previousPredictions = predictions 183 while len(result) < self.selector.suggestions: 184 predictions = self.predictorActivator.predict(factor) 185 if len(predictions) > len(previousPredictions): 186 factor += 1 187 result = self.selector.select(predictions) 188 previousPredictions = predictions 189 else: 190 lg.warning('WARNING: Expected number of suggestions cannot be ' 191 'reached.') 192 break 193 self.learn_from_buffers() 194 return result
195
196 - def learn_from_buffers(self):
197 """Simple ContextMonitor.update() wrapper for comprehension sake.""" 198 self.contextMonitor.update()
199
200 - def make_completion(self, suggestion):
201 """Simple ContextMonitor.make_completion() wrapper.""" 202 return self.contextMonitor.make_completion(suggestion)
203
204 - def close_databases(self):
205 """Close every opened predictors database.""" 206 self.predictorRegistry.close_database()
207
208 - def make_config(self):
209 """Initialize the config dictionary. 210 211 This method first try to read the configuration file and parse it into 212 a Configuration instance (dictionary). 213 If the config file is empty or dosen't exists, than a default config 214 dictionary is created. 215 216 @return: 217 The Configuration instance holding every settings (dictionary 218 style). 219 @rtype: L{drvr.Configuration} 220 """ 221 config = Configuration() 222 if config.read(self.configFile) == []: 223 config.readfp(StringIO( 224 """ 225 [Global] 226 language = en 227 228 [GUI] 229 font_size = 10 230 231 [MinerRegistry] 232 miners = CorpusMiner FbMiner 233 234 [CorpusMiner] 235 class = CorpusMiner 236 texts = /home/mathieu/Documents/data_mining/pyprest_clean/brown.txt 237 dbfilename = /home/mathieu/Documents/data_mining/pyprest_clean/db/database_brown.db 238 lowercase = False 239 n = 3 240 241 [FbMiner] 242 class = FacebookMiner 243 accesstoken = 244 dbfilename = /home/mathieu/Documents/data_mining/pyprest_clean/db/fb.db 245 lowercase = False 246 n = 3 247 last_update = 1433879088 248 249 [TwitterMiner] 250 251 [PredictorRegistry] 252 predictors = CorpusNgramPredictor DictionaryPredictor InputNgramPredictor 253 254 [ContextMonitor] 255 live_learning = True 256 monitored_scope = 80 257 lowercase = True 258 259 [Selector] 260 suggestions = 6 261 repeat_suggestions = False 262 greedy_suggestion_threshold = 0 263 264 [PredictorActivator] 265 predict_time = 1000 266 max_partial_prediction_size = 50 267 combination_policy = Probabilistic 268 stoplist = ./insanities_en.stoplist 269 270 [CorpusNgramPredictor] 271 class = WeightNgramPredictor 272 dbfilename = /home/mathieu/Documents/data_mining/pyprest_clean/db/database_brown.db 273 deltas = 0.01 0.1 0.89 274 learn = False 275 276 [InputNgramPredictor] 277 class = WeightNgramPredictor 278 dbfilename = /home/mathieu/Documents/data_mining/pyprest_clean/db/lm.db 279 deltas = 0.01 0.1 0.89 280 learn = True 281 282 [FbNgramPredictor] 283 class = WeightNgramPredictor 284 dbfilename = /home/mathieu/Documents/data_mining/pyprest_clean/db/fb.db 285 deltas = 0.01 0.1 0.89 286 learn = False 287 288 [LateOccurPredictor] 289 class = LastOccurPredictor 290 lambda = 1 291 n_0 = 1 292 cutoff_threshold = 20 293 294 [MemorizePredictor] 295 class = MemorizePredictor 296 memory = /home/mathieu/Documents/data_mining/pyprest_clean/txt/memory.txt 297 trigger = 3 298 learn = True 299 300 [DictionaryPredictor] 301 class = DictionaryPredictor 302 dictionary = /usr/share/dict/words 303 probability = 0.000001 304 """)) 305 return config
306