1
2
3
4 """The Driver class holds everything the program needs to perform prediction.
5
6 It gather instances of needed classes, implement some wrapper methods and the
7 very important predict() method which actualy compute the suggested words.
8 """
9
10 try:
11 import configparser
12 except ImportError:
13 import ConfigParser as configparser
14 try:
15 from StringIO import StringIO
16 except ImportError:
17 from io import StringIO
18 import clbk
19 import slct
20 import prdct
21 import cntxt
22 import sys
23 import re
24 from lg import lg
25
26
30
32 return repr('Unknown cast type "%s"' % self.value)
33
34
38
40 return repr('Section "%s" is missing from the configuration'
41 % self.value)
42
43
47
49 return repr('Section "%s" is missing from the configuration'
50 % self.value)
51
52
54 """Copy a configuration file (ini format) in memory.
55
56 This class subclass the Configparser class. Configparser is used to read() a
57 configuration file (ini format) in memory in the form of a dictionary
58 associating sections and options.
59 This class implement a new method which allow to retrieve and cast a
60 configuration option and asserts the option do exists and can be casted.
61 The config file could be edited by the user and you know... never trust
62 user input.
63
64 G{classtree Configuration}
65 """
66
68 """Configuration creator."""
69 super().__init__()
70
71 - def getas(self, section, option, typeCast=None):
72 """A more secure way to retrieve configuration options.
73
74 This method check if the section and the option is in the configuration
75 dictionary, else it raise an error. Also this method allow an optional
76 parameter for casting the result before returning it. Allowed type
77 casting are:
78 - bool: cast to bool
79 - int: cas to int
80 - float: cast to float
81 - list: cast to list
82 - intlist: cast to list and cast each element to int
83 - floatlist: cast to list and cast each element to float
84 The method makes sure the casting is possible.
85
86 @param section:
87 The section from which to retrieve the option.
88 @type section: str
89 @param option:
90 The option to retrieve inside the section.
91 @type option: str
92 @param typeCast:
93 Indicate how to cast the result. If no value are passed the
94 result is returned as a string.
95
96 @return:
97 The casted value of the given option found inside the given section.
98 @rtype:
99 str or int or bool or float or list (depends on "typeCast" param)
100
101 @raise MissingConfigurationSection:
102 If the given section cannot be found in the configuration
103 dictionary.
104 @raise MissingConfigurationOption:
105 If the given option cannot be found in the configuration dictionary.
106 @raise UnknownTypeCastError:
107 If the given typeCast value is not "bool", "int", "float", "list",
108 "intlist" or "floatlist".
109 """
110 if not self.has_section(section):
111 raise MissingConfigurationSection(section)
112 if not self.has_option(section, option):
113 raise MissingConfigurationOption(option)
114 if not typeCast or typeCast == 'str':
115 return self.get(section, option)
116 if typeCast == 'bool':
117 return self.getboolean(section, option)
118 if typeCast == 'int':
119 return self.getint(section, option)
120 if typeCast == 'float':
121 return self.getfloat(section, option)
122 if typeCast == 'list':
123 return self.get(section, option).split()
124 if typeCast == 'intlist':
125 return [int(x) for x in self.get(section, option).split()]
126 if typeCast == 'floatlist':
127 return [float(x) for x in self.get(section, option).split()]
128 raise UnknownTypeCastError(typeCast)
129
130
132 """The Driver class gather classes inctances and variables of the program.
133
134 G{classtree Driver}
135 """
136
137 - def __init__(self, callback, configFile=''):
138 """The driver class. It hold every elements needed for the prediction.
139
140 @param callback:
141 The callback is used to access the input buffers from anywhere.
142 @type callback: L{clbk.Callback}
143 @param configFile:
144 Path of the configuration file.
145 @type configFile: str
146 """
147 self.configFile = configFile
148 self.configuration = self.make_config()
149 self.callback = callback
150 self.predictorRegistry = prdct.PredictorRegistry(self.configuration)
151 self.contextMonitor = cntxt.ContextMonitor(
152 self.configuration, self.predictorRegistry, callback)
153 self.predictorActivator = prdct.PredictorActivator(
154 self.configuration, self.predictorRegistry)
155 self.selector = slct.Selector(self.configuration, self.contextMonitor)
156
158 """Request suggested words to predictors.
159
160 This method:
161 - Do the next two steps until it cannot get more suggestions.
162 - Call the PredictorActivator.predict() which:
163 - Call the predict() method of each predictors in the
164 predictorRegistry. Each predict() method should return a
165 Prediction instance containing the suggested words computed by
166 the predictor (it may be empty).
167 - Merge the Prediction instances into a single Prediction
168 instance.
169 - Select the best suggestions in the Prediction instance and remove
170 the excess.
171 - Learn from what the user have typped.
172 - Return the selected suggestions.
173
174 @return:
175 The suggested words list.
176 @rtype: list
177 """
178 factor = 1
179 predictions = self.predictorActivator.predict(factor)
180 result = self.selector.select(predictions)
181
182 previousPredictions = predictions
183 while len(result) < self.selector.suggestions:
184 predictions = self.predictorActivator.predict(factor)
185 if len(predictions) > len(previousPredictions):
186 factor += 1
187 result = self.selector.select(predictions)
188 previousPredictions = predictions
189 else:
190 lg.warning('WARNING: Expected number of suggestions cannot be '
191 'reached.')
192 break
193 self.learn_from_buffers()
194 return result
195
197 """Simple ContextMonitor.update() wrapper for comprehension sake."""
198 self.contextMonitor.update()
199
203
205 """Close every opened predictors database."""
206 self.predictorRegistry.close_database()
207
209 """Initialize the config dictionary.
210
211 This method first try to read the configuration file and parse it into
212 a Configuration instance (dictionary).
213 If the config file is empty or dosen't exists, than a default config
214 dictionary is created.
215
216 @return:
217 The Configuration instance holding every settings (dictionary
218 style).
219 @rtype: L{drvr.Configuration}
220 """
221 config = Configuration()
222 if config.read(self.configFile) == []:
223 config.readfp(StringIO(
224 """
225 [Global]
226 language = en
227
228 [GUI]
229 font_size = 10
230
231 [MinerRegistry]
232 miners = CorpusMiner FbMiner
233
234 [CorpusMiner]
235 class = CorpusMiner
236 texts = /home/mathieu/Documents/data_mining/pyprest_clean/brown.txt
237 dbfilename = /home/mathieu/Documents/data_mining/pyprest_clean/db/database_brown.db
238 lowercase = False
239 n = 3
240
241 [FbMiner]
242 class = FacebookMiner
243 accesstoken =
244 dbfilename = /home/mathieu/Documents/data_mining/pyprest_clean/db/fb.db
245 lowercase = False
246 n = 3
247 last_update = 1433879088
248
249 [TwitterMiner]
250
251 [PredictorRegistry]
252 predictors = CorpusNgramPredictor DictionaryPredictor InputNgramPredictor
253
254 [ContextMonitor]
255 live_learning = True
256 monitored_scope = 80
257 lowercase = True
258
259 [Selector]
260 suggestions = 6
261 repeat_suggestions = False
262 greedy_suggestion_threshold = 0
263
264 [PredictorActivator]
265 predict_time = 1000
266 max_partial_prediction_size = 50
267 combination_policy = Probabilistic
268 stoplist = ./insanities_en.stoplist
269
270 [CorpusNgramPredictor]
271 class = WeightNgramPredictor
272 dbfilename = /home/mathieu/Documents/data_mining/pyprest_clean/db/database_brown.db
273 deltas = 0.01 0.1 0.89
274 learn = False
275
276 [InputNgramPredictor]
277 class = WeightNgramPredictor
278 dbfilename = /home/mathieu/Documents/data_mining/pyprest_clean/db/lm.db
279 deltas = 0.01 0.1 0.89
280 learn = True
281
282 [FbNgramPredictor]
283 class = WeightNgramPredictor
284 dbfilename = /home/mathieu/Documents/data_mining/pyprest_clean/db/fb.db
285 deltas = 0.01 0.1 0.89
286 learn = False
287
288 [LateOccurPredictor]
289 class = LastOccurPredictor
290 lambda = 1
291 n_0 = 1
292 cutoff_threshold = 20
293
294 [MemorizePredictor]
295 class = MemorizePredictor
296 memory = /home/mathieu/Documents/data_mining/pyprest_clean/txt/memory.txt
297 trigger = 3
298 learn = True
299
300 [DictionaryPredictor]
301 class = DictionaryPredictor
302 dictionary = /usr/share/dict/words
303 probability = 0.000001
304 """))
305 return config
306