1
2
3
4 """Classes for context monitoring and context change detection.
5
6 The context is determined by the input buffers contained in the Callback class.
7 It is necessary to monitor the context in order to know what the user is typing
8 and compute accurate predictive suggestions.
9 """
10
11 from __future__ import absolute_import, unicode_literals
12 import copy
13 import char
14 import tknz
15 try:
16 from StringIO import StringIO
17 except ImportError:
18 from io import StringIO
19
20
22 """Detect context change.
23
24 A context change can occure when some special characters appear in the
25 buffers:
26 - Word characters indicate that the current token is a (partial) word.
27 - Blankspaces indicate the separations between two words. They mark
28 the end of the current token.
29 - Separators indicate a separation between two words. The dot separator
30 mark the end of the sentence (and of the token) so the next token and
31 suggestions should begin with an uppercase letter.
32 - Special characters are non-printable characters such as backspace and
33 arrow keys which are used to modify the input buffers.
34
35 It is important to detect context change because some operations such as
36 n-gram learning from input or suggested words suppression have to be carried
37 out upon context changes.
38
39 G{classtree ContextChangeDetector}
40 """
41
42 - def __init__(self, lowercase, config):
43 """ContextChangeDetector creator.
44
45 @param config:
46 It is used to retrieve the ContextMonitor settings from the
47 configuration file.
48 @type config: L{drvr.Configuration}
49 @param lowercase:
50 Indicate if the tokens should be convert to lowercase.
51 @type lowercase: boolean
52 """
53 self.lowercase = lowercase
54 self.config = config
55 self.monitoredScopeSize = self.config.getas(
56 'ContextMonitor', 'monitored_scope', 'int')
57 self.monitoredScope = ''
58
59 - def update_monitored_scope(self, string):
60 """Move the monitored scope according to the string length.
61
62 @param string:
63 Every characters inputed in the monitored buffer.
64 @type string: str
65 """
66 if len(string) <= self.monitoredScopeSize:
67 self.monitoredScope = string
68 else:
69 self.monitoredScope = string[:-self.monitoredScopeSize]
70
71 - def context_change(self, leftBuffer):
72 """Check if the context has changed.
73
74 To determine if a context change occure or not it is important to
75 scan the input left buffer and the monitored scope. A change occure if:
76 - The monitored scope is not part of the left buffer.
77 - The monitored scope is part of the left buffer and a separator
78 character appear in the left buffer part wich is not the monitored
79 scope.
80
81 @param leftBuffer:
82 The input left buffer.
83 @type leftBuffer: str
84
85 @return:
86 True or False weither the context has changed or not.
87 @rtype: boolean
88 """
89 prevContext = self.monitoredScope
90 currContext = leftBuffer
91 if len(prevContext) == 0:
92 if len(currContext) == 0:
93 return False
94 else:
95 return True
96 iIdx = currContext.rfind(prevContext)
97 if iIdx == -1:
98 return True
99 rest = currContext[iIdx + len(prevContext):]
100 idx = char.last_word_char(rest)
101 if idx == -1:
102 if len(rest) == 0:
103 return False
104 last_char = currContext[iIdx + len(prevContext) - 1]
105 if char.is_word_char(last_char):
106 return True
107 else:
108 return False
109 if idx == len(rest) - 1:
110 return False
111 return True
112
113 - def change(self, leftBuffer):
114 """Return the (part of the) token(s) appearing after a change.
115
116 When a change occure it is necessary to retrieve the characters forming
117 (partial) tokens which have been inputed AFTER the change and this is
118 what this method do.
119 Weither a change occure or not is determined by self.context_change().
120
121 @note: If no change have been registered yet then the leftBuffer is
122 returned.
123
124 @param leftBuffer:
125 The input left buffer.
126 @type leftBuffer: str
127
128 @return:
129 (Part of) tokens inputed after the last change.
130 @rtype: list
131 """
132 prevContext = self.monitoredScope
133 currContext = leftBuffer
134 if len(prevContext) == 0:
135 return currContext
136 iIdx = currContext.rfind(prevContext)
137 if iIdx == -1:
138 return currContext
139 result = currContext[iIdx + len(prevContext):]
140 if self.context_change(leftBuffer):
141 tokenizer = tknz.ReverseTokenizer(prevContext, self.lowercase)
142 firstToken = tokenizer.next_token()
143 if not len(firstToken) == 0:
144 result = firstToken + result
145 return result
146
147
148 -class ContextMonitor(object):
149 """Monitire user current context.
150
151 This class monitore the input buffers in order to:
152 - Tokenize the input and use the tokens for prediction.
153 - Identify context changes.
154
155 G{classtree ContextMonitor}
156 """
157
158 - def __init__(self, config, predictorRegistry, callback):
159 """ContextMonitor creator.
160
161 @param config:
162 It is used to retrieve the ContextMonitor settings from the
163 configuration file.
164 @type config: L{drvr.Configuration}
165 @param predictorRegistry:
166 It is used to access the predictors's learn() methods. Also, the
167 ContextMonitor is used by the predictors to access the input
168 buffers.
169 @type predictorRegistry: L{prdct.PredictorRegistry}
170 @param callback:
171 As the callback hold the input buffers and the ContextMonitor
172 operate on these buffers, it is used to access the input buffers
173 from inside the ContextMonitor.
174 @type callback: L{clbk.Callback}
175 """
176 self.config = config
177 self.lowercase = self.config.getas(
178 'ContextMonitor', 'lowercase', 'bool')
179 self.liveLearning = self.config.getas(
180 'ContextMonitor', 'live_learning', 'bool')
181 self.predictorRegistry = predictorRegistry
182 self.callback = callback
183 self.contextChangeDetector = ContextChangeDetector(
184 self.lowercase, self.config)
185 self.predictorRegistry.contextMonitor = self
186
187 - def context_change(self):
188 """Check if a context change occure.
189
190 @return:
191 Return True or False weither a context change occure.
192 @rtype: bool
193 """
194 return self.contextChangeDetector.context_change(self.left_buffer())
195
197 """Check if context changes occure and learn what need to be learnt.
198
199 This method is called by Driver.predict() after the predictions have
200 been computed. It check if a context change occure in the input
201 buffers and if so, it learn the words that need to be learnt if the
202 predictor's learning mode is ON. Finaly, it update the monitored scope.
203 """
204 change = self.contextChangeDetector.change(self.left_buffer())
205 if self.liveLearning and change:
206 self.learn(change)
207 self.contextChangeDetector.update_monitored_scope(self.left_buffer())
208
209 - def learn(self, string):
210 """Learn n-grams from the input buffers.
211
212 Trigger the learn() method of each predictor of the registry. This
213 method use the input buffers to create n-grams and add them to the
214 predictors's databases or memory so that the program learn from the
215 user input.
216
217 @param string:
218 The string to learn.
219 @type string: str
220 """
221 tokens = []
222 tok = tknz.ForwardTokenizer(
223 string, self.lowercase, char.blankspaces, char.separators)
224 while tok.has_more_tokens():
225 token = tok.next_token()
226 tokens.append(token)
227 if tokens:
228 tokens = tokens[:-1]
229 for predictor in self.predictorRegistry:
230 predictor.learn(tokens)
231
233 """Return the token just before the cursor.
234
235 @return:
236 The token just before the cursor or an empty string if there is
237 none.
238 @rtype: str
239 """
240 return self.left_token(0)
241
243 """Return the token just after the cursor.
244
245 @return:
246 The token just after the cursor or the empty string if there is
247 none.
248 @rtype: str
249 """
250 return self.right_token(0)
251
252 - def left_token(self, index):
253 """Return the token at a given index in the left input buffer.
254
255 @param index:
256 The index of the token to retrieve in the left input buffer.
257 @type index: int
258
259 @return:
260 The token at index 'index' in the left input buffer or an empty
261 string if the token dosen't exists.
262 @rtype: str
263 """
264 leftInput = self.left_buffer()
265 tok = tknz.ReverseTokenizer(leftInput, self.lowercase)
266 i = 0
267 while tok.has_more_tokens() and i <= index:
268 token = tok.next_token()
269 i += 1
270 if i <= index:
271 token = ''
272 return token
273
274 - def right_token(self, index):
275 """Return the token at a given index in the right input buffer.
276
277 @param index:
278 The index of the token to retrieve in the right input buffer.
279 @type index: int
280
281 @return:
282 The token at index 'index' in the right input buffer or an empty
283 string if the token dosen't exists.
284 @rtype: str
285 """
286 tok = tknz.ForwardTokenizer(self.right, self.lowercase)
287 i = 0
288 while tok.has_more_tokens() and i <= index:
289 token = tok.next_token()
290 i += 1
291 if i <= index:
292 token = ''
293 return token
294
295 - def previous_tokens(self, index, change):
296 """Return the token just before the change token (if any).
297
298 This method is called in some predictors's learn() method. It retrieve
299 the token that appear just before the change token and has already
300 been learnt before (or should have). The previous token is used to fill
301 the n-grams.
302
303 @param index:
304 Index of the previous token.
305 @type index: int
306 @param change:
307 The change token.
308 @type change: str
309
310 @return:
311 The token just before the change token or an empty string if there
312 is none.
313 @rtype: str
314 """
315 return self.left_token(index + len(change))
316
317 - def left_buffer(self):
318 """Use the callback to get the value of the left buffer.
319
320 @return:
321 The left input buffer.
322 @rtype: str
323 """
324 return self.callback.left
325
326 - def right_buffer(self):
327 """Use the callback to get the value of the right buffer.
328
329 @return:
330 The right input buffer.
331 @rtype:
332 str
333 """
334 return self.callback.right
335
336 - def make_completion(self, suggestion):
337 """Compute the completion string given a suggested word.
338
339 This method compute and return the completion string using the token
340 just before the cursor (prefix) and the suggested word (suggestion).
341 The suggestion should be the word that the user choose from the
342 suggested words list.
343
344 For instance, if the prefix is::
345 "wor"
346 And the suggestion is::
347 "world"
348 Then this method will compute the completion::
349 "ld"
350
351 If the character before the cursor is a blankspace or a separator then
352 the prefix should be empty::
353 ""
354 Then if the suggestion is::
355 "guilty"
356 This method will compute the completion::
357 "guilty"
358
359 If the suggestion and the prefix don't match then False is returned.
360 This should never happen as suggestions completing an input word should
361 always match it. Still, I prefer to check it at the cost of some lower()
362 and startswith() calls.
363
364 @param suggestion:
365 The suggested word from which to compute the completion.
366 @type suggestion: str
367 """
368 prefix = self.prefix()
369 if suggestion.lower().startswith(prefix.lower()):
370 return suggestion[len(prefix):]
371 return False
372