1
2
3
4 """Classes for context monitoring and context change detection.
5
6 The context is determined by the input buffers contained in the Callback class.
7 It is necessary to monitor the context in order to know what the user is typing
8 and compute accurate predictive suggestions.
9 """
10
11 from tipy.char import blankspaces, separators, is_word_char, last_word_char
12 from tipy.tknz import ReverseTokenizer, ForwardTokenizer
13 try:
14 from StringIO import StringIO
15 except ImportError:
16 from io import StringIO
17
18
20 """Detect context change.
21
22 A context change can occure when some special characters appear in the
23 buffers:
24 - Word characters indicate that the current token is a (partial) word.
25 - Blankspaces indicate the separations between two words. They mark
26 the end of the current token.
27 - Separators indicate a separation between two words. The dot separator
28 mark the end of the sentence (and of the token) so the next token and
29 suggestions should begin with an uppercase letter.
30 - Special characters are non-printable characters such as backspace and
31 arrow keys which are used to modify the input buffers.
32
33 It is important to detect context change because some operations such as
34 n-gram learning from input or suggested words suppression have to be carried
35 out upon context changes.
36
37 G{classtree ContextChangeDetector}
38 """
39
40 - def __init__(self, lowercase, config):
41 """ContextChangeDetector creator.
42
43 @param config:
44 It is used to retrieve the ContextMonitor settings from the
45 configuration file.
46 @type config: L{drvr.Configuration}
47 @param lowercase:
48 Indicate if the tokens should be convert to lowercase.
49 @type lowercase: boolean
50 """
51 self.lowercase = lowercase
52 self.config = config
53 self.monitoredScopeSize = self.config.getas(
54 'ContextMonitor', 'monitored_scope', 'int')
55 self.monitoredScope = ''
56
57 - def update_monitored_scope(self, string):
58 """Move the monitored scope according to the string length.
59
60 @param string:
61 Every characters inputed in the monitored buffer.
62 @type string: str
63 """
64 if len(string) <= self.monitoredScopeSize:
65 self.monitoredScope = string
66 else:
67 self.monitoredScope = string[:-self.monitoredScopeSize]
68
69 - def context_change(self, leftBuffer):
70 """Check if the context has changed.
71
72 To determine if a context change occure or not it is important to
73 scan the input left buffer and the monitored scope. A change occure if:
74 - The monitored scope is not part of the left buffer.
75 - The monitored scope is part of the left buffer and a separator
76 character appear in the left buffer part wich is not the monitored
77 scope.
78
79 @param leftBuffer:
80 The input left buffer.
81 @type leftBuffer: str
82
83 @return:
84 True or False weither the context has changed or not.
85 @rtype: boolean
86 """
87 prevContext = self.monitoredScope
88 currContext = leftBuffer
89 if len(prevContext) == 0:
90 if len(currContext) == 0:
91 return False
92 else:
93 return True
94 iIdx = currContext.rfind(prevContext)
95 if iIdx == -1:
96 return True
97 rest = currContext[iIdx + len(prevContext):]
98 idx = last_word_char(rest)
99 if idx == -1:
100 if len(rest) == 0:
101 return False
102 last_char = currContext[iIdx + len(prevContext) - 1]
103 if is_word_char(last_char):
104 return True
105 else:
106 return False
107 if idx == len(rest) - 1:
108 return False
109 return True
110
111 - def change(self, leftBuffer):
112 """Return the (part of the) token(s) appearing after a change.
113
114 When a change occure it is necessary to retrieve the characters forming
115 (partial) tokens which have been inputed AFTER the change and this is
116 what this method do.
117 Weither a change occure or not is determined by self.context_change().
118
119 @note: If no change have been registered yet then the leftBuffer is
120 returned.
121
122 @param leftBuffer:
123 The input left buffer.
124 @type leftBuffer: str
125
126 @return:
127 (Part of) tokens inputed after the last change.
128 @rtype: list
129 """
130 prevContext = self.monitoredScope
131 currContext = leftBuffer
132 if len(prevContext) == 0:
133 return currContext
134 iIdx = currContext.rfind(prevContext)
135 if iIdx == -1:
136 return currContext
137 result = currContext[iIdx + len(prevContext):]
138 if self.context_change(leftBuffer):
139 tokenizer = ReverseTokenizer(prevContext, self.lowercase)
140 firstToken = tokenizer.next_token()
141 if not len(firstToken) == 0:
142 result = firstToken + result
143 return result
144
145
146 -class ContextMonitor(object):
147 """Monitire user current context.
148
149 This class monitore the input buffers in order to:
150 - Tokenize the input and use the tokens for prediction.
151 - Identify context changes.
152
153 G{classtree ContextMonitor}
154 """
155
156 - def __init__(self, config, predictorRegistry, callback):
157 """ContextMonitor creator.
158
159 @param config:
160 It is used to retrieve the ContextMonitor settings from the
161 configuration file.
162 @type config: L{drvr.Configuration}
163 @param predictorRegistry:
164 It is used to access the predictors's learn() methods. Also, the
165 ContextMonitor is used by the predictors to access the input
166 buffers.
167 @type predictorRegistry: L{PredictorRegistry}
168 @param callback:
169 As the callback hold the input buffers and the ContextMonitor
170 operate on these buffers, it is used to access the input buffers
171 from inside the ContextMonitor.
172 @type callback: L{Callback}
173 """
174 self.config = config
175 self.lowercase = self.config.getas(
176 'ContextMonitor', 'lowercase', 'bool')
177 self.liveLearning = self.config.getas(
178 'ContextMonitor', 'live_learning', 'bool')
179 self.predictorRegistry = predictorRegistry
180 self.callback = callback
181 self.contextChangeDetector = ContextChangeDetector(
182 self.lowercase, self.config)
183 self.predictorRegistry.contextMonitor = self
184
185 - def context_change(self):
186 """Check if a context change occure.
187
188 @return:
189 Return True or False weither a context change occure.
190 @rtype: bool
191 """
192 return self.contextChangeDetector.context_change(self.left_buffer())
193
195 """Check if context changes occure and learn what need to be learnt.
196
197 This method is called by Driver.predict() after the predictions have
198 been computed. It check if a context change occure in the input
199 buffers and if so, it learn the words that need to be learnt if the
200 predictor's learning mode is ON. Finaly, it update the monitored scope.
201 """
202 change = self.contextChangeDetector.change(self.left_buffer())
203 if self.liveLearning and change:
204 self.learn(change)
205 self.contextChangeDetector.update_monitored_scope(self.left_buffer())
206
207 - def learn(self, string):
208 """Learn n-grams from the input buffers.
209
210 Trigger the learn() method of each predictor of the registry. This
211 method use the input buffers to create n-grams and add them to the
212 predictors's databases or memory so that the program learn from the
213 user input.
214
215 @param string:
216 The string to learn.
217 @type string: str
218 """
219 tokens = []
220 tok = ForwardTokenizer(string, self.lowercase, blankspaces, separators)
221 while tok.has_more_tokens():
222 token = tok.next_token()
223 tokens.append(token)
224 if tokens:
225 tokens = tokens[:-1]
226 for predictor in self.predictorRegistry:
227 predictor.learn(tokens)
228
230 """Return the token just before the cursor.
231
232 @return:
233 The token just before the cursor or an empty string if there is
234 none.
235 @rtype: str
236 """
237 return self.left_token(0)
238
240 """Return the token just after the cursor.
241
242 @return:
243 The token just after the cursor or the empty string if there is
244 none.
245 @rtype: str
246 """
247 return self.right_token(0)
248
249 - def left_token(self, index):
250 """Return the token at a given index in the left input buffer.
251
252 @param index:
253 The index of the token to retrieve in the left input buffer.
254 @type index: int
255
256 @return:
257 The token at index 'index' in the left input buffer or an empty
258 string if the token dosen't exists.
259 @rtype: str
260 """
261 leftInput = self.left_buffer()
262 tok = ReverseTokenizer(leftInput, self.lowercase)
263 i = 0
264 while tok.has_more_tokens() and i <= index:
265 token = tok.next_token()
266 i += 1
267 if i <= index:
268 token = ''
269 return token
270
271 - def right_token(self, index):
272 """Return the token at a given index in the right input buffer.
273
274 @param index:
275 The index of the token to retrieve in the right input buffer.
276 @type index: int
277
278 @return:
279 The token at index 'index' in the right input buffer or an empty
280 string if the token dosen't exists.
281 @rtype: str
282 """
283 tok = ForwardTokenizer(self.right, self.lowercase)
284 i = 0
285 while tok.has_more_tokens() and i <= index:
286 token = tok.next_token()
287 i += 1
288 if i <= index:
289 token = ''
290 return token
291
292 - def previous_tokens(self, index, change):
293 """Return the token just before the change token (if any).
294
295 This method is called in some predictors's learn() method. It retrieve
296 the token that appear just before the change token and has already
297 been learnt before (or should have). The previous token is used to fill
298 the n-grams.
299
300 @param index:
301 Index of the previous token.
302 @type index: int
303 @param change:
304 The change token.
305 @type change: str
306
307 @return:
308 The token just before the change token or an empty string if there
309 is none.
310 @rtype: str
311 """
312 return self.left_token(index + len(change))
313
314 - def left_buffer(self):
315 """Use the callback to get the value of the left buffer.
316
317 @return:
318 The left input buffer.
319 @rtype: str
320 """
321 return self.callback.left
322
323 - def right_buffer(self):
324 """Use the callback to get the value of the right buffer.
325
326 @return:
327 The right input buffer.
328 @rtype:
329 str
330 """
331 return self.callback.right
332
333 - def make_completion(self, suggestion):
334 """Compute the completion string given a suggested word.
335
336 This method compute and return the completion string using the token
337 just before the cursor (prefix) and the suggested word (suggestion).
338 The suggestion should be the word that the user choose from the
339 suggested words list.
340
341 For instance, if the prefix is::
342 "wor"
343 And the suggestion is::
344 "world"
345 Then this method will compute the completion::
346 "ld"
347
348 If the character before the cursor is a blankspace or a separator then
349 the prefix should be empty::
350 ""
351 Then if the suggestion is::
352 "guilty"
353 This method will compute the completion::
354 "guilty"
355
356 If the suggestion and the prefix don't match then False is returned.
357 This should never happen as suggestions completing an input word should
358 always match it. Still, I prefer to check it at the cost of some lower()
359 and startswith() calls.
360
361 @param suggestion:
362 The suggested word from which to compute the completion.
363 @type suggestion: str
364 """
365 prefix = self.prefix()
366 if suggestion.lower().startswith(prefix.lower()):
367 return suggestion[len(prefix):]
368 return False
369