prest.cntxt

1 #!/usr/bin/env python3 2 # -*- coding: utf-8 -*- 3 4 """Classes for context monitoring and context change detection. 5 6 The context is determined by the input buffers contained in the Callback class. 7 It is necessary to monitor the context in order to know what the user is typing 8 and compute accurate predictive suggestions. 9 """ 10 11 from __future__ import absolute_import, unicode_literals 12 import copy 13 import char 14 import tknz 15 try: 16 from StringIO import StringIO 17 except ImportError: 18 from io import StringIO 19 20

21 -class ContextChangeDetector(object):

22 """Detect context change. 23 24 A context change can occure when some special characters appear in the 25 buffers: 26 - Word characters indicate that the current token is a (partial) word. 27 - Blankspaces indicate the separations between two words. They mark 28 the end of the current token. 29 - Separators indicate a separation between two words. The dot separator 30 mark the end of the sentence (and of the token) so the next token and 31 suggestions should begin with an uppercase letter. 32 - Special characters are non-printable characters such as backspace and 33 arrow keys which are used to modify the input buffers. 34 35 It is important to detect context change because some operations such as 36 n-gram learning from input or suggested words suppression have to be carried 37 out upon context changes. 38 39 G{classtree ContextChangeDetector} 40 """ 41

42 - def __init__(self, lowercase, config):

43 """ContextChangeDetector creator. 44 45 @param config: 46 It is used to retrieve the ContextMonitor settings from the 47 configuration file. 48 @type config: L{drvr.Configuration} 49 @param lowercase: 50 Indicate if the tokens should be convert to lowercase. 51 @type lowercase: boolean 52 """ 53 self.lowercase = lowercase 54 self.config = config 55 self.monitoredScopeSize = self.config.getas( 56 'ContextMonitor', 'monitored_scope', 'int') 57 self.monitoredScope = ''

58

59 - def update_monitored_scope(self, string):

60 """Move the monitored scope according to the string length. 61 62 @param string: 63 Every characters inputed in the monitored buffer. 64 @type string: str 65 """ 66 if len(string) <= self.monitoredScopeSize: 67 self.monitoredScope = string 68 else: 69 self.monitoredScope = string[:-self.monitoredScopeSize]

70

71 - def context_change(self, leftBuffer):

72 """Check if the context has changed. 73 74 To determine if a context change occure or not it is important to 75 scan the input left buffer and the monitored scope. A change occure if: 76 - The monitored scope is not part of the left buffer. 77 - The monitored scope is part of the left buffer and a separator 78 character appear in the left buffer part wich is not the monitored 79 scope. 80 81 @param leftBuffer: 82 The input left buffer. 83 @type leftBuffer: str 84 85 @return: 86 True or False weither the context has changed or not. 87 @rtype: boolean 88 """ 89 prevContext = self.monitoredScope 90 currContext = leftBuffer 91 if len(prevContext) == 0: 92 if len(currContext) == 0: 93 return False 94 else: 95 return True 96 iIdx = currContext.rfind(prevContext) 97 if iIdx == -1: 98 return True 99 rest = currContext[iIdx + len(prevContext):] 100 idx = char.last_word_char(rest) 101 if idx == -1: 102 if len(rest) == 0: 103 return False 104 last_char = currContext[iIdx + len(prevContext) - 1] 105 if char.is_word_char(last_char): 106 return True 107 else: 108 return False 109 if idx == len(rest) - 1: 110 return False 111 return True

112

113 - def change(self, leftBuffer):

114 """Return the (part of the) token(s) appearing after a change. 115 116 When a change occure it is necessary to retrieve the characters forming 117 (partial) tokens which have been inputed AFTER the change and this is 118 what this method do. 119 Weither a change occure or not is determined by self.context_change(). 120 121 @note: If no change have been registered yet then the leftBuffer is 122 returned. 123 124 @param leftBuffer: 125 The input left buffer. 126 @type leftBuffer: str 127 128 @return: 129 (Part of) tokens inputed after the last change. 130 @rtype: list 131 """ 132 prevContext = self.monitoredScope 133 currContext = leftBuffer 134 if len(prevContext) == 0: 135 return currContext 136 iIdx = currContext.rfind(prevContext) 137 if iIdx == -1: 138 return currContext 139 result = currContext[iIdx + len(prevContext):] 140 if self.context_change(leftBuffer): 141 tokenizer = tknz.ReverseTokenizer(prevContext, self.lowercase) 142 firstToken = tokenizer.next_token() 143 if not len(firstToken) == 0: 144 result = firstToken + result 145 return result

146 147

148 -class ContextMonitor(object): # observer.Observer

149 """Monitire user current context. 150 151 This class monitore the input buffers in order to: 152 - Tokenize the input and use the tokens for prediction. 153 - Identify context changes. 154 155 G{classtree ContextMonitor} 156 """ 157

158 - def __init__(self, config, predictorRegistry, callback):

159 """ContextMonitor creator. 160 161 @param config: 162 It is used to retrieve the ContextMonitor settings from the 163 configuration file. 164 @type config: L{drvr.Configuration} 165 @param predictorRegistry: 166 It is used to access the predictors's learn() methods. Also, the 167 ContextMonitor is used by the predictors to access the input 168 buffers. 169 @type predictorRegistry: L{prdct.PredictorRegistry} 170 @param callback: 171 As the callback hold the input buffers and the ContextMonitor 172 operate on these buffers, it is used to access the input buffers 173 from inside the ContextMonitor. 174 @type callback: L{clbk.Callback} 175 """ 176 self.config = config 177 self.lowercase = self.config.getas( 178 'ContextMonitor', 'lowercase', 'bool') 179 self.liveLearning = self.config.getas( 180 'ContextMonitor', 'live_learning', 'bool') 181 self.predictorRegistry = predictorRegistry 182 self.callback = callback 183 self.contextChangeDetector = ContextChangeDetector( 184 self.lowercase, self.config) 185 self.predictorRegistry.contextMonitor = self

186

187 - def context_change(self):

188 """Check if a context change occure. 189 190 @return: 191 Return True or False weither a context change occure. 192 @rtype: bool 193 """ 194 return self.contextChangeDetector.context_change(self.left_buffer())

195

196 - def update(self):

197 """Check if context changes occure and learn what need to be learnt. 198 199 This method is called by Driver.predict() after the predictions have 200 been computed. It check if a context change occure in the input 201 buffers and if so, it learn the words that need to be learnt if the 202 predictor's learning mode is ON. Finaly, it update the monitored scope. 203 """ 204 change = self.contextChangeDetector.change(self.left_buffer()) 205 if self.liveLearning and change: 206 self.learn(change) 207 self.contextChangeDetector.update_monitored_scope(self.left_buffer())

208

209 - def learn(self, string):

210 """Learn n-grams from the input buffers. 211 212 Trigger the learn() method of each predictor of the registry. This 213 method use the input buffers to create n-grams and add them to the 214 predictors's databases or memory so that the program learn from the 215 user input. 216 217 @param string: 218 The string to learn. 219 @type string: str 220 """ 221 tokens = [] 222 tok = tknz.ForwardTokenizer( 223 string, self.lowercase, char.blankspaces, char.separators) 224 while tok.has_more_tokens(): 225 token = tok.next_token() 226 tokens.append(token) 227 if tokens: 228 tokens = tokens[:-1] 229 for predictor in self.predictorRegistry: 230 predictor.learn(tokens)

231

232 - def prefix(self):

233 """Return the token just before the cursor. 234 235 @return: 236 The token just before the cursor or an empty string if there is 237 none. 238 @rtype: str 239 """ 240 return self.left_token(0)

241

242 - def suffix(self):

243 """Return the token just after the cursor. 244 245 @return: 246 The token just after the cursor or the empty string if there is 247 none. 248 @rtype: str 249 """ 250 return self.right_token(0)

251

252 - def left_token(self, index):

253 """Return the token at a given index in the left input buffer. 254 255 @param index: 256 The index of the token to retrieve in the left input buffer. 257 @type index: int 258 259 @return: 260 The token at index 'index' in the left input buffer or an empty 261 string if the token dosen't exists. 262 @rtype: str 263 """ 264 leftInput = self.left_buffer() 265 tok = tknz.ReverseTokenizer(leftInput, self.lowercase) 266 i = 0 267 while tok.has_more_tokens() and i <= index: 268 token = tok.next_token() 269 i += 1 270 if i <= index: 271 token = '' 272 return token

273

274 - def right_token(self, index):

275 """Return the token at a given index in the right input buffer. 276 277 @param index: 278 The index of the token to retrieve in the right input buffer. 279 @type index: int 280 281 @return: 282 The token at index 'index' in the right input buffer or an empty 283 string if the token dosen't exists. 284 @rtype: str 285 """ 286 tok = tknz.ForwardTokenizer(self.right, self.lowercase) 287 i = 0 288 while tok.has_more_tokens() and i <= index: 289 token = tok.next_token() 290 i += 1 291 if i <= index: 292 token = '' 293 return token

294

295 - def previous_tokens(self, index, change):

296 """Return the token just before the change token (if any). 297 298 This method is called in some predictors's learn() method. It retrieve 299 the token that appear just before the change token and has already 300 been learnt before (or should have). The previous token is used to fill 301 the n-grams. 302 303 @param index: 304 Index of the previous token. 305 @type index: int 306 @param change: 307 The change token. 308 @type change: str 309 310 @return: 311 The token just before the change token or an empty string if there 312 is none. 313 @rtype: str 314 """ 315 return self.left_token(index + len(change))

316

317 - def left_buffer(self):

318 """Use the callback to get the value of the left buffer. 319 320 @return: 321 The left input buffer. 322 @rtype: str 323 """ 324 return self.callback.left

325

326 - def right_buffer(self):

327 """Use the callback to get the value of the right buffer. 328 329 @return: 330 The right input buffer. 331 @rtype: 332 str 333 """ 334 return self.callback.right

335

336 - def make_completion(self, suggestion):

337 """Compute the completion string given a suggested word. 338 339 This method compute and return the completion string using the token 340 just before the cursor (prefix) and the suggested word (suggestion). 341 The suggestion should be the word that the user choose from the 342 suggested words list. 343 344 For instance, if the prefix is:: 345 "wor" 346 And the suggestion is:: 347 "world" 348 Then this method will compute the completion:: 349 "ld" 350 351 If the character before the cursor is a blankspace or a separator then 352 the prefix should be empty:: 353 "" 354 Then if the suggestion is:: 355 "guilty" 356 This method will compute the completion:: 357 "guilty" 358 359 If the suggestion and the prefix don't match then False is returned. 360 This should never happen as suggestions completing an input word should 361 always match it. Still, I prefer to check it at the cost of some lower() 362 and startswith() calls. 363 364 @param suggestion: 365 The suggested word from which to compute the completion. 366 @type suggestion: str 367 """ 368 prefix = self.prefix() 369 if suggestion.lower().startswith(prefix.lower()): 370 return suggestion[len(prefix):] 371 return False

372

Source Code for Module prest.cntxt