tipy.cntxt

1 #!/usr/bin/env python3 2 # -*- coding: utf-8 -*- 3 4 """Classes for context monitoring and context change detection. 5 6 The context is determined by the input buffers contained in the Callback class. 7 It is necessary to monitor the context in order to know what the user is typing 8 and compute accurate predictive suggestions. 9 """ 10 11 from tipy.char import blankspaces, separators, is_word_char, last_word_char 12 from tipy.tknz import ReverseTokenizer, ForwardTokenizer 13 try: 14 from StringIO import StringIO 15 except ImportError: 16 from io import StringIO 17 18

19 -class ContextChangeDetector(object):

20 """Detect context change. 21 22 A context change can occure when some special characters appear in the 23 buffers: 24 - Word characters indicate that the current token is a (partial) word. 25 - Blankspaces indicate the separations between two words. They mark 26 the end of the current token. 27 - Separators indicate a separation between two words. The dot separator 28 mark the end of the sentence (and of the token) so the next token and 29 suggestions should begin with an uppercase letter. 30 - Special characters are non-printable characters such as backspace and 31 arrow keys which are used to modify the input buffers. 32 33 It is important to detect context change because some operations such as 34 n-gram learning from input or suggested words suppression have to be carried 35 out upon context changes. 36 37 G{classtree ContextChangeDetector} 38 """ 39

40 - def __init__(self, lowercase, config):

41 """ContextChangeDetector creator. 42 43 @param config: 44 It is used to retrieve the ContextMonitor settings from the 45 configuration file. 46 @type config: L{drvr.Configuration} 47 @param lowercase: 48 Indicate if the tokens should be convert to lowercase. 49 @type lowercase: boolean 50 """ 51 self.lowercase = lowercase 52 self.config = config 53 self.monitoredScopeSize = self.config.getas( 54 'ContextMonitor', 'monitored_scope', 'int') 55 self.monitoredScope = ''

56

57 - def update_monitored_scope(self, string):

58 """Move the monitored scope according to the string length. 59 60 @param string: 61 Every characters inputed in the monitored buffer. 62 @type string: str 63 """ 64 if len(string) <= self.monitoredScopeSize: 65 self.monitoredScope = string 66 else: 67 self.monitoredScope = string[:-self.monitoredScopeSize]

68

69 - def context_change(self, leftBuffer):

70 """Check if the context has changed. 71 72 To determine if a context change occure or not it is important to 73 scan the input left buffer and the monitored scope. A change occure if: 74 - The monitored scope is not part of the left buffer. 75 - The monitored scope is part of the left buffer and a separator 76 character appear in the left buffer part wich is not the monitored 77 scope. 78 79 @param leftBuffer: 80 The input left buffer. 81 @type leftBuffer: str 82 83 @return: 84 True or False weither the context has changed or not. 85 @rtype: boolean 86 """ 87 prevContext = self.monitoredScope 88 currContext = leftBuffer 89 if len(prevContext) == 0: 90 if len(currContext) == 0: 91 return False 92 else: 93 return True 94 iIdx = currContext.rfind(prevContext) 95 if iIdx == -1: 96 return True 97 rest = currContext[iIdx + len(prevContext):] 98 idx = last_word_char(rest) 99 if idx == -1: 100 if len(rest) == 0: 101 return False 102 last_char = currContext[iIdx + len(prevContext) - 1] 103 if is_word_char(last_char): 104 return True 105 else: 106 return False 107 if idx == len(rest) - 1: 108 return False 109 return True

110

111 - def change(self, leftBuffer):

112 """Return the (part of the) token(s) appearing after a change. 113 114 When a change occure it is necessary to retrieve the characters forming 115 (partial) tokens which have been inputed AFTER the change and this is 116 what this method do. 117 Weither a change occure or not is determined by self.context_change(). 118 119 @note: If no change have been registered yet then the leftBuffer is 120 returned. 121 122 @param leftBuffer: 123 The input left buffer. 124 @type leftBuffer: str 125 126 @return: 127 (Part of) tokens inputed after the last change. 128 @rtype: list 129 """ 130 prevContext = self.monitoredScope 131 currContext = leftBuffer 132 if len(prevContext) == 0: 133 return currContext 134 iIdx = currContext.rfind(prevContext) 135 if iIdx == -1: 136 return currContext 137 result = currContext[iIdx + len(prevContext):] 138 if self.context_change(leftBuffer): 139 tokenizer = ReverseTokenizer(prevContext, self.lowercase) 140 firstToken = tokenizer.next_token() 141 if not len(firstToken) == 0: 142 result = firstToken + result 143 return result

144 145

146 -class ContextMonitor(object): # observer.Observer

147 """Monitire user current context. 148 149 This class monitore the input buffers in order to: 150 - Tokenize the input and use the tokens for prediction. 151 - Identify context changes. 152 153 G{classtree ContextMonitor} 154 """ 155

156 - def __init__(self, config, predictorRegistry, callback):

157 """ContextMonitor creator. 158 159 @param config: 160 It is used to retrieve the ContextMonitor settings from the 161 configuration file. 162 @type config: L{drvr.Configuration} 163 @param predictorRegistry: 164 It is used to access the predictors's learn() methods. Also, the 165 ContextMonitor is used by the predictors to access the input 166 buffers. 167 @type predictorRegistry: L{PredictorRegistry} 168 @param callback: 169 As the callback hold the input buffers and the ContextMonitor 170 operate on these buffers, it is used to access the input buffers 171 from inside the ContextMonitor. 172 @type callback: L{Callback} 173 """ 174 self.config = config 175 self.lowercase = self.config.getas( 176 'ContextMonitor', 'lowercase', 'bool') 177 self.liveLearning = self.config.getas( 178 'ContextMonitor', 'live_learning', 'bool') 179 self.predictorRegistry = predictorRegistry 180 self.callback = callback 181 self.contextChangeDetector = ContextChangeDetector( 182 self.lowercase, self.config) 183 self.predictorRegistry.contextMonitor = self

184

185 - def context_change(self):

186 """Check if a context change occure. 187 188 @return: 189 Return True or False weither a context change occure. 190 @rtype: bool 191 """ 192 return self.contextChangeDetector.context_change(self.left_buffer())

193

194 - def update(self):

195 """Check if context changes occure and learn what need to be learnt. 196 197 This method is called by Driver.predict() after the predictions have 198 been computed. It check if a context change occure in the input 199 buffers and if so, it learn the words that need to be learnt if the 200 predictor's learning mode is ON. Finaly, it update the monitored scope. 201 """ 202 change = self.contextChangeDetector.change(self.left_buffer()) 203 if self.liveLearning and change: 204 self.learn(change) 205 self.contextChangeDetector.update_monitored_scope(self.left_buffer())

206

207 - def learn(self, string):

208 """Learn n-grams from the input buffers. 209 210 Trigger the learn() method of each predictor of the registry. This 211 method use the input buffers to create n-grams and add them to the 212 predictors's databases or memory so that the program learn from the 213 user input. 214 215 @param string: 216 The string to learn. 217 @type string: str 218 """ 219 tokens = [] 220 tok = ForwardTokenizer(string, self.lowercase, blankspaces, separators) 221 while tok.has_more_tokens(): 222 token = tok.next_token() 223 tokens.append(token) 224 if tokens: 225 tokens = tokens[:-1] 226 for predictor in self.predictorRegistry: 227 predictor.learn(tokens)

228

229 - def prefix(self):

230 """Return the token just before the cursor. 231 232 @return: 233 The token just before the cursor or an empty string if there is 234 none. 235 @rtype: str 236 """ 237 return self.left_token(0)

238

239 - def suffix(self):

240 """Return the token just after the cursor. 241 242 @return: 243 The token just after the cursor or the empty string if there is 244 none. 245 @rtype: str 246 """ 247 return self.right_token(0)

248

249 - def left_token(self, index):

250 """Return the token at a given index in the left input buffer. 251 252 @param index: 253 The index of the token to retrieve in the left input buffer. 254 @type index: int 255 256 @return: 257 The token at index 'index' in the left input buffer or an empty 258 string if the token dosen't exists. 259 @rtype: str 260 """ 261 leftInput = self.left_buffer() 262 tok = ReverseTokenizer(leftInput, self.lowercase) 263 i = 0 264 while tok.has_more_tokens() and i <= index: 265 token = tok.next_token() 266 i += 1 267 if i <= index: 268 token = '' 269 return token

270

271 - def right_token(self, index):

272 """Return the token at a given index in the right input buffer. 273 274 @param index: 275 The index of the token to retrieve in the right input buffer. 276 @type index: int 277 278 @return: 279 The token at index 'index' in the right input buffer or an empty 280 string if the token dosen't exists. 281 @rtype: str 282 """ 283 tok = ForwardTokenizer(self.right, self.lowercase) 284 i = 0 285 while tok.has_more_tokens() and i <= index: 286 token = tok.next_token() 287 i += 1 288 if i <= index: 289 token = '' 290 return token

291

292 - def previous_tokens(self, index, change):

293 """Return the token just before the change token (if any). 294 295 This method is called in some predictors's learn() method. It retrieve 296 the token that appear just before the change token and has already 297 been learnt before (or should have). The previous token is used to fill 298 the n-grams. 299 300 @param index: 301 Index of the previous token. 302 @type index: int 303 @param change: 304 The change token. 305 @type change: str 306 307 @return: 308 The token just before the change token or an empty string if there 309 is none. 310 @rtype: str 311 """ 312 return self.left_token(index + len(change))

313

314 - def left_buffer(self):

315 """Use the callback to get the value of the left buffer. 316 317 @return: 318 The left input buffer. 319 @rtype: str 320 """ 321 return self.callback.left

322

323 - def right_buffer(self):

324 """Use the callback to get the value of the right buffer. 325 326 @return: 327 The right input buffer. 328 @rtype: 329 str 330 """ 331 return self.callback.right

332

333 - def make_completion(self, suggestion):

334 """Compute the completion string given a suggested word. 335 336 This method compute and return the completion string using the token 337 just before the cursor (prefix) and the suggested word (suggestion). 338 The suggestion should be the word that the user choose from the 339 suggested words list. 340 341 For instance, if the prefix is:: 342 "wor" 343 And the suggestion is:: 344 "world" 345 Then this method will compute the completion:: 346 "ld" 347 348 If the character before the cursor is a blankspace or a separator then 349 the prefix should be empty:: 350 "" 351 Then if the suggestion is:: 352 "guilty" 353 This method will compute the completion:: 354 "guilty" 355 356 If the suggestion and the prefix don't match then False is returned. 357 This should never happen as suggestions completing an input word should 358 always match it. Still, I prefer to check it at the cost of some lower() 359 and startswith() calls. 360 361 @param suggestion: 362 The suggested word from which to compute the completion. 363 @type suggestion: str 364 """ 365 prefix = self.prefix() 366 if suggestion.lower().startswith(prefix.lower()): 367 return suggestion[len(prefix):] 368 return False

369

Source Code for Module tipy.cntxt