Module header_data
[hide private]
[frames] | no frames]

Source Code for Module header_data

  1  """" 
  2  This is a parser for the header section of KAF/NAF 
  3  """ 
  4   
  5  from lxml import etree 
  6  import time 
  7  import platform  
  8   
9 -class CfileDesc:
10 """ 11 This class encapsulates the file description element in the header 12 13 Example of usage: 14 15 ###################################################### 16 obj = KafNafParser('examples/entity_example.naf') 17 18 header = obj.get_header() 19 20 my_file_desc = header.get_fileDesc() 21 if my_file_desc is None: 22 #Create a new one 23 my_file_desc = CfileDesc() 24 header.set_fileDesc(my_file_desc) 25 26 #Modify the attributes 27 my_file_desc.set_title('my new title') 28 29 #Dump the object to a new file (or the changes will not be changed) 30 obj.dump() 31 ###################################################### 32 """
33 - def __init__(self,node=None):
34 """ 35 Constructor of the object 36 @type node: xml Element or None (to create and empty one) 37 @param node: this is the node of the element. If it is None it will create a new object 38 """ 39 self.type = 'KAF/NAF' 40 if node is None: 41 self.node = etree.Element('fileDesc') 42 else: 43 self.node = node
44 45
46 - def get_node(self):
47 return self.node
48 49 50 #self.title='' #self.author='' #self.creationtime='' #self.filename='' #self.filetype='' #self.pages=''
51 - def set_title(self,t):
52 ''' 53 Sets the title 54 @param t: title 55 @type t: string 56 ''' 57 self.node.set('title',t)
58
59 - def get_title(self):
60 ''' 61 Returns the title 62 @return: title 63 @rtype: string 64 ''' 65 return self.node.get('title')
66
67 - def set_author(self,a):
68 ''' 69 Sets the author 70 @param a: title 71 @type a: string 72 ''' 73 self.node.set('author',a)
74
75 - def get_author(self):
76 ''' 77 Returne the author 78 @return: title 79 @rtype: string 80 ''' 81 return self.node.get('author')
82
83 - def set_publisher(self,p):
84 ''' 85 Sets the publisher 86 @param p: title 87 @type p: string 88 ''' 89 self.node.set('publisher',p)
90
91 - def get_publisher(self):
92 ''' 93 Returns the publisher 94 @return: title 95 @rtype: string 96 ''' 97 return self.node.get('publisher')
98
99 - def set_section(self,s):
100 ''' 101 Sets the section 102 @param s: title 103 @type s: string 104 ''' 105 self.node.set('section',s)
106
107 - def get_section(self):
108 ''' 109 Returns the section 110 @return: title 111 @rtype: string 112 ''' 113 return self.node.get('section')
114
115 - def set_location(self,l):
116 ''' 117 Sets the location 118 @param l: title 119 @type l: string 120 ''' 121 self.node.set('location',l)
122
123 - def get_location(self):
124 ''' 125 Returns the location 126 @return: title 127 @rtype: string 128 ''' 129 return self.node.get('location')
130
131 - def set_magazine(self,m):
132 ''' 133 Sets the magazine 134 @param m: title 135 @type m: string 136 ''' 137 self.node.set('magazine',m)
138
139 - def get_magazine(self):
140 ''' 141 Returns the magazine 142 @return: title 143 @rtype: string 144 ''' 145 return self.node.get('magazine')
146
147 - def set_creationtime(self,t):
148 ''' 149 Sets the creation time 150 @param t: creation time 151 @type t: string 152 ''' 153 self.node.set('creationtime',t)
154
155 - def get_creationtime(self):
156 ''' 157 Returns the creation time 158 @return: creation time 159 @rtype: string 160 ''' 161 return self.node.get('creationtime')
162
163 - def set_filename(self,f):
164 ''' 165 Sets the filename 166 @param f: title 167 @type f: string 168 ''' 169 self.node.set('filename',f)
170
171 - def get_filename(self):
172 ''' 173 Returns the filename 174 @return: title 175 @rtype: string 176 ''' 177 return self.node.get('filename')
178
179 - def set_filetype(self,f):
180 ''' 181 Sets the filetype 182 @param f: title 183 @type f: string 184 ''' 185 self.node.set('filetype',f)
186
187 - def get_filetype(self):
188 ''' 189 Returns the filetype 190 @return: title 191 @rtype: string 192 ''' 193 return self.node.get('filetype')
194
195 - def set_pages(self,p):
196 ''' 197 Sets the pages 198 @param p: title 199 @type p: string 200 ''' 201 self.node.set('pages',p)
202
203 - def get_pages(self,p):
204 ''' 205 Returns the pages 206 @return: title 207 @rtype: string 208 ''' 209 return self.node.get('pages')
210 211
212 -class Cpublic:
213 """ 214 This class encapsulates the public element in the header 215 """
216 - def __init__(self,node=None):
217 """ 218 Constructor of the object 219 @type node: xml Element or None (to create and empty one) 220 @param node: this is the node of the element. If it is None it will create a new object 221 """ 222 self.type = 'KAF/NAF' 223 if node is None: 224 self.node = etree.Element('public') 225 else: 226 self.node = node
227 228 #self.publicId = '' 229 #slf.uri = ''
230 - def get_node(self):
231 return self.node
232
233 - def set_uri(self,t):
234 ''' 235 Sets the uri 236 @param t: uri 237 @type t: string 238 ''' 239 self.node.set('uri',t)
240
241 - def get_uri(self):
242 ''' 243 Returns the uri 244 @return: uri 245 @rtype: string 246 ''' 247 return self.node.get('uri')
248
249 - def set_publicid(self,a):
250 ''' 251 Sets the publicId 252 @param a: title 253 @type a: string 254 ''' 255 self.node.set('publicId',a)
256
257 - def get_publicid(self):
258 ''' 259 Returne the publicId 260 @return: title 261 @rtype: string 262 ''' 263 return self.node.get('publicId')
264 265
266 -class Clp:
267 """ 268 This class encapsulates the linguistic processor element in the header 269 """
270 - def __init__(self,node=None,name="",version="",timestamp=None,btimestamp=None,etimestamp=None):
271 """ 272 Constructor of the object 273 @type node: xml Element or None (to create an empty one) 274 @param node: this is the node of the element. If it is None it will create a new object 275 @type name: string 276 @param name: the name of the linguistic processor 277 @type version: string 278 @param version: the version of the linguistic processor 279 @type timestamp: string 280 @param timestamp: the timestamp, or None to set it to the current time 281 @param btimestamp: the begin timestamp, or None to set it to the current time (NOTE: only use None if header created at begining of process!) 282 @param etimestamp: the end timestamp, or None to set it (NOTE: only use None if header created at the end of the process!) 283 """ 284 self.type = 'KAF/NAF' 285 if node is None: 286 self.node = etree.Element('lp') 287 self.set_name(name) 288 self.set_version(version) 289 self.set_timestamp(timestamp) 290 self.set_beginTimestamp(btimestamp) 291 self.set_endTimestamp(etimestamp) 292 293 #For the hostnameimport platform 294 self.node.set('hostname',platform.node()) 295 296 else: 297 self.node = node
298
299 - def set_name(self,name):
300 """ 301 Set the name of the linguistic processor 302 @type name:string 303 @param name: name of the linguistic processor 304 """ 305 self.node.set('name',name)
306
307 - def set_version(self,version):
308 """ 309 Set the version of the linguistic processor 310 @type version:string 311 @param version: version of the linguistic processor 312 """ 313 self.node.set('version',version)
314
315 - def set_timestamp(self,timestamp=None):
316 """ 317 Set the timestamp of the linguistic processor, set to None for the current time 318 @type timestamp:string 319 @param timestamp: version of the linguistic processor 320 """ 321 if timestamp is None: 322 import time 323 timestamp = time.strftime('%Y-%m-%dT%H:%M:%S%Z') 324 self.node.set('timestamp',timestamp)
325
326 - def set_beginTimestamp(self,btimestamp=None):
327 """ 328 Set the begin timestamp of the linguistic processor, set to None for the current time 329 @type btimestamp: string 330 @param btimestamp: version of the linguistic processor 331 """ 332 if btimestamp is None: 333 import time 334 btimestamp = time.strftime('%Y-%m-%dT%H:%M:%S%Z') 335 self.node.set('beginTimestamp',btimestamp)
336
337 - def set_endTimestamp(self,etimestamp=None):
338 """ 339 Set the end timestamp of the linguistic processor, set to None for the current time 340 @type etimestamp: string 341 @param etimestamp: version of the linguistic processor 342 """ 343 if etimestamp is None: 344 import time 345 etimestamp = time.strftime('%Y-%m-%dT%H:%M:%S%Z') 346 self.node.set('endTimestamp',etimestamp)
347 348 349 350 351
352 - def get_node(self):
353 """ 354 Returns the node of the element 355 @rtype: xml Element 356 @return: the node of the element 357 """ 358 return self.node
359 360
361 -class ClinguisticProcessors:
362 """ 363 This class encapsulates the linguistic processors element in the header 364 """
365 - def __init__(self,node=None):
366 """ 367 Constructor of the object 368 @type node: xml Element or None (to create and empty one) 369 @param node: this is the node of the element. If it is None it will create a new object 370 """ 371 self.type = 'KAF/NAF' 372 if node is None: 373 self.node = etree.Element('linguisticProcessors') 374 else: 375 self.node = node
376
377 - def get_layer(self):
378 """ 379 Returns the layer of the element 380 @rtype: string 381 @return: the layer of the element 382 """ 383 return self.node.get('layer')
384
385 - def set_layer(self,layer):
386 """ 387 Set the layer of the element 388 @type layer: string 389 @param layer: layer 390 """ 391 self.node.set('layer',layer)
392
393 - def add_linguistic_processor(self,my_lp):
394 """ 395 Add a linguistic processor object to the layer 396 @type my_lp: L{Clp} 397 @param my_lp: linguistic processor object 398 """ 399 self.node.append(my_lp.get_node())
400
401 - def get_node(self):
402 """ 403 Returns the node of the element 404 @rtype: xml Element 405 @return: the node of the element 406 """ 407 return self.node
408 409
410 -class CHeader:
411 """ 412 This class encapsulates the header 413 """
414 - def __init__(self,node=None,type='NAF'):
415 """ 416 Constructor of the object 417 @type node: xml Element or None (to create and empty one) 418 @param node: this is the node of the element. If it is None it will create a new object 419 @type type: string 420 @param type: the type of the object (KAF or NAF) 421 """ 422 self.type = type 423 if node is None: 424 if self.type == 'NAF': 425 self.node = etree.Element('nafHeader') 426 elif self.type == 'KAF': 427 self.node = etree.Element('kafHeader') 428 else: 429 self.node = node
430
431 - def get_node(self):
432 """ 433 Returns the node of the element 434 @rtype: xml Element 435 @return: the node of the element 436 """ 437 return self.node
438
439 - def to_kaf(self):
440 """ 441 Converts the header element to KAF 442 """ 443 if self.type == 'NAF': 444 self.node.tag = 'kafHeader' 445 self.type = 'KAF'
446
447 - def to_naf(self):
448 """ 449 Converts the header element to NAF 450 """ 451 if self.type == 'KAF': 452 self.node.tag = 'nafHeader' 453 self.type = 'NAF'
454 455
456 - def get_dct(self):
457 """ 458 Returns the document creation time defined in the header 459 @rtype: String 460 @return: the document creation time defined in fileDesc of header 461 """ 462 fileDescObj = self.node.find('fileDesc') 463 if fileDescObj is not None: 464 return fileDescObj.get('creationtime') 465 else: 466 return None
467 468
469 - def get_publicId(self):
470 """ 471 Returns the public Id defined in the header 472 @rtype: String 473 @return: the publicId defined in public of header 474 """ 475 publicObj = self.node.find('public') 476 if publicObj is not None: 477 return publicObj.get('publicId') 478 else: 479 return None
480
481 - def set_publicId(self,publicId):
482 ''' 483 Sets the publicId object 484 @param publicId: a publicId object 485 @type publicId: L{CpublicId} 486 ''' 487 self.node.insert(0,publicId.get_node())
488
489 - def add_linguistic_processors(self,linpro):
490 """Adds a linguistic processors element 491 @type linpro: ClinguisticProcessors 492 @param linpro: linguistic processors element 493 """ 494 self.node.append(linpro.get_node())
495
496 - def remove_lp(self,layer):
497 """ 498 Removes the linguistic processors for a given layer 499 @type layer: string 500 @param layer: the name of the layer 501 """ 502 for this_node in self.node.findall('linguisticProcessors'): 503 if this_node.get('layer') == layer: 504 self.node.remove(this_node) 505 break
506 507
508 - def add_linguistic_processor(self, layer ,my_lp):
509 """ 510 Adds a linguistic processor to a certain layer 511 @type layer: string 512 @param layer: the name of the layer 513 @type my_lp: L{Clp} 514 @param my_lp: the linguistic processor 515 """ 516 ## Locate the linguisticProcessor element for taht layer 517 found_lp_obj = None 518 for this_lp in self.node.findall('linguisticProcessors'): 519 lp_obj = ClinguisticProcessors(this_lp) 520 if lp_obj.get_layer() == layer: 521 found_lp_obj = lp_obj 522 break 523 524 if found_lp_obj is None: #Not found 525 found_lp_obj = ClinguisticProcessors() 526 found_lp_obj.set_layer(layer) 527 self.add_linguistic_processors(found_lp_obj) 528 529 found_lp_obj.add_linguistic_processor(my_lp)
530
531 - def get_fileDesc(self):
532 ''' 533 Returns the fileDesc object or None if there is no such element 534 @return: the fileDesc object 535 @rtype: L{CfileDesc} 536 ''' 537 node = self.node.find('fileDesc') 538 if node is not None: 539 return CfileDesc(node=node) 540 else: 541 return None
542
543 - def set_fileDesc(self,fileDesc):
544 ''' 545 Sets the fileDesc object 546 @param fileDesc: a fileDesc object 547 @type fileDesc: L{CfileDesc} 548 ''' 549 self.node.insert(0,fileDesc.get_node())
550