1 """"
2 This is a parser for the header section of KAF/NAF
3 """
4
5 from lxml import etree
6 import time
7 import platform
8
10 """
11 This class encapsulates the file description element in the header
12
13 Example of usage:
14
15 ######################################################
16 obj = KafNafParser('examples/entity_example.naf')
17
18 header = obj.get_header()
19
20 my_file_desc = header.get_fileDesc()
21 if my_file_desc is None:
22 #Create a new one
23 my_file_desc = CfileDesc()
24 header.set_fileDesc(my_file_desc)
25
26 #Modify the attributes
27 my_file_desc.set_title('my new title')
28
29 #Dump the object to a new file (or the changes will not be changed)
30 obj.dump()
31 ######################################################
32 """
34 """
35 Constructor of the object
36 @type node: xml Element or None (to create and empty one)
37 @param node: this is the node of the element. If it is None it will create a new object
38 """
39 self.type = 'KAF/NAF'
40 if node is None:
41 self.node = etree.Element('fileDesc')
42 else:
43 self.node = node
44
45
48
49
50
52 '''
53 Sets the title
54 @param t: title
55 @type t: string
56 '''
57 self.node.set('title',t)
58
60 '''
61 Returns the title
62 @return: title
63 @rtype: string
64 '''
65 return self.node.get('title')
66
68 '''
69 Sets the author
70 @param a: title
71 @type a: string
72 '''
73 self.node.set('author',a)
74
76 '''
77 Returne the author
78 @return: title
79 @rtype: string
80 '''
81 return self.node.get('author')
82
84 '''
85 Sets the publisher
86 @param p: title
87 @type p: string
88 '''
89 self.node.set('publisher',p)
90
92 '''
93 Returns the publisher
94 @return: title
95 @rtype: string
96 '''
97 return self.node.get('publisher')
98
100 '''
101 Sets the section
102 @param s: title
103 @type s: string
104 '''
105 self.node.set('section',s)
106
108 '''
109 Returns the section
110 @return: title
111 @rtype: string
112 '''
113 return self.node.get('section')
114
116 '''
117 Sets the location
118 @param l: title
119 @type l: string
120 '''
121 self.node.set('location',l)
122
124 '''
125 Returns the location
126 @return: title
127 @rtype: string
128 '''
129 return self.node.get('location')
130
132 '''
133 Sets the magazine
134 @param m: title
135 @type m: string
136 '''
137 self.node.set('magazine',m)
138
140 '''
141 Returns the magazine
142 @return: title
143 @rtype: string
144 '''
145 return self.node.get('magazine')
146
148 '''
149 Sets the creation time
150 @param t: creation time
151 @type t: string
152 '''
153 self.node.set('creationtime',t)
154
156 '''
157 Returns the creation time
158 @return: creation time
159 @rtype: string
160 '''
161 return self.node.get('creationtime')
162
164 '''
165 Sets the filename
166 @param f: title
167 @type f: string
168 '''
169 self.node.set('filename',f)
170
172 '''
173 Returns the filename
174 @return: title
175 @rtype: string
176 '''
177 return self.node.get('filename')
178
180 '''
181 Sets the filetype
182 @param f: title
183 @type f: string
184 '''
185 self.node.set('filetype',f)
186
188 '''
189 Returns the filetype
190 @return: title
191 @rtype: string
192 '''
193 return self.node.get('filetype')
194
195 - def set_pages(self,p):
196 '''
197 Sets the pages
198 @param p: title
199 @type p: string
200 '''
201 self.node.set('pages',p)
202
203 - def get_pages(self,p):
204 '''
205 Returns the pages
206 @return: title
207 @rtype: string
208 '''
209 return self.node.get('pages')
210
211
213 """
214 This class encapsulates the public element in the header
215 """
217 """
218 Constructor of the object
219 @type node: xml Element or None (to create and empty one)
220 @param node: this is the node of the element. If it is None it will create a new object
221 """
222 self.type = 'KAF/NAF'
223 if node is None:
224 self.node = etree.Element('public')
225 else:
226 self.node = node
227
228
229
232
234 '''
235 Sets the uri
236 @param t: uri
237 @type t: string
238 '''
239 self.node.set('uri',t)
240
242 '''
243 Returns the uri
244 @return: uri
245 @rtype: string
246 '''
247 return self.node.get('uri')
248
250 '''
251 Sets the publicId
252 @param a: title
253 @type a: string
254 '''
255 self.node.set('publicId',a)
256
258 '''
259 Returne the publicId
260 @return: title
261 @rtype: string
262 '''
263 return self.node.get('publicId')
264
265
267 """
268 This class encapsulates the linguistic processor element in the header
269 """
270 - def __init__(self,node=None,name="",version="",timestamp=None,btimestamp=None,etimestamp=None):
271 """
272 Constructor of the object
273 @type node: xml Element or None (to create an empty one)
274 @param node: this is the node of the element. If it is None it will create a new object
275 @type name: string
276 @param name: the name of the linguistic processor
277 @type version: string
278 @param version: the version of the linguistic processor
279 @type timestamp: string
280 @param timestamp: the timestamp, or None to set it to the current time
281 @param btimestamp: the begin timestamp, or None to set it to the current time (NOTE: only use None if header created at begining of process!)
282 @param etimestamp: the end timestamp, or None to set it (NOTE: only use None if header created at the end of the process!)
283 """
284 self.type = 'KAF/NAF'
285 if node is None:
286 self.node = etree.Element('lp')
287 self.set_name(name)
288 self.set_version(version)
289 self.set_timestamp(timestamp)
290 self.set_beginTimestamp(btimestamp)
291 self.set_endTimestamp(etimestamp)
292
293
294 self.node.set('hostname',platform.node())
295
296 else:
297 self.node = node
298
300 """
301 Set the name of the linguistic processor
302 @type name:string
303 @param name: name of the linguistic processor
304 """
305 self.node.set('name',name)
306
308 """
309 Set the version of the linguistic processor
310 @type version:string
311 @param version: version of the linguistic processor
312 """
313 self.node.set('version',version)
314
316 """
317 Set the timestamp of the linguistic processor, set to None for the current time
318 @type timestamp:string
319 @param timestamp: version of the linguistic processor
320 """
321 if timestamp is None:
322 import time
323 timestamp = time.strftime('%Y-%m-%dT%H:%M:%S%Z')
324 self.node.set('timestamp',timestamp)
325
327 """
328 Set the begin timestamp of the linguistic processor, set to None for the current time
329 @type btimestamp: string
330 @param btimestamp: version of the linguistic processor
331 """
332 if btimestamp is None:
333 import time
334 btimestamp = time.strftime('%Y-%m-%dT%H:%M:%S%Z')
335 self.node.set('beginTimestamp',btimestamp)
336
338 """
339 Set the end timestamp of the linguistic processor, set to None for the current time
340 @type etimestamp: string
341 @param etimestamp: version of the linguistic processor
342 """
343 if etimestamp is None:
344 import time
345 etimestamp = time.strftime('%Y-%m-%dT%H:%M:%S%Z')
346 self.node.set('endTimestamp',etimestamp)
347
348
349
350
351
353 """
354 Returns the node of the element
355 @rtype: xml Element
356 @return: the node of the element
357 """
358 return self.node
359
360
362 """
363 This class encapsulates the linguistic processors element in the header
364 """
366 """
367 Constructor of the object
368 @type node: xml Element or None (to create and empty one)
369 @param node: this is the node of the element. If it is None it will create a new object
370 """
371 self.type = 'KAF/NAF'
372 if node is None:
373 self.node = etree.Element('linguisticProcessors')
374 else:
375 self.node = node
376
378 """
379 Returns the layer of the element
380 @rtype: string
381 @return: the layer of the element
382 """
383 return self.node.get('layer')
384
386 """
387 Set the layer of the element
388 @type layer: string
389 @param layer: layer
390 """
391 self.node.set('layer',layer)
392
394 """
395 Add a linguistic processor object to the layer
396 @type my_lp: L{Clp}
397 @param my_lp: linguistic processor object
398 """
399 self.node.append(my_lp.get_node())
400
402 """
403 Returns the node of the element
404 @rtype: xml Element
405 @return: the node of the element
406 """
407 return self.node
408
409
411 """
412 This class encapsulates the header
413 """
415 """
416 Constructor of the object
417 @type node: xml Element or None (to create and empty one)
418 @param node: this is the node of the element. If it is None it will create a new object
419 @type type: string
420 @param type: the type of the object (KAF or NAF)
421 """
422 self.type = type
423 if node is None:
424 if self.type == 'NAF':
425 self.node = etree.Element('nafHeader')
426 elif self.type == 'KAF':
427 self.node = etree.Element('kafHeader')
428 else:
429 self.node = node
430
432 """
433 Returns the node of the element
434 @rtype: xml Element
435 @return: the node of the element
436 """
437 return self.node
438
440 """
441 Converts the header element to KAF
442 """
443 if self.type == 'NAF':
444 self.node.tag = 'kafHeader'
445 self.type = 'KAF'
446
448 """
449 Converts the header element to NAF
450 """
451 if self.type == 'KAF':
452 self.node.tag = 'nafHeader'
453 self.type = 'NAF'
454
455
457 """
458 Returns the document creation time defined in the header
459 @rtype: String
460 @return: the document creation time defined in fileDesc of header
461 """
462 fileDescObj = self.node.find('fileDesc')
463 if fileDescObj is not None:
464 return fileDescObj.get('creationtime')
465 else:
466 return None
467
468
470 """
471 Returns the public Id defined in the header
472 @rtype: String
473 @return: the publicId defined in public of header
474 """
475 publicObj = self.node.find('public')
476 if publicObj is not None:
477 return publicObj.get('publicId')
478 else:
479 return None
480
482 '''
483 Sets the publicId object
484 @param publicId: a publicId object
485 @type publicId: L{CpublicId}
486 '''
487 self.node.insert(0,publicId.get_node())
488
490 """Adds a linguistic processors element
491 @type linpro: ClinguisticProcessors
492 @param linpro: linguistic processors element
493 """
494 self.node.append(linpro.get_node())
495
497 """
498 Removes the linguistic processors for a given layer
499 @type layer: string
500 @param layer: the name of the layer
501 """
502 for this_node in self.node.findall('linguisticProcessors'):
503 if this_node.get('layer') == layer:
504 self.node.remove(this_node)
505 break
506
507
509 """
510 Adds a linguistic processor to a certain layer
511 @type layer: string
512 @param layer: the name of the layer
513 @type my_lp: L{Clp}
514 @param my_lp: the linguistic processor
515 """
516
517 found_lp_obj = None
518 for this_lp in self.node.findall('linguisticProcessors'):
519 lp_obj = ClinguisticProcessors(this_lp)
520 if lp_obj.get_layer() == layer:
521 found_lp_obj = lp_obj
522 break
523
524 if found_lp_obj is None:
525 found_lp_obj = ClinguisticProcessors()
526 found_lp_obj.set_layer(layer)
527 self.add_linguistic_processors(found_lp_obj)
528
529 found_lp_obj.add_linguistic_processor(my_lp)
530
532 '''
533 Returns the fileDesc object or None if there is no such element
534 @return: the fileDesc object
535 @rtype: L{CfileDesc}
536 '''
537 node = self.node.find('fileDesc')
538 if node is not None:
539 return CfileDesc(node=node)
540 else:
541 return None
542
544 '''
545 Sets the fileDesc object
546 @param fileDesc: a fileDesc object
547 @type fileDesc: L{CfileDesc}
548 '''
549 self.node.insert(0,fileDesc.get_node())
550