1 """
2 This module implements a parser for KAF or NAF files. It allows to parse an input KAF/NAF file and extract information from the
3 different layers as python objects. It also allows to create a new KAF/NAF file or add new information to an existing one
4
5 @author: U{Ruben Izquierdo Bevia<rubenizquierdobevia.com>}
6 @version: 1.3
7 @contact: U{ruben.izquierdobevia@vu.nl<mailto:ruben.izquierdobevia@vu.nl>}
8 @contact: U{rubensanvi@gmail.com<mailto:rubensanvi@gmail.com>}
9 @contact: U{rubenizquierdobevia.com}
10 @since: 28-Jan-2015
11 """
12 from markable_data import Cmarkables
13
14
15
16
17
18
19
20
21
22
23 __last_modified__ = '2September2015'
24 __version__ = '1.3.1'
25 __author__ = 'Ruben Izquierdo Bevia'
26
27 from lxml import etree
28 from header_data import *
29 from text_data import *
30 from term_data import *
31 from entity_data import *
32 from features_data import *
33 from opinion_data import *
34 from constituency_data import *
35 from dependency_data import *
36 from feature_extractor import Cdependency_extractor, Cconstituency_extractor
37 from coreference_data import *
38 from srl_data import *
39 from external_references_data import *
40 from time_data import *
41 from causal_data import *
42 from temporal_data import *
43 from factuality_data import *
44 from markable_data import *
45
46
47 import sys
48
49
50
52 - def __init__(self,filename=None,type=None):
53 """
54 The constructor for the parser
55 @type filename: string
56 @param filename: KAF/NAF filename. Set it to None to create an empty file
57 @type type: string
58 @param type: to indicate if the file will be a NAF or a KAF file, in case of new files.
59 """
60
61 self.tree = None
62 if filename is not None:
63 self.filename = filename
64 self.tree = etree.parse(filename,etree.XMLParser(remove_blank_text=True))
65 else:
66 self.tree = etree.ElementTree(etree.Element(type))
67 self.root = self.tree.getroot()
68 self.type = self.root.tag
69
70 self.header = None
71 self.text_layer = None
72 self.term_layer = None
73 self.entity_layer = None
74 self.features_layer = None
75 self.opinion_layer = None
76 self.constituency_layer = None
77 self.dependency_layer = None
78 self.coreference_layer = None
79 self.srl_layer = None
80 self.raw = None
81 self.timex_layer = None
82 self.causalRelations_layer = None
83 self.temporalRelations_layer = None
84 self.factuality_layer = None
85 self.markable_layer = None
86
87
88
89 self.my_dependency_extractor = None
90 self.my_constituency_extractor = None
91
92
93
94 self.dict_tokens_for_tid = None
95 self.terms_for_token = None
96
97
98 self.lang = self.root.get('{http://www.w3.org/XML/1998/namespace}lang')
99 self.version = self.root.get('version')
100
101 if self.type == 'NAF':
102 node_header = self.root.find('nafHeader')
103 elif self.type == 'KAF':
104 node_header = self.root.find('kafHeader')
105
106 if node_header is not None:
107 self.header = CHeader(node_header,self.type)
108
109
110 node_text = self.root.find('text')
111 if node_text is not None:
112 self.text_layer = Ctext(node=node_text,type=self.type)
113
114 node_term = self.root.find('terms')
115 if node_term is not None:
116 self.term_layer = Cterms(node=node_term,type=self.type)
117
118 node_entity = self.root.find('entities')
119 if node_entity is not None:
120 self.entity_layer = Centities(node_entity,type=self.type)
121
122 node_features = self.root.find('features')
123 if node_features is not None:
124 self.features_layer = Cfeatures(node_features,type=self.type)
125
126 node_opinions = self.root.find('opinions')
127 if node_opinions is not None:
128 self.opinion_layer = Copinions(node_opinions,type=self.type)
129
130
131 node_constituency = self.root.find('constituency')
132 if node_constituency is not None:
133 self.constituency_layer = Cconstituency(node_constituency)
134
135
136 node_dependency = self.root.find('deps')
137 if node_dependency is not None:
138 self.dependency_layer = Cdependencies(node_dependency)
139
140 node_coreferences = self.root.find('coreferences')
141 if node_coreferences is not None:
142 self.coreference_layer = Ccoreferences(node_coreferences,type=self.type)
143
144 node_srl = self.root.find('srl')
145 if node_srl is not None:
146 self.srl_layer = Csrl(node_srl)
147
148 node_timex = self.root.find('timeExpressions')
149 if node_timex is not None:
150 self.timex_layer = CtimeExpressions(node_timex)
151
152 node_temporalRelations = self.root.find('temporalRelations')
153 if node_temporalRelations is not None:
154 self.temporalRelations_layer = CtemporalRelations(node_temporalRelations)
155
156 node_causalRelations = self.root.find('causalRelations')
157 if node_causalRelations is not None:
158 self.causalRelations_layer = CcausalRelations(node_causalRelations)
159
160 node_factualitylayer = self.root.find('factualitylayer')
161 if node_factualitylayer is not None:
162 self.factuality_layer = Cfactualitylayer(node_factualitylayer)
163
164 node_factualities = self.root.find('factualities')
165 if node_factualities is not None:
166 self.factuality_layer = Cfactualities(node_factualities)
167
168 node_raw = self.root.find('raw')
169 if node_raw is not None:
170 self.raw = node_raw.text
171
172 node_markables = self.root.find('markables')
173 if node_markables is not None:
174 self.markable_layer = Cmarkables(node_markables)
175
177 '''
178 Returns the header object
179 @return: the header object
180 @rtype: L{CHeader}
181 '''
182 return self.header
183
185 """
186 Sets the language to the KAF root element
187 @param l: the language code
188 @type l: string
189 """
190 self.root.set('{http://www.w3.org/XML/1998/namespace}lang',l)
191
193 """
194 Sets the language to the KAF root element
195 @param v: the language code
196 @type v: string
197 """
198 self.root.set('version',v)
199
201 """
202 Returns the type (NAF/KAF) of the object
203 @rtype: string
204 @return: the type of the file
205 """
206
207 return self.type
208
210 """
211 Returns the name of the filename
212 @rtype: string
213 @return: the filename of the KAF/NAF object
214 """
215 return self.filename
216
218 """
219 Converts a NAF object to KAF (in memory). You will have to use the method dump later to save it as a new KAF file
220 """
221
222 if self.type == 'NAF':
223 self.root.tag = 'KAF'
224 self.type = 'KAF'
225
226
227 if self.header is not None:
228 self.header.to_kaf()
229
230
231 if self.text_layer is not None:
232 self.text_layer.to_kaf()
233
234
235 if self.term_layer is not None:
236 self.term_layer.to_kaf()
237
238
239 if self.entity_layer is not None:
240 self.entity_layer.to_kaf()
241
242
243
244
245 if self.features_layer is not None:
246 self.features_layer.to_kaf()
247
248
249
250 if self.opinion_layer is not None:
251 self.opinion_layer.to_kaf()
252
253
254
255 if self.constituency_layer is not None:
256 self.constituency_layer.to_kaf()
257
258
259
260
261 if self.dependency_layer is not None:
262 self.dependency_layer.to_kaf()
263
264 if self.coreference_layer is not None:
265 self.coreference_layer.to_kaf()
266
267
268
269
270 if self.temporalRelations_layer is not None:
271 self.temporalRelations_layer.to_kaf()
272
273
274
275 if self.causalRelations_layer is not None:
276 self.causalRelations_layer.to_kaf()
277
278
279
280 if self.factuality_layer is not None:
281 self.factuality_layer.to_kaf()
282
284 """
285 Converts a KAF object to NAF (in memory). You will have to use the method dump later to save it as a new NAF file
286 """
287 if self.type == 'KAF':
288 self.root.tag = self.type = 'NAF'
289
290
291 if self.header is not None:
292 self.header.to_naf()
293
294
295 if self.text_layer is not None:
296 self.text_layer.to_naf()
297
298
299
300 if self.term_layer is not None:
301 self.term_layer.to_naf()
302
303
304
305 if self.entity_layer is not None:
306 self.entity_layer.to_naf()
307
308
309
310
311 if self.features_layer is not None:
312 self.features_layer.to_naf()
313
314
315
316 if self.opinion_layer is not None:
317 self.opinion_layer.to_naf()
318
319
320
321
322 if self.constituency_layer is not None:
323 self.constituency_layer.to_naf()
324
325
326
327
328 if self.dependency_layer is not None:
329 self.dependency_layer.to_naf()
330
331 if self.coreference_layer is not None:
332 self.coreference_layer.to_naf()
333
334
335
336
337
338 if self.temporalRelations_layer is not None:
339 self.temporalRelations_layer.to_naf()
340
341
342
343 if self.causalRelations_layer is not None:
344 self.causalRelations_layer.to_naf()
345
346
347
348 if self.factuality_layer is not None:
349 self.factuality_layer.to_naf()
350
351
352
353
354 if self.markable_layer is not None:
355 self.markable_layer.to_naf()
356
357
359 """
360 Prints the constituency layer
361 """
362 print self.constituency_layer
363
365 """
366 Iterator that returns the constituency trees
367 @rtype: L{Ctree}
368 @return: iterator to all the constituency trees
369 """
370
371 if self.constituency_layer is not None:
372 for tree in self.constituency_layer.get_trees():
373 yield tree
374
376 """
377 Iterator that returns the constituency trees
378 @rtype: L{Ctree}
379 @return: iterator to all the constituency trees
380 """
381 mytrees = []
382 if self.constituency_layer is not None:
383 for tree in self.constituency_layer.get_trees():
384 mytrees.append(tree)
385 return mytrees
386
388 """
389 Iterator that returns the dependencies from the dependency layer. Use it as:
390 for my_dep in my_obj.get_dependencies():
391 @rtype: L{Cdependency}
392 @returns: iterator to get all the dependencies
393 """
394 if self.dependency_layer is not None:
395 for dep in self.dependency_layer.get_dependencies():
396 yield dep
397
399 """
400 Iterator that returns the tlinks from the temporalRelations layer. Use it as:
401 for my_tlink in my_obj.get_tlinks():
402 @rtype: L{Ctlink}
403 @returns: iterator to get all the tlinks
404 """
405 if self.temporalRelations_layer is not None:
406 for tlink in self.temporalRelations_layer.get_tlinks():
407 yield tlink
408
410 """
411 Iterator that returns the clinks from the causalRelations layer. Use it as:
412 for my_clink in my_obj.get_clinks():
413 @rtype: L{Cclink}
414 @returns: iterator to get all the clinks
415 """
416 if self.causalRelations_layer is not None:
417 for clink in self.causalRelations_layer.get_clinks():
418 yield clink
419
421 """
422 Iterator that returns the factvalues from the factuality layer. Use it as:
423 for my_fact in my_obj.get_factvalues():
424 @rtype: L{Cfactvalue}
425 @returns: iterator to get all the factvalues
426 """
427 if self.factuality_layer is not None:
428 for fact in self.factuality_layer.get_factvalues():
429 yield fact
430
432 """
433 Iterator that returns the corefs from the coreferences layer.
434 @rtype: L{Ccoreference}
435 @returns: iterator to get all the coreferences
436 """
437 if self.coreference_layer is not None:
438 for coref in self.coreference_layer.get_corefs():
439 yield coref
440
442 """
443 Returns the code language of the file
444 @rtype: string
445 @returns: language code of the file
446 """
447 return self.lang
448
449
451 """Iterator that returns all the tokens from the text layer
452 @rtype: L{Cwf}
453 @return: list of token objects
454 """
455 for token in self.text_layer:
456 yield token
457
459 """Iterator that returns all the terms from the term layer
460 @rtype: L{Cterm}
461 @return: list of term objects
462 """
463 if self.term_layer is not None:
464 for term in self.term_layer:
465 yield term
466
468 """Iterator that returns all the markables from the markable layer
469 @rtype: L{Cmarkable}
470 @return: list of markable objects
471 """
472 if self.markable_layer is not None:
473 for markable in self.markable_layer:
474 yield markable
475
477 """
478 Returns a markable object for the specified markable_id
479 @type markable_id:string
480 @param markable_id: entity identifier
481 @rtype: L{Cmarkable}
482 @return: markable object
483 """
484 if self.markable_layer is not None:
485 return self.markable_layer.get_markable(markable_id)
486 else:
487 return None
488
490 """
491 Returns a token object for the specified token_id
492 @type token_id:string
493 @param token_id: token identifier
494 @rtype: L{Cwf}
495 @return: token object
496 """
497 if self.text_layer is not None:
498 return self.text_layer.get_wf(token_id)
499 else:
500 return None
501
502
504 """
505 Returns a term object for the specified term_id
506 @type term_id:string
507 @param term_id: token identifier
508 @rtype: L{Cterm}
509 @return: term object
510 """
511 if self.term_layer is not None:
512 return self.term_layer.get_term(term_id)
513 else:
514 return None
515
517 """
518 Returns all the properties of the features layer (iterator)
519 @rtype: L{Cproperty}
520 @return: list of properties
521 """
522 if self.features_layer is not None:
523 for property in self.features_layer.get_properties():
524 yield property
525
527 """
528 Returns a list of all the entities in the object
529 @rtype: L{Centity}
530 @return: list of entities (iterator)
531 """
532 if self.entity_layer is not None:
533 for entity in self.entity_layer:
534 yield entity
535
536
538 """
539 Returns an entity object for the specified entity_id
540 @type entity_id:string
541 @param entity_id: entity identifier
542 @rtype: L{Centity}
543 @return: entity object
544 """
545 if self.entity_layer is not None:
546 return self.entity_layer.get_entity(entity_id)
547 else:
548 return None
549
550
552 """
553 Returns a list of all the opinions in the object
554 @rtype: L{Copinion}
555 @return: list of opinions (iterator)
556 """
557 if self.opinion_layer is not None:
558 for opinion in self.opinion_layer.get_opinions():
559 yield opinion
560
562 """
563 Returns a list of all the predicates in the object
564 @rtype: L{Cpredicate}
565 @return: list of predicates (iterator)
566 """
567 if self.srl_layer is not None:
568 for pred in self.srl_layer.get_predicates():
569 yield pred
570
572 """
573 Returns the raw text as a string
574 @rtype: string
575 @return: the raw text
576 """
577 if self.raw is not None:
578 return self.raw
579
581 """
582 Sets the text of the raw element (or creates the layer if does not exist)
583 @param text: text of the raw layer
584 @type text: string
585 """
586 node_raw = self.root.find('raw')
587 if node_raw is None:
588 node_raw = etree.Element('raw')
589 self.root.insert(0,node_raw)
590 node_raw.text = etree.CDATA(text)
591
593 """
594 Returns a list of all the timeexpressions in the text
595 @rtype: L{Ctime}
596 @return: list of time expressions (iterator)
597 """
598 if self.timex_layer is not None:
599 for time in self.timex_layer.get_timeExpressions():
600 yield time
601
602 - def dump(self,filename=sys.stdout):
603 """
604 Dumps the object to an output filename (or open file descriptor). The filename
605 parameter is optional, and if it is not provided, the standard output will be used
606 @type filename: string or file descriptor
607 @param filename: file where to dump the object (default standard output)
608 """
609
610 self.tree.write(filename,encoding='UTF-8',pretty_print=True,xml_declaration=True)
611
612
614 """
615 Removes the entity layer (if exists) of the object (in memory)
616 """
617 if self.entity_layer is not None:
618 this_node = self.entity_layer.get_node()
619 self.root.remove(this_node)
620 self.entity_layer = None
621 if self.header is not None:
622 self.header.remove_lp('entities')
623
625 """
626 Removes the dependency layer (if exists) of the object (in memory)
627 """
628 if self.dependency_layer is not None:
629 this_node = self.dependency_layer.get_node()
630 self.root.remove(this_node)
631 self.dependency_layer = self.my_dependency_extractor = None
632
633 if self.header is not None:
634 self.header.remove_lp('deps')
635
636
638 """
639 Removes the temporalRelations layer (if exists) of the object (in memory)
640 """
641 if self.temporalRelations_layer is not None:
642 this_node = self.temporalRelations_layer.get_node()
643 self.root.remove(this_node)
644 self.temporalRelations_layer = None
645
646 if self.header is not None:
647 self.header.remove_lp('temporalRelations')
648
650 """
651 Removes the causalRelations layer (if exists) of the object (in memory)
652 """
653 if self.causalRelations_layer is not None:
654 this_node = self.causalRelations_layer.get_node()
655 self.root.remove(this_node)
656 self.causalRelations_layer = None
657
658 if self.header is not None:
659 self.header.remove_lp('causalRelations')
660
662 """
663 Removes the factualitylayer layer (the old version) (if exists) of the object (in memory)
664 """
665 if self.factuality_layer is not None:
666 this_node = self.factuality_layer.get_node()
667 self.root.remove(this_node)
668 self.factuality_layer = None
669
670 if self.header is not None:
671 self.header.remove_lp('factualitylayer')
672
674 """
675 Removes the constituency layer (if exists) of the object (in memory)
676 """
677 if self.constituency_layer is not None:
678 this_node = self.constituency_layer.get_node()
679 self.root.remove(this_node)
680 if self.header is not None:
681 self.header.remove_lp('constituents')
682
683
685 """
686 Removes the opinion with the provided opinion identifier
687 @type opinion_id: string
688 @param opinion_id: the opinion identifier of the opinion to remove
689 """
690 if self.opinion_layer is not None:
691 self.opinion_layer.remove_this_opinion(opinion_id)
692
694 """
695 Removes the opinion layer (if exists) of the object (in memory)
696 """
697 if self.opinion_layer is not None:
698 this_node = self.opinion_layer.get_node()
699 self.root.remove(this_node)
700 self.opinion_layer = None
701
702 if self.header is not None:
703 self.header.remove_lp('opinions')
704
706 """
707 Removes the property layer (if exists) of the object (in memory)
708 """
709 if self.features_layer is not None:
710 self.features_layer.remove_properties()
711
712 if self.header is not None:
713 self.header.remove_lp('features')
714
715
717 """
718 Removes the term layer (if exists) of the object (in memory)
719 """
720 if self.term_layer is not None:
721 this_node = self.term_layer.get_node()
722 self.root.remove(this_node)
723 self.term_layer = None
724
725 if self.header is not None:
726 self.header.remove_lp('terms')
727
728
729
731 """
732 Removes the text layer (if exists) of the object (in memory)
733 """
734 if self.text_layer is not None:
735 this_node = self.text_layer.get_node()
736 self.root.remove(this_node)
737 self.text_layer = None
738
739 if self.header is not None:
740 self.header.remove_lp('text')
741
742
744 """
745 Removes the constituency layer (if exists) of the object (in memory)
746 """
747 if self.coreference_layer is not None:
748 this_node = self.coreference_layer.get_node()
749 self.root.remove(this_node)
750 if self.header is not None:
751 self.header.remove_lp('coreferences')
752
753
792
793
795 """
796 Returns a constituency extractor object
797 @rtype: L{Cconstituency_extractor}
798 @return: a constituency extractor object
799 """
800
801 if self.constituency_layer is not None:
802 if self.my_constituency_extractor is None:
803 self.my_constituency_extractor = Cconstituency_extractor(self)
804 return self.my_constituency_extractor
805 else:
806 return None
807
809 """
810 Returns a dependency extractor object
811 @rtype: L{Cdependency_extractor}
812 @return: a dependency extractor object
813 """
814 if self.dependency_layer is not None:
815 if self.my_dependency_extractor is None:
816 self.my_dependency_extractor = Cdependency_extractor(self)
817 return self.my_dependency_extractor
818 else:
819 return None
820
821
823 """
824 Adds a token to the text layer
825 @type wf_obj: L{Cwf}
826 @param wf_obj: the token object
827 """
828 if self.text_layer is None:
829 self.text_layer = Ctext(type=self.type)
830 self.root.append(self.text_layer.get_node())
831 self.text_layer.add_wf(wf_obj)
832
834 """
835 Adds a term to the term layer
836 @type term_obj: L{Cterm}
837 @param term_obj: the term object
838 """
839 if self.term_layer is None:
840 self.term_layer = Cterms(type=self.type)
841 self.root.append(self.term_layer.get_node())
842 self.term_layer.add_term(term_obj)
843
845 """
846 Adds a markable to the markable layer
847 @type markable_obj: L{Cmarkable}
848 @param markable_obj: the markable object
849 """
850 if self.markable_layer is None:
851 self.markable_layer = Cmarkables(type=self.type)
852 self.root.append(self.markable_layer.get_node())
853 self.markable_layer.add_markable(markable_obj)
854
855
857 """
858 Adds an opinion to the opinion layer
859 @type opinion_obj: L{Copinion}
860 @param opinion_obj: the opinion object
861 """
862 if self.opinion_layer is None:
863 self.opinion_layer = Copinions()
864 self.root.append(self.opinion_layer.get_node())
865 self.opinion_layer.add_opinion(opinion_obj)
866
867
869 """
870 Adds a predicate to the semantic layer
871 @type predicate_obj: L{Cpredicate}
872 @param predicate_obj: the predicate object
873 """
874 if self.srl_layer is None:
875 self.srl_layer = Csrl()
876 self.root.append(self.srl_layer.get_node())
877 self.srl_layer.add_predicate(predicate_obj)
878
880 """
881 Adds a timex entry to the time layer
882 @type time_obj: L{Ctime}
883 @param time_obj: time time object
884 """
885 if self.timex_layer is None:
886 self.timex_layer = CtimeExpressions()
887 self.root.append(self.timex_layer.get_node())
888 self.timex_layer.add_timex(time_obj)
889
890
892 """
893 Sets the header of the object
894 @type header: L{CHeader}
895 @param header: the header object
896 """
897 self.root.insert(0,header.get_node())
898
900 """
901 Adds a linguistic processor to the header
902 @type my_lp: L{Clp}
903 @param my_lp: linguistic processor object
904 @type layer: string
905 @param layer: the layer to which the processor is related to
906 """
907 if self.header is None:
908 self.header = CHeader(type=self.type)
909 self.root.insert(0,self.header.get_node())
910 self.header.add_linguistic_processor(layer,my_lp)
911
912
914 """
915 Adds a dependency to the dependency layer
916 @type my_dep: L{Cdependency}
917 @param my_dep: dependency object
918 """
919 if self.dependency_layer is None:
920 self.dependency_layer = Cdependencies()
921 self.root.append(self.dependency_layer.get_node())
922 self.dependency_layer.add_dependency(my_dep)
923
925 """
926 Adds a tlink to the temporalRelations layer
927 @type my_tlink: L{Ctlink}
928 @param my_tlink: tlink object
929 """
930 if self.temporalRelations_layer is None:
931 self.temporalRelations_layer = CtemporalRelations()
932 self.root.append(self.temporalRelations_layer.get_node())
933 self.temporalRelations_layer.add_tlink(my_tlink)
934
936 """
937 Adds a predAnch to the temporalRelations layer
938 @type my_predAnch: L{CpredicateAnchor}
939 @param my_predAnch: predicateAnchor object
940 """
941 if self.temporalRelations_layer is None:
942 self.temporalRelations_layer = CtemporalRelations()
943 self.root.append(self.temporalRelations_layer.get_node())
944 self.temporalRelations_layer.add_predicateAnchor(my_predAnch)
945
947 """
948 Adds a clink to the causalRelations layer
949 @type my_clink: L{Cclink}
950 @param my_clink: clink object
951 """
952 if self.causalRelations_layer is None:
953 self.causalRelations_layer = CcausalRelations()
954 self.root.append(self.causalRelations_layer.get_node())
955 self.causalRelations_layer.add_clink(my_clink)
956
958 """
959 Adds a factvalue to the factuality layer
960 @type my_fact: L{Cfactvalue}
961 @param my_fact: factvalue object
962 """
963 if self.factuality_layer is None:
964 self.factuality_layer = Cfactualitylayer()
965 self.root.append(self.factuality_layer.get_node())
966 self.factuality_layer.add_factvalue(my_fact)
967
969 """
970 Adds an entity to the entity layer
971 @type entity: L{Centity}
972 @param entity: the entity object
973 """
974 if self.entity_layer is None:
975 self.entity_layer = Centities(type=self.type)
976 self.root.append(self.entity_layer.get_node())
977 self.entity_layer.add_entity(entity)
978
979
981 """
982 Adds an coreference to the coreference layer
983 @type coreference: L{Ccoreference}
984 @param coreference: the coreference object
985 """
986 if self.coreference_layer is None:
987 self.coreference_layer = Ccoreferences(type=self.type)
988 self.root.append(self.coreference_layer.get_node())
989 self.coreference_layer.add_coreference(coreference)
990
991
993 """
994 Adds a constituency tree to the constituency layer
995 @type my_tree: L{Ctree}
996 @param my_tree: the constituency tree object
997 """
998 if self.constituency_layer is None:
999 self.constituency_layer = Cconstituency()
1000 self.root.append(self.constituency_layer.get_node())
1001 self.constituency_layer.add_tree(my_tree)
1002
1003
1005 """
1006 Adds a property to the property layer
1007 @type label: string
1008 @param label: the type of property
1009 @type term_span: list
1010 @param term_span: list of term ids
1011 @type pid: string
1012 @param pid: the identifier for the property (use None to automatically generate one)
1013 """
1014 if self.features_layer is None:
1015 self.features_layer = Cfeatures(type=self.type)
1016 self.root.append(self.features_layer.get_node())
1017 self.features_layer.add_property(pid, label,term_span)
1018
1019
1020
1022 """
1023 Returns the tokens ids that are the span of the term specified
1024 @type term_id: string
1025 @param term_id: the term idenfier
1026 @rtype: list
1027 @return: list of token ids that are the span of the term
1028 """
1029 if self.dict_tokens_for_tid is None:
1030 self.dict_tokens_for_tid = {}
1031 for term in self.get_terms():
1032 self.dict_tokens_for_tid[term.get_id()] = term.get_span().get_span_ids()
1033
1034 return self.dict_tokens_for_tid.get(term_id,[])
1035
1036
1038 """
1039 Maps a list of token ids to the corresponding term ids
1040 @type list_tokens: list
1041 @param list_tokens: list of token identifiers
1042 @rtype: list
1043 @return: list of term idenfitiers
1044 """
1045 if self.terms_for_token is None:
1046 self.terms_for_token = {}
1047 for term in self.get_terms():
1048 termid = term.get_id()
1049 token_ids = term.get_span().get_span_ids()
1050 for tokid in token_ids:
1051 if tokid not in self.terms_for_token:
1052 self.terms_for_token[tokid] = [termid]
1053 else:
1054 self.terms_for_token[tokid].append(termid)
1055
1056 ret = set()
1057 for my_id in list_tokens:
1058 term_ids = self.terms_for_token.get(my_id,[])
1059 ret |= set(term_ids)
1060 return sorted(list(ret))
1061
1063 """
1064 Removes the tokens belonging to the supplied sentence
1065 @type sentence_id: string
1066 @param sentence_id: a sentence identifier
1067 """
1068 self.text_layer.remove_tokens_of_sentence(sentence_id)
1069
1071 """
1072 Removes the list of terms specified
1073 @type list_term_ids: list
1074 @param list_term_ids: list of term identifiers
1075 """
1076 self.term_layer.remove_terms(list_term_ids)
1077
1080
1081
1082
1084 """
1085 Adds an external reference to the given term identifier
1086 @type term_id: string
1087 @param term_id: the term identifier
1088 @param external_ref: an external reference object
1089 @type external_ref: L{CexternalReference}
1090 """
1091 if self.term_layer is not None:
1092 self.term_layer.add_external_reference(term_id, external_ref)
1093
1094
1096 """
1097 Removes all external references present in the term layer
1098 """
1099 if self.term_layer is not None:
1100 for term in self.term_layer:
1101 term.remove_external_references()
1102
1104 """
1105 Adds an external reference to the given role identifier in the SRL layer
1106 @type role_id: string
1107 @param role_id: the role identifier
1108 @param external_ref: an external reference object
1109 @type external_ref: L{CexternalReference}
1110 """
1111 if self.srl_layer is not None:
1112 self.srl_layer.add_external_reference_to_role(role_id,external_ref)
1113
1114
1115
1124
1126 """
1127 Adds an external reference to the given entity identifier in the entity layer
1128 @type entity_id: string
1129 @param entity_id: the entity identifier
1130 @param external_ref: an external reference object
1131 @type external_ref: L{CexternalReference}
1132 """
1133 if self.entity_layer is not None:
1134 self.entity_layer.add_external_reference_to_entity(entity_id,external_ref)
1135