Module KafNafParserMod
[hide private]
[frames] | no frames]

Source Code for Module KafNafParserMod

   1  """ 
   2  This module implements a parser for KAF or NAF files. It allows to parse an input KAF/NAF file and extract information from the 
   3  different layers as python objects. It also allows to create a new KAF/NAF file or add new information to an existing one 
   4   
   5  @author: U{Ruben Izquierdo Bevia<rubenizquierdobevia.com>} 
   6  @version: 1.3 
   7  @contact: U{ruben.izquierdobevia@vu.nl<mailto:ruben.izquierdobevia@vu.nl>}  
   8  @contact: U{rubensanvi@gmail.com<mailto:rubensanvi@gmail.com>} 
   9  @contact: U{rubenizquierdobevia.com} 
  10  @since: 28-Jan-2015 
  11  """ 
  12  from markable_data import Cmarkables 
  13           
  14  ############### Changes   ##################### 
  15  # v1.1 --> added functions to add external refs to entities and to read them 
  16  # v1.2 --> added functions to add new entities to the NAF/KAF file 
  17  # v1.3 --> added set_raw(text) 
  18  # v1.3.1 --> added functions to set and get fileDesc attributes 
  19  # v1.3.2 --> added markable layer and main accompanying functions 
  20  ################################################ 
  21   
  22   
  23  __last_modified__  = '2September2015' 
  24  __version__ = '1.3.1' 
  25  __author__ = 'Ruben Izquierdo Bevia' 
  26   
  27  from lxml import etree 
  28  from header_data import * 
  29  from text_data import * 
  30  from term_data import * 
  31  from entity_data import * 
  32  from features_data import * 
  33  from opinion_data import * 
  34  from constituency_data import * 
  35  from dependency_data import * 
  36  from feature_extractor import Cdependency_extractor, Cconstituency_extractor 
  37  from coreference_data import * 
  38  from srl_data import * 
  39  from external_references_data import * 
  40  from time_data import * 
  41  from causal_data import * 
  42  from temporal_data import * 
  43  from factuality_data import * 
  44  from markable_data import * 
  45   
  46   
  47  import sys 
  48   
  49   
  50   
51 -class KafNafParser:
52 - def __init__(self,filename=None,type=None):
53 """ 54 The constructor for the parser 55 @type filename: string 56 @param filename: KAF/NAF filename. Set it to None to create an empty file 57 @type type: string 58 @param type: to indicate if the file will be a NAF or a KAF file, in case of new files. 59 """ 60 61 self.tree = None 62 if filename is not None: 63 self.filename = filename 64 self.tree = etree.parse(filename,etree.XMLParser(remove_blank_text=True)) 65 else: 66 self.tree = etree.ElementTree(etree.Element(type)) 67 self.root = self.tree.getroot() 68 self.type = self.root.tag # KAF NAF 69 70 self.header = None 71 self.text_layer = None 72 self.term_layer = None 73 self.entity_layer = None 74 self.features_layer = None 75 self.opinion_layer = None 76 self.constituency_layer = None 77 self.dependency_layer = None 78 self.coreference_layer = None 79 self.srl_layer = None 80 self.raw = None 81 self.timex_layer = None 82 self.causalRelations_layer = None 83 self.temporalRelations_layer = None 84 self.factuality_layer = None 85 self.markable_layer = None 86 87 88 ## Specific feature extractor for complicated layers 89 self.my_dependency_extractor = None 90 self.my_constituency_extractor = None 91 ################################################## 92 93 ####### 94 self.dict_tokens_for_tid = None 95 self.terms_for_token = None 96 ## 97 98 self.lang = self.root.get('{http://www.w3.org/XML/1998/namespace}lang') 99 self.version = self.root.get('version') 100 101 if self.type == 'NAF': 102 node_header = self.root.find('nafHeader') 103 elif self.type == 'KAF': 104 node_header = self.root.find('kafHeader') 105 106 if node_header is not None: 107 self.header = CHeader(node_header,self.type) 108 109 # Text layer adapted to naf/kaf 110 node_text = self.root.find('text') 111 if node_text is not None: 112 self.text_layer = Ctext(node=node_text,type=self.type) 113 114 node_term = self.root.find('terms') 115 if node_term is not None: 116 self.term_layer = Cterms(node=node_term,type=self.type) 117 118 node_entity = self.root.find('entities') 119 if node_entity is not None: 120 self.entity_layer = Centities(node_entity,type=self.type) 121 122 node_features = self.root.find('features') 123 if node_features is not None: 124 self.features_layer = Cfeatures(node_features,type=self.type) 125 126 node_opinions = self.root.find('opinions') 127 if node_opinions is not None: 128 self.opinion_layer = Copinions(node_opinions,type=self.type) 129 130 # Definition KAF/NAF is the same 131 node_constituency = self.root.find('constituency') 132 if node_constituency is not None: 133 self.constituency_layer = Cconstituency(node_constituency) 134 135 # Definition KAF/NAF is the same 136 node_dependency = self.root.find('deps') 137 if node_dependency is not None: 138 self.dependency_layer = Cdependencies(node_dependency) 139 140 node_coreferences = self.root.find('coreferences') 141 if node_coreferences is not None: 142 self.coreference_layer = Ccoreferences(node_coreferences,type=self.type) 143 144 node_srl = self.root.find('srl') 145 if node_srl is not None: 146 self.srl_layer = Csrl(node_srl) 147 148 node_timex = self.root.find('timeExpressions') 149 if node_timex is not None: 150 self.timex_layer = CtimeExpressions(node_timex) 151 152 node_temporalRelations = self.root.find('temporalRelations') 153 if node_temporalRelations is not None: 154 self.temporalRelations_layer = CtemporalRelations(node_temporalRelations) 155 156 node_causalRelations = self.root.find('causalRelations') 157 if node_causalRelations is not None: 158 self.causalRelations_layer = CcausalRelations(node_causalRelations) 159 160 node_factualitylayer = self.root.find('factualitylayer') 161 if node_factualitylayer is not None: 162 self.factuality_layer = Cfactualitylayer(node_factualitylayer) 163 164 node_factualities = self.root.find('factualities') 165 if node_factualities is not None: 166 self.factuality_layer = Cfactualities(node_factualities) 167 168 node_raw = self.root.find('raw') 169 if node_raw is not None: 170 self.raw = node_raw.text 171 172 node_markables = self.root.find('markables') 173 if node_markables is not None: 174 self.markable_layer = Cmarkables(node_markables)
175
176 - def get_header(self):
177 ''' 178 Returns the header object 179 @return: the header object 180 @rtype: L{CHeader} 181 ''' 182 return self.header
183
184 - def set_language(self,l):
185 """ 186 Sets the language to the KAF root element 187 @param l: the language code 188 @type l: string 189 """ 190 self.root.set('{http://www.w3.org/XML/1998/namespace}lang',l)
191
192 - def set_version(self,v):
193 """ 194 Sets the language to the KAF root element 195 @param v: the language code 196 @type v: string 197 """ 198 self.root.set('version',v)
199
200 - def get_type(self):
201 """ 202 Returns the type (NAF/KAF) of the object 203 @rtype: string 204 @return: the type of the file 205 """ 206 207 return self.type
208
209 - def get_filename(self):
210 """ 211 Returns the name of the filename 212 @rtype: string 213 @return: the filename of the KAF/NAF object 214 """ 215 return self.filename
216
217 - def to_kaf(self):
218 """ 219 Converts a NAF object to KAF (in memory). You will have to use the method dump later to save it as a new KAF file 220 """ 221 222 if self.type == 'NAF': 223 self.root.tag = 'KAF' 224 self.type = 'KAF' 225 226 ## Convert the header 227 if self.header is not None: 228 self.header.to_kaf() 229 230 ## Convert the token layer 231 if self.text_layer is not None: 232 self.text_layer.to_kaf() 233 234 ## Convert the term layer 235 if self.term_layer is not None: 236 self.term_layer.to_kaf() 237 238 ## Convert the entity layer 239 if self.entity_layer is not None: 240 self.entity_layer.to_kaf() 241 242 ## Convert the features layer 243 ## There is no feature layer defined in NAF, but we assumed 244 ## that is defined will be followin the same rules 245 if self.features_layer is not None: 246 self.features_layer.to_kaf() 247 248 249 ##Convert the opinion layer 250 if self.opinion_layer is not None: 251 self.opinion_layer.to_kaf() 252 253 ## Convert the constituency layer 254 ## This layer is exactly the same in KAF/NAF 255 if self.constituency_layer is not None: 256 self.constituency_layer.to_kaf() #Does nothing... 257 258 259 ## Convert the dedepency layer 260 ## It is not defined on KAF so we assme both will be similar 261 if self.dependency_layer is not None: 262 self.dependency_layer.to_kaf() 263 264 if self.coreference_layer is not None: 265 self.coreference_layer.to_kaf() 266 267 268 ## Convert the temporalRelations layer 269 ## It is not defined on KAF so we assme both will be similar 270 if self.temporalRelations_layer is not None: 271 self.temporalRelations_layer.to_kaf() 272 273 ## Convert the causalRelations layer 274 ## It is not defined on KAF so we assme both will be similar 275 if self.causalRelations_layer is not None: 276 self.causalRelations_layer.to_kaf() 277 278 ## Convert the factualitylayer 279 ## It is not defined on KAF so we assme both will be similar 280 if self.factuality_layer is not None: 281 self.factuality_layer.to_kaf()
282
283 - def to_naf(self):
284 """ 285 Converts a KAF object to NAF (in memory). You will have to use the method dump later to save it as a new NAF file 286 """ 287 if self.type == 'KAF': 288 self.root.tag = self.type = 'NAF' 289 290 ## Convert the header 291 if self.header is not None: 292 self.header.to_naf() 293 294 ## Convert the token layer 295 if self.text_layer is not None: 296 self.text_layer.to_naf() 297 298 299 ## Convert the term layer 300 if self.term_layer is not None: 301 self.term_layer.to_naf() 302 303 304 ## Convert the entity layer 305 if self.entity_layer is not None: 306 self.entity_layer.to_naf() 307 308 ## Convert the features layer 309 ## There is no feature layer defined in NAF, but we assumed 310 ## that is defined will be followin the same rules 311 if self.features_layer is not None: 312 self.features_layer.to_naf() 313 314 315 ##Convert the opinion layer 316 if self.opinion_layer is not None: 317 self.opinion_layer.to_naf() 318 319 320 ## Convert the constituency layer 321 ## This layer is exactly the same in KAF/NAF 322 if self.constituency_layer is not None: 323 self.constituency_layer.to_naf() #Does nothing... 324 325 326 ## Convert the dedepency layer 327 ## It is not defined on KAF so we assume both will be similar 328 if self.dependency_layer is not None: 329 self.dependency_layer.to_naf() #Does nothing... 330 331 if self.coreference_layer is not None: 332 self.coreference_layer.to_naf() 333 334 335 336 ## Convert the temporalRelations layer 337 ## It is not defined on KAF so we assume both will be similar 338 if self.temporalRelations_layer is not None: 339 self.temporalRelations_layer.to_naf() #Does nothing... 340 341 ## Convert the causalRelations layer 342 ## It is not defined on KAF so we assume both will be similar 343 if self.causalRelations_layer is not None: 344 self.causalRelations_layer.to_naf() #Does nothing... 345 346 ## Convert the factuality layer 347 ## It is not defined on KAF so we assume both will be similar 348 if self.factuality_layer is not None: 349 self.factuality_layer.to_naf() #Does nothing... 350 351 352 ## Convert the markable layer 353 ## It is not defined on KAF so we assume both will be similar 354 if self.markable_layer is not None: 355 self.markable_layer.to_naf() #Changes identifier attribute nothing else...
356 357
358 - def print_constituency(self):
359 """ 360 Prints the constituency layer 361 """ 362 print self.constituency_layer
363
364 - def get_trees(self):
365 """ 366 Iterator that returns the constituency trees 367 @rtype: L{Ctree} 368 @return: iterator to all the constituency trees 369 """ 370 371 if self.constituency_layer is not None: 372 for tree in self.constituency_layer.get_trees(): 373 yield tree
374
375 - def get_trees_as_list(self):
376 """ 377 Iterator that returns the constituency trees 378 @rtype: L{Ctree} 379 @return: iterator to all the constituency trees 380 """ 381 mytrees = [] 382 if self.constituency_layer is not None: 383 for tree in self.constituency_layer.get_trees(): 384 mytrees.append(tree) 385 return mytrees
386
387 - def get_dependencies(self):
388 """ 389 Iterator that returns the dependencies from the dependency layer. Use it as: 390 for my_dep in my_obj.get_dependencies(): 391 @rtype: L{Cdependency} 392 @returns: iterator to get all the dependencies 393 """ 394 if self.dependency_layer is not None: 395 for dep in self.dependency_layer.get_dependencies(): 396 yield dep
397 408 419
420 - def get_factvalues(self):
421 """ 422 Iterator that returns the factvalues from the factuality layer. Use it as: 423 for my_fact in my_obj.get_factvalues(): 424 @rtype: L{Cfactvalue} 425 @returns: iterator to get all the factvalues 426 """ 427 if self.factuality_layer is not None: 428 for fact in self.factuality_layer.get_factvalues(): 429 yield fact
430
431 - def get_corefs(self):
432 """ 433 Iterator that returns the corefs from the coreferences layer. 434 @rtype: L{Ccoreference} 435 @returns: iterator to get all the coreferences 436 """ 437 if self.coreference_layer is not None: 438 for coref in self.coreference_layer.get_corefs(): 439 yield coref
440
441 - def get_language(self):
442 """ 443 Returns the code language of the file 444 @rtype: string 445 @returns: language code of the file 446 """ 447 return self.lang
448 449
450 - def get_tokens(self):
451 """Iterator that returns all the tokens from the text layer 452 @rtype: L{Cwf} 453 @return: list of token objects 454 """ 455 for token in self.text_layer: 456 yield token
457
458 - def get_terms(self):
459 """Iterator that returns all the terms from the term layer 460 @rtype: L{Cterm} 461 @return: list of term objects 462 """ 463 if self.term_layer is not None: 464 for term in self.term_layer: 465 yield term
466
467 - def get_markables(self):
468 """Iterator that returns all the markables from the markable layer 469 @rtype: L{Cmarkable} 470 @return: list of markable objects 471 """ 472 if self.markable_layer is not None: 473 for markable in self.markable_layer: 474 yield markable
475
476 - def get_markable(self,markable_id):
477 """ 478 Returns a markable object for the specified markable_id 479 @type markable_id:string 480 @param markable_id: entity identifier 481 @rtype: L{Cmarkable} 482 @return: markable object 483 """ 484 if self.markable_layer is not None: 485 return self.markable_layer.get_markable(markable_id) 486 else: 487 return None
488
489 - def get_token(self,token_id):
490 """ 491 Returns a token object for the specified token_id 492 @type token_id:string 493 @param token_id: token identifier 494 @rtype: L{Cwf} 495 @return: token object 496 """ 497 if self.text_layer is not None: 498 return self.text_layer.get_wf(token_id) 499 else: 500 return None
501 502
503 - def get_term(self,term_id):
504 """ 505 Returns a term object for the specified term_id 506 @type term_id:string 507 @param term_id: token identifier 508 @rtype: L{Cterm} 509 @return: term object 510 """ 511 if self.term_layer is not None: 512 return self.term_layer.get_term(term_id) 513 else: 514 return None
515
516 - def get_properties(self):
517 """ 518 Returns all the properties of the features layer (iterator) 519 @rtype: L{Cproperty} 520 @return: list of properties 521 """ 522 if self.features_layer is not None: 523 for property in self.features_layer.get_properties(): 524 yield property
525
526 - def get_entities(self):
527 """ 528 Returns a list of all the entities in the object 529 @rtype: L{Centity} 530 @return: list of entities (iterator) 531 """ 532 if self.entity_layer is not None: 533 for entity in self.entity_layer: 534 yield entity
535 536
537 - def get_entity(self,entity_id):
538 """ 539 Returns an entity object for the specified entity_id 540 @type entity_id:string 541 @param entity_id: entity identifier 542 @rtype: L{Centity} 543 @return: entity object 544 """ 545 if self.entity_layer is not None: 546 return self.entity_layer.get_entity(entity_id) 547 else: 548 return None
549 550
551 - def get_opinions(self):
552 """ 553 Returns a list of all the opinions in the object 554 @rtype: L{Copinion} 555 @return: list of opinions (iterator) 556 """ 557 if self.opinion_layer is not None: 558 for opinion in self.opinion_layer.get_opinions(): 559 yield opinion
560
561 - def get_predicates(self):
562 """ 563 Returns a list of all the predicates in the object 564 @rtype: L{Cpredicate} 565 @return: list of predicates (iterator) 566 """ 567 if self.srl_layer is not None: 568 for pred in self.srl_layer.get_predicates(): 569 yield pred
570
571 - def get_raw(self):
572 """ 573 Returns the raw text as a string 574 @rtype: string 575 @return: the raw text 576 """ 577 if self.raw is not None: 578 return self.raw
579
580 - def set_raw(self,text):
581 """ 582 Sets the text of the raw element (or creates the layer if does not exist) 583 @param text: text of the raw layer 584 @type text: string 585 """ 586 node_raw = self.root.find('raw') 587 if node_raw is None: 588 node_raw = etree.Element('raw') 589 self.root.insert(0,node_raw) 590 node_raw.text = etree.CDATA(text)
591
592 - def get_timeExpressions(self):
593 """ 594 Returns a list of all the timeexpressions in the text 595 @rtype: L{Ctime} 596 @return: list of time expressions (iterator) 597 """ 598 if self.timex_layer is not None: 599 for time in self.timex_layer.get_timeExpressions(): 600 yield time
601
602 - def dump(self,filename=sys.stdout):
603 """ 604 Dumps the object to an output filename (or open file descriptor). The filename 605 parameter is optional, and if it is not provided, the standard output will be used 606 @type filename: string or file descriptor 607 @param filename: file where to dump the object (default standard output) 608 """ 609 610 self.tree.write(filename,encoding='UTF-8',pretty_print=True,xml_declaration=True)
611 612
613 - def remove_entity_layer(self):
614 """ 615 Removes the entity layer (if exists) of the object (in memory) 616 """ 617 if self.entity_layer is not None: 618 this_node = self.entity_layer.get_node() 619 self.root.remove(this_node) 620 self.entity_layer = None 621 if self.header is not None: 622 self.header.remove_lp('entities')
623
624 - def remove_dependency_layer(self):
625 """ 626 Removes the dependency layer (if exists) of the object (in memory) 627 """ 628 if self.dependency_layer is not None: 629 this_node = self.dependency_layer.get_node() 630 self.root.remove(this_node) 631 self.dependency_layer = self.my_dependency_extractor = None 632 633 if self.header is not None: 634 self.header.remove_lp('deps')
635 636
638 """ 639 Removes the temporalRelations layer (if exists) of the object (in memory) 640 """ 641 if self.temporalRelations_layer is not None: 642 this_node = self.temporalRelations_layer.get_node() 643 self.root.remove(this_node) 644 self.temporalRelations_layer = None 645 646 if self.header is not None: 647 self.header.remove_lp('temporalRelations')
648
650 """ 651 Removes the causalRelations layer (if exists) of the object (in memory) 652 """ 653 if self.causalRelations_layer is not None: 654 this_node = self.causalRelations_layer.get_node() 655 self.root.remove(this_node) 656 self.causalRelations_layer = None 657 658 if self.header is not None: 659 self.header.remove_lp('causalRelations')
660
662 """ 663 Removes the factualitylayer layer (the old version) (if exists) of the object (in memory) 664 """ 665 if self.factuality_layer is not None: 666 this_node = self.factuality_layer.get_node() 667 self.root.remove(this_node) 668 self.factuality_layer = None 669 670 if self.header is not None: 671 self.header.remove_lp('factualitylayer')
672
673 - def remove_constituency_layer(self):
674 """ 675 Removes the constituency layer (if exists) of the object (in memory) 676 """ 677 if self.constituency_layer is not None: 678 this_node = self.constituency_layer.get_node() 679 self.root.remove(this_node) 680 if self.header is not None: 681 self.header.remove_lp('constituents')
682 683
684 - def remove_this_opinion(self,opinion_id):
685 """ 686 Removes the opinion with the provided opinion identifier 687 @type opinion_id: string 688 @param opinion_id: the opinion identifier of the opinion to remove 689 """ 690 if self.opinion_layer is not None: 691 self.opinion_layer.remove_this_opinion(opinion_id)
692
693 - def remove_opinion_layer(self):
694 """ 695 Removes the opinion layer (if exists) of the object (in memory) 696 """ 697 if self.opinion_layer is not None: 698 this_node = self.opinion_layer.get_node() 699 self.root.remove(this_node) 700 self.opinion_layer = None 701 702 if self.header is not None: 703 self.header.remove_lp('opinions')
704
705 - def remove_properties(self):
706 """ 707 Removes the property layer (if exists) of the object (in memory) 708 """ 709 if self.features_layer is not None: 710 self.features_layer.remove_properties() 711 712 if self.header is not None: 713 self.header.remove_lp('features')
714 715
716 - def remove_term_layer(self):
717 """ 718 Removes the term layer (if exists) of the object (in memory) 719 """ 720 if self.term_layer is not None: 721 this_node = self.term_layer.get_node() 722 self.root.remove(this_node) 723 self.term_layer = None 724 725 if self.header is not None: 726 self.header.remove_lp('terms')
727 728 729
730 - def remove_text_layer(self):
731 """ 732 Removes the text layer (if exists) of the object (in memory) 733 """ 734 if self.text_layer is not None: 735 this_node = self.text_layer.get_node() 736 self.root.remove(this_node) 737 self.text_layer = None 738 739 if self.header is not None: 740 self.header.remove_lp('text')
741 742
743 - def remove_coreference_layer(self):
744 """ 745 Removes the constituency layer (if exists) of the object (in memory) 746 """ 747 if self.coreference_layer is not None: 748 this_node = self.coreference_layer.get_node() 749 self.root.remove(this_node) 750 if self.header is not None: 751 self.header.remove_lp('coreferences')
752 753
755 """ 756 Takes information from factuality layer in old representation 757 Creates new factuality representation and removes the old layer 758 """ 759 if self.factuality_layer is not None: 760 this_node = self.factuality_layer.get_node() 761 if this_node.tag == 'factualitylayer': 762 new_node = Cfactualities() 763 #create dictionary from token ids to the term ids 764 token2term = {} 765 for t in self.get_terms(): 766 s = t.get_span() 767 for w in s.get_span_ids(): 768 token2term[w] = t.get_id() 769 fnr = 0 770 for fv in self.get_factvalues(): 771 fnr += 1 772 conf = fv.get_confidence() 773 wid = fv.get_id() 774 tid = token2term.get(wid) 775 fnode = Cfactuality() 776 #set span with tid as element 777 fspan = Cspan() 778 fspan.add_target_id(tid) 779 fnode.set_span(fspan) 780 #add factVal element with val, resource = factbank, + confidence if present 781 fVal = Cfactval() 782 fVal.set_resource('factbank') 783 fVal.set_value(fv.get_prediction()) 784 if conf: 785 fVal.set_confidence(conf) 786 fnode.set_id('f' + str(fnr)) 787 fnode.add_factval(fVal) 788 new_node.add_factuality(fnode) 789 self.root.remove(this_node) 790 self.root.append(new_node.get_node()) 791 self.factuality_layer = new_node
792 793
794 - def get_constituency_extractor(self):
795 """ 796 Returns a constituency extractor object 797 @rtype: L{Cconstituency_extractor} 798 @return: a constituency extractor object 799 """ 800 801 if self.constituency_layer is not None: ##Otherwise there are no constituens 802 if self.my_constituency_extractor is None: 803 self.my_constituency_extractor = Cconstituency_extractor(self) 804 return self.my_constituency_extractor 805 else: 806 return None
807
808 - def get_dependency_extractor(self):
809 """ 810 Returns a dependency extractor object 811 @rtype: L{Cdependency_extractor} 812 @return: a dependency extractor object 813 """ 814 if self.dependency_layer is not None: #otherwise there are no dependencies 815 if self.my_dependency_extractor is None: 816 self.my_dependency_extractor = Cdependency_extractor(self) 817 return self.my_dependency_extractor 818 else: 819 return None
820 821 ## ADDING METHODS
822 - def add_wf(self,wf_obj):
823 """ 824 Adds a token to the text layer 825 @type wf_obj: L{Cwf} 826 @param wf_obj: the token object 827 """ 828 if self.text_layer is None: 829 self.text_layer = Ctext(type=self.type) 830 self.root.append(self.text_layer.get_node()) 831 self.text_layer.add_wf(wf_obj)
832
833 - def add_term(self,term_obj):
834 """ 835 Adds a term to the term layer 836 @type term_obj: L{Cterm} 837 @param term_obj: the term object 838 """ 839 if self.term_layer is None: 840 self.term_layer = Cterms(type=self.type) 841 self.root.append(self.term_layer.get_node()) 842 self.term_layer.add_term(term_obj)
843
844 - def add_markable(self,markable_obj):
845 """ 846 Adds a markable to the markable layer 847 @type markable_obj: L{Cmarkable} 848 @param markable_obj: the markable object 849 """ 850 if self.markable_layer is None: 851 self.markable_layer = Cmarkables(type=self.type) 852 self.root.append(self.markable_layer.get_node()) 853 self.markable_layer.add_markable(markable_obj)
854 855
856 - def add_opinion(self,opinion_obj):
857 """ 858 Adds an opinion to the opinion layer 859 @type opinion_obj: L{Copinion} 860 @param opinion_obj: the opinion object 861 """ 862 if self.opinion_layer is None: 863 self.opinion_layer = Copinions() 864 self.root.append(self.opinion_layer.get_node()) 865 self.opinion_layer.add_opinion(opinion_obj)
866 867
868 - def add_predicate(self, predicate_obj):
869 """ 870 Adds a predicate to the semantic layer 871 @type predicate_obj: L{Cpredicate} 872 @param predicate_obj: the predicate object 873 """ 874 if self.srl_layer is None: 875 self.srl_layer = Csrl() 876 self.root.append(self.srl_layer.get_node()) 877 self.srl_layer.add_predicate(predicate_obj)
878
879 - def add_timex(self, time_obj):
880 """ 881 Adds a timex entry to the time layer 882 @type time_obj: L{Ctime} 883 @param time_obj: time time object 884 """ 885 if self.timex_layer is None: 886 self.timex_layer = CtimeExpressions() 887 self.root.append(self.timex_layer.get_node()) 888 self.timex_layer.add_timex(time_obj)
889 890
891 - def set_header(self,header):
892 """ 893 Sets the header of the object 894 @type header: L{CHeader} 895 @param header: the header object 896 """ 897 self.root.insert(0,header.get_node())
898
899 - def add_linguistic_processor(self, layer ,my_lp):
900 """ 901 Adds a linguistic processor to the header 902 @type my_lp: L{Clp} 903 @param my_lp: linguistic processor object 904 @type layer: string 905 @param layer: the layer to which the processor is related to 906 """ 907 if self.header is None: 908 self.header = CHeader(type=self.type) 909 self.root.insert(0,self.header.get_node()) 910 self.header.add_linguistic_processor(layer,my_lp)
911 912
913 - def add_dependency(self,my_dep):
914 """ 915 Adds a dependency to the dependency layer 916 @type my_dep: L{Cdependency} 917 @param my_dep: dependency object 918 """ 919 if self.dependency_layer is None: 920 self.dependency_layer = Cdependencies() 921 self.root.append(self.dependency_layer.get_node()) 922 self.dependency_layer.add_dependency(my_dep)
923 934
935 - def add_predicateAnchor(self,my_predAnch):
936 """ 937 Adds a predAnch to the temporalRelations layer 938 @type my_predAnch: L{CpredicateAnchor} 939 @param my_predAnch: predicateAnchor object 940 """ 941 if self.temporalRelations_layer is None: 942 self.temporalRelations_layer = CtemporalRelations() 943 self.root.append(self.temporalRelations_layer.get_node()) 944 self.temporalRelations_layer.add_predicateAnchor(my_predAnch)
945 956
957 - def add_factuality(self,my_fact):
958 """ 959 Adds a factvalue to the factuality layer 960 @type my_fact: L{Cfactvalue} 961 @param my_fact: factvalue object 962 """ 963 if self.factuality_layer is None: 964 self.factuality_layer = Cfactualitylayer() 965 self.root.append(self.factuality_layer.get_node()) 966 self.factuality_layer.add_factvalue(my_fact)
967
968 - def add_entity(self,entity):
969 """ 970 Adds an entity to the entity layer 971 @type entity: L{Centity} 972 @param entity: the entity object 973 """ 974 if self.entity_layer is None: 975 self.entity_layer = Centities(type=self.type) 976 self.root.append(self.entity_layer.get_node()) 977 self.entity_layer.add_entity(entity)
978 979
980 - def add_coreference(self, coreference):
981 """ 982 Adds an coreference to the coreference layer 983 @type coreference: L{Ccoreference} 984 @param coreference: the coreference object 985 """ 986 if self.coreference_layer is None: 987 self.coreference_layer = Ccoreferences(type=self.type) 988 self.root.append(self.coreference_layer.get_node()) 989 self.coreference_layer.add_coreference(coreference)
990 991
992 - def add_constituency_tree(self,my_tree):
993 """ 994 Adds a constituency tree to the constituency layer 995 @type my_tree: L{Ctree} 996 @param my_tree: the constituency tree object 997 """ 998 if self.constituency_layer is None: 999 self.constituency_layer = Cconstituency() 1000 self.root.append(self.constituency_layer.get_node()) 1001 self.constituency_layer.add_tree(my_tree)
1002 1003 ## Adds a property to the feature layer
1004 - def add_property(self,label,term_span,pid=None):
1005 """ 1006 Adds a property to the property layer 1007 @type label: string 1008 @param label: the type of property 1009 @type term_span: list 1010 @param term_span: list of term ids 1011 @type pid: string 1012 @param pid: the identifier for the property (use None to automatically generate one) 1013 """ 1014 if self.features_layer is None: 1015 self.features_layer = Cfeatures(type=self.type) 1016 self.root.append(self.features_layer.get_node()) 1017 self.features_layer.add_property(pid, label,term_span)
1018 1019 ## EXTRA FUNCTIONS 1020 ## Gets the token identifiers in the span of a term id
1021 - def get_dict_tokens_for_termid(self, term_id):
1022 """ 1023 Returns the tokens ids that are the span of the term specified 1024 @type term_id: string 1025 @param term_id: the term idenfier 1026 @rtype: list 1027 @return: list of token ids that are the span of the term 1028 """ 1029 if self.dict_tokens_for_tid is None: 1030 self.dict_tokens_for_tid = {} 1031 for term in self.get_terms(): 1032 self.dict_tokens_for_tid[term.get_id()] = term.get_span().get_span_ids() 1033 1034 return self.dict_tokens_for_tid.get(term_id,[])
1035 1036 ## Maps a list of token ids to term ids
1037 - def map_tokens_to_terms(self,list_tokens):
1038 """ 1039 Maps a list of token ids to the corresponding term ids 1040 @type list_tokens: list 1041 @param list_tokens: list of token identifiers 1042 @rtype: list 1043 @return: list of term idenfitiers 1044 """ 1045 if self.terms_for_token is None: 1046 self.terms_for_token = {} 1047 for term in self.get_terms(): 1048 termid = term.get_id() 1049 token_ids = term.get_span().get_span_ids() 1050 for tokid in token_ids: 1051 if tokid not in self.terms_for_token: 1052 self.terms_for_token[tokid] = [termid] 1053 else: 1054 self.terms_for_token[tokid].append(termid) 1055 1056 ret = set() 1057 for my_id in list_tokens: 1058 term_ids = self.terms_for_token.get(my_id,[]) 1059 ret |= set(term_ids) 1060 return sorted(list(ret))
1061
1062 - def remove_tokens_of_sentence(self,sentence_id):
1063 """ 1064 Removes the tokens belonging to the supplied sentence 1065 @type sentence_id: string 1066 @param sentence_id: a sentence identifier 1067 """ 1068 self.text_layer.remove_tokens_of_sentence(sentence_id)
1069
1070 - def remove_terms(self,list_term_ids):
1071 """ 1072 Removes the list of terms specified 1073 @type list_term_ids: list 1074 @param list_term_ids: list of term identifiers 1075 """ 1076 self.term_layer.remove_terms(list_term_ids)
1077
1078 - def add_external_reference(self,term_id, external_ref):
1079 self.add_external_reference_to_term(term_id, external_ref)
1080 1081 1082
1083 - def add_external_reference_to_term(self,term_id, external_ref):
1084 """ 1085 Adds an external reference to the given term identifier 1086 @type term_id: string 1087 @param term_id: the term identifier 1088 @param external_ref: an external reference object 1089 @type external_ref: L{CexternalReference} 1090 """ 1091 if self.term_layer is not None: 1092 self.term_layer.add_external_reference(term_id, external_ref)
1093 1094
1096 """ 1097 Removes all external references present in the term layer 1098 """ 1099 if self.term_layer is not None: 1100 for term in self.term_layer: 1101 term.remove_external_references()
1102
1103 - def add_external_reference_to_role(self,role_id,external_ref):
1104 """ 1105 Adds an external reference to the given role identifier in the SRL layer 1106 @type role_id: string 1107 @param role_id: the role identifier 1108 @param external_ref: an external reference object 1109 @type external_ref: L{CexternalReference} 1110 """ 1111 if self.srl_layer is not None: 1112 self.srl_layer.add_external_reference_to_role(role_id,external_ref)
1113 1114 1115
1117 """ 1118 Removes all external references present in the term layer 1119 """ 1120 if self.srl_layer is not None: 1121 for pred in self.srl_layer.get_predicates(): 1122 pred.remove_external_references() 1123 pred.remove_external_references_from_roles()
1124
1125 - def add_external_reference_to_entity(self,entity_id, external_ref):
1126 """ 1127 Adds an external reference to the given entity identifier in the entity layer 1128 @type entity_id: string 1129 @param entity_id: the entity identifier 1130 @param external_ref: an external reference object 1131 @type external_ref: L{CexternalReference} 1132 """ 1133 if self.entity_layer is not None: 1134 self.entity_layer.add_external_reference_to_entity(entity_id,external_ref)
1135