KafNafParserMod

52 - def __init__(self,filename=None,type=None):

53 """ 54 The constructor for the parser 55 @type filename: string 56 @param filename: KAF/NAF filename. Set it to None to create an empty file 57 @type type: string 58 @param type: to indicate if the file will be a NAF or a KAF file, in case of new files. 59 """ 60 61 self.tree = None 62 if filename is not None: 63 self.filename = filename 64 self.tree = etree.parse(filename,etree.XMLParser(remove_blank_text=True)) 65 else: 66 self.tree = etree.ElementTree(etree.Element(type)) 67 self.root = self.tree.getroot() 68 self.type = self.root.tag # KAF NAF 69 70 self.header = None 71 self.text_layer = None 72 self.term_layer = None 73 self.entity_layer = None 74 self.features_layer = None 75 self.opinion_layer = None 76 self.constituency_layer = None 77 self.dependency_layer = None 78 self.coreference_layer = None 79 self.srl_layer = None 80 self.raw = None 81 self.timex_layer = None 82 self.causalRelations_layer = None 83 self.temporalRelations_layer = None 84 self.factuality_layer = None 85 self.markable_layer = None 86 87 88 ## Specific feature extractor for complicated layers 89 self.my_dependency_extractor = None 90 self.my_constituency_extractor = None 91 ################################################## 92 93 ####### 94 self.dict_tokens_for_tid = None 95 self.terms_for_token = None 96 ## 97 98 self.lang = self.root.get('{http://www.w3.org/XML/1998/namespace}lang') 99 self.version = self.root.get('version') 100 101 if self.type == 'NAF': 102 node_header = self.root.find('nafHeader') 103 elif self.type == 'KAF': 104 node_header = self.root.find('kafHeader') 105 106 if node_header is not None: 107 self.header = CHeader(node_header,self.type) 108 109 # Text layer adapted to naf/kaf 110 node_text = self.root.find('text') 111 if node_text is not None: 112 self.text_layer = Ctext(node=node_text,type=self.type) 113 114 node_term = self.root.find('terms') 115 if node_term is not None: 116 self.term_layer = Cterms(node=node_term,type=self.type) 117 118 node_entity = self.root.find('entities') 119 if node_entity is not None: 120 self.entity_layer = Centities(node_entity,type=self.type) 121 122 node_features = self.root.find('features') 123 if node_features is not None: 124 self.features_layer = Cfeatures(node_features,type=self.type) 125 126 node_opinions = self.root.find('opinions') 127 if node_opinions is not None: 128 self.opinion_layer = Copinions(node_opinions,type=self.type) 129 130 # Definition KAF/NAF is the same 131 node_constituency = self.root.find('constituency') 132 if node_constituency is not None: 133 self.constituency_layer = Cconstituency(node_constituency) 134 135 # Definition KAF/NAF is the same 136 node_dependency = self.root.find('deps') 137 if node_dependency is not None: 138 self.dependency_layer = Cdependencies(node_dependency) 139 140 node_coreferences = self.root.find('coreferences') 141 if node_coreferences is not None: 142 self.coreference_layer = Ccoreferences(node_coreferences,type=self.type) 143 144 node_srl = self.root.find('srl') 145 if node_srl is not None: 146 self.srl_layer = Csrl(node_srl) 147 148 node_timex = self.root.find('timeExpressions') 149 if node_timex is not None: 150 self.timex_layer = CtimeExpressions(node_timex) 151 152 node_temporalRelations = self.root.find('temporalRelations') 153 if node_temporalRelations is not None: 154 self.temporalRelations_layer = CtemporalRelations(node_temporalRelations) 155 156 node_causalRelations = self.root.find('causalRelations') 157 if node_causalRelations is not None: 158 self.causalRelations_layer = CcausalRelations(node_causalRelations) 159 160 node_factualitylayer = self.root.find('factualitylayer') 161 if node_factualitylayer is not None: 162 self.factuality_layer = Cfactualitylayer(node_factualitylayer) 163 164 node_factualities = self.root.find('factualities') 165 if node_factualities is not None: 166 self.factuality_layer = Cfactualities(node_factualities) 167 168 node_raw = self.root.find('raw') 169 if node_raw is not None: 170 self.raw = node_raw.text 171 172 node_markables = self.root.find('markables') 173 if node_markables is not None: 174 self.markable_layer = Cmarkables(node_markables)

175

176 - def get_header(self):

177 ''' 178 Returns the header object 179 @return: the header object 180 @rtype: L{CHeader} 181 ''' 182 return self.header

183

184 - def set_language(self,l):

185 """ 186 Sets the language to the KAF root element 187 @param l: the language code 188 @type l: string 189 """ 190 self.root.set('{http://www.w3.org/XML/1998/namespace}lang',l)

191

192 - def set_version(self,v):

193 """ 194 Sets the language to the KAF root element 195 @param v: the language code 196 @type v: string 197 """ 198 self.root.set('version',v)

199

200 - def get_type(self):

201 """ 202 Returns the type (NAF/KAF) of the object 203 @rtype: string 204 @return: the type of the file 205 """ 206 207 return self.type

208

209 - def get_filename(self):

210 """ 211 Returns the name of the filename 212 @rtype: string 213 @return: the filename of the KAF/NAF object 214 """ 215 return self.filename

216

217 - def to_kaf(self):

218 """ 219 Converts a NAF object to KAF (in memory). You will have to use the method dump later to save it as a new KAF file 220 """ 221 222 if self.type == 'NAF': 223 self.root.tag = 'KAF' 224 self.type = 'KAF' 225 226 ## Convert the header 227 if self.header is not None: 228 self.header.to_kaf() 229 230 ## Convert the token layer 231 if self.text_layer is not None: 232 self.text_layer.to_kaf() 233 234 ## Convert the term layer 235 if self.term_layer is not None: 236 self.term_layer.to_kaf() 237 238 ## Convert the entity layer 239 if self.entity_layer is not None: 240 self.entity_layer.to_kaf() 241 242 ## Convert the features layer 243 ## There is no feature layer defined in NAF, but we assumed 244 ## that is defined will be followin the same rules 245 if self.features_layer is not None: 246 self.features_layer.to_kaf() 247 248 249 ##Convert the opinion layer 250 if self.opinion_layer is not None: 251 self.opinion_layer.to_kaf() 252 253 ## Convert the constituency layer 254 ## This layer is exactly the same in KAF/NAF 255 if self.constituency_layer is not None: 256 self.constituency_layer.to_kaf() #Does nothing... 257 258 259 ## Convert the dedepency layer 260 ## It is not defined on KAF so we assme both will be similar 261 if self.dependency_layer is not None: 262 self.dependency_layer.to_kaf() 263 264 if self.coreference_layer is not None: 265 self.coreference_layer.to_kaf() 266 267 268 ## Convert the temporalRelations layer 269 ## It is not defined on KAF so we assme both will be similar 270 if self.temporalRelations_layer is not None: 271 self.temporalRelations_layer.to_kaf() 272 273 ## Convert the causalRelations layer 274 ## It is not defined on KAF so we assme both will be similar 275 if self.causalRelations_layer is not None: 276 self.causalRelations_layer.to_kaf() 277 278 ## Convert the factualitylayer 279 ## It is not defined on KAF so we assme both will be similar 280 if self.factuality_layer is not None: 281 self.factuality_layer.to_kaf()

282

283 - def to_naf(self):

284 """ 285 Converts a KAF object to NAF (in memory). You will have to use the method dump later to save it as a new NAF file 286 """ 287 if self.type == 'KAF': 288 self.root.tag = self.type = 'NAF' 289 290 ## Convert the header 291 if self.header is not None: 292 self.header.to_naf() 293 294 ## Convert the token layer 295 if self.text_layer is not None: 296 self.text_layer.to_naf() 297 298 299 ## Convert the term layer 300 if self.term_layer is not None: 301 self.term_layer.to_naf() 302 303 304 ## Convert the entity layer 305 if self.entity_layer is not None: 306 self.entity_layer.to_naf() 307 308 ## Convert the features layer 309 ## There is no feature layer defined in NAF, but we assumed 310 ## that is defined will be followin the same rules 311 if self.features_layer is not None: 312 self.features_layer.to_naf() 313 314 315 ##Convert the opinion layer 316 if self.opinion_layer is not None: 317 self.opinion_layer.to_naf() 318 319 320 ## Convert the constituency layer 321 ## This layer is exactly the same in KAF/NAF 322 if self.constituency_layer is not None: 323 self.constituency_layer.to_naf() #Does nothing... 324 325 326 ## Convert the dedepency layer 327 ## It is not defined on KAF so we assume both will be similar 328 if self.dependency_layer is not None: 329 self.dependency_layer.to_naf() #Does nothing... 330 331 if self.coreference_layer is not None: 332 self.coreference_layer.to_naf() 333 334 335 336 ## Convert the temporalRelations layer 337 ## It is not defined on KAF so we assume both will be similar 338 if self.temporalRelations_layer is not None: 339 self.temporalRelations_layer.to_naf() #Does nothing... 340 341 ## Convert the causalRelations layer 342 ## It is not defined on KAF so we assume both will be similar 343 if self.causalRelations_layer is not None: 344 self.causalRelations_layer.to_naf() #Does nothing... 345 346 ## Convert the factuality layer 347 ## It is not defined on KAF so we assume both will be similar 348 if self.factuality_layer is not None: 349 self.factuality_layer.to_naf() #Does nothing... 350 351 352 ## Convert the markable layer 353 ## It is not defined on KAF so we assume both will be similar 354 if self.markable_layer is not None: 355 self.markable_layer.to_naf() #Changes identifier attribute nothing else...

356 357

358 - def print_constituency(self):

359 """ 360 Prints the constituency layer 361 """ 362 print self.constituency_layer

363

364 - def get_trees(self):

365 """ 366 Iterator that returns the constituency trees 367 @rtype: L{Ctree} 368 @return: iterator to all the constituency trees 369 """ 370 371 if self.constituency_layer is not None: 372 for tree in self.constituency_layer.get_trees(): 373 yield tree

374

375 - def get_trees_as_list(self):

376 """ 377 Iterator that returns the constituency trees 378 @rtype: L{Ctree} 379 @return: iterator to all the constituency trees 380 """ 381 mytrees = [] 382 if self.constituency_layer is not None: 383 for tree in self.constituency_layer.get_trees(): 384 mytrees.append(tree) 385 return mytrees

386

387 - def get_dependencies(self):

388 """ 389 Iterator that returns the dependencies from the dependency layer. Use it as: 390 for my_dep in my_obj.get_dependencies(): 391 @rtype: L{Cdependency} 392 @returns: iterator to get all the dependencies 393 """ 394 if self.dependency_layer is not None: 395 for dep in self.dependency_layer.get_dependencies(): 396 yield dep

397

398 - def get_tlinks(self):

399 """ 400 Iterator that returns the tlinks from the temporalRelations layer. Use it as: 401 for my_tlink in my_obj.get_tlinks(): 402 @rtype: L{Ctlink} 403 @returns: iterator to get all the tlinks 404 """ 405 if self.temporalRelations_layer is not None: 406 for tlink in self.temporalRelations_layer.get_tlinks(): 407 yield tlink

408

409 - def get_clinks(self):

410 """ 411 Iterator that returns the clinks from the causalRelations layer. Use it as: 412 for my_clink in my_obj.get_clinks(): 413 @rtype: L{Cclink} 414 @returns: iterator to get all the clinks 415 """ 416 if self.causalRelations_layer is not None: 417 for clink in self.causalRelations_layer.get_clinks(): 418 yield clink

419

420 - def get_factvalues(self):

421 """ 422 Iterator that returns the factvalues from the factuality layer. Use it as: 423 for my_fact in my_obj.get_factvalues(): 424 @rtype: L{Cfactvalue} 425 @returns: iterator to get all the factvalues 426 """ 427 if self.factuality_layer is not None: 428 for fact in self.factuality_layer.get_factvalues(): 429 yield fact

430

431 - def get_corefs(self):

432 """ 433 Iterator that returns the corefs from the coreferences layer. 434 @rtype: L{Ccoreference} 435 @returns: iterator to get all the coreferences 436 """ 437 if self.coreference_layer is not None: 438 for coref in self.coreference_layer.get_corefs(): 439 yield coref

440

441 - def get_language(self):

442 """ 443 Returns the code language of the file 444 @rtype: string 445 @returns: language code of the file 446 """ 447 return self.lang

448 449

450 - def get_tokens(self):

451 """Iterator that returns all the tokens from the text layer 452 @rtype: L{Cwf} 453 @return: list of token objects 454 """ 455 for token in self.text_layer: 456 yield token

457

458 - def get_terms(self):

459 """Iterator that returns all the terms from the term layer 460 @rtype: L{Cterm} 461 @return: list of term objects 462 """ 463 if self.term_layer is not None: 464 for term in self.term_layer: 465 yield term

466

467 - def get_markables(self):

468 """Iterator that returns all the markables from the markable layer 469 @rtype: L{Cmarkable} 470 @return: list of markable objects 471 """ 472 if self.markable_layer is not None: 473 for markable in self.markable_layer: 474 yield markable

475

476 - def get_markable(self,markable_id):

477 """ 478 Returns a markable object for the specified markable_id 479 @type markable_id:string 480 @param markable_id: entity identifier 481 @rtype: L{Cmarkable} 482 @return: markable object 483 """ 484 if self.markable_layer is not None: 485 return self.markable_layer.get_markable(markable_id) 486 else: 487 return None

488

489 - def get_token(self,token_id):

490 """ 491 Returns a token object for the specified token_id 492 @type token_id:string 493 @param token_id: token identifier 494 @rtype: L{Cwf} 495 @return: token object 496 """ 497 if self.text_layer is not None: 498 return self.text_layer.get_wf(token_id) 499 else: 500 return None

501 502

503 - def get_term(self,term_id):

504 """ 505 Returns a term object for the specified term_id 506 @type term_id:string 507 @param term_id: token identifier 508 @rtype: L{Cterm} 509 @return: term object 510 """ 511 if self.term_layer is not None: 512 return self.term_layer.get_term(term_id) 513 else: 514 return None

515

516 - def get_properties(self):

517 """ 518 Returns all the properties of the features layer (iterator) 519 @rtype: L{Cproperty} 520 @return: list of properties 521 """ 522 if self.features_layer is not None: 523 for property in self.features_layer.get_properties(): 524 yield property

525

526 - def get_entities(self):

527 """ 528 Returns a list of all the entities in the object 529 @rtype: L{Centity} 530 @return: list of entities (iterator) 531 """ 532 if self.entity_layer is not None: 533 for entity in self.entity_layer: 534 yield entity

535 536

537 - def get_entity(self,entity_id):

538 """ 539 Returns an entity object for the specified entity_id 540 @type entity_id:string 541 @param entity_id: entity identifier 542 @rtype: L{Centity} 543 @return: entity object 544 """ 545 if self.entity_layer is not None: 546 return self.entity_layer.get_entity(entity_id) 547 else: 548 return None

549 550

551 - def get_opinions(self):

552 """ 553 Returns a list of all the opinions in the object 554 @rtype: L{Copinion} 555 @return: list of opinions (iterator) 556 """ 557 if self.opinion_layer is not None: 558 for opinion in self.opinion_layer.get_opinions(): 559 yield opinion

560

561 - def get_predicates(self):

562 """ 563 Returns a list of all the predicates in the object 564 @rtype: L{Cpredicate} 565 @return: list of predicates (iterator) 566 """ 567 if self.srl_layer is not None: 568 for pred in self.srl_layer.get_predicates(): 569 yield pred

570

571 - def get_raw(self):

572 """ 573 Returns the raw text as a string 574 @rtype: string 575 @return: the raw text 576 """ 577 if self.raw is not None: 578 return self.raw

579

580 - def set_raw(self,text):

581 """ 582 Sets the text of the raw element (or creates the layer if does not exist) 583 @param text: text of the raw layer 584 @type text: string 585 """ 586 node_raw = self.root.find('raw') 587 if node_raw is None: 588 node_raw = etree.Element('raw') 589 self.root.insert(0,node_raw) 590 node_raw.text = etree.CDATA(text)

591

592 - def get_timeExpressions(self):

593 """ 594 Returns a list of all the timeexpressions in the text 595 @rtype: L{Ctime} 596 @return: list of time expressions (iterator) 597 """ 598 if self.timex_layer is not None: 599 for time in self.timex_layer.get_timeExpressions(): 600 yield time

601

602 - def dump(self,filename=sys.stdout):

603 """ 604 Dumps the object to an output filename (or open file descriptor). The filename 605 parameter is optional, and if it is not provided, the standard output will be used 606 @type filename: string or file descriptor 607 @param filename: file where to dump the object (default standard output) 608 """ 609 610 self.tree.write(filename,encoding='UTF-8',pretty_print=True,xml_declaration=True)

611 612

613 - def remove_entity_layer(self):

614 """ 615 Removes the entity layer (if exists) of the object (in memory) 616 """ 617 if self.entity_layer is not None: 618 this_node = self.entity_layer.get_node() 619 self.root.remove(this_node) 620 self.entity_layer = None 621 if self.header is not None: 622 self.header.remove_lp('entities')

623

624 - def remove_dependency_layer(self):

625 """ 626 Removes the dependency layer (if exists) of the object (in memory) 627 """ 628 if self.dependency_layer is not None: 629 this_node = self.dependency_layer.get_node() 630 self.root.remove(this_node) 631 self.dependency_layer = self.my_dependency_extractor = None 632 633 if self.header is not None: 634 self.header.remove_lp('deps')

635 636

637 - def remove_temporalRelations_layer(self):

638 """ 639 Removes the temporalRelations layer (if exists) of the object (in memory) 640 """ 641 if self.temporalRelations_layer is not None: 642 this_node = self.temporalRelations_layer.get_node() 643 self.root.remove(this_node) 644 self.temporalRelations_layer = None 645 646 if self.header is not None: 647 self.header.remove_lp('temporalRelations')

648

649 - def remove_causalRelations_layer(self):

650 """ 651 Removes the causalRelations layer (if exists) of the object (in memory) 652 """ 653 if self.causalRelations_layer is not None: 654 this_node = self.causalRelations_layer.get_node() 655 self.root.remove(this_node) 656 self.causalRelations_layer = None 657 658 if self.header is not None: 659 self.header.remove_lp('causalRelations')

660

661 - def remove_factualitylayer_layer(self):

662 """ 663 Removes the factualitylayer layer (the old version) (if exists) of the object (in memory) 664 """ 665 if self.factuality_layer is not None: 666 this_node = self.factuality_layer.get_node() 667 self.root.remove(this_node) 668 self.factuality_layer = None 669 670 if self.header is not None: 671 self.header.remove_lp('factualitylayer')

672

673 - def remove_constituency_layer(self):

674 """ 675 Removes the constituency layer (if exists) of the object (in memory) 676 """ 677 if self.constituency_layer is not None: 678 this_node = self.constituency_layer.get_node() 679 self.root.remove(this_node) 680 if self.header is not None: 681 self.header.remove_lp('constituents')

682 683

684 - def remove_this_opinion(self,opinion_id):

685 """ 686 Removes the opinion with the provided opinion identifier 687 @type opinion_id: string 688 @param opinion_id: the opinion identifier of the opinion to remove 689 """ 690 if self.opinion_layer is not None: 691 self.opinion_layer.remove_this_opinion(opinion_id)

692

693 - def remove_opinion_layer(self):

694 """ 695 Removes the opinion layer (if exists) of the object (in memory) 696 """ 697 if self.opinion_layer is not None: 698 this_node = self.opinion_layer.get_node() 699 self.root.remove(this_node) 700 self.opinion_layer = None 701 702 if self.header is not None: 703 self.header.remove_lp('opinions')

704

705 - def remove_properties(self):

706 """ 707 Removes the property layer (if exists) of the object (in memory) 708 """ 709 if self.features_layer is not None: 710 self.features_layer.remove_properties() 711 712 if self.header is not None: 713 self.header.remove_lp('features')

714 715

716 - def remove_term_layer(self):

717 """ 718 Removes the term layer (if exists) of the object (in memory) 719 """ 720 if self.term_layer is not None: 721 this_node = self.term_layer.get_node() 722 self.root.remove(this_node) 723 self.term_layer = None 724 725 if self.header is not None: 726 self.header.remove_lp('terms')

727 728 729

730 - def remove_text_layer(self):

731 """ 732 Removes the text layer (if exists) of the object (in memory) 733 """ 734 if self.text_layer is not None: 735 this_node = self.text_layer.get_node() 736 self.root.remove(this_node) 737 self.text_layer = None 738 739 if self.header is not None: 740 self.header.remove_lp('text')

741 742

743 - def remove_coreference_layer(self):

744 """ 745 Removes the constituency layer (if exists) of the object (in memory) 746 """ 747 if self.coreference_layer is not None: 748 this_node = self.coreference_layer.get_node() 749 self.root.remove(this_node) 750 if self.header is not None: 751 self.header.remove_lp('coreferences')

752 753

754 - def convert_factualitylayer_to_factualities(self):

755 """ 756 Takes information from factuality layer in old representation 757 Creates new factuality representation and removes the old layer 758 """ 759 if self.factuality_layer is not None: 760 this_node = self.factuality_layer.get_node() 761 if this_node.tag == 'factualitylayer': 762 new_node = Cfactualities() 763 #create dictionary from token ids to the term ids 764 token2term = {} 765 for t in self.get_terms(): 766 s = t.get_span() 767 for w in s.get_span_ids(): 768 token2term[w] = t.get_id() 769 fnr = 0 770 for fv in self.get_factvalues(): 771 fnr += 1 772 conf = fv.get_confidence() 773 wid = fv.get_id() 774 tid = token2term.get(wid) 775 fnode = Cfactuality() 776 #set span with tid as element 777 fspan = Cspan() 778 fspan.add_target_id(tid) 779 fnode.set_span(fspan) 780 #add factVal element with val, resource = factbank, + confidence if present 781 fVal = Cfactval() 782 fVal.set_resource('factbank') 783 fVal.set_value(fv.get_prediction()) 784 if conf: 785 fVal.set_confidence(conf) 786 fnode.set_id('f' + str(fnr)) 787 fnode.add_factval(fVal) 788 new_node.add_factuality(fnode) 789 self.root.remove(this_node) 790 self.root.append(new_node.get_node()) 791 self.factuality_layer = new_node

792 793

794 - def get_constituency_extractor(self):

795 """ 796 Returns a constituency extractor object 797 @rtype: L{Cconstituency_extractor} 798 @return: a constituency extractor object 799 """ 800 801 if self.constituency_layer is not None: ##Otherwise there are no constituens 802 if self.my_constituency_extractor is None: 803 self.my_constituency_extractor = Cconstituency_extractor(self) 804 return self.my_constituency_extractor 805 else: 806 return None

807

808 - def get_dependency_extractor(self):

809 """ 810 Returns a dependency extractor object 811 @rtype: L{Cdependency_extractor} 812 @return: a dependency extractor object 813 """ 814 if self.dependency_layer is not None: #otherwise there are no dependencies 815 if self.my_dependency_extractor is None: 816 self.my_dependency_extractor = Cdependency_extractor(self) 817 return self.my_dependency_extractor 818 else: 819 return None

820 821 ## ADDING METHODS

822 - def add_wf(self,wf_obj):

823 """ 824 Adds a token to the text layer 825 @type wf_obj: L{Cwf} 826 @param wf_obj: the token object 827 """ 828 if self.text_layer is None: 829 self.text_layer = Ctext(type=self.type) 830 self.root.append(self.text_layer.get_node()) 831 self.text_layer.add_wf(wf_obj)

832

833 - def add_term(self,term_obj):

834 """ 835 Adds a term to the term layer 836 @type term_obj: L{Cterm} 837 @param term_obj: the term object 838 """ 839 if self.term_layer is None: 840 self.term_layer = Cterms(type=self.type) 841 self.root.append(self.term_layer.get_node()) 842 self.term_layer.add_term(term_obj)

843

844 - def add_markable(self,markable_obj):

845 """ 846 Adds a markable to the markable layer 847 @type markable_obj: L{Cmarkable} 848 @param markable_obj: the markable object 849 """ 850 if self.markable_layer is None: 851 self.markable_layer = Cmarkables(type=self.type) 852 self.root.append(self.markable_layer.get_node()) 853 self.markable_layer.add_markable(markable_obj)

854 855

856 - def add_opinion(self,opinion_obj):

857 """ 858 Adds an opinion to the opinion layer 859 @type opinion_obj: L{Copinion} 860 @param opinion_obj: the opinion object 861 """ 862 if self.opinion_layer is None: 863 self.opinion_layer = Copinions() 864 self.root.append(self.opinion_layer.get_node()) 865 self.opinion_layer.add_opinion(opinion_obj)

866 867

868 - def add_predicate(self, predicate_obj):

869 """ 870 Adds a predicate to the semantic layer 871 @type predicate_obj: L{Cpredicate} 872 @param predicate_obj: the predicate object 873 """ 874 if self.srl_layer is None: 875 self.srl_layer = Csrl() 876 self.root.append(self.srl_layer.get_node()) 877 self.srl_layer.add_predicate(predicate_obj)

878

879 - def add_timex(self, time_obj):

880 """ 881 Adds a timex entry to the time layer 882 @type time_obj: L{Ctime} 883 @param time_obj: time time object 884 """ 885 if self.timex_layer is None: 886 self.timex_layer = CtimeExpressions() 887 self.root.append(self.timex_layer.get_node()) 888 self.timex_layer.add_timex(time_obj)

889 890

891 - def set_header(self,header):

892 """ 893 Sets the header of the object 894 @type header: L{CHeader} 895 @param header: the header object 896 """ 897 self.root.insert(0,header.get_node())

898

899 - def add_linguistic_processor(self, layer ,my_lp):

900 """ 901 Adds a linguistic processor to the header 902 @type my_lp: L{Clp} 903 @param my_lp: linguistic processor object 904 @type layer: string 905 @param layer: the layer to which the processor is related to 906 """ 907 if self.header is None: 908 self.header = CHeader(type=self.type) 909 self.root.insert(0,self.header.get_node()) 910 self.header.add_linguistic_processor(layer,my_lp)

911 912

913 - def add_dependency(self,my_dep):

914 """ 915 Adds a dependency to the dependency layer 916 @type my_dep: L{Cdependency} 917 @param my_dep: dependency object 918 """ 919 if self.dependency_layer is None: 920 self.dependency_layer = Cdependencies() 921 self.root.append(self.dependency_layer.get_node()) 922 self.dependency_layer.add_dependency(my_dep)

923

924 - def add_tlink(self,my_tlink):

925 """ 926 Adds a tlink to the temporalRelations layer 927 @type my_tlink: L{Ctlink} 928 @param my_tlink: tlink object 929 """ 930 if self.temporalRelations_layer is None: 931 self.temporalRelations_layer = CtemporalRelations() 932 self.root.append(self.temporalRelations_layer.get_node()) 933 self.temporalRelations_layer.add_tlink(my_tlink)

934

935 - def add_predicateAnchor(self,my_predAnch):

936 """ 937 Adds a predAnch to the temporalRelations layer 938 @type my_predAnch: L{CpredicateAnchor} 939 @param my_predAnch: predicateAnchor object 940 """ 941 if self.temporalRelations_layer is None: 942 self.temporalRelations_layer = CtemporalRelations() 943 self.root.append(self.temporalRelations_layer.get_node()) 944 self.temporalRelations_layer.add_predicateAnchor(my_predAnch)

945

946 - def add_clink(self,my_clink):

947 """ 948 Adds a clink to the causalRelations layer 949 @type my_clink: L{Cclink} 950 @param my_clink: clink object 951 """ 952 if self.causalRelations_layer is None: 953 self.causalRelations_layer = CcausalRelations() 954 self.root.append(self.causalRelations_layer.get_node()) 955 self.causalRelations_layer.add_clink(my_clink)

956

957 - def add_factuality(self,my_fact):

958 """ 959 Adds a factvalue to the factuality layer 960 @type my_fact: L{Cfactvalue} 961 @param my_fact: factvalue object 962 """ 963 if self.factuality_layer is None: 964 self.factuality_layer = Cfactualitylayer() 965 self.root.append(self.factuality_layer.get_node()) 966 self.factuality_layer.add_factvalue(my_fact)

967

968 - def add_entity(self,entity):

969 """ 970 Adds an entity to the entity layer 971 @type entity: L{Centity} 972 @param entity: the entity object 973 """ 974 if self.entity_layer is None: 975 self.entity_layer = Centities(type=self.type) 976 self.root.append(self.entity_layer.get_node()) 977 self.entity_layer.add_entity(entity)

978 979

980 - def add_coreference(self, coreference):

981 """ 982 Adds an coreference to the coreference layer 983 @type coreference: L{Ccoreference} 984 @param coreference: the coreference object 985 """ 986 if self.coreference_layer is None: 987 self.coreference_layer = Ccoreferences(type=self.type) 988 self.root.append(self.coreference_layer.get_node()) 989 self.coreference_layer.add_coreference(coreference)

990 991

992 - def add_constituency_tree(self,my_tree):

993 """ 994 Adds a constituency tree to the constituency layer 995 @type my_tree: L{Ctree} 996 @param my_tree: the constituency tree object 997 """ 998 if self.constituency_layer is None: 999 self.constituency_layer = Cconstituency() 1000 self.root.append(self.constituency_layer.get_node()) 1001 self.constituency_layer.add_tree(my_tree)

1002 1003 ## Adds a property to the feature layer

1004 - def add_property(self,label,term_span,pid=None):

1005 """ 1006 Adds a property to the property layer 1007 @type label: string 1008 @param label: the type of property 1009 @type term_span: list 1010 @param term_span: list of term ids 1011 @type pid: string 1012 @param pid: the identifier for the property (use None to automatically generate one) 1013 """ 1014 if self.features_layer is None: 1015 self.features_layer = Cfeatures(type=self.type) 1016 self.root.append(self.features_layer.get_node()) 1017 self.features_layer.add_property(pid, label,term_span)

1018 1019 ## EXTRA FUNCTIONS 1020 ## Gets the token identifiers in the span of a term id

1021 - def get_dict_tokens_for_termid(self, term_id):

1022 """ 1023 Returns the tokens ids that are the span of the term specified 1024 @type term_id: string 1025 @param term_id: the term idenfier 1026 @rtype: list 1027 @return: list of token ids that are the span of the term 1028 """ 1029 if self.dict_tokens_for_tid is None: 1030 self.dict_tokens_for_tid = {} 1031 for term in self.get_terms(): 1032 self.dict_tokens_for_tid[term.get_id()] = term.get_span().get_span_ids() 1033 1034 return self.dict_tokens_for_tid.get(term_id,[])

1035 1036 ## Maps a list of token ids to term ids

1037 - def map_tokens_to_terms(self,list_tokens):

1038 """ 1039 Maps a list of token ids to the corresponding term ids 1040 @type list_tokens: list 1041 @param list_tokens: list of token identifiers 1042 @rtype: list 1043 @return: list of term idenfitiers 1044 """ 1045 if self.terms_for_token is None: 1046 self.terms_for_token = {} 1047 for term in self.get_terms(): 1048 termid = term.get_id() 1049 token_ids = term.get_span().get_span_ids() 1050 for tokid in token_ids: 1051 if tokid not in self.terms_for_token: 1052 self.terms_for_token[tokid] = [termid] 1053 else: 1054 self.terms_for_token[tokid].append(termid) 1055 1056 ret = set() 1057 for my_id in list_tokens: 1058 term_ids = self.terms_for_token.get(my_id,[]) 1059 ret |= set(term_ids) 1060 return sorted(list(ret))

1061

1062 - def remove_tokens_of_sentence(self,sentence_id):

1063 """ 1064 Removes the tokens belonging to the supplied sentence 1065 @type sentence_id: string 1066 @param sentence_id: a sentence identifier 1067 """ 1068 self.text_layer.remove_tokens_of_sentence(sentence_id)

1069

1070 - def remove_terms(self,list_term_ids):

1071 """ 1072 Removes the list of terms specified 1073 @type list_term_ids: list 1074 @param list_term_ids: list of term identifiers 1075 """ 1076 self.term_layer.remove_terms(list_term_ids)

1077

1078 - def add_external_reference(self,term_id, external_ref):

1079 self.add_external_reference_to_term(term_id, external_ref)

1080 1081 1082

1083 - def add_external_reference_to_term(self,term_id, external_ref):

1084 """ 1085 Adds an external reference to the given term identifier 1086 @type term_id: string 1087 @param term_id: the term identifier 1088 @param external_ref: an external reference object 1089 @type external_ref: L{CexternalReference} 1090 """ 1091 if self.term_layer is not None: 1092 self.term_layer.add_external_reference(term_id, external_ref)

1093 1094

1095 - def remove_external_references_from_terms(self):

1096 """ 1097 Removes all external references present in the term layer 1098 """ 1099 if self.term_layer is not None: 1100 for term in self.term_layer: 1101 term.remove_external_references()

1102

1103 - def add_external_reference_to_role(self,role_id,external_ref):

1104 """ 1105 Adds an external reference to the given role identifier in the SRL layer 1106 @type role_id: string 1107 @param role_id: the role identifier 1108 @param external_ref: an external reference object 1109 @type external_ref: L{CexternalReference} 1110 """ 1111 if self.srl_layer is not None: 1112 self.srl_layer.add_external_reference_to_role(role_id,external_ref)

1113 1114 1115

1116 - def remove_external_references_from_srl_layer(self):

1117 """ 1118 Removes all external references present in the term layer 1119 """ 1120 if self.srl_layer is not None: 1121 for pred in self.srl_layer.get_predicates(): 1122 pred.remove_external_references() 1123 pred.remove_external_references_from_roles()

1124

1125 - def add_external_reference_to_entity(self,entity_id, external_ref):

1126 """ 1127 Adds an external reference to the given entity identifier in the entity layer 1128 @type entity_id: string 1129 @param entity_id: the entity identifier 1130 @param external_ref: an external reference object 1131 @type external_ref: L{CexternalReference} 1132 """ 1133 if self.entity_layer is not None: 1134 self.entity_layer.add_external_reference_to_entity(entity_id,external_ref)

Source Code for Module KafNafParserMod