Module term_data
[hide private]
[frames] | no frames]

Source Code for Module term_data

  1  """ 
  2  This module parses the term layer of a KAF/NAF object 
  3  """ 
  4   
  5  from span_data import * 
  6  from external_references_data import * 
  7  from term_sentiment_data import * 
  8  from lxml import etree 
  9   
 10   
11 -class Cterm:
12 """ 13 This class encapsulates a <term> NAF or KAF object 14 """
15 - def __init__(self,node=None,type='NAF'):
16 """ 17 Constructor of the object 18 @type node: xml Element or None (to create and empty one) 19 @param node: this is the node of the element. If it is None it will create a new object 20 @type type: string 21 @param type: the type of the object (KAF or NAF) 22 """ 23 self.type = type 24 if node is None: 25 self.node = etree.Element('term') 26 else: 27 self.node = node
28
29 - def get_node(self):
30 """ 31 Returns the node of the element 32 @rtype: xml Element 33 @return: the node of the element 34 """ 35 return self.node
36
37 - def get_id(self):
38 """ 39 Returns the term identifier 40 @rtype: string 41 @return: the term identifier 42 """ 43 if self.type == 'NAF': 44 return self.node.get('id') 45 elif self.type == 'KAF': 46 return self.node.get('tid')
47
48 - def set_id(self,i):
49 """ 50 Sets the identifier for the term 51 @type i: string 52 @param i: lemma identifier 53 """ 54 if self.type == 'NAF': 55 self.node.set('id',i) 56 elif self.type == 'KAF': 57 self.node.set('tid',i)
58
59 - def get_lemma(self):
60 """ 61 Returns the lemma of the object 62 @rtype: string 63 @return: the term lemma 64 """ 65 return self.node.get('lemma')
66
67 - def set_lemma(self,l):
68 """ 69 Sets the lemma for the term 70 @type l: string 71 @param l: lemma 72 """ 73 self.node.set('lemma',l)
74
75 - def get_pos(self):
76 """ 77 Returns the part-of-speech of the object 78 @rtype: string 79 @return: the term pos-tag 80 """ 81 return self.node.get('pos')
82
83 - def set_pos(self,p):
84 """ 85 Sets the postag for the term 86 @type p: string 87 @param p: pos-tag 88 """ 89 self.node.set('pos',p)
90 91
92 - def set_type(self,t):
93 """ 94 Sets the type for the lemma 95 @type t: string 96 @param t: type for the term 97 """ 98 self.node.set('type',t)
99 100
101 - def get_morphofeat(self):
102 """ 103 Returns the morphofeat attribute of the term 104 @rtype: string 105 @return: the term morphofeat feature 106 """ 107 return self.node.get('morphofeat')
108 109
110 - def set_morphofeat(self,m):
111 """ 112 Sets the morphofeat attribute 113 @type m: string 114 @param m: the morphofeat value 115 """ 116 self.node.set('morphofeat',m)
117
118 - def get_span(self):
119 """ 120 Returns the span object of the term 121 @rtype: L{Cspan} 122 @return: the term span 123 """ 124 node_span = self.node.find('span') 125 if node_span is not None: 126 return Cspan(node_span) 127 else: 128 return None
129
130 - def set_span(self,this_span):
131 """ 132 Sets the span for the lemma 133 @type this_span: L{Cspan} 134 @param this_span: lemma identifier 135 """ 136 self.node.append(this_span.get_node())
137
138 - def get_sentiment(self):
139 """ 140 Returns the sentiment object of the term 141 @rtype: L{Cterm_sentiment} 142 @return: the term sentiment 143 """ 144 sent_node = self.node.find('sentiment') 145 146 if sent_node is None: 147 return None 148 else: 149 return Cterm_sentiment(sent_node)
150 151
152 - def add_external_reference(self,ext_ref):
153 """ 154 Adds an external reference object to the term 155 @type ext_ref: L{CexternalReference} 156 @param ext_ref: an external reference object 157 """ 158 ext_refs_node = self.node.find('externalReferences') 159 if ext_refs_node is None: 160 ext_refs_obj = CexternalReferences() 161 self.node.append(ext_refs_obj.get_node()) 162 else: 163 ext_refs_obj = CexternalReferences(ext_refs_node) 164 165 ext_refs_obj.add_external_reference(ext_ref)
166
167 - def add_term_sentiment(self,term_sentiment):
168 """ 169 Adds a sentiment object to the term 170 @type term_sentiment: L{Cterm_sentiment} 171 @param term_sentiment: an external reference object 172 """ 173 self.node.append(term_sentiment.get_node())
174
175 - def get_external_references(self):
176 """ 177 Iterator that returns all the external references of the term 178 @rtype: L{CexternalReference} 179 @return: the external references 180 """ 181 for ext_ref_node in self.node.findall('externalReferences'): 182 ext_refs_obj = CexternalReferences(ext_ref_node) 183 for ref in ext_refs_obj: 184 yield ref
185
187 """ 188 Removes any external reference from the term 189 """ 190 for ex_ref_node in self.node.findall('externalReferences'): 191 self.node.remove(ex_ref_node)
192 193
194 -class Cterms:
195 """ 196 This class encapsulates the term layer (collection of term objects) 197 """
198 - def __init__(self,node=None,type='NAF'):
199 """ 200 Constructor of the object 201 @type node: xml Element or None (to create and empty one) 202 @param node: this is the node of the element. If it is None it will create a new object 203 @type type: string 204 @param type: the type of the object (KAF or NAF) 205 """ 206 self.idx = {} 207 self.type = type 208 if node is None: 209 self.node = etree.Element('terms') 210 else: 211 self.node = node 212 for node_term in self.__get_node_terms(): 213 term_obj = Cterm(node_term,self.type) 214 self.idx[term_obj.get_id()] = node_term
215
216 - def get_node(self):
217 """ 218 Returns the node of the element 219 @rtype: xml Element 220 @return: the node of the element 221 """ 222 return self.node
223
224 - def to_kaf(self):
225 """ 226 Converts the object to KAF (if it is NAF) 227 """ 228 if self.type == 'NAF': 229 self.type = 'KAF' 230 for node in self.__get_node_terms(): 231 node.set('tid',node.get('id')) 232 del node.attrib['id']
233
234 - def to_naf(self):
235 """ 236 Converts the object to NAF (if it is KAF) 237 """ 238 if self.type == 'KAF': 239 self.type = 'NAF' 240 for node in self.__get_node_terms(): 241 node.set('id',node.get('tid')) 242 del node.attrib['tid']
243
244 - def __get_node_terms(self):
245 for node_term in self.node.findall('term'): 246 yield node_term
247
248 - def __iter__(self):
249 """ 250 Iterator that returns single term objects in the layer 251 @rtype: L{Cterm} 252 @return: term objects 253 """ 254 for node_term in self.__get_node_terms(): 255 yield Cterm(node_term,self.type)
256
257 - def get_term(self,term_id):
258 """ 259 Returns the term object for the supplied identifier 260 @type term_id: string 261 @param term_id: term identifier 262 """ 263 if term_id in self.idx: 264 return Cterm(self.idx[term_id],self.type) 265 else: 266 return None
267
268 - def add_term(self,term_obj):
269 """ 270 Adds a term object to the layer 271 @type term_obj: L{Cterm} 272 @param term_obj: the term object 273 """ 274 self.node.append(term_obj.get_node())
275
276 - def add_external_reference(self,term_id, external_ref):
277 """ 278 Adds an external reference for the given term 279 @type term_id: string 280 @param term_id: the term identifier 281 @type external_ref: L{CexternalReference} 282 @param external_ref: the external reference object 283 """ 284 if term_id in self.idx: 285 term_obj = Cterm(self.idx[term_id],self.type) 286 term_obj.add_external_reference(external_ref) 287 else: 288 print term_id,' not in self.idx'
289 290 291 292
293 - def remove_terms(self,list_term_ids):
294 """ 295 Removes a list of terms from the layer 296 @type list_term_ids: list 297 @param list_term_ids: list of term identifiers to be removed 298 """ 299 nodes_to_remove = set() 300 for term in self: 301 if term.get_id() in list_term_ids: 302 nodes_to_remove.add(term.get_node()) 303 #For removing the previous comment 304 prv = term.get_node().getprevious() 305 if prv is not None: 306 nodes_to_remove.add(prv) 307 308 for node in nodes_to_remove: 309 self.node.remove(node)
310