Package fcp :: Module xmlobject
[hide private]
[frames] | no frames]

Source Code for Module fcp.xmlobject

  1  #@+leo-ver=4 
  2  #@+node:@file xmlobject.py 
  3  """ 
  4  Allows XML files to be operated on like Python objects. 
  5   
  6  Features: 
  7      - load XML source from file pathnames, readable file objects or raw strings 
  8      - add, get and set tag attributes like with python attributes 
  9      - iterate over nodes 
 10      - save the modified XMLFile or XMLObject to file 
 11   
 12  Example XML file:: 
 13   
 14      <?xml version="1.0" encoding="UTF-8"?> 
 15      <rapsheets> 
 16       <person name="John Smith" age="42"> 
 17          <!-- John Smith has an appeal in process against his last conviction --> 
 18          <crime name="Armed robbery" date="March 11, 1994"/> 
 19          <crime name="Aggravated burglary" date="June 9, 2001"/> 
 20       </person> 
 21       <person name="Mary Jones" age="33"> 
 22          <crime name="Prostitution" date="January 8, 1997"/> 
 23          <crime name="Selling heroin" date="September 4, 2002"/> 
 24          <crime name="Manslaughter" date="December 21, 2004"/> 
 25       </person> 
 26      </rapsheets> 
 27   
 28  Example usage:: 
 29   
 30      >>> from xmlobject import XMLFile 
 31       
 32      >>> x = XMLFile(path="sample.xml) 
 33   
 34      >>> print x 
 35      <xmlobj.XMLFile instance at 0xb7ccc52c> 
 36   
 37      >>> print x.root 
 38      <XMLNode: rapsheets> 
 39   
 40      >>> print x.root._children 
 41      [<XMLNode: text>, <XMLNode: person>, <XMLNode: text>, 
 42       <XMLNode: person>, <XMLNode: text>] 
 43   
 44      >>> print x.root.person 
 45      [<XMLNode: person>, <XMLNode: person>] 
 46   
 47      >>> print x.root.person[0].name 
 48      John Smith 
 49   
 50      >>> john = x.root.person[0] 
 51       
 52      >>> john.height = 184 
 53   
 54      >>> c = john._addNode("crime") 
 55   
 56      >>> c.name = "Grand Theft Auto" 
 57       
 58      >>> c.date = "4 May, 2005" 
 59   
 60      >>> print x.toxml() 
 61      <?xml version="1.0" ?> 
 62      <rapsheets> 
 63       <person age="42" height="184" name="John Smith"> 
 64          <!-- John Smith has an appeal in process against his last conviction --> 
 65          <crime date="March 11, 1994" name="Armed robbery"/> 
 66          <crime date="June 9, 2001" name="Aggravated burglary"/> 
 67       <crime date="4 May, 2005" name="Grand Theft Auto"/></person> 
 68       <person age="33" name="Mary Jones"> 
 69          <crime date="January 8, 1997" name="Prostitution"/> 
 70          <crime date="September 4, 2002" name="Selling heroin"/> 
 71          <crime date="December 21, 2004" name="Manslaughter"/> 
 72       </person> 
 73      </rapsheets> 
 74   
 75      >>> 
 76   
 77  """ 
 78   
 79  #@+others 
 80  #@+node:imports 
 81  import sys, os 
 82  import xml.dom 
 83  import xml.dom.minidom 
 84  from xml.dom.minidom import parse, parseString, getDOMImplementation 
 85   
 86  #@-node:imports 
 87  #@+node:globals 
 88  impl = getDOMImplementation() 
 89   
 90  #@-node:globals 
 91  #@+node:exceptions 
92 -class MissingRootTag(Exception):
93 """root tag name was not given"""
94
95 -class InvalidXML(Exception):
96 """failed to parse XML input"""
97
98 -class CannotSave(Exception):
99 """unable to save"""
100
101 -class InvalidNode(Exception):
102 """not a valid minidom node"""
103 104 #@-node:exceptions 105 #@+node:class XMLFile
106 -class XMLFile:
107 """ 108 Allows an xml file to be viewed and operated on 109 as a python object. 110 111 (If you're viewing the epydoc-generated HTML documentation, click the 'show private' 112 link at the top right of this page to see all the methods) 113 114 Holds the root node in the .root attribute, also in an attribute 115 with the same name as this root node. 116 """ 117 #@ @+others 118 #@+node:__init__
119 - def __init__(self, **kw):
120 """ 121 Create an XMLFile 122 123 Keywords: 124 - path - a pathname from which the file can be read 125 - file - an open file object from which the raw xml 126 can be read 127 - raw - the raw xml itself 128 - root - name of root tag, if not reading content 129 130 Usage scenarios: 131 1. Working with existing content - you must supply input in 132 one of the following ways: 133 - 'path' must be an existing file, or 134 - 'file' must be a readable file object, or 135 - 'raw' must contain raw xml as a string 136 2. Creating whole new content - you must give the name 137 of the root tag in the 'root' keyword 138 139 Notes: 140 - Keyword precedence governing existing content is: 141 1. path (if existing file) 142 2. file 143 3. raw 144 - If working with existing content: 145 - if the 'root' is given, then the content's toplevel tag 146 MUST match the value given for 'root' 147 - trying to _save will raise an exception unless 'path' 148 has been given 149 - if not working with existing content: 150 - 'root' must be given 151 - _save() will raise an exception unless 'path' has been given 152 """ 153 path = kw.get("path", None) 154 fobj = kw.get("file", None) 155 raw = kw.get("raw", None) 156 root = kw.get("root", None) 157 158 if path: 159 self.path = path 160 try: 161 fobj = file(path) 162 except IOError: 163 pass 164 else: 165 self.path = None 166 167 if fobj: 168 raw = fobj.read() 169 170 if raw: 171 self.dom = xml.dom.minidom.parseString(raw) 172 else: 173 # could not source content, so create a blank slate 174 if not root: 175 # in which case, must give a root node name 176 raise MissingRootTag( 177 "No existing content, so must specify root") 178 179 # ok, create a blank dom 180 self.dom = impl.createDocument(None, root, None) 181 182 # get the root node, save it as attributes 'root' and name of node 183 rootnode = self.dom.documentElement 184 185 # now validate root tag 186 if root: 187 if rootnode.nodeName != root: 188 raise IncorrectRootTag("Gave root='%s', input has root='%s'" % ( 189 root, rootnode.nodeName)) 190 191 # need this for recursion in XMLNode 192 self._childrenByName = {} 193 self._children = [] 194 195 # add all the child nodes 196 for child in self.dom.childNodes: 197 childnode = XMLNode(self, child) 198 #print "compare %s to %s" % (rootnode, child) 199 if child == rootnode: 200 #print "found root" 201 self.root = childnode 202 setattr(self, rootnode.nodeName, self.root)
203 204 #@-node:__init__ 205 #@+node:save
206 - def save(self, where=None, obj=None):
207 """ 208 Saves the document. 209 210 If argument 'where' is given, saves to it, otherwise 211 tries to save to the original given 'path' (or barfs) 212 213 Value can be a string (taken to be a file path), or an open 214 file object. 215 """ 216 obj = obj or self.dom 217 218 if not where: 219 if self.path: 220 where = self.path 221 222 if isinstance(where, str): 223 where = file(where, "w") 224 225 if not where: 226 raise CannotSave("No save destination, and no original path") 227 228 where.write(obj.toxml()) 229 where.flush()
230 231 #@-node:save 232 #@+node:saveAs
233 - def saveAs(self, path):
234 """ 235 save this time, and all subsequent times, to filename 'path' 236 """ 237 self.path = path 238 self.save()
239 240 #@-node:saveAs 241 #@+node:toxml
242 - def toxml(self):
243 return self.dom.toxml()
244 245 #@-node:toxml 246 #@+node:__len__
247 - def __len__(self):
248 """ 249 returns number of child nodes 250 """ 251 return len(self._children)
252 253 #@-node:__len__ 254 #@+node:__getitem__
255 - def __getitem__(self, idx):
256 if isinstance(idx, int): 257 return self._children[idx] 258 else: 259 return self._childrenByName[idx]
260 261 #@-node:__getitem__ 262 #@-others 263 264 #@-node:class XMLFile 265 #@+node:class XMLNode
266 -class XMLNode:
267 """ 268 This is the workhorse for the xml object interface 269 270 (If you're viewing the epydoc-generated HTML documentation, click the 'show private' 271 link at the top right of this page to see all the methods) 272 273 """ 274 #@ @+others 275 #@+node:__init__
276 - def __init__(self, parent, node):
277 """ 278 You shouldn't need to instantiate this directly 279 """ 280 self._parent = parent 281 if isinstance(parent, XMLFile): 282 self._root = parent 283 else: 284 self._root = parent._root 285 self._node = node 286 self._childrenByName = {} 287 self._children = [] 288 289 # add ourself to parent's children registry 290 parent._children.append(self) 291 292 # the deal with named subtags is that we store the first instance 293 # as itself, and with second and subsequent instances, we make a list 294 parentDict = self._parent._childrenByName 295 nodeName = node.nodeName 296 if not parentDict.has_key(nodeName): 297 parentDict[nodeName] = parent.__dict__[nodeName] = self 298 else: 299 if isinstance(parentDict[nodeName], XMLNode): 300 # this is the second child node of a given tag name, so convert 301 # the instance to a list 302 parentDict[nodeName] = parent.__dict__[nodeName] = [parentDict[nodeName]] 303 parentDict[nodeName].append(self) 304 305 # figure out our type 306 self._value = None 307 if isinstance(node, xml.dom.minidom.Text): 308 self._type = "text" 309 self._value = node.nodeValue 310 elif isinstance(node, xml.dom.minidom.Element): 311 self._type = "node" 312 self._name = nodeName 313 elif isinstance(node, xml.dom.minidom.Comment): 314 self._type = "comment" 315 self._value = node.nodeValue 316 else: 317 raise InvalidNode("node class %s" % node.__class__) 318 319 # and wrap all the child nodes 320 for child in node.childNodes: 321 XMLNode(self, child)
322 323 #@-node:__init__ 324 #@+node:_render
325 - def _render(self):
326 """ 327 Produces well-formed XML of this node's contents, 328 indented as required 329 """ 330 return self._node.toxml()
331 332 #@-node:_render 333 #@+node:__repr__
334 - def __repr__(self):
335 if self._type == "node": 336 return "<XMLNode: %s>" % self._node.nodeName 337 else: 338 return "<XMLNode: %s>" % self._type
339 340 #@-node:__repr__ 341 #@+node:__getattr__
342 - def __getattr__(self, attr):
343 """ 344 Fetches an attribute or child node of this tag 345 346 If it's an attribute, then returns the attribute value as a string. 347 348 If a child node, then: 349 - if there is only one child node of that name, return it 350 - if there is more than one child node of that name, return a list 351 of child nodes of that tag name 352 353 Supports some magic attributes: 354 - _text - the value of the first child node of type text 355 """ 356 #print "%s: __getattr__: attr=%s" % (self, attr) 357 358 # magic attribute to return text 359 if attr == '_text': 360 tnode = self['#text'] 361 if isinstance(tnode, list): 362 tnode = tnode[0] 363 return tnode._value 364 365 if self._type in ['text', 'comment']: 366 if attr == '_value': 367 return self._node.nodeValue 368 else: 369 raise AttributeError(attr) 370 371 if self._node.hasAttribute(attr): 372 return self._node.getAttribute(attr) 373 elif self._childrenByName.has_key(attr): 374 return self._childrenByName[attr] 375 376 #elif attr == 'value': 377 # magic attribute 378 379 else: 380 raise AttributeError(attr)
381 382 383 #@-node:__getattr__ 384 #@+node:__setattr__
385 - def __setattr__(self, attr, val):
386 """ 387 Change the value of an attribute of this tag 388 389 The magic attribute '_text' can be used to set the first child 390 text node's value 391 392 For example:: 393 394 Consider: 395 396 <somenode> 397 <child>foo</child> 398 </somenode> 399 400 >>> somenode 401 <XMLNODE: somenode> 402 >>> somenode.child 403 <XMLNODE: child> 404 >>> somenode.child._text 405 'foo' 406 >>> somenode._toxml() 407 u'<somenode><child>foo</child></somenode>' 408 >>> somenode.child._text = 'bar' 409 >>> somenode.child._text 410 'bar' 411 >>> somenode.child._toxml() 412 u'<somenode><child>bar/child></somenode>' 413 414 """ 415 if attr.startswith("_"): 416 417 # magic attribute for setting _text 418 if attr == '_text': 419 tnode = self['#text'] 420 if isinstance(tnode, list): 421 tnode = tnode[0] 422 tnode._node.nodeValue = val 423 tnode._value = val 424 return 425 426 self.__dict__[attr] = val 427 elif self._type in ['text', 'comment']: 428 self._node.nodeValue = val 429 else: 430 # discern between attribute and child node 431 if self._childrenByName.has_key(attr): 432 raise Exception("Attribute Exists") 433 self._node.setAttribute(attr, str(val))
434 435 #@-node:__setattr__ 436 #@+node:_keys
437 - def _keys(self):
438 """ 439 Return a list of attribute names 440 """ 441 return self._node.attributes.keys()
442
443 - def _values(self):
444 """ 445 Returns a list of (attrname, attrval) tuples for this tag 446 """ 447 return [self._node.getAttribute(k) for k in self._node.attributes.keys()]
448
449 - def _items(self):
450 """ 451 returns a list of attribute values for this tag 452 """ 453 return [(k, self._node.getAttribute(k)) for k in self._node.attributes.keys()]
454
455 - def _has_key(self, k):
456 """ 457 returns True if this tag has an attribute of the given name 458 """ 459 return self._node.hasAttribute(k) or self._childrenByName.has_key(k)
460
461 - def _get(self, k, default=None):
462 """ 463 returns the value of attribute k, or default if no such attribute 464 """ 465 if self._has_key(k): 466 return getattr(self, k) 467 else: 468 return default
469 #@-node:_keys 470 #@+node:__len__
471 - def __len__(self):
472 """ 473 returns number of child nodes 474 """ 475 return len(self._children)
476 477 #@-node:__len__ 478 #@+node:__getitem__
479 - def __getitem__(self, idx):
480 """ 481 if given key is numeric, return the nth child, otherwise 482 try to return the child tag (or list of child tags) having 483 the key as the tag name 484 """ 485 #print "__getitem__: idx=%s" % str(idx) 486 487 if isinstance(idx, slice) or isinstance(idx, int): 488 return self._children[idx] 489 elif isinstance(idx, str): 490 return self._childrenByName[idx] 491 else: 492 raise IndexError(idx)
493 494 #@-node:__getitem__ 495 #@+node:_addNode
496 - def _addNode(self, child):
497 """ 498 Tries to append a child node to the tree, and returns it 499 500 Value of 'child' must be one of: 501 - a string (in which case it is taken to be the name 502 of the new node's tag) 503 - a dom object, in which case it will be wrapped and added 504 - an XMLNode object, in which case it will be added without 505 wrapping 506 """ 507 508 if isinstance(child, XMLNode): 509 510 # add it to our children registry 511 self._children.append(child) 512 513 parentDict = self._childrenByName 514 nodeName = child._node.nodeName 515 516 if not parentDict.has_key(nodeName): 517 parentDict[nodeName] = self.__dict__[nodeName] = child 518 else: 519 if isinstance(parentDict[nodeName], XMLNode): 520 # this is the second child node of a given tag name, so convert 521 # the instance to a list 522 parentDict[nodeName] \ 523 = self.__dict__[nodeName] \ 524 = [parentDict[nodeName]] 525 526 parentDict[nodeName].append(child) 527 528 # and stick it in the dom 529 self._node.appendChild(child._node) 530 531 return child 532 533 elif isinstance(child, str): 534 childNode = self._root.dom.createElement(child) 535 self._node.appendChild(childNode) 536 537 elif isinstance(child, xml.dom.minidom.Element): 538 childNode = child 539 child = childNode.nodeName 540 self._node.appendChild(childNode) 541 542 543 return XMLNode(self, childNode)
544 545 #@-node:_addNode 546 #@+node:_getChild
547 - def _getChild(self, name):
548 """ 549 Returns a list of zero or more child nodes whose 550 tag name is <name> 551 """ 552 try: 553 item = getattr(self, name) 554 except AttributeError: 555 return [] 556 557 if not isinstance(item, list): 558 item = [item] 559 560 return item
561 562 #@-node:_getChild 563 #@+node:_delChild
564 - def _delChild(self, child):
565 """ 566 Removes given child node 567 """ 568 node = self 569 while True: 570 print "Trying to remove %s from %s" % (child, node) 571 if child in node._children: 572 print "removing" 573 node._children.remove(child) 574 node._node.removeChild(child._node) 575 576 for k,v in node._childrenByName.items(): 577 if child == v: 578 del node._childrenByName[k] 579 elif isinstance(v, list): 580 if child in v: 581 v.remove(child) 582 583 if isinstance(node, XMLFile): 584 break 585 586 node = node._parent
587 588 #@-node:_delChild 589 #@+node:_addText
590 - def _addText(self, value):
591 """ 592 Tries to append a child text node, with the given text, to the tree, 593 and returns the created node object 594 """ 595 childNode = self._root.dom.createTextNode(value) 596 self._node.appendChild(childNode) 597 return XMLNode(self, childNode)
598 599 #@-node:_addText 600 #@+node:_addComment
601 - def _addComment(self, comment):
602 """ 603 Tries to append a child comment node (with the given text value) 604 to the tree, and returns the create node object 605 """ 606 childNode = self._root.dom.createCommentNode(comment) 607 self._node.appendChild(childNode) 608 return XMLNode(self, childNode)
609 610 #@-node:_addComment 611 #@+node:_save
612 - def _save(self, where=None):
613 """ 614 Generates well-formed XML from just this node, and saves it 615 to a file. 616 617 Argument 'where' is either an open file object, or a pathname 618 619 If 'where' is not given, then saves the entire document tree. 620 """ 621 if not where: 622 self._root.save() 623 else: 624 self._root.save(where, self._node)
625 626 #@-node:_save 627 #@+node:_toxml
628 - def _toxml(self):
629 """ 630 renders just this node out to raw xml code 631 """ 632 return self._node.toxml()
633 634 #@-node:_toxml 635 #@-others 636 #@-node:class XMLNode 637 #@-others 638 #@nonl 639 #@-node:@file xmlobject.py 640 #@-leo 641