1
2
3 """
4 Allows XML files to be operated on like Python objects.
5
6 Features:
7 - load XML source from file pathnames, readable file objects or raw strings
8 - add, get and set tag attributes like with python attributes
9 - iterate over nodes
10 - save the modified XMLFile or XMLObject to file
11
12 Example XML file::
13
14 <?xml version="1.0" encoding="UTF-8"?>
15 <rapsheets>
16 <person name="John Smith" age="42">
17 <!-- John Smith has an appeal in process against his last conviction -->
18 <crime name="Armed robbery" date="March 11, 1994"/>
19 <crime name="Aggravated burglary" date="June 9, 2001"/>
20 </person>
21 <person name="Mary Jones" age="33">
22 <crime name="Prostitution" date="January 8, 1997"/>
23 <crime name="Selling heroin" date="September 4, 2002"/>
24 <crime name="Manslaughter" date="December 21, 2004"/>
25 </person>
26 </rapsheets>
27
28 Example usage::
29
30 >>> from xmlobject import XMLFile
31
32 >>> x = XMLFile(path="sample.xml)
33
34 >>> print x
35 <xmlobj.XMLFile instance at 0xb7ccc52c>
36
37 >>> print x.root
38 <XMLNode: rapsheets>
39
40 >>> print x.root._children
41 [<XMLNode: text>, <XMLNode: person>, <XMLNode: text>,
42 <XMLNode: person>, <XMLNode: text>]
43
44 >>> print x.root.person
45 [<XMLNode: person>, <XMLNode: person>]
46
47 >>> print x.root.person[0].name
48 John Smith
49
50 >>> john = x.root.person[0]
51
52 >>> john.height = 184
53
54 >>> c = john._addNode("crime")
55
56 >>> c.name = "Grand Theft Auto"
57
58 >>> c.date = "4 May, 2005"
59
60 >>> print x.toxml()
61 <?xml version="1.0" ?>
62 <rapsheets>
63 <person age="42" height="184" name="John Smith">
64 <!-- John Smith has an appeal in process against his last conviction -->
65 <crime date="March 11, 1994" name="Armed robbery"/>
66 <crime date="June 9, 2001" name="Aggravated burglary"/>
67 <crime date="4 May, 2005" name="Grand Theft Auto"/></person>
68 <person age="33" name="Mary Jones">
69 <crime date="January 8, 1997" name="Prostitution"/>
70 <crime date="September 4, 2002" name="Selling heroin"/>
71 <crime date="December 21, 2004" name="Manslaughter"/>
72 </person>
73 </rapsheets>
74
75 >>>
76
77 """
78
79
80
81 import sys, os
82 import xml.dom
83 import xml.dom.minidom
84 from xml.dom.minidom import parse, parseString, getDOMImplementation
85
86
87
88 impl = getDOMImplementation()
89
90
91
93 """root tag name was not given"""
94
96 """failed to parse XML input"""
97
100
102 """not a valid minidom node"""
103
104
105
107 """
108 Allows an xml file to be viewed and operated on
109 as a python object.
110
111 (If you're viewing the epydoc-generated HTML documentation, click the 'show private'
112 link at the top right of this page to see all the methods)
113
114 Holds the root node in the .root attribute, also in an attribute
115 with the same name as this root node.
116 """
117
118
120 """
121 Create an XMLFile
122
123 Keywords:
124 - path - a pathname from which the file can be read
125 - file - an open file object from which the raw xml
126 can be read
127 - raw - the raw xml itself
128 - root - name of root tag, if not reading content
129
130 Usage scenarios:
131 1. Working with existing content - you must supply input in
132 one of the following ways:
133 - 'path' must be an existing file, or
134 - 'file' must be a readable file object, or
135 - 'raw' must contain raw xml as a string
136 2. Creating whole new content - you must give the name
137 of the root tag in the 'root' keyword
138
139 Notes:
140 - Keyword precedence governing existing content is:
141 1. path (if existing file)
142 2. file
143 3. raw
144 - If working with existing content:
145 - if the 'root' is given, then the content's toplevel tag
146 MUST match the value given for 'root'
147 - trying to _save will raise an exception unless 'path'
148 has been given
149 - if not working with existing content:
150 - 'root' must be given
151 - _save() will raise an exception unless 'path' has been given
152 """
153 path = kw.get("path", None)
154 fobj = kw.get("file", None)
155 raw = kw.get("raw", None)
156 root = kw.get("root", None)
157
158 if path:
159 self.path = path
160 try:
161 fobj = file(path)
162 except IOError:
163 pass
164 else:
165 self.path = None
166
167 if fobj:
168 raw = fobj.read()
169
170 if raw:
171 self.dom = xml.dom.minidom.parseString(raw)
172 else:
173
174 if not root:
175
176 raise MissingRootTag(
177 "No existing content, so must specify root")
178
179
180 self.dom = impl.createDocument(None, root, None)
181
182
183 rootnode = self.dom.documentElement
184
185
186 if root:
187 if rootnode.nodeName != root:
188 raise IncorrectRootTag("Gave root='%s', input has root='%s'" % (
189 root, rootnode.nodeName))
190
191
192 self._childrenByName = {}
193 self._children = []
194
195
196 for child in self.dom.childNodes:
197 childnode = XMLNode(self, child)
198
199 if child == rootnode:
200
201 self.root = childnode
202 setattr(self, rootnode.nodeName, self.root)
203
204
205
206 - def save(self, where=None, obj=None):
207 """
208 Saves the document.
209
210 If argument 'where' is given, saves to it, otherwise
211 tries to save to the original given 'path' (or barfs)
212
213 Value can be a string (taken to be a file path), or an open
214 file object.
215 """
216 obj = obj or self.dom
217
218 if not where:
219 if self.path:
220 where = self.path
221
222 if isinstance(where, str):
223 where = file(where, "w")
224
225 if not where:
226 raise CannotSave("No save destination, and no original path")
227
228 where.write(obj.toxml())
229 where.flush()
230
231
232
234 """
235 save this time, and all subsequent times, to filename 'path'
236 """
237 self.path = path
238 self.save()
239
240
241
243 return self.dom.toxml()
244
245
246
248 """
249 returns number of child nodes
250 """
251 return len(self._children)
252
253
254
256 if isinstance(idx, int):
257 return self._children[idx]
258 else:
259 return self._childrenByName[idx]
260
261
262
263
264
265
267 """
268 This is the workhorse for the xml object interface
269
270 (If you're viewing the epydoc-generated HTML documentation, click the 'show private'
271 link at the top right of this page to see all the methods)
272
273 """
274
275
277 """
278 You shouldn't need to instantiate this directly
279 """
280 self._parent = parent
281 if isinstance(parent, XMLFile):
282 self._root = parent
283 else:
284 self._root = parent._root
285 self._node = node
286 self._childrenByName = {}
287 self._children = []
288
289
290 parent._children.append(self)
291
292
293
294 parentDict = self._parent._childrenByName
295 nodeName = node.nodeName
296 if not parentDict.has_key(nodeName):
297 parentDict[nodeName] = parent.__dict__[nodeName] = self
298 else:
299 if isinstance(parentDict[nodeName], XMLNode):
300
301
302 parentDict[nodeName] = parent.__dict__[nodeName] = [parentDict[nodeName]]
303 parentDict[nodeName].append(self)
304
305
306 self._value = None
307 if isinstance(node, xml.dom.minidom.Text):
308 self._type = "text"
309 self._value = node.nodeValue
310 elif isinstance(node, xml.dom.minidom.Element):
311 self._type = "node"
312 self._name = nodeName
313 elif isinstance(node, xml.dom.minidom.Comment):
314 self._type = "comment"
315 self._value = node.nodeValue
316 else:
317 raise InvalidNode("node class %s" % node.__class__)
318
319
320 for child in node.childNodes:
321 XMLNode(self, child)
322
323
324
326 """
327 Produces well-formed XML of this node's contents,
328 indented as required
329 """
330 return self._node.toxml()
331
332
333
335 if self._type == "node":
336 return "<XMLNode: %s>" % self._node.nodeName
337 else:
338 return "<XMLNode: %s>" % self._type
339
340
341
343 """
344 Fetches an attribute or child node of this tag
345
346 If it's an attribute, then returns the attribute value as a string.
347
348 If a child node, then:
349 - if there is only one child node of that name, return it
350 - if there is more than one child node of that name, return a list
351 of child nodes of that tag name
352
353 Supports some magic attributes:
354 - _text - the value of the first child node of type text
355 """
356
357
358
359 if attr == '_text':
360 tnode = self['#text']
361 if isinstance(tnode, list):
362 tnode = tnode[0]
363 return tnode._value
364
365 if self._type in ['text', 'comment']:
366 if attr == '_value':
367 return self._node.nodeValue
368 else:
369 raise AttributeError(attr)
370
371 if self._node.hasAttribute(attr):
372 return self._node.getAttribute(attr)
373 elif self._childrenByName.has_key(attr):
374 return self._childrenByName[attr]
375
376
377
378
379 else:
380 raise AttributeError(attr)
381
382
383
384
386 """
387 Change the value of an attribute of this tag
388
389 The magic attribute '_text' can be used to set the first child
390 text node's value
391
392 For example::
393
394 Consider:
395
396 <somenode>
397 <child>foo</child>
398 </somenode>
399
400 >>> somenode
401 <XMLNODE: somenode>
402 >>> somenode.child
403 <XMLNODE: child>
404 >>> somenode.child._text
405 'foo'
406 >>> somenode._toxml()
407 u'<somenode><child>foo</child></somenode>'
408 >>> somenode.child._text = 'bar'
409 >>> somenode.child._text
410 'bar'
411 >>> somenode.child._toxml()
412 u'<somenode><child>bar/child></somenode>'
413
414 """
415 if attr.startswith("_"):
416
417
418 if attr == '_text':
419 tnode = self['#text']
420 if isinstance(tnode, list):
421 tnode = tnode[0]
422 tnode._node.nodeValue = val
423 tnode._value = val
424 return
425
426 self.__dict__[attr] = val
427 elif self._type in ['text', 'comment']:
428 self._node.nodeValue = val
429 else:
430
431 if self._childrenByName.has_key(attr):
432 raise Exception("Attribute Exists")
433 self._node.setAttribute(attr, str(val))
434
435
436
438 """
439 Return a list of attribute names
440 """
441 return self._node.attributes.keys()
442
444 """
445 Returns a list of (attrname, attrval) tuples for this tag
446 """
447 return [self._node.getAttribute(k) for k in self._node.attributes.keys()]
448
450 """
451 returns a list of attribute values for this tag
452 """
453 return [(k, self._node.getAttribute(k)) for k in self._node.attributes.keys()]
454
456 """
457 returns True if this tag has an attribute of the given name
458 """
459 return self._node.hasAttribute(k) or self._childrenByName.has_key(k)
460
461 - def _get(self, k, default=None):
462 """
463 returns the value of attribute k, or default if no such attribute
464 """
465 if self._has_key(k):
466 return getattr(self, k)
467 else:
468 return default
469
470
472 """
473 returns number of child nodes
474 """
475 return len(self._children)
476
477
478
480 """
481 if given key is numeric, return the nth child, otherwise
482 try to return the child tag (or list of child tags) having
483 the key as the tag name
484 """
485
486
487 if isinstance(idx, slice) or isinstance(idx, int):
488 return self._children[idx]
489 elif isinstance(idx, str):
490 return self._childrenByName[idx]
491 else:
492 raise IndexError(idx)
493
494
495
497 """
498 Tries to append a child node to the tree, and returns it
499
500 Value of 'child' must be one of:
501 - a string (in which case it is taken to be the name
502 of the new node's tag)
503 - a dom object, in which case it will be wrapped and added
504 - an XMLNode object, in which case it will be added without
505 wrapping
506 """
507
508 if isinstance(child, XMLNode):
509
510
511 self._children.append(child)
512
513 parentDict = self._childrenByName
514 nodeName = child._node.nodeName
515
516 if not parentDict.has_key(nodeName):
517 parentDict[nodeName] = self.__dict__[nodeName] = child
518 else:
519 if isinstance(parentDict[nodeName], XMLNode):
520
521
522 parentDict[nodeName] \
523 = self.__dict__[nodeName] \
524 = [parentDict[nodeName]]
525
526 parentDict[nodeName].append(child)
527
528
529 self._node.appendChild(child._node)
530
531 return child
532
533 elif isinstance(child, str):
534 childNode = self._root.dom.createElement(child)
535 self._node.appendChild(childNode)
536
537 elif isinstance(child, xml.dom.minidom.Element):
538 childNode = child
539 child = childNode.nodeName
540 self._node.appendChild(childNode)
541
542
543 return XMLNode(self, childNode)
544
545
546
548 """
549 Returns a list of zero or more child nodes whose
550 tag name is <name>
551 """
552 try:
553 item = getattr(self, name)
554 except AttributeError:
555 return []
556
557 if not isinstance(item, list):
558 item = [item]
559
560 return item
561
562
563
565 """
566 Removes given child node
567 """
568 node = self
569 while True:
570 print "Trying to remove %s from %s" % (child, node)
571 if child in node._children:
572 print "removing"
573 node._children.remove(child)
574 node._node.removeChild(child._node)
575
576 for k,v in node._childrenByName.items():
577 if child == v:
578 del node._childrenByName[k]
579 elif isinstance(v, list):
580 if child in v:
581 v.remove(child)
582
583 if isinstance(node, XMLFile):
584 break
585
586 node = node._parent
587
588
589
590 - def _addText(self, value):
591 """
592 Tries to append a child text node, with the given text, to the tree,
593 and returns the created node object
594 """
595 childNode = self._root.dom.createTextNode(value)
596 self._node.appendChild(childNode)
597 return XMLNode(self, childNode)
598
599
600
609
610
611
612 - def _save(self, where=None):
613 """
614 Generates well-formed XML from just this node, and saves it
615 to a file.
616
617 Argument 'where' is either an open file object, or a pathname
618
619 If 'where' is not given, then saves the entire document tree.
620 """
621 if not where:
622 self._root.save()
623 else:
624 self._root.save(where, self._node)
625
626
627
629 """
630 renders just this node out to raw xml code
631 """
632 return self._node.toxml()
633
634
635
636
637
638
639
640
641