Package pyxb :: Package utils :: Module saxdom
[hide private]
[frames] | no frames]

Source Code for Module pyxb.utils.saxdom

  1  # -*- coding: utf-8 -*- 
  2  # Copyright 2009-2013, Peter A. Bigot 
  3  # 
  4  # Licensed under the Apache License, Version 2.0 (the "License"); you may 
  5  # not use this file except in compliance with the License. You may obtain a 
  6  # copy of the License at: 
  7  # 
  8  #            http://www.apache.org/licenses/LICENSE-2.0 
  9  # 
 10  # Unless required by applicable law or agreed to in writing, software 
 11  # distributed under the License is distributed on an "AS IS" BASIS, WITHOUT 
 12  # WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the 
 13  # License for the specific language governing permissions and limitations 
 14  # under the License. 
 15   
 16  """This module contains support for a DOM tree representation from an XML 
 17  document using a SAX parser. 
 18   
 19  This functionality exists because we need a DOM interface to generate the 
 20  binding classses, but the Python C{xml.dom.minidom} package does not support 
 21  location information.  The SAX interface does, so we have a SAX content 
 22  handler which converts the SAX events into a DOM tree. 
 23   
 24  This is not a general-purpose DOM capability; only a small subset of the DOM 
 25  interface is supported, and only for storing the XML information, not for 
 26  converting it back into document format. 
 27  """ 
 28   
 29  import xml.dom 
 30  import pyxb.utils.saxutils 
 31  import StringIO 
 32  import pyxb.namespace 
 33  import logging 
 34   
 35  _log = logging.getLogger(__name__) 
36 37 -def _DumpDOM (n, depth=0):
38 """Utility function to print a DOM tree.""" 39 40 pfx = ' ' * depth 41 if (xml.dom.Node.ELEMENT_NODE == n.nodeType): 42 print '%sElement[%d] %s %s with %d children' % (pfx, n._indexInParent(), n, pyxb.namespace.ExpandedName(n.name), len(n.childNodes)) 43 ins = pyxb.namespace.resolution.NamespaceContext.GetNodeContext(n).inScopeNamespaces() 44 print '%s%s' % (pfx, ' ; '.join([ '%s=%s' % (_k, _v.uri()) for (_k, _v) in ins.items()])) 45 for (k, v) in n.attributes.items(): 46 print '%s %s=%s' % (pfx, pyxb.namespace.ExpandedName(k), v) 47 for cn in n.childNodes: 48 _DumpDOM(cn, depth+1) 49 elif (xml.dom.Node.TEXT_NODE == n.nodeType): 50 #print '%sText "%s"' % (pfx, n.value) 51 pass 52 elif (xml.dom.Node.DOCUMENT_NODE == n.nodeType): 53 print 'Document node' 54 _DumpDOM(n.firstChild, depth) 55 else: 56 print 'UNRECOGNIZED %s' % (n.nodeType,)
57
58 -class _DOMSAXHandler (pyxb.utils.saxutils.BaseSAXHandler):
59 """SAX handler class that transforms events into a DOM tree.""" 60
61 - def document (self):
62 """The document that is the root of the generated tree.""" 63 return self.__document
64 __document = None 65
66 - def startDocument (self):
67 super(_DOMSAXHandler, self).startDocument() 68 self.__document = Document(namespace_context=self.namespaceContext())
69
70 - def endDocument (self):
71 content = self.elementState().content() 72 if 0 < len(content): 73 assert content[0].maybe_element 74 self.__document.appendChild(content[0].item)
75 #_DumpDOM(content) 76
77 - def startElementNS (self, name, qname, attrs):
78 (this_state, parent_state, ns_ctx, name_en) = super(_DOMSAXHandler, self).startElementNS(name, qname, attrs) 79 this_state.__attributes = NamedNodeMap() 80 for name in attrs.getNames(): 81 attr_en = pyxb.namespace.ExpandedName(name) 82 value = attrs.getValue(name) 83 this_state.__attributes._addItem(Attr(expanded_name=attr_en, namespace_context=ns_ctx, value=value, location=this_state.location()))
84
85 - def endElementNS (self, name, qname):
86 this_state = super(_DOMSAXHandler, self).endElementNS(name, qname) 87 ns_ctx = this_state.namespaceContext() 88 element = Element(namespace_context=ns_ctx, expanded_name=this_state.expandedName(), attributes=this_state.__attributes, location=this_state.location()) 89 for info in this_state.content(): 90 if isinstance(info.item, Node): 91 element.appendChild(info.item) 92 else: 93 element.appendChild(Text(info.item, namespace_context=ns_ctx)) 94 parent_state = this_state.parentState() 95 parent_state.addElementContent(this_state.location(), element, None)
96
97 -def parse (stream, **kw):
98 """Parse a stream containing an XML document and return the DOM tree 99 representing its contents. 100 101 Keywords not described here are passed to L{pyxb.utils.saxutils.make_parser}. 102 103 @param stream: An object presenting the standard file C{read} interface 104 from which the document can be read. 105 106 @keyword content_handler_constructor: Input is overridden to assign this a 107 value of L{_DOMSAXHandler}. 108 109 @rtype: C{xml.dom.Document} 110 """ 111 112 kw['content_handler_constructor'] = _DOMSAXHandler 113 saxer = pyxb.utils.saxutils.make_parser(**kw) 114 handler = saxer.getContentHandler() 115 saxer.parse(stream) 116 return handler.document()
117
118 -def parseString (text, **kw):
119 """Parse a string holding an XML document and return the corresponding DOM 120 tree.""" 121 # XML parser doesn't really like unicode strings 122 if isinstance(text, unicode): 123 text = text.encode(pyxb._InputEncoding) 124 return parse(StringIO.StringIO(text), **kw)
125
126 -class Node (xml.dom.Node, pyxb.utils.utility.Locatable_mixin):
127 """Base for the minimal DOM interface required by PyXB."""
128 - def __init__ (self, node_type, **kw):
129 location = kw.pop('location', None) 130 if location is not None: 131 pyxb.utils.utility.Locatable_mixin.__init__(self, location=location) 132 self.__nodeType = node_type 133 self.__parentNode = None 134 self.__indexInParent = None 135 self.__childNodes = [] 136 self.__namespaceContext = kw['namespace_context'] 137 self.__value = kw.get('value') 138 self.__attributes = kw.get('attributes') 139 expanded_name = kw.get('expanded_name') 140 if expanded_name is not None: 141 self.__name = expanded_name.uriTuple() 142 self.__namespaceURI = expanded_name.namespaceURI() 143 self.__localName = expanded_name.localName() 144 self.__namespaceContext.setNodeContext(self)
145 146 location = property(lambda _s: _s._location()) 147 148 __name = None 149 @property
150 - def name (self):
151 return self.__name
152 @property
153 - def expanded_name (self):
155 __namespaceURI = None 156 namespaceURI = property(lambda _s: _s.__namespaceURI) 157 __localName = None 158 localName = property(lambda _s: _s.__localName) 159 __value = None 160 value = property(lambda _s: _s.__value) 161
162 - def _indexInParent (self): return self.__indexInParent
163
164 - def __childIfPresent (self, index):
165 if index < len(self.__childNodes): 166 return self.__childNodes[index] 167 return None
168
169 - def appendChild (self, new_child):
170 new_child._setParentNode(self, len(self.__childNodes)) 171 self.__childNodes.append(new_child)
172
173 - def _setParentNode (self, parent_node, index_in_parent):
174 self.__parentNode = parent_node 175 self.__indexInParent = index_in_parent
176
177 - def _setAttributes (self, attributes):
178 assert self.__attributes is None 179 self.__attributes = attributes
180 __attributes = None 181 182 nodeType = property(lambda _s: _s.__nodeType) 183 parentNode = property(lambda _s: _s.__parentNode) 184 firstChild = property(lambda _s: _s.__childIfPresent(0)) 185 childNodes = property(lambda _s: _s.__childNodes) 186 attributes = property(lambda _s: _s.__attributes) 187 188 nextSibling = property(lambda _s: _s.parentNode.__childIfPresent(_s.__indexInParent+1)) 189
190 - def hasAttributeNS (self, ns_uri, local_name):
191 return self.getAttributeNodeNS(ns_uri, local_name) is not None
192
193 - def getAttributeNodeNS (self, ns_uri, local_name):
194 return self.__attributes._getAttr( (ns_uri, local_name) )
195
196 - def getAttributeNS (self, ns_uri, local_name):
197 rv = self.getAttributeNodeNS(ns_uri, local_name) 198 if rv is None: 199 return '' 200 return rv.value
201
202 -class Document (Node):
203 """Add the documentElement interface."""
204 - def __init__ (self, **kw):
205 super(Document, self).__init__(node_type=xml.dom.Node.DOCUMENT_NODE, **kw)
206 207 documentElement = Node.firstChild
208
209 -class Attr (Node):
210 """Add the nodeName and nodeValue interface."""
211 - def __init__ (self, **kw):
212 super(Attr, self).__init__(node_type=xml.dom.Node.ATTRIBUTE_NODE, **kw)
213 nodeName = Node.name 214 nodeValue = Node.value
215
216 -class NamedNodeMap (dict):
217 """Implement that portion of NamedNodeMap required to satisfy PyXB's 218 needs.""" 219 __members = None 220
221 - def __init__ (self):
222 super(NamedNodeMap, self).__init__() 223 self.__members = []
224 225 length = property(lambda _s: len(_s.__members))
226 - def item (self, index):
227 return self.__members[index]
228
229 - def _addItem (self, attr):
230 self[attr.name] = attr.value 231 assert pyxb.namespace.resolution.NamespaceContext.GetNodeContext(attr) is not None 232 self.__members.append(attr)
233
234 - def _getAttr (self, name):
235 for attr in self.__members: 236 if attr.name == name: 237 return attr 238 return None
239
240 -class Element (Node):
241 - def __init__ (self, **kw):
242 super(Element, self).__init__(node_type=xml.dom.Node.ELEMENT_NODE, **kw) 243 assert self.attributes is not None
244 tagName = Node.localName 245 nodeName = Node.localName
246
247 -class _CharacterData (Node):
248 """Abstract base for anything holding text data.""" 249 data = Node.value
250
251 -class Text (_CharacterData):
252 - def __init__ (self, text, **kw):
253 super(Text, self).__init__(value=text, node_type=xml.dom.Node.TEXT_NODE, **kw)
254
255 -class Comment (_CharacterData):
256 - def __init__ (self, text, **kw):
257 super(Comment, self).__init__(value=text, node_type=xml.dom.Node.COMMENT_NODE, **kw)
258 259 if '__main__' == __name__: 260 import sys 261 xml_file = 'examples/tmsxtvd/tmsdatadirect_sample.xml' 262 if 1 < len(sys.argv): 263 xml_file = sys.argv[1] 264 265 doc = parse(file(xml_file)) 266 267 ## Local Variables: 268 ## fill-column:78 269 ## End: 270