Package deefuzzer :: Package deefuzzer :: Package tools :: Module xmltodict2
[hide private]
[frames] | no frames]

Source Code for Module deefuzzer.deefuzzer.tools.xmltodict2

  1  # -*- coding: utf-8 -*- 
  2  """ xmltodict(): convert xml into tree of Python dicts. 
  3   
  4  This was copied and modified from John Bair's recipe at aspn.activestate.com: 
  5          http://aspn.activestate.com/ASPN/Cookbook/Python/Recipe/149368 
  6  """ 
  7  import os 
  8  import string 
  9  import locale 
 10  from xml.parsers import expat 
 11   
 12  # If we're in Dabo, get the default encoding. 
 13  #import dabo 
 14  #import dabo.lib.DesignerUtils as desUtil 
 15  #from dabo.dLocalize import _ 
 16  #from dabo.lib.utils import resolvePath 
 17  #app = dabo.dAppRef 
 18  #if app is not None: 
 19          #default_encoding = app.Encoding 
 20  #else: 
 21                  #enc = locale.getlocale()[1] 
 22                  #if enc is None: 
 23                          #enc = dabo.defaultEncoding 
 24                  #default_encoding = enc 
 25   
 26  # Python seems to need to compile code with \n linesep: 
 27  code_linesep = "\n" 
 28  eol = os.linesep 
 29   
 30   
31 -class Xml2Obj:
32 """XML to Object"""
33 - def __init__(self):
34 self.root = None 35 self.nodeStack = [] 36 self.attsToSkip = [] 37 self._inCode = False 38 self._mthdName = "" 39 self._mthdCode = "" 40 self._codeDict = None 41 self._inProp = False 42 self._propName = "" 43 self._propData = "" 44 self._propDict = None 45 self._currPropAtt = "" 46 self._currPropDict = None
47 48
49 - def StartElement(self, name, attributes):
50 """SAX start element even handler""" 51 if name == "code": 52 # This is code for the parent element 53 self._inCode = True 54 parent = self.nodeStack[-1] 55 if not parent.has_key("code"): 56 parent["code"] = {} 57 self._codeDict = parent["code"] 58 59 elif name == "properties": 60 # These are the custom property definitions 61 self._inProp = True 62 self._propName = "" 63 self._propData = "" 64 parent = self.nodeStack[-1] 65 if not parent.has_key("properties"): 66 parent["properties"] = {} 67 self._propDict = parent["properties"] 68 69 else: 70 if self._inCode: 71 self._mthdName = name.encode() 72 elif self._inProp: 73 if self._propName: 74 # In the middle of a prop definition 75 self._currPropAtt = name.encode() 76 else: 77 self._propName = name.encode() 78 self._currPropDict = {} 79 self._currPropAtt = "" 80 else: 81 element = {"name": name.encode()} 82 if len(attributes) > 0: 83 for att in self.attsToSkip: 84 if attributes.has_key(att): 85 del attributes[att] 86 element["attributes"] = attributes 87 88 # Push element onto the stack and make it a child of parent 89 if len(self.nodeStack) > 0: 90 parent = self.nodeStack[-1] 91 if not parent.has_key("children"): 92 parent["children"] = [] 93 parent["children"].append(element) 94 else: 95 self.root = element 96 self.nodeStack.append(element)
97 98
99 - def EndElement(self, name):
100 """SAX end element event handler""" 101 if self._inCode: 102 if name == "code": 103 self._inCode = False 104 self._codeDict = None 105 else: 106 # End of an individual method 107 mth = self._mthdCode.strip() 108 if not mth.endswith("\n"): 109 mth += "\n" 110 self._codeDict[self._mthdName] = mth 111 self._mthdName = "" 112 self._mthdCode = "" 113 elif self._inProp: 114 if name == "properties": 115 self._inProp = False 116 self._propDict = None 117 elif name == self._propName: 118 # End of an individual prop definition 119 self._propDict[self._propName] = self._currPropDict 120 self._propName = "" 121 else: 122 # end of a property attribute 123 self._currPropDict[self._currPropAtt] = self._propData 124 self._propData = self._currPropAtt = "" 125 else: 126 self.nodeStack = self.nodeStack[:-1]
127 128
129 - def CharacterData(self, data):
130 """SAX character data event handler""" 131 if self._inCode or data.strip(): 132 data = data.replace("&lt;", "<") 133 data = data.encode() 134 if self._inCode: 135 if self._mthdCode: 136 self._mthdCode += data 137 else: 138 self._mthdCode = data 139 elif self._inProp: 140 self._propData += data 141 else: 142 element = self.nodeStack[-1] 143 if not element.has_key("cdata"): 144 element["cdata"] = "" 145 element["cdata"] += data
146 147
148 - def Parse(self, xml):
149 # Create a SAX parser 150 Parser = expat.ParserCreate() 151 # SAX event handlers 152 Parser.StartElementHandler = self.StartElement 153 Parser.EndElementHandler = self.EndElement 154 Parser.CharacterDataHandler = self.CharacterData 155 # Parse the XML File 156 ParserStatus = Parser.Parse(xml, 1) 157 return self.root
158 159
160 - def ParseFromFile(self, filename):
161 return self.Parse(open(filename,"r").read())
162 163
164 -def xmltodict(xml, attsToSkip=[], addCodeFile=False):
165 """Given an xml string or file, return a Python dictionary.""" 166 parser = Xml2Obj() 167 parser.attsToSkip = attsToSkip 168 isPath = os.path.exists(xml) 169 errmsg = "" 170 if eol not in xml and isPath: 171 # argument was a file 172 try: 173 ret = parser.ParseFromFile(xml) 174 except expat.ExpatError, e: 175 errmsg = _("The XML in '%s' is not well-formed and cannot be parsed: %s") % (xml, e) 176 else: 177 # argument must have been raw xml: 178 if not xml.strip().startswith("<?xml "): 179 # it's a bad file name 180 errmsg = _("The file '%s' could not be found") % xml 181 else: 182 try: 183 ret = parser.Parse(xml) 184 except expat.ExpatError: 185 errmsg = _("An invalid XML string was encountered") 186 if errmsg: 187 raise dabo.dException.XmlException, errmsg 188 if addCodeFile and isPath: 189 # Get the associated code file, if any 190 codePth = "%s-code.py" % os.path.splitext(xml)[0] 191 if os.path.exists(codePth): 192 try: 193 codeDict = desUtil.parseCodeFile(open(codePth).read()) 194 desUtil.addCodeToClassDict(ret, codeDict) 195 except StandardError, e: 196 print "Failed to parse code file:", e 197 return ret
198 199
200 -def escQuote(val, noEscape=False, noQuote=False):
201 """Add surrounding quotes to the string, and escape 202 any illegal XML characters. 203 """ 204 if not isinstance(val, basestring): 205 val = str(val) 206 if not isinstance(val, unicode): 207 val = unicode(val, default_encoding) 208 if noQuote: 209 qt = '' 210 else: 211 qt = '"' 212 slsh = "\\" 213 # val = val.replace(slsh, slsh+slsh) 214 if not noEscape: 215 # First escape internal ampersands. We need to double them up due to a 216 # quirk in wxPython and the way it displays this character. 217 val = val.replace("&", "&amp;&amp;") 218 # Escape any internal quotes 219 val = val.replace('"', '&quot;').replace("'", "&apos;") 220 # Escape any high-order characters 221 chars = [] 222 for pos, char in enumerate(list(val)): 223 if ord(char) > 127: 224 chars.append("&#%s;" % ord(char)) 225 else: 226 chars.append(char) 227 val = "".join(chars) 228 val = val.replace("<", "&#060;").replace(">", "&#062;") 229 return "%s%s%s" % (qt, val, qt)
230 231
232 -def dicttoxml(dct, level=0, header=None, linesep=None):
233 """Given a Python dictionary, return an xml string. 234 235 The dictionary must be in the format returned by dicttoxml(), with keys 236 on "attributes", "code", "cdata", "name", and "children". 237 238 Send your own XML header, otherwise a default one will be used. 239 240 The linesep argument is a dictionary, with keys on levels, allowing the 241 developer to add extra whitespace depending on the level. 242 """ 243 att = "" 244 ret = "" 245 246 if dct.has_key("attributes"): 247 for key, val in dct["attributes"].items(): 248 # Some keys are already handled. 249 noEscape = key in ("sizerInfo",) 250 val = escQuote(val, noEscape) 251 att += " %s=%s" % (key, val) 252 ret += "%s<%s%s" % ("\t" * level, dct["name"], att) 253 254 if (not dct.has_key("cdata") and not dct.has_key("children") 255 and not dct.has_key("code") and not dct.has_key("properties")): 256 ret += " />%s" % eol 257 else: 258 ret += ">" 259 if dct.has_key("cdata"): 260 ret += "%s" % dct["cdata"].replace("<", "&lt;") 261 262 if dct.has_key("code"): 263 if len(dct["code"].keys()): 264 ret += "%s%s<code>%s" % (eol, "\t" * (level+1), eol) 265 methodTab = "\t" * (level+2) 266 for mthd, cd in dct["code"].items(): 267 # Convert \n's in the code to eol: 268 cd = eol.join(cd.splitlines()) 269 270 # Make sure that the code ends with a linefeed 271 if not cd.endswith(eol): 272 cd += eol 273 274 ret += "%s<%s><![CDATA[%s%s]]>%s%s</%s>%s" % (methodTab, 275 mthd, eol, cd, eol, 276 methodTab, mthd, eol) 277 ret += "%s</code>%s" % ("\t" * (level+1), eol) 278 279 if dct.has_key("properties"): 280 if len(dct["properties"].keys()): 281 ret += "%s%s<properties>%s" % (eol, "\t" * (level+1), eol) 282 currTab = "\t" * (level+2) 283 for prop, val in dct["properties"].items(): 284 ret += "%s<%s>%s" % (currTab, prop, eol) 285 for propItm, itmVal in val.items(): 286 itmTab = "\t" * (level+3) 287 ret += "%s<%s>%s</%s>%s" % (itmTab, propItm, itmVal, 288 propItm, eol) 289 ret += "%s</%s>%s" % (currTab, prop, eol) 290 ret += "%s</properties>%s" % ("\t" * (level+1), eol) 291 292 if dct.has_key("children") and len(dct["children"]) > 0: 293 ret += eol 294 for child in dct["children"]: 295 ret += dicttoxml(child, level+1, linesep=linesep) 296 indnt = "" 297 if ret.endswith(eol): 298 # Indent the closing tag 299 indnt = ("\t" * level) 300 ret += "%s</%s>%s" % (indnt, dct["name"], eol) 301 302 if linesep: 303 ret += linesep.get(level, "") 304 305 if level == 0: 306 if header is None: 307 header = '<?xml version="1.0" encoding="%s" standalone="no"?>%s' \ 308 % (default_encoding, eol) 309 ret = header + ret 310 311 return ret
312 313
314 -def flattenClassDict(cd, retDict=None):
315 """Given a dict containing a series of nested objects such as would 316 be created by restoring from a cdxml file, returns a dict with all classIDs 317 as keys, and a dict as the corresponding value. The dict value will have 318 keys for the attributes and/or code, depending on what was in the original 319 dict. The end result is to take a nested dict structure and return a flattened 320 dict with all objects at the top level. 321 """ 322 if retDict is None: 323 retDict = {} 324 atts = cd.get("attributes", {}) 325 props = cd.get("properties", {}) 326 kids = cd.get("children", []) 327 code = cd.get("code", {}) 328 classID = atts.get("classID", "") 329 classFile = resolvePath(atts.get("designerClass", "")) 330 superclass = resolvePath(atts.get("superclass", "")) 331 superclassID = atts.get("superclassID", "") 332 if superclassID and os.path.exists(superclass): 333 # Get the superclass info 334 superCD = xmltodict(superclass, addCodeFile=True) 335 flattenClassDict(superCD, retDict) 336 if classID: 337 if os.path.exists(classFile): 338 # Get the class info 339 classCD = xmltodict(classFile, addCodeFile=True) 340 classAtts = classCD.get("attributes", {}) 341 classProps = classCD.get("properties", {}) 342 classCode = classCD.get("code", {}) 343 classKids = classCD.get("children", []) 344 currDict = retDict.get(classID, {}) 345 retDict[classID] = {"attributes": classAtts, "code": classCode, 346 "properties": classProps} 347 retDict[classID].update(currDict) 348 # Now update the child objects in the dict 349 for kid in classKids: 350 flattenClassDict(kid, retDict) 351 else: 352 # Not a file; most likely just a component in another class 353 currDict = retDict.get(classID, {}) 354 retDict[classID] = {"attributes": atts, "code": code, 355 "properties": props} 356 retDict[classID].update(currDict) 357 if kids: 358 for kid in kids: 359 flattenClassDict(kid, retDict) 360 return retDict
361 362
363 -def addInheritedInfo(src, super, updateCode=False):
364 """Called recursively on the class container structure, modifying 365 the attributes to incorporate superclass information. When the 366 'updateCode' parameter is True, superclass code is added to the 367 object's code 368 """ 369 atts = src.get("attributes", {}) 370 props = src.get("properties", {}) 371 kids = src.get("children", []) 372 code = src.get("code", {}) 373 classID = atts.get("classID", "") 374 if classID: 375 superInfo = super.get(classID, {"attributes": {}, "code": {}, "properties": {}}) 376 src["attributes"] = superInfo["attributes"].copy() 377 src["attributes"].update(atts) 378 src["properties"] = superInfo.get("properties", {}).copy() 379 src["properties"].update(props) 380 if updateCode: 381 src["code"] = superInfo["code"].copy() 382 src["code"].update(code) 383 if kids: 384 for kid in kids: 385 addInheritedInfo(kid, super, updateCode)
386 387 388 389 #if __name__ == "__main__": 390 #test_dict = {"name": "test", "attributes":{"path": "c:\\temp\\name", 391 #"problemChars": "Welcome to <Jos\xc3\xa9's \ Stuff!>\xc2\xae".decode("latin-1")}} 392 #print "test_dict:", test_dict 393 #xml = dicttoxml(test_dict) 394 #print "xml:", xml 395 #test_dict2 = xmltodict(xml) 396 #print "test_dict2:", test_dict2 397 #print "same?:", test_dict == test_dict2 398