pyxb.utils.utility

1 # -*- coding: utf-8 -*- 2 # Copyright 2009-2013, Peter A. Bigot 3 # 4 # Licensed under the Apache License, Version 2.0 (the "License"); you may 5 # not use this file except in compliance with the License. You may obtain a 6 # copy of the License at: 7 # 8 # http://www.apache.org/licenses/LICENSE-2.0 9 # 10 # Unless required by applicable law or agreed to in writing, software 11 # distributed under the License is distributed on an "AS IS" BASIS, WITHOUT 12 # WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the 13 # License for the specific language governing permissions and limitations 14 # under the License. 15 16 """Utility functions and classes.""" 17 18 import re 19 import os 20 import errno 21 import pyxb 22 import urlparse 23 import time 24 import datetime 25 import logging 26 27 _log = logging.getLogger(__name__) 28

29 -def QuotedEscaped (s):

30 """Convert a string into a literal value that can be used in Python source. 31 32 This just calls C{repr}. No point in getting all complex when the language 33 already gives us what we need. 34 35 @rtype: C{str} 36 """ 37 return repr(s)

38

39 -def _DefaultXMLIdentifierToPython (identifier):

40 """Default implementation for _XMLIdentifierToPython 41 42 For historical reasons, this converts the identifier from a str to 43 unicode in the system default encoding. This should have no 44 practical effect. 45 46 @param identifier : some XML identifier 47 48 @return: C{unicode(identifier)} 49 """ 50 51 return unicode(identifier)

52

53 -def _SetXMLIdentifierToPython (xml_identifier_to_python):

54 """Configure a callable L{MakeIdentifier} uses to pre-process an XM Lidentifier. 55 56 In Python3, identifiers can be full Unicode tokens, but in Python2, 57 all identifiers must be ASCII characters. L{MakeIdentifier} enforces 58 this by removing all characters that are not valid within an 59 identifier. 60 61 In some cases, an application generating bindings may be able to 62 transliterate Unicode code points that are not valid Python identifier 63 characters into something else. This callable can be assigned to 64 perform that translation before the invalid characters are 65 stripped. 66 67 It is not the responsibility of this callable to do anything other 68 than replace whatever characters it wishes to. All 69 transformations performed by L{MakeIdentifier} will still be 70 applied, to ensure the output is in fact a legal identifier. 71 72 @param xml_identifier_to_python : A callable that takes a string 73 and returns a Unicode, possibly with non-identifier characters 74 replaced by other characters. Pass C{None} to reset to the 75 default implementation, which is L{_DefaultXMLIdentifierToPython}. 76 77 @rtype: C{unicode} 78 """ 79 global _XMLIdentifierToPython 80 if xml_identifier_to_python is None: 81 xml_identifier_to_python = _DefaultXMLIdentifierToPython 82 _XMLIdentifierToPython = xml_identifier_to_python

83 84 _XMLIdentifierToPython = _DefaultXMLIdentifierToPython 85 86 _UnderscoreSubstitute_re = re.compile(r'[- .]') 87 _NonIdentifier_re = re.compile(r'[^a-zA-Z0-9_]') 88 _PrefixUnderscore_re = re.compile(r'^_+') 89 _PrefixDigit_re = re.compile(r'^\d+') 90 _CamelCase_re = re.compile(r'_\w') 91

92 -def MakeIdentifier (s, camel_case=False):

93 """Convert a string into something suitable to be a Python identifier. 94 95 The string is processed by L{_XMLIdentifierToPython}. Following 96 this, dashes, spaces, and periods are replaced by underscores, and 97 characters not permitted in Python identifiers are stripped. 98 Furthermore, any leading underscores are removed. If the result 99 begins with a digit, the character 'n' is prepended. If the 100 result is the empty string, the string 'emptyString' is 101 substituted. 102 103 No check is made for L{conflicts with keywords <DeconflictKeyword>}. 104 105 @keyword camel_case : If C{True}, any underscore in the result 106 string that is immediately followed by an alphanumeric is replaced 107 by the capitalized version of that alphanumeric. Thus, 108 'one_or_two' becomes 'oneOrTwo'. If C{False} (default), has no 109 effect. 110 111 @rtype: C{str} 112 """ 113 s = _XMLIdentifierToPython(s) 114 s = _PrefixUnderscore_re.sub('', _NonIdentifier_re.sub('', _UnderscoreSubstitute_re.sub('_', s))) 115 if camel_case: 116 s = _CamelCase_re.sub(lambda _m: _m.group(0)[1].upper(), s) 117 if _PrefixDigit_re.match(s): 118 s = 'n' + s 119 if 0 == len(s): 120 s = 'emptyString' 121 return s

122 123 _PythonKeywords = frozenset( ( 124 "and", "as", "assert", "break", "class", "continue", "def", "del", 125 "elif", "else", "except", "exec", "finally", "for", "from", "global", 126 "if", "import", "in", "is", "lambda", "not", "or", "pass", "print", 127 "raise", "return", "try", "while", "with", "yield" 128 ) ) 129 """Python keywords. Note that types like int and float are not 130 keywords. 131 132 @see: U{http://docs.python.org/reference/lexical_analysis.html#keywords}.""" 133 134 _PythonBuiltInConstants = frozenset( ( 135 "False", "True", "None", "NotImplemented", "Ellipsis", "__debug__", 136 # "set" is neither a keyword nor a constant, but if some fool 137 # like {http://www.w3.org/2001/SMIL20/}set gets defined there's 138 # no way to access the builtin constructor. 139 "set" 140 ) ) 141 """Other symbols that aren't keywords but that can't be used. 142 143 @see: U{http://docs.python.org/library/constants.html}.""" 144 145 _Keywords = frozenset(_PythonKeywords.union(_PythonBuiltInConstants)) 146 """The keywords reserved for Python, derived from L{_PythonKeywords} 147 and L{_PythonBuiltInConstants}.""" 148

149 -def DeconflictKeyword (s, aux_keywords=frozenset()):

150 """If the provided string C{s} matches a Python language keyword, 151 append an underscore to distinguish them. 152 153 See also L{MakeUnique}. 154 155 @param s: string to be deconflicted 156 157 @keyword aux_keywords: optional iterable of additional strings 158 that should be treated as keywords. 159 160 @rtype: C{str} 161 162 """ 163 if (s in _Keywords) or (s in aux_keywords): 164 return '%s_' % (s,) 165 return s

166

167 -def MakeUnique (s, in_use):

168 """Return an identifier based on C{s} that is not in the given set. 169 170 The returned identifier is made unique by appending an underscore 171 and, if necessary, a serial number. 172 173 The order is : C{x}, C{x_}, C{x_2}, C{x_3}, ... 174 175 @param in_use: The set of identifiers already in use in the 176 relevant scope. C{in_use} is updated to contain the returned 177 identifier. 178 179 @rtype: C{str} 180 """ 181 if s in in_use: 182 ctr = 2 183 s = s.rstrip('_') 184 candidate = '%s_' % (s,) 185 while candidate in in_use: 186 candidate = '%s_%d' % (s, ctr) 187 ctr += 1 188 s = candidate 189 in_use.add(s) 190 return s

191

192 -def PrepareIdentifier (s, in_use, aux_keywords=frozenset(), private=False, protected=False):

193 """Combine everything required to create a unique identifier. 194 195 Leading and trailing underscores are stripped from all 196 identifiers. 197 198 @param in_use: the set of already used identifiers. Upon return 199 from this function, it is updated to include the returned 200 identifier. 201 202 @keyword aux_keywords: an optional set of additional symbols that 203 are illegal in the given context; use this to prevent conflicts 204 with known method names. 205 206 @keyword private: if C{False} (default), all leading underscores 207 are stripped, guaranteeing the identifier will not be private. If 208 C{True}, the returned identifier has two leading underscores, 209 making it a private variable within a Python class. 210 211 @keyword protected: as for C{private}, but uses only one 212 underscore. 213 214 @rtype: C{str} 215 216 @note: Only module-level identifiers should be treated as 217 protected. The class-level L{_DeconflictSymbols_mixin} 218 infrastructure does not include protected symbols. All class and 219 instance members beginning with a single underscore are reserved 220 for the PyXB infrastructure.""" 221 s = DeconflictKeyword(MakeIdentifier(s).strip('_'), aux_keywords) 222 if private: 223 s = '__' + s 224 elif protected: 225 s = '_' + s 226 return MakeUnique(s, in_use)

227 228 # @todo: descend from pyxb.cscRoot, if we import pyxb

229 -class _DeconflictSymbols_mixin (object):

230 """Mix-in used to deconflict public symbols in classes that may be 231 inherited by generated binding classes. 232 233 Some classes, like the L{pyxb.binding.basis.element} or 234 L{pyxb.binding.basis.simpleTypeDefinition} classes in 235 L{pyxb.binding.basis}, have public symbols associated with 236 functions and variables. It is possible that an XML schema might 237 include tags and attribute names that match these symbols. To 238 avoid conflict, the reserved symbols marked in this class are 239 added to the pre-defined identifier set. 240 241 Subclasses should create a class-level variable 242 C{_ReservedSymbols} that contains a set of strings denoting the 243 symbols reserved in this class, combined with those from any 244 superclasses that also have reserved symbols. Code like the 245 following is suggested:: 246 247 # For base classes (direct mix-in): 248 _ReservedSymbols = set([ 'one', 'two' ]) 249 # For subclasses: 250 _ReservedSymbols = SuperClass._ReservedSymbols.union(set([ 'three' ])) 251 252 Only public symbols (those with no underscores) are currently 253 supported. (Private symbols can't be deconflicted that easily, 254 and no protected symbols that derive from the XML are created by 255 the binding generator.) 256 """ 257 258 _ReservedSymbols = set() 259 """There are no reserved symbols in the base class."""

260 261 # Regular expression detecting tabs, carriage returns, and line feeds 262 __TabCRLF_re = re.compile("[\t\n\r]") 263 # Regular expressoin detecting sequences of two or more spaces 264 __MultiSpace_re = re.compile(" +") 265

266 -def NormalizeWhitespace (text, preserve=False, replace=False, collapse=False):

267 """Normalize the given string. 268 269 Exactly one of the C{preserve}, C{replace}, and C{collapse} keyword 270 parameters must be assigned the value C{True} by the caller. 271 272 - C{preserve}: the text is returned unchanged. 273 274 - C{replace}: all tabs, newlines, and carriage returns are 275 replaced with ASCII spaces. 276 277 - C{collapse}: the C{replace} normalization is done, then 278 sequences of two or more spaces are replaced by a single space. 279 280 See the U{whiteSpace facet<http://www.w3.org/TR/xmlschema-2/#rf-whiteSpace>}. 281 282 @rtype: C{str} 283 """ 284 if preserve: 285 return text 286 text = __TabCRLF_re.sub(' ', text) 287 if replace: 288 return text 289 if collapse: 290 return __MultiSpace_re.sub(' ', text).strip() 291 # pyxb not imported here; could be. 292 raise Exception('NormalizeWhitespace: No normalization specified')

293

294 -class Graph:

295 """Represent a directed graph with arbitrary objects as nodes. 296 297 This is used in the L{code 298 generator<pyxb.binding.generate.Generator>} to determine order 299 dependencies among components within a namespace, and schema that 300 comprise various namespaces. An edge from C{source} to C{target} 301 indicates that some aspect of C{source} requires that some aspect 302 of C{target} already be available. 303 """ 304

305 - def __init__ (self, root=None):

306 self.__roots = None 307 if root is not None: 308 self.__roots = set([root]) 309 self.__edges = set() 310 self.__edgeMap = { } 311 self.__reverseMap = { } 312 self.__nodes = set()

313 314 __scc = None 315 __sccMap = None 316 __dfsOrder = None 317

318 - def addEdge (self, source, target):

319 """Add a directed edge from the C{source} to the C{target}. 320 321 The nodes are added to the graph if necessary. 322 """ 323 self.__edges.add( (source, target) ) 324 self.__edgeMap.setdefault(source, set()).add(target) 325 if source != target: 326 self.__reverseMap.setdefault(target, set()).add(source) 327 self.__nodes.add(source) 328 self.__nodes.add(target)

329

330 - def addNode (self, node):

331 """Add the given node to the graph.""" 332 self.__nodes.add(node)

333 334 __roots = None

335 - def roots (self, reset=False):

336 """Return the set of nodes calculated to be roots (i.e., those that have no incoming edges). 337 338 This caches the roots calculated in a previous invocation 339 unless the C{reset} keyword is given the value C{True}. 340 341 @note: Upon reset, any notes that had been manually added 342 using L{addNode} will no longer be in the set. 343 344 @keyword reset: If C{True}, any cached value is discarded and 345 recomputed. No effect if C{False} (defalut). 346 347 @rtype: C{set} 348 """ 349 if reset or (self.__roots is None): 350 self.__roots = set() 351 for n in self.__nodes: 352 if not (n in self.__reverseMap): 353 self.__roots.add(n) 354 return self.__roots

355 - def addRoot (self, root):

356 """Add the provided node as a root node, even if it has incoming edges. 357 358 The node need not be present in the graph (if necessary, it is added). 359 360 Note that roots added in this way do not survive a reset using 361 L{roots}. 362 363 @return: C{self} 364 """ 365 if self.__roots is None: 366 self.__roots = set() 367 self.__nodes.add(root) 368 self.__roots.add(root) 369 return self

370

371 - def edgeMap (self):

372 """Return the edges in the graph. 373 374 The edge data structure is a map from the source node to the 375 set of nodes that can be reached in a single step from the 376 source. 377 """ 378 return self.__edgeMap

379 __edgeMap = None 380

381 - def edges (self):

382 """Return the edges in the graph. 383 384 The edge data structure is a set of node pairs represented as C{( source, target )}. 385 """ 386 return self.__edges

387

388 - def nodes (self):

389 """Return the set of nodes in the graph. 390 391 The node collection data structure is a set containing node 392 objects, whatever they may be.""" 393 return self.__nodes

394

395 - def tarjan (self, reset=False):

396 """Execute Tarjan's algorithm on the graph. 397 398 U{Tarjan's 399 algorithm<http://en.wikipedia.org/wiki/Tarjan%27s_strongly_connected_components_algorithm>} 400 computes the U{strongly-connected 401 components<http://en.wikipedia.org/wiki/Strongly_connected_component>} 402 of the graph: i.e., the sets of nodes that form a minimal 403 closed set under edge transition. In essence, the loops. We 404 use this to detect groups of components that have a dependency 405 cycle. 406 407 @keyword reset: If C{True}, any cached component set is erased 408 and recomputed. If C{True}, an existing previous result is 409 left unchanged.""" 410 411 if (self.__scc is not None) and (not reset): 412 return 413 self.__sccMap = { } 414 self.__stack = [] 415 self.__sccOrder = [] 416 self.__scc = [] 417 self.__index = 0 418 self.__tarjanIndex = { } 419 self.__tarjanLowLink = { } 420 for v in self.__nodes: 421 self.__tarjanIndex[v] = None 422 roots = self.roots() 423 if (0 == len(roots)) and (0 < len(self.__nodes)): 424 raise Exception('TARJAN: No roots found in graph with %d nodes' % (len(self.__nodes),)) 425 for r in roots: 426 self._tarjan(r) 427 self.__didTarjan = True

428

429 - def _tarjan (self, v):

430 """Do the work of Tarjan's algorithm for a given root node.""" 431 if self.__tarjanIndex.get(v) is not None: 432 # "Root" was already reached. 433 return 434 self.__tarjanIndex[v] = self.__tarjanLowLink[v] = self.__index 435 self.__index += 1 436 self.__stack.append(v) 437 source = v 438 for target in self.__edgeMap.get(source, []): 439 if self.__tarjanIndex[target] is None: 440 self._tarjan(target) 441 self.__tarjanLowLink[v] = min(self.__tarjanLowLink[v], self.__tarjanLowLink[target]) 442 elif target in self.__stack: 443 self.__tarjanLowLink[v] = min(self.__tarjanLowLink[v], self.__tarjanLowLink[target]) 444 else: 445 pass 446 447 if self.__tarjanLowLink[v] == self.__tarjanIndex[v]: 448 scc = [] 449 while True: 450 scc.append(self.__stack.pop()) 451 if v == scc[-1]: 452 break 453 self.__sccOrder.append(scc) 454 if 1 < len(scc): 455 self.__scc.append(scc) 456 [ self.__sccMap.setdefault(_v, scc) for _v in scc ]

457

458 - def scc (self, reset=False):

459 """Return the strongly-connected components of the graph. 460 461 The data structure is a set, each element of which is itself a 462 set containing one or more nodes from the graph. 463 464 @see: L{tarjan}. 465 """ 466 if reset or (self.__scc is None): 467 self.tarjan(reset) 468 return self.__scc

469 __scc = None 470

471 - def sccMap (self, reset=False):

472 """Return a map from nodes to the strongly-connected component 473 to which the node belongs. 474 475 @keyword reset: If C{True}, the L{tarjan} method will be 476 re-invoked, propagating the C{reset} value. If C{False} 477 (default), a cached value will be returned if available. 478 479 @see: L{tarjan}. 480 """ 481 if reset or (self.__sccMap is None): 482 self.tarjan(reset) 483 return self.__sccMap

484 __sccMap = None 485

486 - def sccOrder (self, reset=False):

487 """Return the strongly-connected components in order. 488 489 The data structure is a list, in dependency order, of strongly 490 connected components (which can be single nodes). Appearance 491 of a node in a set earlier in the list indicates that it has 492 no dependencies on any node that appears in a subsequent set. 493 This order is preferred over L{dfsOrder} for code generation, 494 since it detects loops. 495 496 @see: L{tarjan}. 497 """ 498 if reset or (self.__sccOrder is None): 499 self.tarjan(reset) 500 return self.__sccOrder

501 __sccOrder = None 502

503 - def sccForNode (self, node, **kw):

504 """Return the strongly-connected component to which the given 505 node belongs. 506 507 Any keywords suppliend when invoking this method are passed to 508 the L{sccMap} method. 509 510 @return: The SCC set, or C{None} if the node is not present in 511 the results of Tarjan's algorithm.""" 512 513 return self.sccMap(**kw).get(node)

514

515 - def cyclomaticComplexity (self):

516 """Return the cyclomatic complexity of the graph.""" 517 self.tarjan() 518 return len(self.__edges) - len(self.__nodes) + 2 * len(self.__scc)

519

520 - def __dfsWalk (self, source):

521 assert not (source in self.__dfsWalked) 522 self.__dfsWalked.add(source) 523 for target in self.__edgeMap.get(source, []): 524 if not (target in self.__dfsWalked): 525 self.__dfsWalk(target) 526 self.__dfsOrder.append(source)

527

528 - def _generateDOT (self, title='UNKNOWN', labeller=None):

529 node_map = { } 530 idx = 1 531 for n in self.__nodes: 532 node_map[n] = idx 533 idx += 1 534 text = [] 535 text.append('digraph "%s" {' % (title,)) 536 for n in self.__nodes: 537 if labeller is not None: 538 nn = labeller(n) 539 else: 540 nn = str(n) 541 text.append('%s [shape=box,label="%s"];' % (node_map[n], nn)) 542 for s in self.__nodes: 543 for d in self.__edgeMap.get(s, []): 544 if s != d: 545 text.append('%s -> %s;' % (node_map[s], node_map[d])) 546 text.append("};") 547 return "\n".join(text)

548

549 - def dfsOrder (self, reset=False):

550 """Return the nodes of the graph in U{depth-first-search 551 order<http://en.wikipedia.org/wiki/Depth-first_search>}. 552 553 The data structure is a list. Calculated lists are retained 554 and returned on future invocations, subject to the C{reset} 555 keyword. 556 557 @keyword reset: If C{True}, discard cached results and recompute the order.""" 558 if reset or (self.__dfsOrder is None): 559 self.__dfsWalked = set() 560 self.__dfsOrder = [] 561 for root in self.roots(reset=reset): 562 self.__dfsWalk(root) 563 self.__dfsWalked = None 564 if len(self.__dfsOrder) != len(self.__nodes): 565 raise Exception('DFS walk did not cover all nodes (walk %d versus nodes %d)' % (len(self.__dfsOrder), len(self.__nodes))) 566 return self.__dfsOrder

567

568 - def rootSetOrder (self):

569 """Return the nodes of the graph as a sequence of root sets. 570 571 The first root set is the set of nodes that are roots: i.e., 572 have no incoming edges. The second root set is the set of 573 nodes that have incoming nodes in the first root set. This 574 continues until all nodes have been reached. The sets impose 575 a partial order on the nodes, without being as constraining as 576 L{sccOrder}. 577 578 @return: a list of the root sets.""" 579 order = [] 580 nodes = set(self.__nodes) 581 edge_map = {} 582 for (d, srcs) in self.__edgeMap.iteritems(): 583 edge_map[d] = srcs.copy() 584 while nodes: 585 freeset = set() 586 for n in nodes: 587 if not (n in edge_map): 588 freeset.add(n) 589 if 0 == len(freeset): 590 _log.error('dependency cycle in named components') 591 return None 592 order.append(freeset) 593 nodes.difference_update(freeset) 594 new_edge_map = {} 595 for (d, srcs) in edge_map.iteritems(): 596 srcs.difference_update(freeset) 597 if 0 != len(srcs): 598 new_edge_map[d] = srcs 599 edge_map = new_edge_map 600 return order

601 602 LocationPrefixRewriteMap_ = { } 603

604 -def SetLocationPrefixRewriteMap (prefix_map):

605 """Set the map that is used to by L{NormalizeLocation} to rewrite URI prefixes.""" 606 607 LocationPrefixRewriteMap_.clear() 608 LocationPrefixRewriteMap_.update(prefix_map)

609

610 -def NormalizeLocation (uri, parent_uri=None, prefix_map=None):

611 """Normalize a URI against an optional parent_uri in the way that is 612 done for C{schemaLocation} attribute values. 613 614 If no URI schema is present, this will normalize a file system 615 path. 616 617 Optionally, the resulting absolute URI can subsequently be 618 rewritten to replace specified prefix strings with alternative 619 strings, e.g. to convert a remote URI to a local repository. This 620 rewriting is done after the conversion to an absolute URI, but 621 before normalizing file system URIs. 622 623 @param uri : The URI to normalize. If C{None}, function returns 624 C{None} 625 626 @param parent_uri : The base URI against which normalization is 627 done, if C{uri} is a relative URI. 628 629 @param prefix_map : A map used to rewrite URI prefixes. If 630 C{None}, the value defaults to that stored by 631 L{SetLocationPrefixRewriteMap}. 632 633 """ 634 if uri is None: 635 return uri 636 if parent_uri is None: 637 abs_uri = uri 638 else: 639 #if (0 > parent_uri.find(':')) and (not parent_uri.endswith(os.sep)): 640 # parent_uri = parent_uri + os.sep 641 abs_uri = urlparse.urljoin(parent_uri, uri) 642 if prefix_map is None: 643 prefix_map = LocationPrefixRewriteMap_ 644 for (pfx, sub) in prefix_map.items(): 645 if abs_uri.startswith(pfx): 646 abs_uri = sub + abs_uri[len(pfx):] 647 if 0 > abs_uri.find(':'): 648 abs_uri = os.path.realpath(abs_uri) 649 return abs_uri

650 651

652 -def TextFromURI (uri, archive_directory=None):

653 """Retrieve the contents of the uri as a text string. 654 655 If the uri does not include a scheme (e.g., C{http:}), it is 656 assumed to be a file path on the local system.""" 657 import urllib 658 import urllib2 659 stream = None 660 exc = None 661 # Only something that has a colon is a non-file URI. Some things 662 # that have a colon are a file URI (sans schema). Prefer urllib2, 663 # but allow urllib (which apparently works better on Windows). 664 if 0 <= uri.find(':'): 665 try: 666 stream = urllib2.urlopen(uri) 667 except Exception as e: 668 exc = e 669 if stream is None: 670 try: 671 stream = urllib.urlopen(uri) 672 exc = None 673 except: 674 # Prefer urllib exception 675 pass 676 if stream is None: 677 # No go as URI; give file a chance 678 try: 679 stream = file(uri) 680 exc = None 681 except Exception as e: 682 if exc is None: 683 exc = e 684 if exc is not None: 685 _log.error('open %s', uri, exc_info=exc) 686 raise exc 687 try: 688 # Protect this in case whatever stream is doesn't have an fp 689 # attribute. 690 if isinstance(stream, file) or isinstance(stream.fp, file): 691 archive_directory = None 692 except: 693 pass 694 xmls = stream.read() 695 if archive_directory: 696 base_name = os.path.basename(os.path.normpath(urlparse.urlparse(uri)[2])) 697 counter = 1 698 dest_file = os.path.join(archive_directory, base_name) 699 while os.path.isfile(dest_file): 700 dest_file = os.path.join(archive_directory, '%s.%d' % (base_name, counter)) 701 counter += 1 702 try: 703 OpenOrCreate(dest_file).write(xmls) 704 except OSError as e: 705 _log.warning('Unable to save %s in %s: %s', uri, dest_file, e) 706 return xmls

707

708 -def OpenOrCreate (file_name, tag=None, preserve_contents=False):

709 """Return a file object used to write the given file. 710 711 Use the C{tag} keyword to preserve the contents of existing files 712 that are not supposed to be overwritten. 713 714 To get a writable file but leaving any existing contents in place, 715 set the C{preserve_contents} keyword to C{True}. Normally, existing file 716 contents are erased. 717 718 The returned file pointer is positioned at the end of the file. 719 720 @keyword tag: If not C{None} and the file already exists, absence 721 of the given value in the first 4096 bytes of the file causes an 722 C{IOError} to be raised with C{errno} set to C{EEXIST}. I.e., 723 only files with this value in the first 4KB will be returned for 724 writing. 725 726 @keyword preserve_contents: This value controls whether existing 727 contents of the file will be erased (C{False}, default) or left in 728 place (C{True}). 729 """ 730 (path, leaf) = os.path.split(file_name) 731 if path: 732 try: 733 os.makedirs(path) 734 except Exception as e: 735 if not (isinstance(e, (OSError, IOError)) and (errno.EEXIST == e.errno)): 736 raise 737 fp = file(file_name, 'ab+') 738 if (tag is not None) and (0 < os.fstat(fp.fileno()).st_size): 739 text = fp.read(4096) 740 if 0 > text.find(tag): 741 raise OSError(errno.EEXIST, os.strerror(errno.EEXIST)) 742 if not preserve_contents: 743 fp.seek(0) # os.SEEK_SET 744 fp.truncate() 745 else: 746 fp.seek(2) # os.SEEK_END 747 return fp

748 749 # hashlib didn't show up until 2.5, and sha is deprecated in 2.6. 750 __Hasher = None 751 try: 752 import hashlib 753 __Hasher = hashlib.sha1 754 except ImportError: 755 import sha 756 __Hasher = sha.new 757

758 -def HashForText (text):

759 """Calculate a cryptographic hash of the given string. 760 761 For example, this is used to verify that a given module file 762 contains bindings from a previous generation run for the same 763 namespace. See L{OpenOrCreate}. If the text is in Unicode, the 764 hash is calculated on the UTF-8 encoding of the text. 765 766 @return: A C{str}, generally a sequence of hexadecimal "digit"s. 767 """ 768 if isinstance(text, unicode): 769 text = text.encode('utf-8') 770 return __Hasher(text).hexdigest()

771 772 # uuid didn't show up until 2.5 773 __HaveUUID = False 774 try: 775 import uuid 776 __HaveUUID = True 777 except ImportError: 778 import random

779 -def _NewUUIDString ():

780 """Obtain a UUID using the best available method. On a version of 781 python that does not incorporate the C{uuid} class, this creates a 782 string combining the current date and time (to the second) with a 783 random number. 784 785 @rtype: C{str} 786 """ 787 if __HaveUUID: 788 return uuid.uuid1().urn 789 return '%s:%08.8x' % (time.strftime('%Y%m%d%H%M%S'), random.randint(0, 0xFFFFFFFFL))

790

791 -class UniqueIdentifier (object):

792 """Records a unique identifier, generally associated with a 793 binding generation action. 794 795 The identifier is a string, but gets wrapped in an instance of 796 this class to optimize comparisons and reduce memory footprint. 797 798 Invoking the constructor for this class on the same string 799 multiple times will return the same Python object. 800 801 An instance of this class compares equal to, and hashes equivalent 802 to, the uid string. When C{str}'d, the result is the uid; when 803 C{repr}'d, the result is a constructor call to 804 C{pyxb.utils.utility.UniqueIdentifier}. 805 """ 806 807 # A map from UID string to the instance that represents it 808 __ExistingUIDs = {} 809

810 - def uid (self):

811 """The string unique identifier""" 812 return self.__uid

813 __uid = None 814 815 # Support pickling, which is done using only the UID.

816 - def __getnewargs__ (self):

817 return (self.__uid,)

818

819 - def __getstate__ (self):

820 return self.__uid

821

822 - def __setstate__ (self, state):

823 assert self.__uid == state

824 825 # Singleton-like

826 - def __new__ (cls, *args):

827 if 0 == len(args): 828 uid = _NewUUIDString() 829 else: 830 uid = args[0] 831 if isinstance(uid, UniqueIdentifier): 832 uid = uid.uid() 833 if not isinstance(uid, basestring): 834 raise TypeError('UniqueIdentifier uid must be a string') 835 rv = cls.__ExistingUIDs.get(uid) 836 if rv is None: 837 rv = super(UniqueIdentifier, cls).__new__(cls) 838 rv.__uid = uid 839 cls.__ExistingUIDs[uid] = rv 840 return rv

841

842 - def associateObject (self, obj):

843 """Associate the given object witth this identifier. 844 845 This is a one-way association: the object is not provided with 846 a return path to this identifier instance.""" 847 self.__associatedObjects.add(obj)

848 - def associatedObjects (self):

849 """The set of objects that have been associated with this 850 identifier instance.""" 851 return self.__associatedObjects

852 __associatedObjects = None 853

854 - def __init__ (self, uid=None):

855 """Create a new UniqueIdentifier instance. 856 857 @param uid: The unique identifier string. If present, it is 858 the callers responsibility to ensure the value is universally 859 unique. If C{None}, one will be provided. 860 @type uid: C{str} or C{unicode} 861 """ 862 assert (uid is None) or (self.uid() == uid), 'UniqueIdentifier: ctor %s, actual %s' % (uid, self.uid()) 863 self.__associatedObjects = set()

864

865 - def __eq__ (self, other):

866 if other is None: 867 return False 868 elif isinstance(other, UniqueIdentifier): 869 other_uid = other.uid() 870 elif isinstance(other, basestring): 871 other_uid = other 872 else: 873 raise TypeError('UniqueIdentifier: Cannot compare with type %s' % (type(other),)) 874 return self.uid() == other_uid

875

876 - def __hash__ (self):

877 return hash(self.uid())

878

879 - def __str__ (self):

880 return self.uid()

881

882 - def __repr__ (self):

883 return 'pyxb.utils.utility.UniqueIdentifier(%s)' % (repr(self.uid()),)

884

885 -class UTCOffsetTimeZone (datetime.tzinfo):

886 """A C{datetime.tzinfo} subclass that helps deal with UTC 887 conversions in an ISO8601 world. 888 889 This class only supports fixed offsets from UTC. 890 """ 891 892 # Regular expression that matches valid ISO8601 time zone suffixes 893 __Lexical_re = re.compile('^([-+])(\d\d):(\d\d)$') 894 895 # The offset in minutes east of UTC. 896 __utcOffset_min = 0 897 898 # Same as __utcOffset_min, but as a datetime.timedelta 899 __utcOffset_td = None 900 901 # A zero-length duration 902 __ZeroDuration = datetime.timedelta(0) 903 904 # Range limits 905 __MaxOffset_td = datetime.timedelta(hours=14) 906

907 - def __init__ (self, spec=None):

908 """Create a time zone instance with a fixed offset from UTC. 909 910 @param spec: Specifies the offset. Can be an integer counting 911 minutes east of UTC, the value C{None} (equal to 0 minutes 912 east), or a string that conform to the ISO8601 time zone 913 sequence (B{Z}, or B{[+-]HH:MM}). 914 """ 915 916 if spec is not None: 917 if isinstance(spec, basestring): 918 if 'Z' == spec: 919 self.__utcOffset_min = 0 920 else: 921 match = self.__Lexical_re.match(spec) 922 if match is None: 923 raise ValueError('Bad time zone: %s' % (spec,)) 924 self.__utcOffset_min = int(match.group(2)) * 60 + int(match.group(3)) 925 if '-' == match.group(1): 926 self.__utcOffset_min = - self.__utcOffset_min 927 elif isinstance(spec, int): 928 self.__utcOffset_min = spec 929 elif isinstance(spec, datetime.timedelta): 930 self.__utcOffset_min = spec.seconds / 60 931 else: 932 raise TypeError('%s: unexpected type %s' % (type(self), type(spec))) 933 self.__utcOffset_td = datetime.timedelta(minutes=self.__utcOffset_min) 934 if self.__utcOffset_td < -self.__MaxOffset_td or self.__utcOffset_td > self.__MaxOffset_td: 935 raise ValueError('XSD timezone offset %s larger than %s' % (self.__utcOffset_td, self.__MaxOffset_td)) 936 if 0 == self.__utcOffset_min: 937 self.__tzName = 'Z' 938 elif 0 > self.__utcOffset_min: 939 self.__tzName = '-%02d:%02d' % divmod(-self.__utcOffset_min, 60) 940 else: 941 self.__tzName = '+%02d:%02d' % divmod(self.__utcOffset_min, 60)

942

943 - def utcoffset (self, dt):

944 """Returns the constant offset for this zone.""" 945 return self.__utcOffset_td

946

947 - def tzname (self, dt):

948 """Return the name of the timezone in the format expected by XML Schema.""" 949 return self.__tzName

950

951 - def dst (self, dt):

952 """Returns a constant zero duration.""" 953 return self.__ZeroDuration

954

955 - def __cmp__ (self, other):

956 if isinstance(other, UTCOffsetTimeZone): 957 return cmp(self.__utcOffset_min, other.__utcOffset_min) 958 return cmp(self.__utcOffset_min, other.utcoffset(datetime.datetime.now()))

959 960

961 -class LocalTimeZone (datetime.tzinfo):

962 """A C{datetime.tzinfo} subclass for the local time zone. 963 964 Mostly pinched from the C{datetime.tzinfo} documentation in Python 2.5.1. 965 """ 966 967 __STDOffset = datetime.timedelta(seconds=-time.timezone) 968 __DSTOffset = __STDOffset 969 if time.daylight: 970 __DSTOffset = datetime.timedelta(seconds=-time.altzone) 971 __ZeroDelta = datetime.timedelta(0) 972 __DSTDelta = __DSTOffset - __STDOffset 973

974 - def utcoffset (self, dt):

975 if self.__isDST(dt): 976 return self.__DSTOffset 977 return self.__STDOffset

978

979 - def dst (self, dt):

980 if self.__isDST(dt): 981 return self.__DSTDelta 982 return self.__ZeroDelta

983

984 - def tzname (self, dt):

985 return time.tzname[self.__isDST(dt)]

986

987 - def __isDST (self, dt):

988 tt = (dt.year, dt.month, dt.day, 989 dt.hour, dt.minute, dt.second, 990 0, 0, -1) 991 tt = time.localtime(time.mktime(tt)) 992 return tt.tm_isdst > 0

993

994 -class PrivateTransient_mixin (pyxb.cscRoot):

995 """Emulate the B{transient} keyword from Java for private member 996 variables. 997 998 This class defines a C{__getstate__} method which returns a copy 999 of C{self.__dict__} with certain members removed. Specifically, 1000 if a string "s" appears in a class member variable named 1001 C{__PrivateTransient} defined in the "Class" class, then the 1002 corresponding private variable "_Class__s" will be removed from 1003 the state dictionary. This is used to eliminate unnecessary 1004 fields from instances placed in L{namespace 1005 archives<pyxb.namespace.archive.NamespaceArchive>} without having 1006 to implement a C{__getstate__} method in every class in the 1007 instance hierarchy. 1008 1009 For an example, see 1010 L{pyxb.xmlschema.structures._SchemaComponent_mixin} 1011 1012 If you use this, it is your responsibility to define the 1013 C{__PrivateTransient} class variable and add to it the required 1014 variable names. 1015 1016 Classes that inherit from this are free to define their own 1017 C{__getstate__} method, which may or may not invoke the superclass 1018 one. If you do this, be sure that the class defining 1019 C{__getstate__} lists L{PrivateTransient_mixin} as one of its 1020 direct superclasses, lest the latter end up earlier in the mro and 1021 consequently bypass the local override. 1022 """ 1023 1024 # Suffix used when creating the class member variable in which the 1025 # transient members are cached. 1026 __Attribute = '__PrivateTransient' 1027

1028 - def __getstate__ (self):

1029 state = self.__dict__.copy() 1030 # Note that the aggregate set is stored in a class variable 1031 # with a slightly different name than the class-level set. 1032 attr = '_%s%s_' % (self.__class__.__name__, self.__Attribute) 1033 skipped = getattr(self.__class__, attr, None) 1034 if skipped is None: 1035 skipped = set() 1036 for cl in self.__class__.mro(): 1037 for (k, v) in cl.__dict__.items(): 1038 if k.endswith(self.__Attribute): 1039 cl2 = k[:-len(self.__Attribute)] 1040 skipped.update([ '%s__%s' % (cl2, _n) for _n in v ]) 1041 setattr(self.__class__, attr, skipped) 1042 for k in skipped: 1043 if state.get(k) is not None: 1044 del state[k] 1045 # Uncomment the following to test whether undesirable types 1046 # are being pickled, generally by accidently leaving a 1047 # reference to one in an instance private member. 1048 #for (k, v) in state.items(): 1049 # import pyxb.namespace 1050 # import xml.dom 1051 # import pyxb.xmlschema.structures 1052 # if isinstance(v, (pyxb.namespace.resolution.NamespaceContext, xml.dom.Node, pyxb.xmlschema.structures.Schema)): 1053 # raise pyxb.LogicError('Unexpected instance of %s key %s in %s' % (type(v), k, self)) 1054 1055 return state

1056

1057 -def GetMatchingFiles (path, pattern=None, default_path_wildcard=None, default_path=None, prefix_pattern=None, prefix_substituend=None):

1058 """Provide a list of absolute paths to files present in any of a 1059 set of directories and meeting certain criteria. 1060 1061 This is used, for example, to locate namespace archive files 1062 within the archive path specified by the user. One could use:: 1063 1064 files = GetMatchingFiles('&bundles//:+', 1065 pattern=re.compile('.*\.wxs$'), 1066 default_path_wildcard='+', 1067 default_path='/usr/local/pyxb/nsarchives', 1068 prefix_pattern='&', 1069 prefix_substituend='/opt/pyxb') 1070 1071 to obtain all files that can be recursively found within 1072 C{/opt/pyxb/bundles}, or non-recursively within 1073 C{/usr/local/pyxb/nsarchives}. 1074 1075 @param path: A list of directories in which the search should be 1076 performed. The entries are separated by os.pathsep, which is a 1077 colon on POSIX platforms and a semi-colon on Windows. If a path 1078 entry ends with C{//} regardless of platform, the suffix C{//} is 1079 stripped and any directory beneath the path is scanned as well, 1080 recursively. 1081 1082 @keyword pattern: Optional regular expression object used to 1083 determine whether a given directory entry should be returned. If 1084 left as C{None}, all directory entries will be returned. 1085 1086 @keyword default_path_wildcard: An optional string which, if 1087 present as a single directory in the path, is replaced by the 1088 value of C{default-path}. 1089 1090 @keyword default_path: A system-defined directory which can be 1091 restored to the path by placing the C{default_path_wildcard} in 1092 the C{path}. 1093 1094 @keyword prefix_pattern: An optional string which, if present at 1095 the start of a path element, is replaced by the value of 1096 C{prefix_substituend}. 1097 1098 @keyword prefix_substituend: A system-defined string (path prefix) 1099 which can be combined with the user-provided path information to 1100 identify a file or subdirectory within an installation-specific 1101 area. 1102 """ 1103 matching_files = [] 1104 path_set = path.split(os.pathsep) 1105 while 0 < len(path_set): 1106 path = path_set.pop(0) 1107 if default_path_wildcard == path: 1108 if default_path is not None: 1109 path_set[0:0] = default_path.split(os.pathsep) 1110 default_path = None 1111 continue 1112 recursive = False 1113 if (prefix_pattern is not None) and path.startswith(prefix_pattern): 1114 path = os.path.join(prefix_substituend, path[len(prefix_pattern):]) 1115 if path.endswith('//'): 1116 recursive = True 1117 path = path[:-2] 1118 if os.path.isfile(path): 1119 if (pattern is None) or (pattern.search(path) is not None): 1120 matching_files.append(path) 1121 else: 1122 for (root, dirs, files) in os.walk(path): 1123 for f in files: 1124 if (pattern is None) or (pattern.search(f) is not None): 1125 matching_files.append(os.path.join(root, f)) 1126 if not recursive: 1127 break 1128 return matching_files

1129

1130 -class Location (object):

1131 __locationBase = None 1132 __lineNumber = None 1133 __columnNumber = None 1134

1135 - def __init__ (self, location_base=None, line_number=None, column_number=None):

1136 if isinstance(location_base, str): 1137 location_base = intern(location_base) 1138 self.__locationBase = location_base 1139 self.__lineNumber = line_number 1140 self.__columnNumber = column_number

1141

1142 - def newLocation (self, locator=None, line_number=None, column_number=None):

1143 if locator is not None: 1144 try: 1145 line_number = locator.getLineNumber() 1146 column_number = locator.getColumnNumber() 1147 except: 1148 pass 1149 return Location(self.__locationBase, line_number, column_number)

1150 1151 locationBase = property(lambda _s: _s.__locationBase) 1152 lineNumber = property(lambda _s: _s.__lineNumber) 1153 columnNumber = property(lambda _s: _s.__columnNumber) 1154

1155 - def __cmp__ (self, other):

1156 """Comparison by locationBase, then lineNumber, then columnNumber.""" 1157 if other is None: 1158 return 1 1159 rv = cmp(self.__locationBase, other.__locationBase) 1160 if 0 == rv: 1161 rv = cmp(self.__lineNumber, other.__lineNumber) 1162 if 0 == rv: 1163 rv = cmp(self.__columnNumber, other.__columnNumber) 1164 return rv

1165

1166 - def __str__ (self):

1167 if self.locationBase is None: 1168 lb = '<unknown>' 1169 else: 1170 # No, this should not be os.sep. The location is 1171 # expected to be a URI. 1172 lb = self.locationBase.rsplit('/', 1)[-1] 1173 return '%s[%s:%s]' % (lb, self.lineNumber, self.columnNumber)

1174

1175 - def __repr__ (self):

1176 t = type(self) 1177 ctor = '%s.%s' % (t.__module__, t.__name__) 1178 return '%s(%r, %r, %r)' % (ctor, self.__locationBase, self.__lineNumber, self.__columnNumber)

1179

1180 -class Locatable_mixin (pyxb.cscRoot):

1181 __location = None 1182

1183 - def __init__ (self, *args, **kw):

1184 self.__location = kw.pop('location', None) 1185 super(Locatable_mixin, self).__init__(*args, **kw)

1186

1187 - def _setLocation (self, location):

1188 self.__location = location

1189

1190 - def _location (self):

1191 return self.__location

1192

Source Code for Module pyxb.utils.utility