1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16 """Utility functions and classes."""
17
18 import re
19 import os
20 import errno
21 import pyxb
22 import urlparse
23 import time
24 import datetime
25 import logging
26
27 _log = logging.getLogger(__name__)
28
30 """Convert a string into a literal value that can be used in Python source.
31
32 This just calls C{repr}. No point in getting all complex when the language
33 already gives us what we need.
34
35 @rtype: C{str}
36 """
37 return repr(s)
38
40 """Default implementation for _XMLIdentifierToPython
41
42 For historical reasons, this converts the identifier from a str to
43 unicode in the system default encoding. This should have no
44 practical effect.
45
46 @param identifier : some XML identifier
47
48 @return: C{unicode(identifier)}
49 """
50
51 return unicode(identifier)
52
54 """Configure a callable L{MakeIdentifier} uses to pre-process an XM Lidentifier.
55
56 In Python3, identifiers can be full Unicode tokens, but in Python2,
57 all identifiers must be ASCII characters. L{MakeIdentifier} enforces
58 this by removing all characters that are not valid within an
59 identifier.
60
61 In some cases, an application generating bindings may be able to
62 transliterate Unicode code points that are not valid Python identifier
63 characters into something else. This callable can be assigned to
64 perform that translation before the invalid characters are
65 stripped.
66
67 It is not the responsibility of this callable to do anything other
68 than replace whatever characters it wishes to. All
69 transformations performed by L{MakeIdentifier} will still be
70 applied, to ensure the output is in fact a legal identifier.
71
72 @param xml_identifier_to_python : A callable that takes a string
73 and returns a Unicode, possibly with non-identifier characters
74 replaced by other characters. Pass C{None} to reset to the
75 default implementation, which is L{_DefaultXMLIdentifierToPython}.
76
77 @rtype: C{unicode}
78 """
79 global _XMLIdentifierToPython
80 if xml_identifier_to_python is None:
81 xml_identifier_to_python = _DefaultXMLIdentifierToPython
82 _XMLIdentifierToPython = xml_identifier_to_python
83
84 _XMLIdentifierToPython = _DefaultXMLIdentifierToPython
85
86 _UnderscoreSubstitute_re = re.compile(r'[- .]')
87 _NonIdentifier_re = re.compile(r'[^a-zA-Z0-9_]')
88 _PrefixUnderscore_re = re.compile(r'^_+')
89 _PrefixDigit_re = re.compile(r'^\d+')
90 _CamelCase_re = re.compile(r'_\w')
91
93 """Convert a string into something suitable to be a Python identifier.
94
95 The string is processed by L{_XMLIdentifierToPython}. Following
96 this, dashes, spaces, and periods are replaced by underscores, and
97 characters not permitted in Python identifiers are stripped.
98 Furthermore, any leading underscores are removed. If the result
99 begins with a digit, the character 'n' is prepended. If the
100 result is the empty string, the string 'emptyString' is
101 substituted.
102
103 No check is made for L{conflicts with keywords <DeconflictKeyword>}.
104
105 @keyword camel_case : If C{True}, any underscore in the result
106 string that is immediately followed by an alphanumeric is replaced
107 by the capitalized version of that alphanumeric. Thus,
108 'one_or_two' becomes 'oneOrTwo'. If C{False} (default), has no
109 effect.
110
111 @rtype: C{str}
112 """
113 s = _XMLIdentifierToPython(s)
114 s = _PrefixUnderscore_re.sub('', _NonIdentifier_re.sub('', _UnderscoreSubstitute_re.sub('_', s)))
115 if camel_case:
116 s = _CamelCase_re.sub(lambda _m: _m.group(0)[1].upper(), s)
117 if _PrefixDigit_re.match(s):
118 s = 'n' + s
119 if 0 == len(s):
120 s = 'emptyString'
121 return s
122
123 _PythonKeywords = frozenset( (
124 "and", "as", "assert", "break", "class", "continue", "def", "del",
125 "elif", "else", "except", "exec", "finally", "for", "from", "global",
126 "if", "import", "in", "is", "lambda", "not", "or", "pass", "print",
127 "raise", "return", "try", "while", "with", "yield"
128 ) )
129 """Python keywords. Note that types like int and float are not
130 keywords.
131
132 @see: U{http://docs.python.org/reference/lexical_analysis.html#keywords}."""
133
134 _PythonBuiltInConstants = frozenset( (
135 "False", "True", "None", "NotImplemented", "Ellipsis", "__debug__",
136
137
138
139 "set"
140 ) )
141 """Other symbols that aren't keywords but that can't be used.
142
143 @see: U{http://docs.python.org/library/constants.html}."""
144
145 _Keywords = frozenset(_PythonKeywords.union(_PythonBuiltInConstants))
146 """The keywords reserved for Python, derived from L{_PythonKeywords}
147 and L{_PythonBuiltInConstants}."""
148
150 """If the provided string C{s} matches a Python language keyword,
151 append an underscore to distinguish them.
152
153 See also L{MakeUnique}.
154
155 @param s: string to be deconflicted
156
157 @keyword aux_keywords: optional iterable of additional strings
158 that should be treated as keywords.
159
160 @rtype: C{str}
161
162 """
163 if (s in _Keywords) or (s in aux_keywords):
164 return '%s_' % (s,)
165 return s
166
168 """Return an identifier based on C{s} that is not in the given set.
169
170 The returned identifier is made unique by appending an underscore
171 and, if necessary, a serial number.
172
173 The order is : C{x}, C{x_}, C{x_2}, C{x_3}, ...
174
175 @param in_use: The set of identifiers already in use in the
176 relevant scope. C{in_use} is updated to contain the returned
177 identifier.
178
179 @rtype: C{str}
180 """
181 if s in in_use:
182 ctr = 2
183 s = s.rstrip('_')
184 candidate = '%s_' % (s,)
185 while candidate in in_use:
186 candidate = '%s_%d' % (s, ctr)
187 ctr += 1
188 s = candidate
189 in_use.add(s)
190 return s
191
192 -def PrepareIdentifier (s, in_use, aux_keywords=frozenset(), private=False, protected=False):
193 """Combine everything required to create a unique identifier.
194
195 Leading and trailing underscores are stripped from all
196 identifiers.
197
198 @param in_use: the set of already used identifiers. Upon return
199 from this function, it is updated to include the returned
200 identifier.
201
202 @keyword aux_keywords: an optional set of additional symbols that
203 are illegal in the given context; use this to prevent conflicts
204 with known method names.
205
206 @keyword private: if C{False} (default), all leading underscores
207 are stripped, guaranteeing the identifier will not be private. If
208 C{True}, the returned identifier has two leading underscores,
209 making it a private variable within a Python class.
210
211 @keyword protected: as for C{private}, but uses only one
212 underscore.
213
214 @rtype: C{str}
215
216 @note: Only module-level identifiers should be treated as
217 protected. The class-level L{_DeconflictSymbols_mixin}
218 infrastructure does not include protected symbols. All class and
219 instance members beginning with a single underscore are reserved
220 for the PyXB infrastructure."""
221 s = DeconflictKeyword(MakeIdentifier(s).strip('_'), aux_keywords)
222 if private:
223 s = '__' + s
224 elif protected:
225 s = '_' + s
226 return MakeUnique(s, in_use)
227
228
230 """Mix-in used to deconflict public symbols in classes that may be
231 inherited by generated binding classes.
232
233 Some classes, like the L{pyxb.binding.basis.element} or
234 L{pyxb.binding.basis.simpleTypeDefinition} classes in
235 L{pyxb.binding.basis}, have public symbols associated with
236 functions and variables. It is possible that an XML schema might
237 include tags and attribute names that match these symbols. To
238 avoid conflict, the reserved symbols marked in this class are
239 added to the pre-defined identifier set.
240
241 Subclasses should create a class-level variable
242 C{_ReservedSymbols} that contains a set of strings denoting the
243 symbols reserved in this class, combined with those from any
244 superclasses that also have reserved symbols. Code like the
245 following is suggested::
246
247 # For base classes (direct mix-in):
248 _ReservedSymbols = set([ 'one', 'two' ])
249 # For subclasses:
250 _ReservedSymbols = SuperClass._ReservedSymbols.union(set([ 'three' ]))
251
252 Only public symbols (those with no underscores) are currently
253 supported. (Private symbols can't be deconflicted that easily,
254 and no protected symbols that derive from the XML are created by
255 the binding generator.)
256 """
257
258 _ReservedSymbols = set()
259 """There are no reserved symbols in the base class."""
260
261
262 __TabCRLF_re = re.compile("[\t\n\r]")
263
264 __MultiSpace_re = re.compile(" +")
265
267 """Normalize the given string.
268
269 Exactly one of the C{preserve}, C{replace}, and C{collapse} keyword
270 parameters must be assigned the value C{True} by the caller.
271
272 - C{preserve}: the text is returned unchanged.
273
274 - C{replace}: all tabs, newlines, and carriage returns are
275 replaced with ASCII spaces.
276
277 - C{collapse}: the C{replace} normalization is done, then
278 sequences of two or more spaces are replaced by a single space.
279
280 See the U{whiteSpace facet<http://www.w3.org/TR/xmlschema-2/#rf-whiteSpace>}.
281
282 @rtype: C{str}
283 """
284 if preserve:
285 return text
286 text = __TabCRLF_re.sub(' ', text)
287 if replace:
288 return text
289 if collapse:
290 return __MultiSpace_re.sub(' ', text).strip()
291
292 raise Exception('NormalizeWhitespace: No normalization specified')
293
295 """Represent a directed graph with arbitrary objects as nodes.
296
297 This is used in the L{code
298 generator<pyxb.binding.generate.Generator>} to determine order
299 dependencies among components within a namespace, and schema that
300 comprise various namespaces. An edge from C{source} to C{target}
301 indicates that some aspect of C{source} requires that some aspect
302 of C{target} already be available.
303 """
304
306 self.__roots = None
307 if root is not None:
308 self.__roots = set([root])
309 self.__edges = set()
310 self.__edgeMap = { }
311 self.__reverseMap = { }
312 self.__nodes = set()
313
314 __scc = None
315 __sccMap = None
316 __dfsOrder = None
317
318 - def addEdge (self, source, target):
319 """Add a directed edge from the C{source} to the C{target}.
320
321 The nodes are added to the graph if necessary.
322 """
323 self.__edges.add( (source, target) )
324 self.__edgeMap.setdefault(source, set()).add(target)
325 if source != target:
326 self.__reverseMap.setdefault(target, set()).add(source)
327 self.__nodes.add(source)
328 self.__nodes.add(target)
329
331 """Add the given node to the graph."""
332 self.__nodes.add(node)
333
334 __roots = None
335 - def roots (self, reset=False):
336 """Return the set of nodes calculated to be roots (i.e., those that have no incoming edges).
337
338 This caches the roots calculated in a previous invocation
339 unless the C{reset} keyword is given the value C{True}.
340
341 @note: Upon reset, any notes that had been manually added
342 using L{addNode} will no longer be in the set.
343
344 @keyword reset: If C{True}, any cached value is discarded and
345 recomputed. No effect if C{False} (defalut).
346
347 @rtype: C{set}
348 """
349 if reset or (self.__roots is None):
350 self.__roots = set()
351 for n in self.__nodes:
352 if not (n in self.__reverseMap):
353 self.__roots.add(n)
354 return self.__roots
356 """Add the provided node as a root node, even if it has incoming edges.
357
358 The node need not be present in the graph (if necessary, it is added).
359
360 Note that roots added in this way do not survive a reset using
361 L{roots}.
362
363 @return: C{self}
364 """
365 if self.__roots is None:
366 self.__roots = set()
367 self.__nodes.add(root)
368 self.__roots.add(root)
369 return self
370
372 """Return the edges in the graph.
373
374 The edge data structure is a map from the source node to the
375 set of nodes that can be reached in a single step from the
376 source.
377 """
378 return self.__edgeMap
379 __edgeMap = None
380
382 """Return the edges in the graph.
383
384 The edge data structure is a set of node pairs represented as C{( source, target )}.
385 """
386 return self.__edges
387
389 """Return the set of nodes in the graph.
390
391 The node collection data structure is a set containing node
392 objects, whatever they may be."""
393 return self.__nodes
394
395 - def tarjan (self, reset=False):
396 """Execute Tarjan's algorithm on the graph.
397
398 U{Tarjan's
399 algorithm<http://en.wikipedia.org/wiki/Tarjan%27s_strongly_connected_components_algorithm>}
400 computes the U{strongly-connected
401 components<http://en.wikipedia.org/wiki/Strongly_connected_component>}
402 of the graph: i.e., the sets of nodes that form a minimal
403 closed set under edge transition. In essence, the loops. We
404 use this to detect groups of components that have a dependency
405 cycle.
406
407 @keyword reset: If C{True}, any cached component set is erased
408 and recomputed. If C{True}, an existing previous result is
409 left unchanged."""
410
411 if (self.__scc is not None) and (not reset):
412 return
413 self.__sccMap = { }
414 self.__stack = []
415 self.__sccOrder = []
416 self.__scc = []
417 self.__index = 0
418 self.__tarjanIndex = { }
419 self.__tarjanLowLink = { }
420 for v in self.__nodes:
421 self.__tarjanIndex[v] = None
422 roots = self.roots()
423 if (0 == len(roots)) and (0 < len(self.__nodes)):
424 raise Exception('TARJAN: No roots found in graph with %d nodes' % (len(self.__nodes),))
425 for r in roots:
426 self._tarjan(r)
427 self.__didTarjan = True
428
430 """Do the work of Tarjan's algorithm for a given root node."""
431 if self.__tarjanIndex.get(v) is not None:
432
433 return
434 self.__tarjanIndex[v] = self.__tarjanLowLink[v] = self.__index
435 self.__index += 1
436 self.__stack.append(v)
437 source = v
438 for target in self.__edgeMap.get(source, []):
439 if self.__tarjanIndex[target] is None:
440 self._tarjan(target)
441 self.__tarjanLowLink[v] = min(self.__tarjanLowLink[v], self.__tarjanLowLink[target])
442 elif target in self.__stack:
443 self.__tarjanLowLink[v] = min(self.__tarjanLowLink[v], self.__tarjanLowLink[target])
444 else:
445 pass
446
447 if self.__tarjanLowLink[v] == self.__tarjanIndex[v]:
448 scc = []
449 while True:
450 scc.append(self.__stack.pop())
451 if v == scc[-1]:
452 break
453 self.__sccOrder.append(scc)
454 if 1 < len(scc):
455 self.__scc.append(scc)
456 [ self.__sccMap.setdefault(_v, scc) for _v in scc ]
457
458 - def scc (self, reset=False):
459 """Return the strongly-connected components of the graph.
460
461 The data structure is a set, each element of which is itself a
462 set containing one or more nodes from the graph.
463
464 @see: L{tarjan}.
465 """
466 if reset or (self.__scc is None):
467 self.tarjan(reset)
468 return self.__scc
469 __scc = None
470
471 - def sccMap (self, reset=False):
472 """Return a map from nodes to the strongly-connected component
473 to which the node belongs.
474
475 @keyword reset: If C{True}, the L{tarjan} method will be
476 re-invoked, propagating the C{reset} value. If C{False}
477 (default), a cached value will be returned if available.
478
479 @see: L{tarjan}.
480 """
481 if reset or (self.__sccMap is None):
482 self.tarjan(reset)
483 return self.__sccMap
484 __sccMap = None
485
487 """Return the strongly-connected components in order.
488
489 The data structure is a list, in dependency order, of strongly
490 connected components (which can be single nodes). Appearance
491 of a node in a set earlier in the list indicates that it has
492 no dependencies on any node that appears in a subsequent set.
493 This order is preferred over L{dfsOrder} for code generation,
494 since it detects loops.
495
496 @see: L{tarjan}.
497 """
498 if reset or (self.__sccOrder is None):
499 self.tarjan(reset)
500 return self.__sccOrder
501 __sccOrder = None
502
504 """Return the strongly-connected component to which the given
505 node belongs.
506
507 Any keywords suppliend when invoking this method are passed to
508 the L{sccMap} method.
509
510 @return: The SCC set, or C{None} if the node is not present in
511 the results of Tarjan's algorithm."""
512
513 return self.sccMap(**kw).get(node)
514
516 """Return the cyclomatic complexity of the graph."""
517 self.tarjan()
518 return len(self.__edges) - len(self.__nodes) + 2 * len(self.__scc)
519
521 assert not (source in self.__dfsWalked)
522 self.__dfsWalked.add(source)
523 for target in self.__edgeMap.get(source, []):
524 if not (target in self.__dfsWalked):
525 self.__dfsWalk(target)
526 self.__dfsOrder.append(source)
527
529 node_map = { }
530 idx = 1
531 for n in self.__nodes:
532 node_map[n] = idx
533 idx += 1
534 text = []
535 text.append('digraph "%s" {' % (title,))
536 for n in self.__nodes:
537 if labeller is not None:
538 nn = labeller(n)
539 else:
540 nn = str(n)
541 text.append('%s [shape=box,label="%s"];' % (node_map[n], nn))
542 for s in self.__nodes:
543 for d in self.__edgeMap.get(s, []):
544 if s != d:
545 text.append('%s -> %s;' % (node_map[s], node_map[d]))
546 text.append("};")
547 return "\n".join(text)
548
550 """Return the nodes of the graph in U{depth-first-search
551 order<http://en.wikipedia.org/wiki/Depth-first_search>}.
552
553 The data structure is a list. Calculated lists are retained
554 and returned on future invocations, subject to the C{reset}
555 keyword.
556
557 @keyword reset: If C{True}, discard cached results and recompute the order."""
558 if reset or (self.__dfsOrder is None):
559 self.__dfsWalked = set()
560 self.__dfsOrder = []
561 for root in self.roots(reset=reset):
562 self.__dfsWalk(root)
563 self.__dfsWalked = None
564 if len(self.__dfsOrder) != len(self.__nodes):
565 raise Exception('DFS walk did not cover all nodes (walk %d versus nodes %d)' % (len(self.__dfsOrder), len(self.__nodes)))
566 return self.__dfsOrder
567
569 """Return the nodes of the graph as a sequence of root sets.
570
571 The first root set is the set of nodes that are roots: i.e.,
572 have no incoming edges. The second root set is the set of
573 nodes that have incoming nodes in the first root set. This
574 continues until all nodes have been reached. The sets impose
575 a partial order on the nodes, without being as constraining as
576 L{sccOrder}.
577
578 @return: a list of the root sets."""
579 order = []
580 nodes = set(self.__nodes)
581 edge_map = {}
582 for (d, srcs) in self.__edgeMap.iteritems():
583 edge_map[d] = srcs.copy()
584 while nodes:
585 freeset = set()
586 for n in nodes:
587 if not (n in edge_map):
588 freeset.add(n)
589 if 0 == len(freeset):
590 _log.error('dependency cycle in named components')
591 return None
592 order.append(freeset)
593 nodes.difference_update(freeset)
594 new_edge_map = {}
595 for (d, srcs) in edge_map.iteritems():
596 srcs.difference_update(freeset)
597 if 0 != len(srcs):
598 new_edge_map[d] = srcs
599 edge_map = new_edge_map
600 return order
601
602 LocationPrefixRewriteMap_ = { }
603
609
611 """Normalize a URI against an optional parent_uri in the way that is
612 done for C{schemaLocation} attribute values.
613
614 If no URI schema is present, this will normalize a file system
615 path.
616
617 Optionally, the resulting absolute URI can subsequently be
618 rewritten to replace specified prefix strings with alternative
619 strings, e.g. to convert a remote URI to a local repository. This
620 rewriting is done after the conversion to an absolute URI, but
621 before normalizing file system URIs.
622
623 @param uri : The URI to normalize. If C{None}, function returns
624 C{None}
625
626 @param parent_uri : The base URI against which normalization is
627 done, if C{uri} is a relative URI.
628
629 @param prefix_map : A map used to rewrite URI prefixes. If
630 C{None}, the value defaults to that stored by
631 L{SetLocationPrefixRewriteMap}.
632
633 """
634 if uri is None:
635 return uri
636 if parent_uri is None:
637 abs_uri = uri
638 else:
639
640
641 abs_uri = urlparse.urljoin(parent_uri, uri)
642 if prefix_map is None:
643 prefix_map = LocationPrefixRewriteMap_
644 for (pfx, sub) in prefix_map.items():
645 if abs_uri.startswith(pfx):
646 abs_uri = sub + abs_uri[len(pfx):]
647 if 0 > abs_uri.find(':'):
648 abs_uri = os.path.realpath(abs_uri)
649 return abs_uri
650
651
652 -def TextFromURI (uri, archive_directory=None):
653 """Retrieve the contents of the uri as a text string.
654
655 If the uri does not include a scheme (e.g., C{http:}), it is
656 assumed to be a file path on the local system."""
657 import urllib
658 import urllib2
659 stream = None
660 exc = None
661
662
663
664 if 0 <= uri.find(':'):
665 try:
666 stream = urllib2.urlopen(uri)
667 except Exception as e:
668 exc = e
669 if stream is None:
670 try:
671 stream = urllib.urlopen(uri)
672 exc = None
673 except:
674
675 pass
676 if stream is None:
677
678 try:
679 stream = file(uri)
680 exc = None
681 except Exception as e:
682 if exc is None:
683 exc = e
684 if exc is not None:
685 _log.error('open %s', uri, exc_info=exc)
686 raise exc
687 try:
688
689
690 if isinstance(stream, file) or isinstance(stream.fp, file):
691 archive_directory = None
692 except:
693 pass
694 xmls = stream.read()
695 if archive_directory:
696 base_name = os.path.basename(os.path.normpath(urlparse.urlparse(uri)[2]))
697 counter = 1
698 dest_file = os.path.join(archive_directory, base_name)
699 while os.path.isfile(dest_file):
700 dest_file = os.path.join(archive_directory, '%s.%d' % (base_name, counter))
701 counter += 1
702 try:
703 OpenOrCreate(dest_file).write(xmls)
704 except OSError as e:
705 _log.warning('Unable to save %s in %s: %s', uri, dest_file, e)
706 return xmls
707
708 -def OpenOrCreate (file_name, tag=None, preserve_contents=False):
709 """Return a file object used to write the given file.
710
711 Use the C{tag} keyword to preserve the contents of existing files
712 that are not supposed to be overwritten.
713
714 To get a writable file but leaving any existing contents in place,
715 set the C{preserve_contents} keyword to C{True}. Normally, existing file
716 contents are erased.
717
718 The returned file pointer is positioned at the end of the file.
719
720 @keyword tag: If not C{None} and the file already exists, absence
721 of the given value in the first 4096 bytes of the file causes an
722 C{IOError} to be raised with C{errno} set to C{EEXIST}. I.e.,
723 only files with this value in the first 4KB will be returned for
724 writing.
725
726 @keyword preserve_contents: This value controls whether existing
727 contents of the file will be erased (C{False}, default) or left in
728 place (C{True}).
729 """
730 (path, leaf) = os.path.split(file_name)
731 if path:
732 try:
733 os.makedirs(path)
734 except Exception as e:
735 if not (isinstance(e, (OSError, IOError)) and (errno.EEXIST == e.errno)):
736 raise
737 fp = file(file_name, 'ab+')
738 if (tag is not None) and (0 < os.fstat(fp.fileno()).st_size):
739 text = fp.read(4096)
740 if 0 > text.find(tag):
741 raise OSError(errno.EEXIST, os.strerror(errno.EEXIST))
742 if not preserve_contents:
743 fp.seek(0)
744 fp.truncate()
745 else:
746 fp.seek(2)
747 return fp
748
749
750 __Hasher = None
751 try:
752 import hashlib
753 __Hasher = hashlib.sha1
754 except ImportError:
755 import sha
756 __Hasher = sha.new
757
758 -def HashForText (text):
759 """Calculate a cryptographic hash of the given string.
760
761 For example, this is used to verify that a given module file
762 contains bindings from a previous generation run for the same
763 namespace. See L{OpenOrCreate}. If the text is in Unicode, the
764 hash is calculated on the UTF-8 encoding of the text.
765
766 @return: A C{str}, generally a sequence of hexadecimal "digit"s.
767 """
768 if isinstance(text, unicode):
769 text = text.encode('utf-8')
770 return __Hasher(text).hexdigest()
771
772
773 __HaveUUID = False
774 try:
775 import uuid
776 __HaveUUID = True
777 except ImportError:
778 import random
780 """Obtain a UUID using the best available method. On a version of
781 python that does not incorporate the C{uuid} class, this creates a
782 string combining the current date and time (to the second) with a
783 random number.
784
785 @rtype: C{str}
786 """
787 if __HaveUUID:
788 return uuid.uuid1().urn
789 return '%s:%08.8x' % (time.strftime('%Y%m%d%H%M%S'), random.randint(0, 0xFFFFFFFFL))
790
792 """Records a unique identifier, generally associated with a
793 binding generation action.
794
795 The identifier is a string, but gets wrapped in an instance of
796 this class to optimize comparisons and reduce memory footprint.
797
798 Invoking the constructor for this class on the same string
799 multiple times will return the same Python object.
800
801 An instance of this class compares equal to, and hashes equivalent
802 to, the uid string. When C{str}'d, the result is the uid; when
803 C{repr}'d, the result is a constructor call to
804 C{pyxb.utils.utility.UniqueIdentifier}.
805 """
806
807
808 __ExistingUIDs = {}
809
811 """The string unique identifier"""
812 return self.__uid
813 __uid = None
814
815
818
821
824
825
841
843 """Associate the given object witth this identifier.
844
845 This is a one-way association: the object is not provided with
846 a return path to this identifier instance."""
847 self.__associatedObjects.add(obj)
849 """The set of objects that have been associated with this
850 identifier instance."""
851 return self.__associatedObjects
852 __associatedObjects = None
853
855 """Create a new UniqueIdentifier instance.
856
857 @param uid: The unique identifier string. If present, it is
858 the callers responsibility to ensure the value is universally
859 unique. If C{None}, one will be provided.
860 @type uid: C{str} or C{unicode}
861 """
862 assert (uid is None) or (self.uid() == uid), 'UniqueIdentifier: ctor %s, actual %s' % (uid, self.uid())
863 self.__associatedObjects = set()
864
866 if other is None:
867 return False
868 elif isinstance(other, UniqueIdentifier):
869 other_uid = other.uid()
870 elif isinstance(other, basestring):
871 other_uid = other
872 else:
873 raise TypeError('UniqueIdentifier: Cannot compare with type %s' % (type(other),))
874 return self.uid() == other_uid
875
877 return hash(self.uid())
878
881
883 return 'pyxb.utils.utility.UniqueIdentifier(%s)' % (repr(self.uid()),)
884
886 """A C{datetime.tzinfo} subclass that helps deal with UTC
887 conversions in an ISO8601 world.
888
889 This class only supports fixed offsets from UTC.
890 """
891
892
893 __Lexical_re = re.compile('^([-+])(\d\d):(\d\d)$')
894
895
896 __utcOffset_min = 0
897
898
899 __utcOffset_td = None
900
901
902 __ZeroDuration = datetime.timedelta(0)
903
904
905 __MaxOffset_td = datetime.timedelta(hours=14)
906
908 """Create a time zone instance with a fixed offset from UTC.
909
910 @param spec: Specifies the offset. Can be an integer counting
911 minutes east of UTC, the value C{None} (equal to 0 minutes
912 east), or a string that conform to the ISO8601 time zone
913 sequence (B{Z}, or B{[+-]HH:MM}).
914 """
915
916 if spec is not None:
917 if isinstance(spec, basestring):
918 if 'Z' == spec:
919 self.__utcOffset_min = 0
920 else:
921 match = self.__Lexical_re.match(spec)
922 if match is None:
923 raise ValueError('Bad time zone: %s' % (spec,))
924 self.__utcOffset_min = int(match.group(2)) * 60 + int(match.group(3))
925 if '-' == match.group(1):
926 self.__utcOffset_min = - self.__utcOffset_min
927 elif isinstance(spec, int):
928 self.__utcOffset_min = spec
929 elif isinstance(spec, datetime.timedelta):
930 self.__utcOffset_min = spec.seconds / 60
931 else:
932 raise TypeError('%s: unexpected type %s' % (type(self), type(spec)))
933 self.__utcOffset_td = datetime.timedelta(minutes=self.__utcOffset_min)
934 if self.__utcOffset_td < -self.__MaxOffset_td or self.__utcOffset_td > self.__MaxOffset_td:
935 raise ValueError('XSD timezone offset %s larger than %s' % (self.__utcOffset_td, self.__MaxOffset_td))
936 if 0 == self.__utcOffset_min:
937 self.__tzName = 'Z'
938 elif 0 > self.__utcOffset_min:
939 self.__tzName = '-%02d:%02d' % divmod(-self.__utcOffset_min, 60)
940 else:
941 self.__tzName = '+%02d:%02d' % divmod(self.__utcOffset_min, 60)
942
944 """Returns the constant offset for this zone."""
945 return self.__utcOffset_td
946
948 """Return the name of the timezone in the format expected by XML Schema."""
949 return self.__tzName
950
951 - def dst (self, dt):
952 """Returns a constant zero duration."""
953 return self.__ZeroDuration
954
959
960
962 """A C{datetime.tzinfo} subclass for the local time zone.
963
964 Mostly pinched from the C{datetime.tzinfo} documentation in Python 2.5.1.
965 """
966
967 __STDOffset = datetime.timedelta(seconds=-time.timezone)
968 __DSTOffset = __STDOffset
969 if time.daylight:
970 __DSTOffset = datetime.timedelta(seconds=-time.altzone)
971 __ZeroDelta = datetime.timedelta(0)
972 __DSTDelta = __DSTOffset - __STDOffset
973
978
979 - def dst (self, dt):
983
986
988 tt = (dt.year, dt.month, dt.day,
989 dt.hour, dt.minute, dt.second,
990 0, 0, -1)
991 tt = time.localtime(time.mktime(tt))
992 return tt.tm_isdst > 0
993
995 """Emulate the B{transient} keyword from Java for private member
996 variables.
997
998 This class defines a C{__getstate__} method which returns a copy
999 of C{self.__dict__} with certain members removed. Specifically,
1000 if a string "s" appears in a class member variable named
1001 C{__PrivateTransient} defined in the "Class" class, then the
1002 corresponding private variable "_Class__s" will be removed from
1003 the state dictionary. This is used to eliminate unnecessary
1004 fields from instances placed in L{namespace
1005 archives<pyxb.namespace.archive.NamespaceArchive>} without having
1006 to implement a C{__getstate__} method in every class in the
1007 instance hierarchy.
1008
1009 For an example, see
1010 L{pyxb.xmlschema.structures._SchemaComponent_mixin}
1011
1012 If you use this, it is your responsibility to define the
1013 C{__PrivateTransient} class variable and add to it the required
1014 variable names.
1015
1016 Classes that inherit from this are free to define their own
1017 C{__getstate__} method, which may or may not invoke the superclass
1018 one. If you do this, be sure that the class defining
1019 C{__getstate__} lists L{PrivateTransient_mixin} as one of its
1020 direct superclasses, lest the latter end up earlier in the mro and
1021 consequently bypass the local override.
1022 """
1023
1024
1025
1026 __Attribute = '__PrivateTransient'
1027
1029 state = self.__dict__.copy()
1030
1031
1032 attr = '_%s%s_' % (self.__class__.__name__, self.__Attribute)
1033 skipped = getattr(self.__class__, attr, None)
1034 if skipped is None:
1035 skipped = set()
1036 for cl in self.__class__.mro():
1037 for (k, v) in cl.__dict__.items():
1038 if k.endswith(self.__Attribute):
1039 cl2 = k[:-len(self.__Attribute)]
1040 skipped.update([ '%s__%s' % (cl2, _n) for _n in v ])
1041 setattr(self.__class__, attr, skipped)
1042 for k in skipped:
1043 if state.get(k) is not None:
1044 del state[k]
1045
1046
1047
1048
1049
1050
1051
1052
1053
1054
1055 return state
1056
1057 -def GetMatchingFiles (path, pattern=None, default_path_wildcard=None, default_path=None, prefix_pattern=None, prefix_substituend=None):
1058 """Provide a list of absolute paths to files present in any of a
1059 set of directories and meeting certain criteria.
1060
1061 This is used, for example, to locate namespace archive files
1062 within the archive path specified by the user. One could use::
1063
1064 files = GetMatchingFiles('&bundles//:+',
1065 pattern=re.compile('.*\.wxs$'),
1066 default_path_wildcard='+',
1067 default_path='/usr/local/pyxb/nsarchives',
1068 prefix_pattern='&',
1069 prefix_substituend='/opt/pyxb')
1070
1071 to obtain all files that can be recursively found within
1072 C{/opt/pyxb/bundles}, or non-recursively within
1073 C{/usr/local/pyxb/nsarchives}.
1074
1075 @param path: A list of directories in which the search should be
1076 performed. The entries are separated by os.pathsep, which is a
1077 colon on POSIX platforms and a semi-colon on Windows. If a path
1078 entry ends with C{//} regardless of platform, the suffix C{//} is
1079 stripped and any directory beneath the path is scanned as well,
1080 recursively.
1081
1082 @keyword pattern: Optional regular expression object used to
1083 determine whether a given directory entry should be returned. If
1084 left as C{None}, all directory entries will be returned.
1085
1086 @keyword default_path_wildcard: An optional string which, if
1087 present as a single directory in the path, is replaced by the
1088 value of C{default-path}.
1089
1090 @keyword default_path: A system-defined directory which can be
1091 restored to the path by placing the C{default_path_wildcard} in
1092 the C{path}.
1093
1094 @keyword prefix_pattern: An optional string which, if present at
1095 the start of a path element, is replaced by the value of
1096 C{prefix_substituend}.
1097
1098 @keyword prefix_substituend: A system-defined string (path prefix)
1099 which can be combined with the user-provided path information to
1100 identify a file or subdirectory within an installation-specific
1101 area.
1102 """
1103 matching_files = []
1104 path_set = path.split(os.pathsep)
1105 while 0 < len(path_set):
1106 path = path_set.pop(0)
1107 if default_path_wildcard == path:
1108 if default_path is not None:
1109 path_set[0:0] = default_path.split(os.pathsep)
1110 default_path = None
1111 continue
1112 recursive = False
1113 if (prefix_pattern is not None) and path.startswith(prefix_pattern):
1114 path = os.path.join(prefix_substituend, path[len(prefix_pattern):])
1115 if path.endswith('//'):
1116 recursive = True
1117 path = path[:-2]
1118 if os.path.isfile(path):
1119 if (pattern is None) or (pattern.search(path) is not None):
1120 matching_files.append(path)
1121 else:
1122 for (root, dirs, files) in os.walk(path):
1123 for f in files:
1124 if (pattern is None) or (pattern.search(f) is not None):
1125 matching_files.append(os.path.join(root, f))
1126 if not recursive:
1127 break
1128 return matching_files
1129
1179
1192