1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16 """This module contains support for Unicode characters as required to
17 support the regular expression syntax defined in U{annex F
18 <http://www/Documentation/W3C/www.w3.org/TR/xmlschema-2/index.html#regexs>}
19 of the XML Schema definition.
20
21 In particular, we need to be able to identify character properties and
22 block escapes, as defined in F.1.1, by name.
23
24 - Block data: U{http://www.unicode.org/Public/3.1-Update/Blocks-4.txt}
25 - Property list data: U{http://www.unicode.org/Public/3.1-Update/PropList-3.1.0.txt}
26 - Full dataset: U{http://www.unicode.org/Public/3.1-Update/UnicodeData-3.1.0.txt}
27
28 The Unicode database active at the time XML Schema 1.0 was defined is
29 archived at
30 U{http://www.unicode.org/Public/3.1-Update/UnicodeCharacterDatabase-3.1.0.html},
31 and refers to U{Unicode Standard Annex #27: Unicode 3.1
32 <http://www.unicode.org/unicode/reports/tr27/>}.
33 """
34
35 import re
36 import logging
37
38 _log = logging.getLogger(__name__)
39
40 SupportsWideUnicode = False
41 try:
42 re.compile(u'[\U0001d7ce-\U0001d7ff]')
43 SupportsWideUnicode = True
44 except:
45 pass
46
47 import bisect
48
50 """Raised when some abuse of a L{CodePointSet} is detected."""
51 pass
52
54 """Represent a set of Unicode code points.
55
56 Each code point is an integral value between 0 and 0x10FFFF. This
57 class is used to represent a set of code points in a manner
58 suitable for use as regular expression character sets."""
59
60 MaxCodePoint = 0x10FFFF
61 """The maximum value for a code point in the Unicode code point
62 space. This is normally 0xFFFF, because wide unicode characters
63 are generally not enabled in Python builds. If, however, they are
64 enabled, this will be the full value of 0x10FFFF."""
65
66 MaxShortCodePoint = 0xFFFF
67 if not SupportsWideUnicode:
68 MaxCodePoint = MaxShortCodePoint
69
70
71
72
73
74
75
76
77 __codepoints = None
78
80 """For testing purrposes only, access to the codepoints
81 internal representation."""
82 return self.__codepoints
83
87
98
100
101
102 if isinstance(value, tuple):
103 (s, e) = value
104 e += 1
105 elif isinstance(value, basestring):
106 if 1 < len(value):
107 raise TypeError()
108 s = ord(value)
109 e = s+1
110 else:
111 s = int(value)
112 e = s+1
113 if s >= e:
114 raise ValueError('codepoint range value order')
115
116
117
118 if s > self.MaxCodePoint:
119 return self
120 if e > self.MaxCodePoint:
121 e = self.MaxCodePoint+1
122
123
124 li = bisect.bisect_left(self.__codepoints, s)
125
126 ri = bisect.bisect_right(self.__codepoints, e)
127
128 case = ((li & 1) << 1) | (ri & 1)
129 if not do_add:
130 case = 3 - case
131 if 0x03 == case:
132
133 del self.__codepoints[li:ri]
134 elif 0x02 == case:
135
136 del self.__codepoints[li+1:ri]
137 self.__codepoints[li] = e
138 elif 0x01 == case:
139
140 del self.__codepoints[li+1:ri]
141 self.__codepoints[li] = s
142 else:
143
144 self.__codepoints[li:ri] = [s, e]
145 return self
146
147 - def add (self, value):
148 """Add the given value to the code point set.
149
150 @param value: An integral value denoting a code point, or a
151 tuple C{(s,e)} denoting the start and end (inclusive) code
152 points in a range.
153 @return: C{self}"""
154 return self.__mutate(value, True)
155
157 """Add multiple values to a code point set.
158
159 @param values: Either a L{CodePointSet} instance, or an iterable
160 whose members are valid parameters to L{add}.
161
162 @return: C{self}"""
163 if isinstance(values, CodePointSet):
164 self.extend(values.asTuples())
165 else:
166 for v in values:
167 self.__mutate(v, True)
168 return self
169
171 """Remove the given value from the code point set.
172
173 @param value: An integral value denoting a code point, or a tuple
174 C{(s,e)} denoting the start and end (inclusive) code points in a
175 range, or a L{CodePointSet}.
176
177 @return: C{self}"""
178 if isinstance(value, CodePointSet):
179 for v in value.asTuples():
180 self.subtract(v)
181 return self
182 return self.__mutate(value, False)
183
184
185
186
187 __XMLtoPythonREMap = {
188 u'\x00': u'\\x00',
189
190
191 u'^': u'\\^',
192
193 u'\\': u'\\\\',
194 u'[': u'\\[',
195
196 u']': u'\\]',
197 u'-': u'\\-',
198 }
199
200
201
206
208 """Return the code point set as Unicode regular expression
209 character group consisting of a sequence of characters or
210 character ranges.
211
212 This returns a regular expression fragment using Python's
213 regular expression syntax. Note that different regular expression
214 syntaxes are not compatible, often in subtle ways.
215
216 @param with_brackets: If C{True} (default), square brackets
217 are added to enclose the returned character group."""
218 rva = []
219 if with_brackets:
220 rva.append(u'[')
221 for (s, e) in self.asTuples():
222 if s == e:
223 rva.append(self.__unichr(s))
224 else:
225 rva.extend([self.__unichr(s), '-', self.__unichr(e)])
226 if with_brackets:
227 rva.append(u']')
228 return u''.join(rva)
229
231 """Return the codepoints as tuples denoting the ranges that are in
232 the set.
233
234 Each tuple C{(s, e)} indicates that the code points from C{s}
235 (inclusive) to C{e}) (inclusive) are in the set."""
236
237 rv = []
238 start = None
239 for ri in xrange(len(self.__codepoints)):
240 if start is not None:
241 rv.append( (start, self.__codepoints[ri]-1) )
242 start = None
243 else:
244 start = self.__codepoints[ri]
245 if (start is not None) and (start <= self.MaxCodePoint):
246 rv.append( (start, self.MaxCodePoint) )
247 return rv
248
258
260 """If this set represents a single character, return it as its
261 unicode string value. Otherwise return C{None}."""
262 if (2 != len(self.__codepoints)) or (1 < (self.__codepoints[1] - self.__codepoints[0])):
263 return None
264 return unichr(self.__codepoints[0])
265
266 from pyxb.utils.unicode_data import PropertyMap
267 from pyxb.utils.unicode_data import BlockMap
268
270 """Regular expression support for XML Schema Data Types.
271
272 This class holds character classes and regular expressions used to
273 constrain the lexical space of XML Schema datatypes derived from
274 U{string<http://www.w3.org/TR/xmlschema-2/#string>}. They are
275 from U{XML 1.0 (Second
276 Edition)<http://www.w3.org/TR/2000/WD-xml-2e-20000814>} and
277 U{Namespaces in XML
278 <http://www.w3.org/TR/1999/REC-xml-names-19990114/>}.
279
280 Unlike the regular expressions used for pattern constraints in XML
281 Schema, which are derived from the Unicode 3.1 specification,
282 these are derived from the Unicode 2.0 specification.
283
284 The XML Schema definition refers explicitly to the second edition
285 of XML, so we have to use these code point sets and patterns. Be
286 aware that U{subsequent updates to the XML specification
287 <http://www.w3.org/XML/xml-V10-4e-errata#E09>} have changed the
288 corresponding patterns for other uses of XML. One significant
289 change is that the original specification, used here, does not
290 allow wide unicode characters."""
291
292 Char = CodePointSet(
293 0x0009,
294 0x000A,
295 0x000D,
296 ( 0x0020, 0xD7FF ),
297 ( 0xE000, 0xFFFD )
298 )
299 if SupportsWideUnicode:
300 Char.add( ( 1+CodePointSet.MaxShortCodePoint, CodePointSet.MaxCodePoint ) )
301
302 BaseChar = CodePointSet(
303 ( 0x0041, 0x005A ),
304 ( 0x0061, 0x007A ),
305 ( 0x00C0, 0x00D6 ),
306 ( 0x00D8, 0x00F6 ),
307 ( 0x00F8, 0x00FF ),
308 ( 0x0100, 0x0131 ),
309 ( 0x0134, 0x013E ),
310 ( 0x0141, 0x0148 ),
311 ( 0x014A, 0x017E ),
312 ( 0x0180, 0x01C3 ),
313 ( 0x01CD, 0x01F0 ),
314 ( 0x01F4, 0x01F5 ),
315 ( 0x01FA, 0x0217 ),
316 ( 0x0250, 0x02A8 ),
317 ( 0x02BB, 0x02C1 ),
318 0x0386,
319 ( 0x0388, 0x038A ),
320 0x038C,
321 ( 0x038E, 0x03A1 ),
322 ( 0x03A3, 0x03CE ),
323 ( 0x03D0, 0x03D6 ),
324 0x03DA,
325 0x03DC,
326 0x03DE,
327 0x03E0,
328 ( 0x03E2, 0x03F3 ),
329 ( 0x0401, 0x040C ),
330 ( 0x040E, 0x044F ),
331 ( 0x0451, 0x045C ),
332 ( 0x045E, 0x0481 ),
333 ( 0x0490, 0x04C4 ),
334 ( 0x04C7, 0x04C8 ),
335 ( 0x04CB, 0x04CC ),
336 ( 0x04D0, 0x04EB ),
337 ( 0x04EE, 0x04F5 ),
338 ( 0x04F8, 0x04F9 ),
339 ( 0x0531, 0x0556 ),
340 0x0559,
341 ( 0x0561, 0x0586 ),
342 ( 0x05D0, 0x05EA ),
343 ( 0x05F0, 0x05F2 ),
344 ( 0x0621, 0x063A ),
345 ( 0x0641, 0x064A ),
346 ( 0x0671, 0x06B7 ),
347 ( 0x06BA, 0x06BE ),
348 ( 0x06C0, 0x06CE ),
349 ( 0x06D0, 0x06D3 ),
350 0x06D5,
351 ( 0x06E5, 0x06E6 ),
352 ( 0x0905, 0x0939 ),
353 0x093D,
354 ( 0x0958, 0x0961 ),
355 ( 0x0985, 0x098C ),
356 ( 0x098F, 0x0990 ),
357 ( 0x0993, 0x09A8 ),
358 ( 0x09AA, 0x09B0 ),
359 0x09B2,
360 ( 0x09B6, 0x09B9 ),
361 ( 0x09DC, 0x09DD ),
362 ( 0x09DF, 0x09E1 ),
363 ( 0x09F0, 0x09F1 ),
364 ( 0x0A05, 0x0A0A ),
365 ( 0x0A0F, 0x0A10 ),
366 ( 0x0A13, 0x0A28 ),
367 ( 0x0A2A, 0x0A30 ),
368 ( 0x0A32, 0x0A33 ),
369 ( 0x0A35, 0x0A36 ),
370 ( 0x0A38, 0x0A39 ),
371 ( 0x0A59, 0x0A5C ),
372 0x0A5E,
373 ( 0x0A72, 0x0A74 ),
374 ( 0x0A85, 0x0A8B ),
375 0x0A8D,
376 ( 0x0A8F, 0x0A91 ),
377 ( 0x0A93, 0x0AA8 ),
378 ( 0x0AAA, 0x0AB0 ),
379 ( 0x0AB2, 0x0AB3 ),
380 ( 0x0AB5, 0x0AB9 ),
381 0x0ABD,
382 0x0AE0,
383 ( 0x0B05, 0x0B0C ),
384 ( 0x0B0F, 0x0B10 ),
385 ( 0x0B13, 0x0B28 ),
386 ( 0x0B2A, 0x0B30 ),
387 ( 0x0B32, 0x0B33 ),
388 ( 0x0B36, 0x0B39 ),
389 0x0B3D,
390 ( 0x0B5C, 0x0B5D ),
391 ( 0x0B5F, 0x0B61 ),
392 ( 0x0B85, 0x0B8A ),
393 ( 0x0B8E, 0x0B90 ),
394 ( 0x0B92, 0x0B95 ),
395 ( 0x0B99, 0x0B9A ),
396 0x0B9C,
397 ( 0x0B9E, 0x0B9F ),
398 ( 0x0BA3, 0x0BA4 ),
399 ( 0x0BA8, 0x0BAA ),
400 ( 0x0BAE, 0x0BB5 ),
401 ( 0x0BB7, 0x0BB9 ),
402 ( 0x0C05, 0x0C0C ),
403 ( 0x0C0E, 0x0C10 ),
404 ( 0x0C12, 0x0C28 ),
405 ( 0x0C2A, 0x0C33 ),
406 ( 0x0C35, 0x0C39 ),
407 ( 0x0C60, 0x0C61 ),
408 ( 0x0C85, 0x0C8C ),
409 ( 0x0C8E, 0x0C90 ),
410 ( 0x0C92, 0x0CA8 ),
411 ( 0x0CAA, 0x0CB3 ),
412 ( 0x0CB5, 0x0CB9 ),
413 0x0CDE,
414 ( 0x0CE0, 0x0CE1 ),
415 ( 0x0D05, 0x0D0C ),
416 ( 0x0D0E, 0x0D10 ),
417 ( 0x0D12, 0x0D28 ),
418 ( 0x0D2A, 0x0D39 ),
419 ( 0x0D60, 0x0D61 ),
420 ( 0x0E01, 0x0E2E ),
421 0x0E30,
422 ( 0x0E32, 0x0E33 ),
423 ( 0x0E40, 0x0E45 ),
424 ( 0x0E81, 0x0E82 ),
425 0x0E84,
426 ( 0x0E87, 0x0E88 ),
427 0x0E8A,
428 0x0E8D,
429 ( 0x0E94, 0x0E97 ),
430 ( 0x0E99, 0x0E9F ),
431 ( 0x0EA1, 0x0EA3 ),
432 0x0EA5,
433 0x0EA7,
434 ( 0x0EAA, 0x0EAB ),
435 ( 0x0EAD, 0x0EAE ),
436 0x0EB0,
437 ( 0x0EB2, 0x0EB3 ),
438 0x0EBD,
439 ( 0x0EC0, 0x0EC4 ),
440 ( 0x0F40, 0x0F47 ),
441 ( 0x0F49, 0x0F69 ),
442 ( 0x10A0, 0x10C5 ),
443 ( 0x10D0, 0x10F6 ),
444 0x1100,
445 ( 0x1102, 0x1103 ),
446 ( 0x1105, 0x1107 ),
447 0x1109,
448 ( 0x110B, 0x110C ),
449 ( 0x110E, 0x1112 ),
450 0x113C,
451 0x113E,
452 0x1140,
453 0x114C,
454 0x114E,
455 0x1150,
456 ( 0x1154, 0x1155 ),
457 0x1159,
458 ( 0x115F, 0x1161 ),
459 0x1163,
460 0x1165,
461 0x1167,
462 0x1169,
463 ( 0x116D, 0x116E ),
464 ( 0x1172, 0x1173 ),
465 0x1175,
466 0x119E,
467 0x11A8,
468 0x11AB,
469 ( 0x11AE, 0x11AF ),
470 ( 0x11B7, 0x11B8 ),
471 0x11BA,
472 ( 0x11BC, 0x11C2 ),
473 0x11EB,
474 0x11F0,
475 0x11F9,
476 ( 0x1E00, 0x1E9B ),
477 ( 0x1EA0, 0x1EF9 ),
478 ( 0x1F00, 0x1F15 ),
479 ( 0x1F18, 0x1F1D ),
480 ( 0x1F20, 0x1F45 ),
481 ( 0x1F48, 0x1F4D ),
482 ( 0x1F50, 0x1F57 ),
483 0x1F59,
484 0x1F5B,
485 0x1F5D,
486 ( 0x1F5F, 0x1F7D ),
487 ( 0x1F80, 0x1FB4 ),
488 ( 0x1FB6, 0x1FBC ),
489 0x1FBE,
490 ( 0x1FC2, 0x1FC4 ),
491 ( 0x1FC6, 0x1FCC ),
492 ( 0x1FD0, 0x1FD3 ),
493 ( 0x1FD6, 0x1FDB ),
494 ( 0x1FE0, 0x1FEC ),
495 ( 0x1FF2, 0x1FF4 ),
496 ( 0x1FF6, 0x1FFC ),
497 0x2126,
498 ( 0x212A, 0x212B ),
499 0x212E,
500 ( 0x2180, 0x2182 ),
501 ( 0x3041, 0x3094 ),
502 ( 0x30A1, 0x30FA ),
503 ( 0x3105, 0x312C ),
504 ( 0xAC00, 0xD7A3 )
505 )
506
507 Ideographic = CodePointSet(
508 ( 0x4E00, 0x9FA5 ),
509 0x3007,
510 ( 0x3021, 0x3029 )
511 )
512
513 Letter = CodePointSet(BaseChar).extend(Ideographic)
514
515 CombiningChar = CodePointSet(
516 ( 0x0300, 0x0345 ),
517 ( 0x0360, 0x0361 ),
518 ( 0x0483, 0x0486 ),
519 ( 0x0591, 0x05A1 ),
520 ( 0x05A3, 0x05B9 ),
521 ( 0x05BB, 0x05BD ),
522 0x05BF,
523 ( 0x05C1, 0x05C2 ),
524 0x05C4,
525 ( 0x064B, 0x0652 ),
526 0x0670,
527 ( 0x06D6, 0x06DC ),
528 ( 0x06DD, 0x06DF ),
529 ( 0x06E0, 0x06E4 ),
530 ( 0x06E7, 0x06E8 ),
531 ( 0x06EA, 0x06ED ),
532 ( 0x0901, 0x0903 ),
533 0x093C,
534 ( 0x093E, 0x094C ),
535 0x094D,
536 ( 0x0951, 0x0954 ),
537 ( 0x0962, 0x0963 ),
538 ( 0x0981, 0x0983 ),
539 0x09BC,
540 0x09BE,
541 0x09BF,
542 ( 0x09C0, 0x09C4 ),
543 ( 0x09C7, 0x09C8 ),
544 ( 0x09CB, 0x09CD ),
545 0x09D7,
546 ( 0x09E2, 0x09E3 ),
547 0x0A02,
548 0x0A3C,
549 0x0A3E,
550 0x0A3F,
551 ( 0x0A40, 0x0A42 ),
552 ( 0x0A47, 0x0A48 ),
553 ( 0x0A4B, 0x0A4D ),
554 ( 0x0A70, 0x0A71 ),
555 ( 0x0A81, 0x0A83 ),
556 0x0ABC,
557 ( 0x0ABE, 0x0AC5 ),
558 ( 0x0AC7, 0x0AC9 ),
559 ( 0x0ACB, 0x0ACD ),
560 ( 0x0B01, 0x0B03 ),
561 0x0B3C,
562 ( 0x0B3E, 0x0B43 ),
563 ( 0x0B47, 0x0B48 ),
564 ( 0x0B4B, 0x0B4D ),
565 ( 0x0B56, 0x0B57 ),
566 ( 0x0B82, 0x0B83 ),
567 ( 0x0BBE, 0x0BC2 ),
568 ( 0x0BC6, 0x0BC8 ),
569 ( 0x0BCA, 0x0BCD ),
570 0x0BD7,
571 ( 0x0C01, 0x0C03 ),
572 ( 0x0C3E, 0x0C44 ),
573 ( 0x0C46, 0x0C48 ),
574 ( 0x0C4A, 0x0C4D ),
575 ( 0x0C55, 0x0C56 ),
576 ( 0x0C82, 0x0C83 ),
577 ( 0x0CBE, 0x0CC4 ),
578 ( 0x0CC6, 0x0CC8 ),
579 ( 0x0CCA, 0x0CCD ),
580 ( 0x0CD5, 0x0CD6 ),
581 ( 0x0D02, 0x0D03 ),
582 ( 0x0D3E, 0x0D43 ),
583 ( 0x0D46, 0x0D48 ),
584 ( 0x0D4A, 0x0D4D ),
585 0x0D57,
586 0x0E31,
587 ( 0x0E34, 0x0E3A ),
588 ( 0x0E47, 0x0E4E ),
589 0x0EB1,
590 ( 0x0EB4, 0x0EB9 ),
591 ( 0x0EBB, 0x0EBC ),
592 ( 0x0EC8, 0x0ECD ),
593 ( 0x0F18, 0x0F19 ),
594 0x0F35,
595 0x0F37,
596 0x0F39,
597 0x0F3E,
598 0x0F3F,
599 ( 0x0F71, 0x0F84 ),
600 ( 0x0F86, 0x0F8B ),
601 ( 0x0F90, 0x0F95 ),
602 0x0F97,
603 ( 0x0F99, 0x0FAD ),
604 ( 0x0FB1, 0x0FB7 ),
605 0x0FB9,
606 ( 0x20D0, 0x20DC ),
607 0x20E1,
608 ( 0x302A, 0x302F ),
609 0x3099,
610 0x309A
611 )
612
613 Digit = CodePointSet(
614 ( 0x0030, 0x0039 ),
615 ( 0x0660, 0x0669 ),
616 ( 0x06F0, 0x06F9 ),
617 ( 0x0966, 0x096F ),
618 ( 0x09E6, 0x09EF ),
619 ( 0x0A66, 0x0A6F ),
620 ( 0x0AE6, 0x0AEF ),
621 ( 0x0B66, 0x0B6F ),
622 ( 0x0BE7, 0x0BEF ),
623 ( 0x0C66, 0x0C6F ),
624 ( 0x0CE6, 0x0CEF ),
625 ( 0x0D66, 0x0D6F ),
626 ( 0x0E50, 0x0E59 ),
627 ( 0x0ED0, 0x0ED9 ),
628 ( 0x0F20, 0x0F29 )
629 )
630
631 Extender = CodePointSet(
632 0x00B7,
633 0x02D0,
634 0x02D1,
635 0x0387,
636 0x0640,
637 0x0E46,
638 0x0EC6,
639 0x3005,
640 ( 0x3031, 0x3035 ),
641 ( 0x309D, 0x309E ),
642 ( 0x30FC, 0x30FE )
643 )
644
645
646 NameStartChar = CodePointSet(Letter)
647 NameStartChar.add(ord('_'))
648 NameStartChar.add(ord(':'))
649
650 NCNameStartChar = CodePointSet(Letter)
651 NCNameStartChar.add(ord('_'))
652
653 NameChar = CodePointSet(Letter)
654 NameChar.extend(Digit)
655 NameChar.add(ord('.'))
656 NameChar.add(ord('-'))
657 NameChar.add(ord('_'))
658 NameChar.add(ord(':'))
659 NameChar.extend(CombiningChar)
660 NameChar.extend(Extender)
661
662 NCNameChar = CodePointSet(Letter)
663 NCNameChar.extend(Digit)
664 NCNameChar.add(ord('.'))
665 NCNameChar.add(ord('-'))
666 NCNameChar.add(ord('_'))
667 NCNameChar.extend(CombiningChar)
668 NCNameChar.extend(Extender)
669
670 Name_pat = '%s%s*' % (NameStartChar.asPattern(), NameChar.asPattern())
671 Name_re = re.compile('^%s$' % (Name_pat,))
672 NmToken_pat = '%s+' % (NameChar.asPattern(),)
673 NmToken_re = re.compile('^%s$' % (NmToken_pat,))
674 NCName_pat = '%s%s*' % (NCNameStartChar.asPattern(), NCNameChar.asPattern())
675 NCName_re = re.compile('^%s$' % (NCName_pat,))
676 QName_pat = '(%s:)?%s' % (NCName_pat, NCName_pat)
677 QName_re = re.compile('^%s$' % (QName_pat,))
678
679
680 SingleCharEsc = { 'n' : CodePointSet(0x0A),
681 'r' : CodePointSet(0x0D),
682 't' : CodePointSet(0x09) }
683 for c in r'\|.-^?*+{}()[]':
684 SingleCharEsc[c] = CodePointSet(ord(c))
685
686
687
688 catEsc = { }
689 complEsc = { }
690 for k, v in PropertyMap.iteritems():
691 catEsc[u'p{%s}' % (k,)] = v
692 catEsc[u'P{%s}' % (k,)] = v.negate()
693
694
695 IsBlockEsc = { }
696 for k, v in BlockMap.iteritems():
697 IsBlockEsc[u'p{Is%s}' % (k,)] = v
698 IsBlockEsc[u'P{Is%s}' % (k,)] = v.negate()
699
700
701 WildcardEsc = CodePointSet(ord('\n'), ord('\r')).negate()
702 MultiCharEsc = { }
703 MultiCharEsc['s'] = CodePointSet(0x20, ord('\t'), ord('\n'), ord('\r'))
704 MultiCharEsc['S'] = MultiCharEsc['s'].negate()
705 MultiCharEsc['i'] = CodePointSet(XML1p0e2.Letter).add(ord('_')).add(ord(':'))
706 MultiCharEsc['I'] = MultiCharEsc['i'].negate()
707 MultiCharEsc['c'] = CodePointSet(XML1p0e2.NameChar)
708 MultiCharEsc['C'] = MultiCharEsc['c'].negate()
709 MultiCharEsc['d'] = PropertyMap['Nd']
710 MultiCharEsc['D'] = MultiCharEsc['d'].negate()
711 MultiCharEsc['W'] = CodePointSet(PropertyMap['P']).extend(PropertyMap['Z']).extend(PropertyMap['C'])
712 MultiCharEsc['w'] = MultiCharEsc['W'].negate()
713