1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18 """
19 Provides the library's unit tests.
20
21 @todo Impl: Much much more test cases.
22 """
23 import unittest
24 import types
25 import re
26
27 from cjklib.reading import *
28 from cjklib import characterlookup
29
30 from cjklib import exception
33 """A hashable dict."""
37 raise NotImplementedError, "dict is immutable"
39 raise NotImplementedError, "dict is immutable"
41 raise NotImplementedError, "dict is immutable"
43 raise NotImplementedError, "dict is immutable"
45 raise NotImplementedError, "dict is immutable"
47 raise NotImplementedError, "dict is immutable"
49 return hash(tuple(self.iteritems()))
50
53 """Base class for testing of L{ReadingOperator}s."""
54 READING_DIALECTS = [('Pinyin', {'toneMarkType': 'Numbers'}),
55 ('Pinyin', {'Erhua': 'oneSyllable'}),
56 ('CantoneseYale', {'toneMarkType': 'Numbers'}),
57 ('Jyutping', {'missingToneMark': 'ignore'}),
58 ]
59 """Further reading dialect forms included in testing."""
60
73
74 @staticmethod
76 """
77 Gets all classes from the reading module that implement
78 L{ReadingOperator}.
79
80 @rtype: dictionary of string class pairs
81 @return: dictionary of all classes inheriting form
82 L{ReadingOperator}
83 """
84 readingOperatorClasses = {}
85
86
87 readingOperatorClasses = dict([(clss.__name__, clss) \
88 for clss in operator.__dict__.values() \
89 if type(clss) == types.TypeType \
90 and issubclass(clss, operator.ReadingOperator) \
91 and clss.READING_NAME])
92
93 return readingOperatorClasses
94
97 """
98 Runs consistency checks on ReadingOperators. These tests assure that
99 different methods handle the same values in a consistent way. It does not
100 assure though that these values are correct.
101 """
110
112 """Test if all reading entities returned by C{getReadingEntities()} are accepted by C{isReadingEntity()}."""
113 for key in self.readingOperator:
114 reading, _ = key
115 if hasattr(self.readingOperator[key], "getReadingEntities"):
116 entities = self.readingOperator[key]\
117 .getReadingEntities()
118 for entity in entities:
119 self.assert_(
120 self.readingOperator[key].isReadingEntity(entity),
121 "Entity " + repr(entity) + " not accepted for " \
122 "reading '"+ reading + "'")
123
125 """Test if all plain reading entities returned by C{getPlainReadingEntities()} are accepted by C{isPlainReadingEntity()}."""
126 for key in self.readingOperator:
127 reading, _ = key
128 if hasattr(self.readingOperator[key], "getReadingEntities"):
129 plainEntities = self.readingOperator[key]\
130 .getPlainReadingEntities()
131 for plainEntity in plainEntities:
132 self.assert_(self.readingOperator[key]\
133 .isPlainReadingEntity(plainEntity),
134 "Plain entity " + repr(plainEntity) \
135 + " not accepted for reading '" + reading + "'")
136
138 """Test if all reading entities returned by C{getReadingEntities()} are decomposed into the single entity again."""
139 for key in self.readingOperator:
140 reading, _ = key
141 if hasattr(self.readingOperator[key], "getReadingEntities"):
142 entities = self.readingOperator[key].getReadingEntities()
143 for entity in entities:
144 try:
145 entities = self.readingOperator[key].decompose(entity)
146
147 self.assertEquals(entities, [entity],
148 "decompose on single entity " + repr(entity) \
149 + " doesn't return the entity for reading '" \
150 + reading + "': " + repr(entities))
151 except exception.AmbiguousDecompositonError:
152 self.assert_(True, "ambiguous decomposition for " \
153 + repr(entity) + " for reading '" + reading + "'")
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
215 """Test if the composition of C{getTonalEntity()} and C{splitEntityTone()} returns the original value for all entities returned by C{getReadingEntities()}."""
216 for key in self.readingOperator:
217 reading, _ = key
218 if hasattr(self.readingOperator[key], "getReadingEntities") \
219 and hasattr(self.readingOperator[key],
220 "getPlainReadingEntities"):
221 entities = self.readingOperator[key].getReadingEntities()
222 for entity in entities:
223 try:
224 plainEntity, tone \
225 = self.readingOperator[key].splitEntityTone(entity)
226
227 self.assertEquals(
228 self.readingOperator[key].getTonalEntity(
229 plainEntity, tone), entity,
230 "Entity " + repr(entity) + " not preserved in " \
231 + "composition of getTonalEntity() and " \
232 + "splitEntityTone of reading '" + reading \
233 + "'")
234 except exception.UnsupportedError:
235 pass
236
261
264 """
265 Runs reference checks on ReadingOperators. These tests assure that the given
266 values are returned correctly.
267 """
268 DECOMPOSITION_VALUES = {
269 ('Pinyin', ImmutableDict({})): [
270 (u"tiān'ānmén", [u"tiān", "'", u"ān", u"mén"]),
271 ("xian", ["xian"]),
272 (u"xīān", [u"xī", u"ān"]),
273 (u"lao3tou2r5", [u"lao3", u"tou2", u"r5"]),
274 ],
275 ('Pinyin', ImmutableDict({'toneMarkType': 'Numbers'})): [
276 (u"tian1'an1men2", [u"tian1", "'", u"an1", u"men2"]),
277 ("xian", ["xian"]),
278 (u"xi1an1", [u"xi1", u"an1"]),
279 ],
280 ('Pinyin', ImmutableDict({'Erhua': 'oneSyllable'})): [
281 (u"lao3tour2", [u"lao3", u"tour2"]),
282 (u"er2hua4yin1", [u"er2", u"hua4", u"yin1"]),
283 ],
284 ('Hangul', ImmutableDict({})): [
285 (u"한글은 한국어의 고유", [u"한", u"글", u"은", u" ",
286 u"한", u"국", u"어", u"의", u" ", u"고", u"유"]),
287 ],
288 ('CantoneseYale', ImmutableDict({})): [
289 (u'gwóngjàuwá', [u'gwóng', u'jàu', u'wá']),
290 (u'yuhtyúh', [u'yuht', u'yúh']),
291 (u'néihhóu', [u'néih', u'hóu']),
292 ],
293 ('CantoneseYale', ImmutableDict({'toneMarkType': 'Numbers'})): [
294 (u'gwong2jau1wa2', [u'gwong2', u'jau1', u'wa2']),
295 (u'yut6yu5', [u'yut6', u'yu5']),
296 (u'nei5hou2', [u'nei5', u'hou2']),
297 ],
298 }
299
300 COMPOSITION_VALUES = {
301 ('Pinyin', ImmutableDict({})): [
302 (u"tiān'ānmén", [u"tiān", u"ān", u"mén"]),
303 ("xian", ["xian"]),
304 (u"xī'ān", [u"xī", u"ān"]),
305 ],
306 }
307
309 """Test if the given decomposition references are reached."""
310 for key in self.COMPOSITION_VALUES:
311 reading, _ = key
312 for string, targetDecomp in self.DECOMPOSITION_VALUES[key]:
313 decomposition = self.readingOperator[key].decompose(string)
314 self.assertEquals(decomposition, targetDecomp,
315 "Decomposition " + repr(targetDecomp) + " of " \
316 + repr(string) + " not reached for reading '" \
317 + reading + "': " + repr(decomposition))
318
320 """Test if the given composition references are reached."""
321 for key in self.COMPOSITION_VALUES:
322 reading, _ = key
323 for targetStr, composition in self.COMPOSITION_VALUES[key]:
324 string = self.readingOperator[key].compose(composition)
325 self.assertEquals(string, targetStr,
326 "String " + repr(targetStr) + " of Composition " \
327 + repr(composition) + " not reached for reading '" \
328 + reading + "': " + repr(string))
329
332 """Tests GR conversion methods."""
333
334
335
336
337
338 SPECIAL_MAPPING = """
339 zhi jy jyr jyy jyh
340 chi chy chyr chyy chyh
341 shi shy shyr shyy shyh
342 ri ry ryr ryy ryh
343 zi tzy tzyr tzyy tzyh
344 ci tsy tsyr tsyy tsyh
345 si sy syr syy syh
346
347 ju jiu jyu jeu jiuh
348 qu chiu chyu cheu chiuh
349 xu shiu shyu sheu shiuh
350
351 yi i yi yii yih
352 ya ia ya yea yah
353 yo io - - -
354 ye ie ye yee yeh
355 yai iai yai - -
356 yao iau yau yeau yaw
357 you iou you yeou yow
358 yan ian yan yean yann
359 yin in yn yiin yinn
360 yang iang yang yeang yanq
361 ying ing yng yiing yinq
362 yong iong yong yeong yonq
363
364 wu u wu wuu wuh
365 wa ua wa woa wah
366 wo uo wo woo woh
367 wai uai wai woai way
368 wei uei wei woei wey
369 wan uan wan woan wann
370 wen uen wen woen wenn
371 wang uang wang woang wanq
372 weng ueng - woeng wenq
373
374 yu iu yu yeu yuh
375 yue iue yue yeue yueh
376 yuan iuan yuan yeuan yuann
377 yun iun yun yeun yunn
378
379 er el erl eel ell
380
381 yir iel yel yeel yell
382 yar ial yal yeal yall
383 yer ie'l ye'l yeel yell
384 yair - yal - -
385 yaor iaul yaul yeaul yawl
386 your ioul youl yeoul yowl
387 yanr ial yal yeal yall
388 yinr iel yel yeel yell
389 yangr iangl yangl yeangl yanql
390 yingr iengl yengl yeengl yenql
391 yongr iongl yongl yeongl yonql
392
393 wur ul wul wuul wull
394 war ual wal woal wall
395 wor uol wol wool woll
396 wair ual wal woal wall
397 weir uel wel woel well
398 wanr ual wal woal wall
399 wenr uel wel woel well
400 wangr uangl wangl woangl wanql
401 wengr uengl - woengl wenql
402
403 yur iuel yuel yeuel yuell
404 yuer iue'l yue'l - yuell
405 yuanr iual yual yeual yuall
406 yunr iuel yuel yeuel yuell
407 """
408
409
410 FINAL_MAPPING = """
411 a a ar aa ah ha a
412 o o or oo oh ho o
413 e e er ee eh he e
414 ai ai air ae ay hai ai
415 ei ei eir eei ey hei ei
416 ao au aur ao aw hau au
417 ou ou our oou ow hou ou
418 an an arn aan ann han an
419 en en ern een enn hen en
420 ang ang arng aang anq hang ang
421 eng eng erng eeng enq heng eng
422 ong ong orng oong onq hong ong
423
424 i i yi ii ih hi i
425 ia ia ya ea iah hia ia
426 io io - - - hio -
427 ie ie ye iee ieh hie ie
428 iai iai yai - - hiai iai
429 iao iau yau eau iaw hiau iau
430 iu iou you eou iow hiou iou
431 ian ian yan ean iann hian ian
432 in in yn iin inn hin in
433 iang iang yang eang ianq hiang iang
434 ing ing yng iing inq hing ing
435 iong iong yong eong ionq hiong iong
436
437 u u wu uu uh hu u
438 ua ua wa oa uah hua ua
439 uo uo wo uoo uoh huo uo
440 uai uai wai oai uay huai uai
441 ui uei wei oei uey huei uei
442 uan uan wan oan uann huan uan
443 un uen wen oen uenn huen uen
444 uang uang wang oang uanq huang uang
445
446 u: iu yu eu iuh hiu iu
447 u:e iue yue eue iueh hiue iue
448 u:an iuan yuan euan iuann hiuan iuan
449 u:n iun yun eun iunn hiun iun
450
451 ar al arl aal all hal al
452 or ol orl ool oll hol ol
453 er e'l er'l ee'l ehl he'l e'l
454 air al arl aal all hal al
455 eir el erl eel ell hel el
456 aor aul aurl aol awl haul aul
457 our oul ourl ooul owl houl oul
458 anr al arl aal all hal al
459 enr el erl eel ell hel el
460 angr angl arngl aangl anql hangl angl
461 engr engl erngl eengl enql hengl engl
462 ongr ongl orngl oongl onql hongl ongl
463
464 ir iel yel ieel iell hiel iel
465 iar ial yal eal iall hial ial
466 ier ie'l ye'l ieel iell hie'l ie'l
467 iair - yal - - - -
468 iaor iaul yaul eaul iawl hiaul iaul
469 iur ioul youl eoul iowl hioul ioul
470 ianr ial yal eal iall hial ial
471 inr iel yel ieel iell hiel iel
472 iangr iangl yangl eangl ianql hiangl iangl
473 ingr iengl yengl ieengl ienql hiengl iengl
474 iongr iongl yongl eongl ionql hiongl iongl
475
476 ur ul wul uul ull hul ul
477 uar ual wal oal uall hual ual
478 uor uol wol uool uoll huol uol
479 uair ual wal oal uall hual ual
480 uir uel wel oel uell huel uel
481 uanr ual wal oal uall hual ual
482 unr uel wel oel uell huel uel
483 uangr uangl wangl oangl uanql huangl uangl
484 uengr uengl - - - huengl uengl
485
486 u:r iuel yuel euel iuell hiuel iuel
487 u:er iue'l yue'l euel iuell hiue'l iue'l
488 u:anr iual yual eual iuall hiual iual
489 u:nr iuel yuel euel iuell hiuel iuel
490 """
491
492 PINYIN_FINAL_MAPPING = {'iu': 'iou', 'ui': 'uei', 'un': 'uen', 'u:': u'ü',
493 'u:e': u'üe', 'u:an': u'üan', 'u:n': u'ün', 'iur': 'iour',
494 'uir': 'ueir', 'unr': 'uenr', 'u:r': u'ür', 'u:er': u'üer',
495 'u:anr': u'üanr', 'u:nr': u'ünr'}
496
497 INITIAL_REGEX = re.compile('^(tz|ts|ch|sh|[bpmfdtnlsjrgkh])?')
498
500 self.readingFactory = ReadingFactory()
501 self.conv = self.readingFactory.createReadingConverter('Pinyin', 'GR',
502 sourceOptions={'Erhua': "oneSyllable"},
503 targetOptions={'GRRhotacisedFinalApostrophe': "'"})
504 self.py = operator.PinyinOperator(Erhua="oneSyllable")
505
506
507
508 self.grJunctionSpecialMapping = {}
509 for line in self.SPECIAL_MAPPING.split("\n"):
510 if line.strip() == "":
511 continue
512 matchObj = re.match(r"((?:\w|:)+)\s+((?:\w|')+|-)\s+" \
513 + "((?:\w|')+|-)\s+((?:\w|')+|-)\s+((?:\w|')+|-)", line)
514 if not matchObj:
515 print line
516 pinyinSyllable, gr1, gr2, gr3, gr4 = matchObj.groups()
517
518 self.grJunctionSpecialMapping[pinyinSyllable] = {1: gr1, 2: gr2,
519 3: gr3, 4: gr4}
520
521 self.grJunctionFinalMapping = {}
522 self.grJunctionFinalMNLRMapping = {}
523 for line in self.FINAL_MAPPING.split("\n"):
524 matchObj = re.match(r"((?:\w|\:)+)\s+((?:\w|')+|-)\s+" \
525 + "((?:\w|')+|-)\s+((?:\w|')+|-)\s+((?:\w|')+|-)" \
526 + "\s+((?:\w|')+|-)\s+((?:\w|')+|-)", line)
527 if not matchObj:
528 continue
529
530 pinyinFinal, gr1, gr2, gr3, gr4, gr1_m, gr2_m = matchObj.groups()
531
532 if pinyinFinal in self.PINYIN_FINAL_MAPPING:
533 pinyinFinal = self.PINYIN_FINAL_MAPPING[pinyinFinal]
534
535 self.grJunctionFinalMapping[pinyinFinal] = {1: gr1, 2: gr2, 3: gr3,
536 4: gr4}
537 self.grJunctionFinalMNLRMapping[pinyinFinal] = {1: gr1_m, 2: gr2_m}
538
540 """Test if the conversion matches the general final table given by GR Junction."""
541
542 for pinyinPlainSyllable in self.py.getPlainReadingEntities():
543 pinyinInitial, pinyinFinal \
544 = self.py.getOnsetRhyme(pinyinPlainSyllable)
545 if pinyinInitial not in ['m', 'n', 'l', 'r', 'z', 'c', 's', 'zh',
546 'ch', 'sh', ''] and pinyinFinal not in ['m', 'ng', 'mr', 'ngr']:
547 for tone in [1, 2, 3, 4]:
548 if self.grJunctionFinalMapping[pinyinFinal][tone] == '-':
549 continue
550
551 pinyinSyllable = self.py.getTonalEntity(pinyinPlainSyllable,
552 tone)
553 syllable = self.conv.convertEntities([pinyinSyllable])[0]
554
555 tonalFinal = self.INITIAL_REGEX.sub('', syllable)
556
557 self.assertEquals(tonalFinal,
558 self.grJunctionFinalMapping[pinyinFinal][tone],
559 "Wrong conversion " + repr(syllable) + " to GR" \
560 + " for Pinyin syllable " + repr(pinyinSyllable) \
561 + " with the target final being " \
562 + repr(
563 self.grJunctionFinalMapping[pinyinFinal][tone]))
564
566 """Test if the conversion matches the m,n,l,r final table given by GR Junction."""
567
568 for pinyinPlainSyllable in self.py.getPlainReadingEntities():
569 pinyinInitial, pinyinFinal \
570 = self.py.getOnsetRhyme(pinyinPlainSyllable)
571 if pinyinInitial in ['m', 'n', 'l', 'r'] \
572 and pinyinFinal[0] != u'ʅ':
573 for tone in [1, 2]:
574 if self.grJunctionFinalMNLRMapping[pinyinFinal][tone] \
575 == '-':
576 continue
577
578 pinyinSyllable = self.py.getTonalEntity(pinyinPlainSyllable,
579 tone)
580 syllable = self.conv.convertEntities([pinyinSyllable])[0]
581
582 tonalFinal = self.INITIAL_REGEX.sub('', syllable)
583
584 self.assertEquals(tonalFinal,
585 self.grJunctionFinalMNLRMapping[pinyinFinal][tone],
586 "Wrong conversion " + repr(syllable) + " to GR" \
587 + " for Pinyin syllable " + repr(pinyinSyllable) \
588 + " with the target final being " \
589 + repr(self.grJunctionFinalMNLRMapping[pinyinFinal]\
590 [tone]))
591
593 """Test if the conversion matches the special syllable table given by GR Junction."""
594 for pinyinPlainSyllable in self.py.getPlainReadingEntities():
595 if pinyinPlainSyllable in ['zhi', 'chi', 'shi', 'zi', 'ci',
596 'si', 'ju', 'qu', 'xu', 'er'] \
597 or (pinyinPlainSyllable[0] in ['y', 'w'] \
598 and pinyinPlainSyllable not in ['yor']):
599 for tone in [1, 2, 3, 4]:
600 if self.grJunctionSpecialMapping\
601 [pinyinPlainSyllable][tone] == '-':
602 continue
603
604 pinyinSyllable = self.py.getTonalEntity(pinyinPlainSyllable,
605 tone)
606
607 syllable = self.conv.convertEntities([pinyinSyllable])[0]
608
609 self.assertEquals(syllable, self.grJunctionSpecialMapping\
610 [pinyinPlainSyllable][tone],
611 "Wrong conversion " + repr(syllable) + " to GR" \
612 + " for Pinyin syllable " + repr(pinyinSyllable) \
613 + " with the target being " \
614 + repr(self.grJunctionSpecialMapping\
615 [pinyinPlainSyllable][tone]))
616
619 """Base class for testing of L{ReadingConverter}s."""
620
623
626 """
627 Runs reference checks on ReadingConverters. These tests assure that the
628 given values are returned correctly.
629 """
630 CONVERSION_VALUES = {
631
632
633
634
635
636 ('GR', ImmutableDict({}), 'Pinyin', ImmutableDict({})): [
637 (u'"Hannshyue" .de mingcheng duey Jonggwo yeou idean buhtzuenjinq .de yihwey. Woo.men tingshuo yeou "Yinnduhshyue", "Aijyishyue", "Hannshyue", erl meiyeou tingshuo yeou "Shilahshyue", "Luomaashyue", genq meiyeou tingshuo yeou "Inggwoshyue", "Meeigwoshyue". "Hannshyue" jey.geh mingcheng wanchyuan beaushyh Ou-Meei shyuejee duey nahshie yii.jing chernluen .de guulao-gwojia .de wenhuah .de ijoong chingkann .de tayduh.', u'"Hànxué" de míngchēng duì Zhōngguó yǒu yīdiǎn bùzūnjìng de yìwèi. Wǒmen tīngshuō yǒu "Yìndùxué", "Āijíxué", "Hànxué", ér méiyǒu tīngshuō yǒu "Xīlàxué", "Luómǎxué", gèng méiyǒu tīngshuō yǒu "Yīngguóxué", "Měiguóxué". "Hànxué" zhèige míngchēng wánquán biǎoshì Ōu-Měi xuézhě duì nàxiē yǐjing chénlún de gǔlǎo-guójiā de wénhuà de yīzhǒng qīngkàn de tàidù.'),
638 ],
639 }
640
642 """Test if the given conversion references are reached."""
643 for key in self.CONVERSION_VALUES:
644 readingA, optionsA, readingB, optionsB = key
645 for referenceSource, referenceTarget in self.CONVERSION_VALUES[key]:
646 string = self.readingFactory.convert(referenceSource, readingA,
647 readingB, sourceOptions=optionsA, targetOptions=optionsB)
648 self.assertEquals(string, referenceTarget,
649 "Conversion for reading '" + readingA \
650 + "' to reading '" + readingB + "' failed: \n" \
651 + repr(string) + "\n" + repr(referenceTarget))
652
655 """Base class for testing the L{characterlookup.CharacterLookup} class."""
658
662 """
663 Runs consistency checks on the reading methods of the
664 L{characterlookup.CharacterLookup} class.
665 """
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689 if __name__ == '__main__':
690 unittest.main()
691