1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18 r"""
19 iconv_codec: module to register python codecs to encode/decode any char
20 supported by system's iconv command.
21
22 Usage:
23 iconv supports codecs unsupported by python:
24
25 >>> u'testing'.encode('ansi_x3.110-1983')
26 Traceback (most recent call last):
27 ...
28 LookupError: unknown encoding: ansi_x3.110-1983
29 >>> import iconv_codecs
30 >>> 'ansi_x3.110-1983' in iconv_codecs.get_supported_codecs()
31 True
32
33 Just register the codec you want:
34
35 >>> iconv_codecs.register('ansi_x3.110-1983')
36
37 Then you can use it:
38
39 >>> u'testing'.encode('ansi_x3.110-1983')
40 'testing'
41
42 If you want to force iconv usage for an encoding already supported by python,
43 just use the encoding name with an 'iconv:' prefix (no need to register):
44
45 >>> '\x87'.decode('iconv:CP860')
46 u'\xe7'
47
48 To register all python unsupported codecs, just call register() without
49 parameters:
50
51 >>> iconv_codecs.register()
52 >>> u'\xe7'.encode('utf32')
53 '\xff\xfe\x00\x00\xe7\x00\x00\x00'
54
55 That will poll iconv for a list of codecs it supports and register the ones
56 python doesn't support already.
57
58
59 The module will look for iconv in the path. If you need a different iconv
60 location just set it:
61
62 >>> iconv_codecs.ICONV_EXECUTABLE = '/usr/bin/iconv'
63 """
64
65 import codecs
66 import subprocess
67 import os
68
69
70 ICONV_EXECUTABLE='iconv'
71
72
73 _codecs = set()
74
76 """Returns a list of iconv codecs that aren't supported by python directly"""
77 for codec in get_supported_codecs():
78 try:
79 u'a'.encode(codec)
80 except UnicodeEncodeError:
81 pass
82 except LookupError:
83 yield codec
84
86 """
87 Register the codecs passed for iconv usage. Codecs previously registered
88 will be unregistered.
89
90 >>> import iconv_codecs
91 >>> iconv_codecs.register('ansi_x3.110-1983')
92
93 Then you can use it:
94
95 >>> u'testing'.encode('ansi_x3.110-1983')
96 'testing'
97
98 If you want to register all codecs not already supported by python, just
99 suppress all arguments:
100
101 >>> iconv_codecs.register()
102 """
103 if not codecs:
104 codecs = _get_unregistered_codecs()
105 _codecs.update(codec.lower() for codec in codecs)
106
107
109 """
110 Returns a list of iconv supported codecs
111 """
112 cmd = [ICONV_EXECUTABLE, '--list']
113 iconv = subprocess.Popen(cmd, env={'LANG': 'C'},
114 stdout=subprocess.PIPE,
115 stdin=open(os.devnull, 'w+'),
116 stderr=open(os.devnull, 'w+'))
117 return set(line.strip('/').lower() for line in iconv.communicate()[0].splitlines())
118
119 -def _run_iconv(from_codec, to_codec, extra_params=None):
120 cmd = [ICONV_EXECUTABLE, '-f', from_codec, '-t', to_codec, '-s']
121 if extra_params is not None:
122 cmd.extend(extra_params)
123 iconv = subprocess.Popen(cmd, stdout=subprocess.PIPE,
124 stdin=subprocess.PIPE,
125 stderr=subprocess.PIPE,
126 env={'LANG': 'C'})
127 return iconv
128
130 codec_name = codec_name.lower()
131 if codec_name.startswith('iconv:'):
132 name = codec_name[6:]
133 elif codec_name in _codecs:
134 name = codec_name
135 else:
136 return
137
138 def iconvencode(input, errors='strict', encoding=name):
139 extra = []
140 if errors == 'ignore':
141 extra.append('-c')
142 elif errors != 'strict':
143 raise NotImplementedError("%r error handling not implemented"
144 " for codec %r" % (errors, encoding))
145
146 _input = input.encode('utf-8')
147 iconv = _run_iconv('utf-8', encoding, extra)
148 output, error = iconv.communicate(_input)
149 if error:
150 error = error.splitlines()[0]
151 raise UnicodeEncodeError(encoding, input, 0, len(input), error)
152 return output, len(input)
153
154 def iconvdecode(input, errors='strict', encoding=name):
155 extra = []
156 if errors == 'ignore':
157 extra.append('-c')
158 elif errors != 'strict':
159 raise NotImplementedError('%r error handling not implemented'
160 ' for codec %r' % (errors, encoding))
161 _input = str(input)
162 iconv = _run_iconv(encoding, 'utf-8', extra)
163 output, error = iconv.communicate(_input)
164 if error:
165 error = error.splitlines()[0]
166 raise UnicodeDecodeError(encoding, input, 0, len(input), error)
167 output = output.decode('utf-8')
168 return output, len(input)
169
170 class IncrementalEncoder(codecs.IncrementalEncoder):
171 def encode(self, input, final=False):
172 return iconvencode(input, self.errors)[0]
173
174 class IncrementalDecoder(codecs.BufferedIncrementalDecoder):
175 _buffer_decode = staticmethod(iconvdecode)
176
177 class StreamWriter(codecs.StreamWriter):
178 pass
179 StreamWriter.encode = staticmethod(iconvencode)
180
181 class StreamReader(codecs.StreamReader):
182 pass
183 StreamReader.decode = staticmethod(iconvdecode)
184
185 return codecs.CodecInfo(
186 name=codec_name,
187 encode=iconvencode,
188 decode=iconvdecode,
189 incrementalencoder=IncrementalEncoder,
190 incrementaldecoder=IncrementalDecoder,
191 streamreader=StreamReader,
192 streamwriter=StreamWriter,
193 )
194
195 codecs.register(_iconv_factory)
196
197 if __name__ == '__main__':
198 x = u'áéíóúççç'
199 assert x == x.encode('iconv:utf-8').decode('iconv:utf-8')
200 import doctest
201 doctest.testmod()
202