Module iconv_codecs
[hide private]
[frames] | no frames]

Source Code for Module iconv_codecs

  1  #!/usr/bin/env python 
  2  # -*- coding: utf-8 -*- 
  3   
  4  # This program is free software: you can redistribute it and/or modify 
  5  # it under the terms of the GNU General Public License as published by 
  6  # the Free Software Foundation, either version 3 of the License, or 
  7  # (at your option) any later version. 
  8   
  9  # This program is distributed in the hope that it will be useful, 
 10  # but WITHOUT ANY WARRANTY; without even the implied warranty of 
 11  # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the 
 12  # GNU General Public License for more details. 
 13   
 14  # You should have received a copy of the GNU General Public License 
 15  # along with this program.  If not, see <http://www.gnu.org/licenses/>. 
 16   
 17   
 18  r""" 
 19  iconv_codec: module to register python codecs to encode/decode any char  
 20  supported by system's iconv command. 
 21   
 22  Usage: 
 23     iconv supports codecs unsupported by python: 
 24   
 25     >>> u'testing'.encode('ansi_x3.110-1983') 
 26     Traceback (most recent call last): 
 27     ... 
 28     LookupError: unknown encoding: ansi_x3.110-1983 
 29     >>> import iconv_codecs 
 30     >>> 'ansi_x3.110-1983' in iconv_codecs.get_supported_codecs() 
 31     True 
 32      
 33     Just register the codec you want: 
 34      
 35     >>> iconv_codecs.register('ansi_x3.110-1983') 
 36      
 37     Then you can use it: 
 38      
 39     >>> u'testing'.encode('ansi_x3.110-1983') 
 40     'testing' 
 41   
 42     If you want to force iconv usage for an encoding already supported by python,  
 43     just use the encoding name with an 'iconv:' prefix (no need to register): 
 44   
 45     >>> '\x87'.decode('iconv:CP860') 
 46     u'\xe7' 
 47   
 48     To register all python unsupported codecs, just call register() without 
 49     parameters: 
 50      
 51     >>> iconv_codecs.register() 
 52     >>> u'\xe7'.encode('utf32') 
 53     '\xff\xfe\x00\x00\xe7\x00\x00\x00' 
 54      
 55     That will poll iconv for a list of codecs it supports and register the ones 
 56     python doesn't support already.    
 57   
 58   
 59  The module will look for iconv in the path. If you need a different iconv 
 60  location just set it: 
 61   
 62     >>> iconv_codecs.ICONV_EXECUTABLE = '/usr/bin/iconv' 
 63  """ 
 64   
 65  import codecs 
 66  import subprocess 
 67  import os 
 68   
 69  #: change this to reflect your installation path 
 70  ICONV_EXECUTABLE='iconv'  
 71   
 72  #: Global with the names of registered codecs 
 73  _codecs = set()  
 74   
75 -def _get_unregistered_codecs():
76 """Returns a list of iconv codecs that aren't supported by python directly""" 77 for codec in get_supported_codecs(): 78 try: 79 u'a'.encode(codec) 80 except UnicodeEncodeError: 81 pass 82 except LookupError: 83 yield codec
84
85 -def register(*codecs):
86 """ 87 Register the codecs passed for iconv usage. Codecs previously registered 88 will be unregistered. 89 90 >>> import iconv_codecs 91 >>> iconv_codecs.register('ansi_x3.110-1983') 92 93 Then you can use it: 94 95 >>> u'testing'.encode('ansi_x3.110-1983') 96 'testing' 97 98 If you want to register all codecs not already supported by python, just 99 suppress all arguments: 100 101 >>> iconv_codecs.register() 102 """ 103 if not codecs: 104 codecs = _get_unregistered_codecs() 105 _codecs.update(codec.lower() for codec in codecs)
106 107
108 -def get_supported_codecs():
109 """ 110 Returns a list of iconv supported codecs 111 """ 112 cmd = [ICONV_EXECUTABLE, '--list'] 113 iconv = subprocess.Popen(cmd, env={'LANG': 'C'}, 114 stdout=subprocess.PIPE, 115 stdin=open(os.devnull, 'w+'), 116 stderr=open(os.devnull, 'w+')) 117 return set(line.strip('/').lower() for line in iconv.communicate()[0].splitlines())
118
119 -def _run_iconv(from_codec, to_codec, extra_params=None):
120 cmd = [ICONV_EXECUTABLE, '-f', from_codec, '-t', to_codec, '-s'] 121 if extra_params is not None: 122 cmd.extend(extra_params) 123 iconv = subprocess.Popen(cmd, stdout=subprocess.PIPE, 124 stdin=subprocess.PIPE, 125 stderr=subprocess.PIPE, 126 env={'LANG': 'C'}) 127 return iconv
128
129 -def _iconv_factory(codec_name):
130 codec_name = codec_name.lower() 131 if codec_name.startswith('iconv:'): 132 name = codec_name[6:] 133 elif codec_name in _codecs: 134 name = codec_name 135 else: # unsuported or unregistered codec 136 return 137 138 def iconvencode(input, errors='strict', encoding=name): 139 extra = [] 140 if errors == 'ignore': 141 extra.append('-c') 142 elif errors != 'strict': 143 raise NotImplementedError("%r error handling not implemented" 144 " for codec %r" % (errors, encoding)) 145 146 _input = input.encode('utf-8') 147 iconv = _run_iconv('utf-8', encoding, extra) 148 output, error = iconv.communicate(_input) 149 if error: 150 error = error.splitlines()[0] 151 raise UnicodeEncodeError(encoding, input, 0, len(input), error) 152 return output, len(input)
153 154 def iconvdecode(input, errors='strict', encoding=name): 155 extra = [] 156 if errors == 'ignore': 157 extra.append('-c') 158 elif errors != 'strict': 159 raise NotImplementedError('%r error handling not implemented' 160 ' for codec %r' % (errors, encoding)) 161 _input = str(input) 162 iconv = _run_iconv(encoding, 'utf-8', extra) 163 output, error = iconv.communicate(_input) 164 if error: 165 error = error.splitlines()[0] 166 raise UnicodeDecodeError(encoding, input, 0, len(input), error) 167 output = output.decode('utf-8') 168 return output, len(input) 169 170 class IncrementalEncoder(codecs.IncrementalEncoder): 171 def encode(self, input, final=False): 172 return iconvencode(input, self.errors)[0] 173 174 class IncrementalDecoder(codecs.BufferedIncrementalDecoder): 175 _buffer_decode = staticmethod(iconvdecode) 176 177 class StreamWriter(codecs.StreamWriter): 178 pass 179 StreamWriter.encode = staticmethod(iconvencode) 180 181 class StreamReader(codecs.StreamReader): 182 pass 183 StreamReader.decode = staticmethod(iconvdecode) 184 185 return codecs.CodecInfo( 186 name=codec_name, 187 encode=iconvencode, 188 decode=iconvdecode, 189 incrementalencoder=IncrementalEncoder, 190 incrementaldecoder=IncrementalDecoder, 191 streamreader=StreamReader, 192 streamwriter=StreamWriter, 193 ) 194 195 codecs.register(_iconv_factory) 196 197 if __name__ == '__main__': 198 x = u'áéíóúççç' 199 assert x == x.encode('iconv:utf-8').decode('iconv:utf-8') 200 import doctest 201 doctest.testmod() 202