Source code for intessa.conneg.default.text
# -*- coding: utf-8 -*-
from intessa.conneg.codec_base import Codec
from intessa.conneg.content_type import ContentType
[docs]class TextCodec(Codec):
"""Default text codec."""
[docs] def encode(media_type, string, encoding='utf-8', errors='strict'):
ur"""
Encode a unicode string as a bytestring using an encoding.
:param encoding:
The encoding to use (default: ``'utf-8'``).
:param errors:
The strategy for handling encoding errors (default: ``'strict'``).
See the documentation on the built-in ``unicode.encode()`` for more
information about this option.
>>> TextCodec.encode('text/plain', u"Héllo Wörld")
(ContentType('text/plain; charset=utf-8'), 'H\xc3\xa9llo W\xc3\xb6rld')
>>> TextCodec.encode('text/plain', u"Héllo Wörld", encoding='latin1')
(ContentType('text/plain; charset=latin1'), 'H\xe9llo W\xf6rld')
"""
encoded = string.encode(encoding, errors)
c_type = ContentType('%s; charset=%s' % (media_type, encoding))
return (c_type, encoded)
[docs] def decode(c_type, bytes):
ur"""
Decode a bytestring to unicode, using the content type's charset.
>>> TextCodec.decode(ContentType('text/plain; charset=utf-8'),
... 'H\xc3\xa9llo W\xc3\xb6rld')
u'H\xe9llo W\xf6rld'
>>> TextCodec.decode(ContentType('text/plain; charset=latin1'),
... 'H\xe9llo W\xf6rld')
u'H\xe9llo W\xf6rld'
If no charset is present, this method assumes the input is UTF-8::
>>> TextCodec.decode(ContentType('text/plain'),
... 'H\xc3\xa9llo W\xc3\xb6rld')
u'H\xe9llo W\xf6rld'
The decoder always uses 'strict' error handling::
>>> TextCodec.decode(ContentType('text/plain; charset=us-ascii'), # doctest: +ELLIPSIS
... 'H\xc3\xa9llo W\xc3\xb6rld')
Traceback (most recent call last):
...
UnicodeDecodeError: 'ascii' codec can't decode byte 0xc3 in position 1: ordinal not in range(128)
"""
return bytes.decode(c_type.params.get('charset', 'utf-8'))