Source code for rattail.csvutil

# -*- coding: utf-8 -*-
################################################################################
#
#  Rattail -- Retail Software Framework
#  Copyright © 2010-2014 Lance Edgar
#
#  This file is part of Rattail.
#
#  Rattail is free software: you can redistribute it and/or modify it under the
#  terms of the GNU Affero General Public License as published by the Free
#  Software Foundation, either version 3 of the License, or (at your option)
#  any later version.
#
#  Rattail is distributed in the hope that it will be useful, but WITHOUT ANY
#  WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
#  FOR A PARTICULAR PURPOSE.  See the GNU Affero General Public License for
#  more details.
#
#  You should have received a copy of the GNU Affero General Public License
#  along with Rattail.  If not, see <http://www.gnu.org/licenses/>.
#
################################################################################
"""
CSV File Utilities

Contains various utilities relating to CSV file processing.

.. note::
   This module is named ``csvutil`` instead of ``csv`` primarily as a
   workaround to the problem of ``PythonService.exe`` insisting on doing
   relative imports.
"""

from __future__ import unicode_literals

import csv
import codecs
from cStringIO import StringIO


[docs]class DictWriter(csv.DictWriter):
    """
    Convenience implementation of ``csv.DictWriter``.

    This exists only to provide the :meth:`writeheader()` method on Python 2.6.
    """

    def writeheader(self):
        if hasattr(csv.DictWriter, 'writeheader'):
            return csv.DictWriter.writeheader(self)
        self.writer.writerow(self.fieldnames)


[docs]class UTF8Recoder(object):
    """
    Iterator that reads an encoded stream and reencodes the input to UTF-8.

    .. note::
       This class was stolen from the Python 2.7 documentation.
    """

    def __init__(self, fileobj, encoding):
        self.reader = codecs.getreader(encoding)(fileobj)

    def __iter__(self):
        return self

    def next(self):
        return self.reader.next().encode('utf_8')


[docs]class UnicodeReader(object):
    """
    A CSV reader which will iterate over lines in a CSV file, which is encoded
    in the given encoding.

    .. note::
       This class was stolen from the Python 2.7 documentation.
    """

    def __init__(self, fileobj, dialect=csv.excel, encoding='utf_8', **kwargs):
        fileobj = UTF8Recoder(fileobj, encoding)
        self.reader = csv.reader(fileobj, dialect=dialect, **kwargs)

    def __iter__(self):
        return self

    def next(self):
        row = self.reader.next()
        return [unicode(x, 'utf_8') for x in row]


[docs]class UnicodeDictReader(object):
    """
    A CSV Dict reader which will iterate over lines in a CSV file, which is
    encoded in the given encoding.
    """

    def __init__(self, fileobj, dialect=csv.excel, encoding='utf_8', **kwargs):
        fileobj = UTF8Recoder(fileobj, encoding)
        self.reader = csv.reader(fileobj, dialect=dialect, **kwargs)
        self.header = self.reader.next()

    def next(self):
        row = self.reader.next()
        vals = [unicode(s, 'utf_8') for s in row]
        return dict((self.header[i], vals[i]) for i in range(len(self.header)))

    def __iter__(self):
        return self


[docs]class UnicodeWriter(object):
    """
    A CSV writer which will write rows to CSV file "f", which is encoded in the
    given encoding.

    .. note::
       This class was stolen from the Python 2.7 documentation.
    """

    def __init__(self, f, dialect='excel', encoding='utf_8', encoding_errors='strict', **kwargs):
        # Redirect output to a queue
        self.queue = StringIO()
        self.writer = csv.writer(self.queue, dialect=dialect, **kwargs)
        self.stream = f
        self.encoder = codecs.getincrementalencoder(encoding)(encoding_errors)

    def writerow(self, row):
        self.writer.writerow([s.encode('utf_8') for s in row])
        # Fetch UTF-8 output from the queue ...
        data = self.queue.getvalue()
        data = data.decode('utf_8')
        # ... and reencode it into the target encoding
        data = self.encoder.encode(data)
        # write to the target stream
        self.stream.write(data)
        # empty queue
        self.queue.truncate(0)

    def writerows(self, rows):
        for row in rows:
            self.writerow(row)


[docs]class UnicodeDictWriter(UnicodeWriter):
    """
    A ``DictWriter``-ish class which accepts row data as Unicode and can write
    to the file with any encoding.

    .. note::
       This logic was stolen from a `Django snippet`_.  The original docstring
       from this snippet follows ("sic" applies here; our logic uses 'utf_8'
       encoding and regular 'excel' dialect by default):

    A CSV writer that produces Excel-compatibly CSV files from unicode data.
    Uses UTF-16 and tabs as delimeters - it turns out this is the only way to
    get unicode data in to Excel using CSV.

    Usage example::

       fp = open('my-file.csv', 'wb')
       writer = UnicodeDictWriter(fp, ['name', 'age', 'shoesize'])
       writer.writerows([
           {'name': u'Bob', 'age': 22, 'shoesize': 7},
           {'name': u'Sue', 'age': 28, 'shoesize': 6},
           {'name': u'Ben', 'age': 31, 'shoesize': 8},
           # \xc3\x80 is LATIN CAPITAL LETTER A WITH MACRON
           {'name': '\xc4\x80dam'.decode('utf8'), 'age': 11, 'shoesize': 4},
       ])
       fp.close()

    Initially derived from http://docs.python.org/lib/csv-examples.html

    .. _`Django snippet`: https://djangosnippets.org/snippets/993/
    """
    
    def __init__(self, f, fields, dialect='excel', encoding='utf_8', **kwds):
        super(UnicodeDictWriter, self).__init__(f, dialect, encoding, **kwds)
        self.fields = fields
    
    def writerow(self, drow):
        row = [drow.get(field, '') for field in self.fields]
        super(UnicodeDictWriter, self).writerow(row)

    def writeheader(self):
        super(UnicodeDictWriter, self).writerow(self.fields)