Source code for savReaderWriter.savHeaderReader

#!/usr/bin/env python
# -*- coding: utf-8 -*-

import os
import collections
import locale

from savReaderWriter import *
from header import *

@implements_to_string
[docs]class SavHeaderReader(Header):
    """
    This class contains methods that read the data dictionary of an SPSS
    data file. This yields the same information as the Spss command `DISPLAY
    DICTIONARY`. NB: do not confuse an Spss dictionary with a Python
    dictionary!

    Parameters
    ----------
    savFileName : str
        The file name of the spss data file
    ioUtf8 : bool, int, default False
        Indicates the mode in which text communicated to or from 
        the I/O Module will be. See also under 
        :py:meth:`savReaderWriter.Generic.ioUtf8` and under
        ``ioUtf8`` in :py:class:`savReaderWriter.SavReader`.

        .. versionchanged:: 3.4
            ``ioUtf8=UNICODE_BMODE`` was added. 

   ioLocale : locale str, optional
        indicates the locale of the I/O module. Cf. `SET LOCALE`. 
        (default = None, which corresponds to 
        ``locale.setlocale(locale.LC_CTYPE)``)

    Examples
    --------

    Typical use::

        with SavHeaderReader(savFileName) as header:
            metadata = header.all()
            report = str(header)
            print(metadata.varLabels)

   See also
   --------
   savReaderWriter.Header : for more options to retrieve individual 
       metadata items"""

[docs]    def __init__(self, savFileName, ioUtf8=False, ioLocale=None):
        """ Constructor. Initializes all vars that can be recycled """
        super(SavHeaderReader, self).__init__(savFileName, b"rb", None,
                                              ioUtf8, ioLocale)
        self.fh = self.openSavFile()
        self.varNames, self.varTypes = self.varNamesTypes
        self.numVars = self.numberofVariables
        self.nCases = self.numberofCases

[docs]    def __str__(self):
        """ This function returns a report of the SPSS data dictionary
        (i.e., the header), in the encoding of the spss file"""
        return unicode(self).encode(self.fileEncoding)

[docs]    def __unicode__(self):
        """ This function returns a report of the SPSS data dictionary
        (i.e., the header)."""
        report = ""
        if self.textInfo:
            report += self.textInfo + os.linesep
        report += self.reportSpssDataDictionary(self.dataDictionary())
        return report

[docs]    def __enter__(self):
        """ This function returns the DictionaryReader object itself so
        its methods become available for use with context managers
        ('with' statements).

        .. warning::

            Always ensure the the .sav file is properly closed, either by 
            using a context manager (``with`` statement) or by using 
            ``close()``"""
        return self

[docs]    def __exit__(self, type, value, tb):
        """ This function closes the spss data file and does some cleaning."""
        if type is not None:
            pass  # Exception occurred
        self.close()

[docs]    def close(self):
        """This function closes the spss data file and does some cleaning."""
        if not segfaults:
            self.closeSavFile(self.fh, mode=b"rb")
        try:
            locale.resetlocale()  # fails on Windows
        except:
            locale.setlocale(locale.LC_ALL, "")

[docs]    def dataDictionary(self, asNamedtuple=False):
        """ This function returns all the dictionary items. It returns
        a Python dictionary based on the Spss dictionary of the given
        Spss file. This is equivalent to the Spss command 'DISPLAY
        DICTIONARY'. If asNamedtuple=True, this function returns a namedtuple,
        so one can retrieve metadata like e.g. 'metadata.valueLabels'"""
        items = ["varNames", "varTypes", "valueLabels", "varLabels",
                 "formats", "missingValues", "measureLevels",
                 "columnWidths", "alignments", "varSets", "varRoles",
                 "varAttributes", "fileAttributes", "fileLabel",
                 "multRespDefs", "caseWeightVar"] # "dateVariables"]
        if self.ioUtf8:
            items = map(unicode, items)
        metadata = dict([(item, getattr(self, item)) for item in items])
        if asNamedtuple:
            Meta = collections.namedtuple("Meta", " ".join(metadata.keys()))
            return Meta(*metadata.values())
        return metadata

[docs]    def all(self, asNamedtuple=True):
        """Returns all the metadata as a named tuple (cf. SavReader.all)
        Exactly the same as dataDictionary, but with different (nicer?)
        default"""
        return self.dataDictionary(asNamedtuple)

    def __getEntry(self, varName, k, v, enc):
        """Helper function for reportSpssDataDictionary"""
        try:
            k = k if self.ioUtf8 else k.decode(enc).strip()
        except AttributeError:
            pass
        try:
           v = list(v) if isinstance(v, map) else v
        except TypeError:
           pass  # python 2
        try:
            v =  v if self.ioUtf8 else v.decode(enc)
        except AttributeError:
            #v = ", ".join(map(str, v)) if isinstance(v, list) else v
            enc = self.fileEncoding
            func = lambda x: x.decode(enc) if isinstance(x, bytes) else str(x)
            v = ", ".join(map(func, v)) if isinstance(v, list) else v
        try:
            v = ", ".join(eval(str(v)))  # ??
        except:
            pass
        return "%s: %s -- %s" % (varName,k, v)

[docs]    def reportSpssDataDictionary(self, dataDict):
        """ This function reports information from the Spss dictionary
        of the active Spss dataset. The parameter 'dataDict' is the return
        value of dataDictionary()"""
        # Yeah I know: what a mess! ;-)
        report, enc = [], self.fileEncoding
        for kwd, allValues in sorted(dataDict.items()):
            report.append("#" + kwd.upper())
            if hasattr(allValues, "items"):
                for varName, values in sorted(allValues.items()):
                    varName =  varName if self.ioUtf8 else varName.decode(enc)
                    if hasattr(values, "items"):
                        for k, v in sorted(values.items()):
                            report.append(self.__getEntry(varName, k, v, enc))
                    else:
                        # varsets
                        if isinstance(values, list):
                            values = b", ".join(values)
                            entry = "%s -- %s" % (varName, values.decode(enc))
                            report.append(entry)
                        # variable role, label, level, format, colwidth, alignment, type
                        else:
                            try:
                                values =  values if self.ioUtf8 else values.decode(enc)
                            except AttributeError:
                                values = str(values)
                            report.append("%s -- %s" % (varName, values))
            else:
                # varname, file label
                if isinstance(allValues, (str, bytes, unicode)) and allValues:
                    allValues = [allValues]
                for varName in allValues:
                    if isinstance(varName, bytes):
                        varName = varName.decode(enc)
                    report.append(varName)
        return os.linesep.join(report)