Source code for savReaderWriter.savHeaderReader
#!/usr/bin/env python
# -*- coding: utf-8 -*-
import os
import collections
import locale
from savReaderWriter import *
from header import *
@implements_to_string
[docs]class SavHeaderReader(Header):
"""
This class contains methods that read the data dictionary of an SPSS
data file. This yields the same information as the Spss command `DISPLAY
DICTIONARY`. NB: do not confuse an Spss dictionary with a Python
dictionary!
Parameters
----------
savFileName : str
The file name of the spss data file
ioUtf8 : bool, int, default False
Indicates the mode in which text communicated to or from
the I/O Module will be. See also under
:py:meth:`savReaderWriter.Generic.ioUtf8` and under
``ioUtf8`` in :py:class:`savReaderWriter.SavReader`.
.. versionchanged:: 3.4
``ioUtf8=UNICODE_BMODE`` was added.
ioLocale : locale str, optional
indicates the locale of the I/O module. Cf. `SET LOCALE`.
(default = None, which corresponds to
``locale.setlocale(locale.LC_CTYPE)``)
Examples
--------
Typical use::
with SavHeaderReader(savFileName) as header:
metadata = header.all()
report = str(header)
print(metadata.varLabels)
See also
--------
savReaderWriter.Header : for more options to retrieve individual
metadata items"""
[docs] def __init__(self, savFileName, ioUtf8=False, ioLocale=None):
""" Constructor. Initializes all vars that can be recycled """
super(SavHeaderReader, self).__init__(savFileName, b"rb", None,
ioUtf8, ioLocale)
self.fh = self.openSavFile()
self.varNames, self.varTypes = self.varNamesTypes
self.numVars = self.numberofVariables
self.nCases = self.numberofCases
[docs] def __str__(self):
""" This function returns a report of the SPSS data dictionary
(i.e., the header), in the encoding of the spss file"""
return unicode(self).encode(self.fileEncoding)
[docs] def __unicode__(self):
""" This function returns a report of the SPSS data dictionary
(i.e., the header)."""
report = ""
if self.textInfo:
report += self.textInfo + os.linesep
report += self.reportSpssDataDictionary(self.dataDictionary())
return report
[docs] def __enter__(self):
""" This function returns the DictionaryReader object itself so
its methods become available for use with context managers
('with' statements).
.. warning::
Always ensure the the .sav file is properly closed, either by
using a context manager (``with`` statement) or by using
``close()``"""
return self
[docs] def __exit__(self, type, value, tb):
""" This function closes the spss data file and does some cleaning."""
if type is not None:
pass # Exception occurred
self.close()
[docs] def close(self):
"""This function closes the spss data file and does some cleaning."""
if not segfaults:
self.closeSavFile(self.fh, mode=b"rb")
try:
locale.resetlocale() # fails on Windows
except:
locale.setlocale(locale.LC_ALL, "")
[docs] def dataDictionary(self, asNamedtuple=False):
""" This function returns all the dictionary items. It returns
a Python dictionary based on the Spss dictionary of the given
Spss file. This is equivalent to the Spss command 'DISPLAY
DICTIONARY'. If asNamedtuple=True, this function returns a namedtuple,
so one can retrieve metadata like e.g. 'metadata.valueLabels'"""
items = ["varNames", "varTypes", "valueLabels", "varLabels",
"formats", "missingValues", "measureLevels",
"columnWidths", "alignments", "varSets", "varRoles",
"varAttributes", "fileAttributes", "fileLabel",
"multRespDefs", "caseWeightVar"] # "dateVariables"]
if self.ioUtf8:
items = map(unicode, items)
metadata = dict([(item, getattr(self, item)) for item in items])
if asNamedtuple:
Meta = collections.namedtuple("Meta", " ".join(metadata.keys()))
return Meta(*metadata.values())
return metadata
[docs] def all(self, asNamedtuple=True):
"""Returns all the metadata as a named tuple (cf. SavReader.all)
Exactly the same as dataDictionary, but with different (nicer?)
default"""
return self.dataDictionary(asNamedtuple)
def __getEntry(self, varName, k, v, enc):
"""Helper function for reportSpssDataDictionary"""
try:
k = k if self.ioUtf8 else k.decode(enc).strip()
except AttributeError:
pass
try:
v = list(v) if isinstance(v, map) else v
except TypeError:
pass # python 2
try:
v = v if self.ioUtf8 else v.decode(enc)
except AttributeError:
#v = ", ".join(map(str, v)) if isinstance(v, list) else v
enc = self.fileEncoding
func = lambda x: x.decode(enc) if isinstance(x, bytes) else str(x)
v = ", ".join(map(func, v)) if isinstance(v, list) else v
try:
v = ", ".join(eval(str(v))) # ??
except:
pass
return "%s: %s -- %s" % (varName,k, v)
[docs] def reportSpssDataDictionary(self, dataDict):
""" This function reports information from the Spss dictionary
of the active Spss dataset. The parameter 'dataDict' is the return
value of dataDictionary()"""
# Yeah I know: what a mess! ;-)
report, enc = [], self.fileEncoding
for kwd, allValues in sorted(dataDict.items()):
report.append("#" + kwd.upper())
if hasattr(allValues, "items"):
for varName, values in sorted(allValues.items()):
varName = varName if self.ioUtf8 else varName.decode(enc)
if hasattr(values, "items"):
for k, v in sorted(values.items()):
report.append(self.__getEntry(varName, k, v, enc))
else:
# varsets
if isinstance(values, list):
values = b", ".join(values)
entry = "%s -- %s" % (varName, values.decode(enc))
report.append(entry)
# variable role, label, level, format, colwidth, alignment, type
else:
try:
values = values if self.ioUtf8 else values.decode(enc)
except AttributeError:
values = str(values)
report.append("%s -- %s" % (varName, values))
else:
# varname, file label
if isinstance(allValues, (str, bytes, unicode)) and allValues:
allValues = [allValues]
for varName in allValues:
if isinstance(varName, bytes):
varName = varName.decode(enc)
report.append(varName)
return os.linesep.join(report)