Source code for ctmatching.dataset
#!/usr/bin/env python
# -*- coding: utf-8 -*-
"""
``ctmatching`` algoritm example data set loader. For tests and demo uses.
About re78 data:
- 1978 US people earning data by race, age, gender, educations.
- 429 control samples, 185 treatment samples. Each sample has 10 properties.
except ID
Full description of this data: http://users.nber.org/~rdehejia/data/nswdata2.html.
If this link is not available, try this:
https://github.com/MacHu-GWU/ctmatching-project/blob/master/ctmatching/testdata/re78-readme.html
"""
import numpy as np
import site
import os
[docs]def load_re78():
"""re78 dataset loader.
Usage::
>>> from ctmatching import load_re78
>>> control, treat = load_re78()
>>> len(control)
429
>>> len(treat)
185
"""
abspath = os.path.join(
site.getsitepackages()[1], "ctmatching", "testdata", "re78.txt")
with open(abspath, "rb") as f:
lines = f.read().decode("utf-8").split("\n")
columns = lines[0].strip().split(",")
control = list()
treatment = list()
for line in lines[1:]:
record = line.strip().split(",")
for i in [1, 2, 3, 4, 5, 6, 7]:
record[i] = int(record[i])
for i in [8, 9, 10]:
record[i] = float(record[i])
if record[1]:
treatment.append(record)
else:
control.append(record)
return control, treatment
if __name__ == "__main__":
control, treatment = load_re78()
assert len(control) == 429
assert len(treatment) == 185