In this example we want to use AlgoPy to help compute the maximum likelihood estimates and standard errors of parameters of a nonlinear model. This follows the statsmodels generic maximum likelihood example which uses the medpar dataset.
Here is the python code:
"""
Negative Binomial Regression
This is an algopy implementation of the statsmodels example
http://statsmodels.sourceforge.net/devel/examples/generated/example_gmle.html
.
"""
import functools
import numpy
import scipy.optimize
import algopy
import numdifftools
import pandas
import patsy
g_url = 'http://vincentarelbundock.github.com/Rdatasets/csv/COUNT/medpar.csv'
def get_aic(y, X, theta):
return 2*len(theta) + 2*get_neg_ll(y, X, theta)
def get_neg_ll(y, X, theta):
alpha = theta[-1]
beta = theta[:-1]
a = alpha * algopy.exp(algopy.dot(X, beta))
ll = algopy.sum(
-y*algopy.log1p(1/a) +
-algopy.log1p(a) / alpha +
algopy.special.gammaln(y + 1/alpha) +
-algopy.special.gammaln(y + 1) +
-algopy.special.gammaln(1/alpha))
neg_ll = -ll
return neg_ll
def eval_grad(f, theta):
theta = algopy.UTPM.init_jacobian(theta)
return algopy.UTPM.extract_jacobian(f(theta))
def eval_hess(f, theta):
theta = algopy.UTPM.init_hessian(theta)
return algopy.UTPM.extract_hessian(len(theta), f(theta))
def main():
# read the data from the internet into numpy arrays
medpar = pandas.read_csv(g_url)
y_patsy, X_patsy = patsy.dmatrices('los~type2+type3+hmo+white', medpar)
y = numpy.array(y_patsy).flatten()
X = numpy.array(X_patsy)
# define the objective function and the autodiff gradient and hessian
f = functools.partial(get_neg_ll, y, X)
g = functools.partial(eval_grad, f)
h = functools.partial(eval_hess, f)
# init the search for max likelihood parameters
theta0 = numpy.array([
numpy.log(numpy.mean(y)),
0, 0, 0, 0,
0.5,
], dtype=float)
# do the max likelihood search
results = scipy.optimize.fmin_ncg(
f,
theta0,
fprime=g,
fhess=h,
avextol=1e-6,
)
# compute the hessian a couple of different ways
algopy_hessian = h(results)
num_hessian = numdifftools.Hessian(f)(results)
# report the results of the search including aic and standard error
print('search results:')
print(results)
print()
print('aic:')
print(get_aic(y, X, results))
print()
print('standard error using observed fisher information,')
print('with hessian computed using algopy:')
print(numpy.sqrt(numpy.diag(scipy.linalg.inv(algopy_hessian))))
print()
print('standard error using observed fisher information,')
print('with hessian computed using numdifftools:')
print(numpy.sqrt(numpy.diag(scipy.linalg.inv(num_hessian))))
print()
if __name__ == '__main__':
main()
Here is its output:
Optimization terminated successfully.
Current function value: 4797.476603
Iterations: 10
Function evaluations: 11
Gradient evaluations: 10
Hessian evaluations: 10
search results:
[ 2.31027893 0.22124897 0.70615882 -0.06795522 -0.12906544 0.44575671]
aic:
9606.95320507
standard error using observed fisher information,
with hessian computed using algopy:
[ 0.06794736 0.05059255 0.07613111 0.05326133 0.06854179 0.01981577]
standard error using observed fisher information,
with hessian computed using numdifftools:
[ 0.06794736 0.05059255 0.07613111 0.05326133 0.06854179 0.01981577]
The agreement between this output and the statsmodels example results suggests that the statsmodels caveat about numerical precision may not be the result of numerical problems with the derivatives, but rather that the R MASS implementation may be giving less precise standard error estimates or may not be using the observed fisher information to get the standard error estimates in the most straightforward way.