Homework 10 solution, Oct 2009. =============================== > HW10Data = rlogis(5000, runif(1,2,2.5), runif(1,.7,.85)) > mean(HW10Data) [1] 2.480233 > sqrt(var(HW10Data)) [1] 1.366586 > integrate(function(x) x^2*dlogis(x), -20,20,abs.tol=1e-9)$val [1] 3.289866 > unix.time({xfit = nlm(function(x) -sum(log(dlogis(HW10Data,x[1],x[2]))), c(2,1))}) [1] 0.12 0.00 0.13 NA NA > str(xfit) List of 5 $ minimum : num 8573 $ estimate : num [1:2] 2.48 0.75 $ gradient : num [1:2] -0.00251 0.00363 $ code : int 1 $ iterations: int 7 > unix.time({xfit = nlm(function(x) -sum(log(dlogis(HW10Data,x[1],x[2]))), c(0,1))}) [1] 0.19 0.00 0.20 NA NA > unix.time({xfit = nlm(function(x) -sum(log(dlogis( HW10Data,x[1],x[2]))), c(0,1), hessian=T)}) [1] 0.18 0.02 0.20 NA NA > unix.time({xfit2 = nlm(function(x) { obj = -sum(log(dlogis(HW13Data,x[1],x[2]))) pvec=plogis(HW10Data,x[1],x[2]) nd = length(HW10Data) attr(obj,"gradient") = c(nd - 2*sum(pvec), nd-sum((HW10Data-x[1])*(2*pvec-1))/x[2])/x[2] obj}, c(2,1))}) [1] 0.15 0.00 0.15 NA NA > str(xfit2) List of 5 $ minimum : num 8573 $ estimate : num [1:2] 2.48 0.75 $ gradient : num [1:2] -0.00251 0.00364 $ code : int 1 $ iterations: int 7 > unix.time({xfit2 = nlm(function(x) { obj = -sum(log(dlogis(HW10Data,x[1],x[2]))) pvec=plogis(HW10Data,x[1],x[2]) nd = length(HW10Data) attr(obj,"gradient") = c(nd - 2*sum(pvec), nd-sum((HW10Data-x[1])*(2*pvec-1))/x[2])/x[2] obj}, c(2,1), hessian=T)}) [1] 0.18 0.00 0.17 NA NA > unix.time({xfit2 = nlm(function(x) { obj = -sum(log(dlogis(HW10Data,x[1],x[2]))) pvec=plogis(HW10Data,x[1],x[2]) nd = length(HW10Data) attr(obj,"gradient") = c(nd - 2*sum(pvec), nd-sum((HW10Data-x[1])*(2*pvec-1))/x[2])/x[2] obj}, c(0,1))}) [1] 0.16 0.00 0.16 > unix.time({xfit3 = nlm(function(x) { dvec=dlogis(HW10Data,x[1],x[2]) pvec=plogis(HW10Data,x[1],x[2]) stdvec=(HW10Data-x[1])/x[2] obj = -sum(log(dvec)) nd = length(HW10Data) attr(obj,"gradient") = c(nd - 2*sum(pvec), nd-sum(stdvec*(2*pvec-1)))/x[2] attr(obj,"hessian") = -matrix(c(-2*sum(dvec), rep(nd-2*sum(pvec+stdvec*dvec),2), nd+2*sum( stdvec*(1-2*pvec-dvec*stdvec))), ncol=2)/x[2]^2 obj}, c(2,1))}) [1] 0.68 0.00 0.76 NA NA ## So in simple problems or maybe not-so-simple there ## might be no benefit in using the analytical ## gradients unless you want the output Hessian ## (which we almost always do want for statistical ## applications), but even then the analytical ## Hessian might be more trouble than it is worth !! ### Now for part (b): so far we have found using nlm: > xfit$est [1] 2.2160326 0.8122672 > xfit2$est [1] 2.2160326 0.8122672 > xfit3$est [1] 2.2160321 0.8122672 > unix.time({xfit4 = optim(c(2,1),function(x) { dvec=dlogis(HW10Data,x[1],x[2]) pvec=plogis(HW10Data,x[1],x[2]) stdvec=(HW10Data-x[1])/x[2] obj = -sum(log(dvec)) nd = length(HW10Data) obj})}) user system elapsed 0.23 0.00 0.23 > xfit4$par [1] 2.2161128 0.8122323 > unix.time({cat(optim(c(2,1),function(x) { dvec=dlogis(HW10Data,x[1],x[2]) pvec=plogis(HW10Data,x[1],x[2]) stdvec=(HW10Data-x[1])/x[2] obj = -sum(log(dvec)) nd = length(HW10Data) obj}, control=list(reltol=1e-10))$par,"\n")}) 2.216024 0.8122737 user system elapsed 0.3 0.0 0.3 ### so it looks as though the default ### accuracy for "optim" is a little less than for "nlm" ### The "Gradient-Search" solution is done using the "Gradmat" ### and "GradSrch" functions provided in the notes. ### Listings of these same functions are given at the end of ### this Solution file for convenience. > likfunc = function(x) { dvec=dlogis(HW10Data,x[1],x[2]) pvec=plogis(HW10Data,x[1],x[2]) stdvec=(HW10Data-x[1])/x[2] obj = -sum(log(dvec)) nd = length(HW10Data) obj} > Gradmat(c(2,1),likfunc) [1] -392.566 1248.681 ] ### so the steps in the GradSrch function #### will be much too large if we are not careful ! > unix.time({xfit5=GradSrch(c(2,1), function(x) { dvec=dlogis(HW10Data,x[1],x[2]) pvec=plogis(HW10Data,x[1],x[2]) stdvec=(HW10Data-x[1])/x[2] obj = -sum(log(dvec)) nd = length(HW10Data) obj}, .05, unitfac=T)}) user system elapsed 0.73 0.00 0.75 > xfit5 $nstep [1] 25 ### ran up to iteration limit $initial [1] 2 1 $final [1] 2.2159634 0.7853559 $funcval [1] 8964.238 > Gradmat(xfit5$final,likfunc) [1] -0.788279 -312.608450 ## still very far from convergence unix.time({xfit5=GradSrch(xfit5$final, function(x) { dvec=dlogis(HW10Data,x[1],x[2]) pvec=plogis(HW10Data,x[1],x[2]) stdvec=(HW10Data-x[1])/x[2] obj = -sum(log(dvec)) nd = length(HW10Data) obj}, 1e-4, unitfac=F)}) user system elapsed 0.14 0.00 0.14 #### Now do it again using small gradient steps ### with this "final" point as new starting point > unix.time({xfit5=GradSrch(xfit5$final, function(x) { dvec=dlogis(HW10Data,x[1],x[2]) pvec=plogis(HW10Data,x[1],x[2]) stdvec=(HW10Data-x[1])/x[2] obj = -sum(log(dvec)) nd = length(HW10Data) obj}, .001)}) > Gradmat(xfit5$final,likfunc) [1] -0.001060471 0.001962690 ### much better !! > unlist(xfit5[c(1,3)]) nstep final1 final2 5.0000000 2.2160322 0.8122674 ---------- Function Listings follow ------------ > Gradmat function(parvec, infcn, eps = 1e-06) { # Function to calculate the difference-quotient approx gradient # (matrix) of an arbitrary input (vector) function infcn # Now recoded to use central differences ! dd = length(parvec) aa = length(infcn(parvec)) epsmat = (diag(dd) * eps)/2 gmat = array(0, dim = c(aa, dd)) for(i in 1:dd) gmat[, i] = (infcn(parvec + epsmat[, i]) - infcn(parvec - epsmat[, i]))/eps if(aa > 1) gmat else c(gmat) } > GradSrch function(inipar, infcn, step, nmax = 25, stoptol = 1e-05, unitfac = F, eps = 1e-06, gradfunc = NULL) { # Function to implement Steepest-descent. The unitfac # condition indicates whether or not the supplied step-length # factor(s) multiply the negative gradient itself, or # the unit vector in the same direction. if(is.null(gradfunc)) gradfunc = function(x) Gradmat(x, infcn, eps) steps = if(length(step) > 1) step else rep(step, nmax) newpar = inipar oldpar = newpar - 1 ctr = 0 while(ctr < nmax & sqrt(sum((newpar - oldpar)^2)) > stoptol) { ctr = ctr + 1 oldpar = newpar newstep = gradfunc(oldpar) newstep = if(unitfac) newstep/sqrt(sum( newstep^2)) else newstep ### use unit vector in gradient direction if unitfac=T newpar = oldpar - steps[ctr] * newstep } list(nstep = ctr, initial = inipar, final = newpar, funcval = infcn(newpar)) }