HW 11 solution. ============== Xvec = 1.5*rnorm(150) ### IQR from -1.012 to 1.012 ### could instead have used ### Xvec = 0.91*rlogis(150) Yarr = array(rbinom(15000,1,rep(pnorm(Xvec),1000)), dim=c(150,1000)) Mcoef = array(0,c(1000,2)) Mcoef2 = array(0,c(1000,2)) ## METHOD 1 for maximizations: > for (i in 1:1000) Mcoef[i,] = glm(cbind(Yarr[,i],1-Yarr[,i])~Xvec, family=binomial(link="probit"))$coef ## METHOD 2 > logL = function(par,.Xv,.Yv) { aux = pnorm(par[1]+.Xv*par[2]) -sum(ifelse(.Yv==1, log(aux), log(1-aux))) } for (i in 1:1000) Mcoef2[i,] = nlm(logL, c(0,1), .Xv=Xvec, .Yv=Yarr[,i])$est > summary(Mcoef-Mcoef2) V1 V2 Min. :-2.949e-06 Min. :-6.785e-06 1st Qu.: 1.306e-07 1st Qu.:-1.473e-06 Median : 5.589e-07 Median :-4.636e-08 Mean : 5.259e-07 Mean :-6.270e-07 3rd Qu.: 9.113e-07 3rd Qu.: 3.907e-07 Max. : 4.803e-06 Max. : 2.394e-06 ### so the two methods give essentially identical answers !! ### another way to check would be to tally the convergence codes ### and/or gradient and or min eigenvalue of Hessian ##------ Next comes steps to verify these were the max-likelihood values ! auxv = numeric(1000) for (i in 1:1000) auxv[i] = nlm(logL, c(0,1), .Xv=Xvec, .Yv=Yarr[,i])$code auxv 1 ### this is the best convergence code ! 1000 for (i in 1:1000) { tmp = nlm(logL, c(0,1), .Xv=Xvec, .Yv=Yarr[,i], hessian=T)$hess sumev = sum(diag(tmp)) prodev = det(tmp) auxv[i] = 0.5*(sumev-sqrt(sumev^2-4*prodev)) } > summary(auxv) Min. 1st Qu. Median Mean 3rd Qu. Max. 13.88 30.45 36.33 38.00 44.83 74.71 ### all eigenvalues pos. for(i in 1:1000) auxv[i] = max(abs(nlm(logL, c(0,1), .Xv=Xvec, .Yv=Yarr[,i])$grad)) Min. 1st Qu. Median Mean 3rd Qu. Max. 7.105e-08 1.410e-06 4.238e-06 1.245e-05 1.990e-05 5.792e-05 ### remember these are for sums of 1000 iid terms ! ##-------- Now finally compare histograms with normal --- for (i in 1:1000) Mcoef2[i,] = diag(solve(nlm(logL, c(0,1), .Xv=Xvec, .Yv=Yarr[,i], hessian=T)$hess)) ### est'd var's > hist(Mcoef[,1], nclass=20, prob=T) ### intercept coef's curve(dnorm(x,0,sqrt(mean(Mcoef2[,1]))), add=T) ### rough agreement, not great, compare the following quantiles rbind(empir = round(quantile(Mcoef[,1], prob=c(.01, .05, .25, .6, .75, .9, .95, .99)) , 3), normal = round(sqrt(mean(Mcoef2[,1]))*qnorm(c(.01, .05, .25, .6, .75, .9, .95, .99)), 3)) 1% 5% 25% 60% 75% 90% 95% 99% empir -0.245 -0.156 -0.062 0.037 0.083 0.191 0.209 0.328 normal -0.304 -0.215 -0.088 0.033 0.088 0.167 0.215 0.304 > hist(Mcoef[,2], nclass=20, prob=T) ### slope coef's curve(dnorm(x,1,sqrt(mean(Mcoef2[,2]))), add=T) ### now there is a definite difference in lower tail !! ### compare the quantiles rbind(empir = round(quantile(Mcoef[,2], prob=c(.01, .05, .25, .6, .75, .9, .95, .99)) , 3), normal = round(1+ sqrt(mean(Mcoef2[,2]))*qnorm(c(.01, .05, .25, .6, .75, .9, .95, .99)), 3)) 1% 5% 25% 60% 75% 90% 95% 99% empir 0.774 0.813 0.953 1.087 1.135 1.236 1.352 1.511 normal 0.608 0.723 0.886 1.043 1.114 1.216 1.277 1.392