HW 11 solution.
==============

Xvec = 1.5*rnorm(150)  ### IQR from -1.012 to 1.012

### could instead have used
###   Xvec = 0.91*rlogis(150)

Yarr = array(rbinom(15000,1,rep(pnorm(Xvec),1000)),
              dim=c(150,1000))

Mcoef = array(0,c(1000,2))
Mcoef2 = array(0,c(1000,2))

## METHOD 1 for maximizations:
> for (i in 1:1000) Mcoef[i,] = glm(cbind(Yarr[,i],1-Yarr[,i])~Xvec, 
         family=binomial(link="probit"))$coef

## METHOD 2
> logL = function(par,.Xv,.Yv) {
           aux = pnorm(par[1]+.Xv*par[2])
           -sum(ifelse(.Yv==1, log(aux), log(1-aux))) }
  for (i in 1:1000) Mcoef2[i,] = nlm(logL, c(0,1), .Xv=Xvec, .Yv=Yarr[,i])$est

> summary(Mcoef-Mcoef2)
       V1                   V2            
 Min.   :-2.949e-06   Min.   :-6.785e-06  
 1st Qu.: 1.306e-07   1st Qu.:-1.473e-06  
 Median : 5.589e-07   Median :-4.636e-08  
 Mean   : 5.259e-07   Mean   :-6.270e-07  
 3rd Qu.: 9.113e-07   3rd Qu.: 3.907e-07  
 Max.   : 4.803e-06   Max.   : 2.394e-06    
     ### so the two methods give essentially identical answers !!
   ### another way to check would be to tally the convergence codes
   ### and/or gradient and or min eigenvalue of Hessian

##------ Next comes steps to verify these were the max-likelihood values !

auxv = numeric(1000)
 for (i in 1:1000) auxv[i] = nlm(logL, c(0,1), .Xv=Xvec, .Yv=Yarr[,i])$code
auxv
   1                    ### this is the best convergence code !
1000 

 for (i in 1:1000) {
      tmp = nlm(logL, c(0,1), .Xv=Xvec, .Yv=Yarr[,i], hessian=T)$hess
      sumev = sum(diag(tmp))
      prodev = det(tmp)
      auxv[i] = 0.5*(sumev-sqrt(sumev^2-4*prodev)) }
> summary(auxv)
   Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
  13.88   30.45   36.33   38.00   44.83   74.71    ### all eigenvalues pos.

for(i in 1:1000) auxv[i] = max(abs(nlm(logL, c(0,1), .Xv=Xvec, 
         .Yv=Yarr[,i])$grad))
     Min.   1st Qu.    Median      Mean   3rd Qu.      Max. 
7.105e-08 1.410e-06 4.238e-06 1.245e-05 1.990e-05 5.792e-05
             ### remember these are for sums of 1000 iid terms !

##-------- Now finally compare histograms with normal ---

for (i in 1:1000) 
      Mcoef2[i,] = diag(solve(nlm(logL, c(0,1), .Xv=Xvec, 
                     .Yv=Yarr[,i], hessian=T)$hess))     ### est'd var's

> hist(Mcoef[,1], nclass=20, prob=T)               ### intercept coef's
  curve(dnorm(x,0,sqrt(mean(Mcoef2[,1]))), add=T)
      ### rough agreement, not great, compare the following quantiles
  rbind(empir = round(quantile(Mcoef[,1], prob=c(.01, .05, .25, 
                      .6, .75, .9, .95, .99)) , 3),
        normal = round(sqrt(mean(Mcoef2[,1]))*qnorm(c(.01, .05, .25, 
                      .6, .75, .9, .95, .99)), 3))
           1%     5%    25%   60%   75%   90%   95%   99%
empir  -0.245 -0.156 -0.062 0.037 0.083 0.191 0.209 0.328
normal -0.304 -0.215 -0.088 0.033 0.088 0.167 0.215 0.304

> hist(Mcoef[,2], nclass=20, prob=T)               ### slope coef's
  curve(dnorm(x,1,sqrt(mean(Mcoef2[,2]))), add=T)
      ### now there is a definite difference in lower tail !! 
      ###       compare the quantiles
  rbind(empir = round(quantile(Mcoef[,2], prob=c(.01, .05, .25, 
                      .6, .75, .9, .95, .99)) , 3),
        normal = round(1+ sqrt(mean(Mcoef2[,2]))*qnorm(c(.01, .05, .25, 
                      .6, .75, .9, .95, .99)), 3))
          1%    5%   25%   60%   75%   90%   95%   99%
empir  0.774 0.813 0.953 1.087 1.135 1.236 1.352 1.511
normal 0.608 0.723 0.886 1.043 1.114 1.216 1.277 1.392