Homework Solutions to Problems 15--16.
=====================================
[NB solutions are in Splus 6.0]

(15) Simulated using b0=3.6, 

> nvec <- c(100,50, 120,80)
  pvec <- 0.1*log(1+3.6*(1:4))
  mvec <- rbinom(4,nvec,pvec)
> cbind(nvec=nvec, pvec=pvec, xv=1:4, mvec=mvec, 
      Emvec=round(nvec*pvec,2))
     nvec      pvec xv mvec Emvec 
[1,]  100 0.1526056  1   19 15.26
[2,]   50 0.2104134  2   13 10.52
[3,]  120 0.2468100  3   30 29.62
[4,]   80 0.2734368  4   24 21.87

### (exp(10*mvec[3]/nvec[3])-1)/3 
[1] 3.727498   ### will use this as starting value
    ### Other possible choices would have been:
> (exp(10*mvec/nvec)-1)/(1:4)
[1] 5.685894 6.231869 3.727498 4.771384
### NB all are greater than the true value 3.6,
###  because it just happens that all observed 
###  numbers of successes are larger than their 
###  corresponding expected numbers.

> lLikHW15 <- function(b) {
    pvb <- .1*log(1+b*(1:4))  ## vector
    sum(mvec*log(pvb)+(nvec-mvec)*log(1-pvb)) }
## Note that this function does not make sense for
##  b greater than  (exp(10)-1)/4 = 5506.366

> optimize(lLikHW15, c(0,5000), max=T)[c(1,2,5)]$maximum:
[1] 4.820766

$objective:
[1] -193.9393

$message:
[1] "normal termination"

> c(lLikHW15(4.8208), lLikHW15(3.6))
[1] -193.9393 -194.6267

### The likelihood is greater at the MLE of 4.82
###   than at the true value, but not MUCH greater.

> NRaph2 <- function(x0, gfcn, gprimfcn, tol = 1e-06, maxit = 15)
{
        xout <- x0
        g.old <- gfcn(x0)
        ctr <- 0
        while(ctr < maxit & abs(g.old) > tol) {
                xout <- xout - g.old/gprimfcn(xout)
                ctr <- ctr + 1
                g.old <- gfcn(xout)
        }
        list(xout=xout, niter=ctr, logLik=lLikHW15(xout),
          Hess=gprimfcn(xout))
}

> tmpo <- NRaph2(3.7275, function(b) {
         pv <- .1*log(1+b*(1:4))
         sum((0.1*(1:4)/(1+b*(1:4)))*(mvec-nvec*pv)/(pv*(1-pv)))},
      function(b) {
         pv <- .1*log(1+b*(1:4))
         -0.1* sum(((1:4)/(1+b*(1:4)))^2*( (mvec-nvec*pv)/
             (pv*(1-pv))+0.1*(mvec/pv^2+(nvec-mvec)/(1-pv)^2)))})
$xout:
[1] 4.820771

$niter:
[1] 4

$logLik:
[1] -193.9393

$Hess:
[1] -0.6785414

> xt <- seq(2,7,by=.05)
  llk <- numeric(101)
  for(i in 1:101) llk[i] <- lLikHW15(xt[i])
  plot(xt,llk)
  abline(h=tmpo$logLik - 0.5*qnorm(.975)^2, lty=3)
### This line shows that all values b within (2.9,8)
###   would lie within the "confidence interval"

## Another way to view this is through the Wald CI 
> tmpo$xout - c(-1,1)*qnorm(.975) / tmpo$Hess
[1] 1.932275 7.709266
### We can see that this "interval" is skewed wrt
###  LRT test-based region because the logLik function
###  is asymmetric near the max !!

### Chi-square goodness of fit:
> pvopt <- .1*log(1+tmpo$xout*(1:4))
  sum((mvec-nvec*pvopt)^2/(nvec*pvopt*(1-pvopt)))
[1] 0.6289064   ### compare with chisq 3df 

**************************************************************

(16) Misspecified Model  Dataset Dset3D.asc for HW16.
    ================================================

The model: y_i ~ binom(1, g(a + dmat %*% b))
where  g(x) = exp(x)
## For this, we want to make sure that scores a + dmat %*% b
##    are always < 0
 
## SIMULATION: Start with correlated normal observations.

> xvec <- rnorm(1000)*0.4
  dmat <- cbind(rep(1,1000), xvec, -3*xvec+rnorm(1000)*0.7)
> apply(dmat,2,summary)
                  xvec             
   Min. 1 -1.156850840 -4.83032127
1st Qu. 1 -0.259528092 -0.97472353
 Median 1  0.022094641 -0.09772142
   Mean 1  0.007300602 -0.05061261
3rd Qu. 1  0.271476445  0.84269223
   Max. 1  1.194744110  4.82130422

> b1vec <- c(-1.3, .5, .25)
  scors <- c(dmat %*% b1vec)
  summary(exp(scors))
      Min.   1st Qu.    Median      Mean   3rd Qu.      Max. 
 0.1480496 0.2360536 0.2704901 0.2757545 0.3074749 0.5199690

> ydat <- rbinom(1000,1,exp(scors))

> outmat <- cbind(round(dmat,6), ydat)
  write(t(outmat),file="/home1/evs/public_html/s798c/Data/Dset3D.asc",
     ncolumns=4)

 Analysis of "outmat" imported from Data/Dset3D.asc
---------------------------------------------------

### True beta is c(-1.3, .5, .25)

> dfram <- data.frame(outmat[,2:4])
  names(dfram) <- c("x1","x2","y")
> glmtmp <- glm(cbind(y,1-y) ~ . , family=binomial, 
      data=dfram)
> glmtmp
..
Coefficients:
 (Intercept)        x1       x2 
   -0.967169 0.3817471 0.307846

Degrees of Freedom: 1000 Total; 997 Residual
Residual Deviance: 1161.625 

> c(lLikC(b1vec), lLikC(glmtmp$coef))
[1] -593.2391 -580.8126
### NOTE that the logLik difference is much larger than would be
###    expected (one half of a chi-square 3df rv) if the model
###    were properly specified !!!

> tmpmax <- nlmin(function(b) -lLikC(b), rep(0,3))
> tmpmax
$x:
[1] -0.9671691  0.3817473  0.3078461

$converged:
[1] T

$conv.type:
[1] "relative function convergence"

==============================================================
### Goodness of fit 

CI's 
> rbind(b1vec, fitted=glmtmp$coef, s.dev=summary(glmtmp)$coef[,2])
       (Intercept)        x1         x2 
 b1vec -1.30000000 0.5000000 0.25000000
fitted -0.96716901 0.3817471 0.3078460
 s.dev  0.07163044 0.3490234 0.1012989

> rbind(ulim = glmtmp$coef + 1.96*summary(glmtmp)$coef[,2],
 llim = glmtmp$coef - 1.96*summary(glmtmp)$coef[,2])
     (Intercept)         x1        x2 
ulim  -0.8267733  1.0658331 0.5063918
llim  -1.1075647 -0.3023388 0.1093002

These give CI's based on "properly specified" Iobs-based analysis.
The comparison below with Jobs shows that this is reasonable.

Other, test-based, CI's: for this we need to re-calculate
log-likelihoods with maximized values substituted for all 
coordinates other than the one of interest. 

### Begin with intercept:
> likAlt1 <- function(b0) {
    auxmin <- nlminb(glmtmp$coef[2:3], function(b2dim,B0) 
          -lLikC(c(B0,b2dim)), B0=b0)
    c(rMLE = auxmin$param, lLik= -auxmin$obj) }
> likAlt1(glmtmp$coef[1])
   rMLE.x1  rMLE.x2      lLik 
 0.3817471 0.307846 -580.8126
> likAlt1(-.7)
  rMLE.x1   rMLE.x2      lLik 
 0.375994 0.2926115 -588.0464
> likAlt1(-1.2)
   rMLE.x1   rMLE.x2      lLik 
 0.3964614 0.3298038 -585.8997
>  xt <- seq(-1.3,-.7, length=100)
   yt <- numeric(100)
   for(i in 1:100) yt[i] <- likAlt1(xt[i])[3]
   plot(xt, yt)
   abline(h=-580.813-1.92)  ## -582.733 threshold
   ### graphically read off CI for intercept: (-1.11, -0.83)

> uniroot(function(b) likAlt1(b)[3]+582.733, 
       c(-1.2,glmtmp$coef[1]))$root
[1] -1.10913
> uniroot(function(b) likAlt1(b)[3]-(-580.813-1.92), 
       c(glmtmp$coef[1],-0.7))$root
[1] -0.8282462
### This reproduces the Wald interval above for Intercept 

*** Now for x1 interval by test-based method:
> likAlt2 <- function(b1) {
    auxmin <- nlminb(glmtmp$coef[c(1,3)], function(b2dim,B1) 
          -lLikC(c(b2dim[1],B1,b2dim[2])), B1=b1)
    c(rMLE = auxmin$param, lLik= -auxmin$obj) }
> c(uniroot(function(b) likAlt2(b)[3]+582.733, 
       c(-.6,glmtmp$coef[2]))$root, uniroot(function(b) 
    likAlt2(b)[3]+582.733, c(glmtmp$coef[2],1.5))$root)
[1] -0.3019839  1.0673748	    ### OK, very close !!

*** Now for x2 interval by test-based method:
> likAlt3 <- function(b2) {
    auxmin <- nlminb(glmtmp$coef[1:2], function(b2dim,B2) 
          -lLikC(c(b2dim[1:2],B2)), B2=b2)
    c(rMLE = auxmin$param, lLik= -auxmin$obj) }
> c(uniroot(function(b) likAlt3(b)[3]+582.733, 
       c(-.2,glmtmp$coef[3]))$root, uniroot(function(b) 
    likAlt3(b)[3]+582.733, c(glmtmp$coef[3],.8))$root)
[1] 0.1102483 0.5076699       ### OK, close again !!


(I) Comparison of 2 ways to calculate Iobs
### The first is the one inverted above:
> Iobs1 <- t(dmat) %*% (dlogis(c(dmat %*% glmtmp$coef))*dmat)
> round(Iobs1,4)
                   xvec           
      196.5368  -5.8305   24.7926
xvec   -5.8305  31.7733  -94.4104
       24.7926 -94.4104  378.2660

> Iobs2 <- t(dmat) %*% ((dfram[,3] - plogis(c(dmat %*% 
      glmtmp$coef)))^2*dmat)
  round(Iobs2,4)
                   xvec           
      196.4871  -6.4059   25.1193
xvec   -6.4059  31.2392  -93.8838
       25.1193 -93.8838  382.9509


(II) Goodness of fit statistics:

> sum( (dfram[,3] - plogis(c(dmat %*% glmtmp$coef)))^2 )/
  sum( dlogis(c(dmat %*% glmtmp$coef)) ) 
[1] 0.9997468          ### Ratio of variances SHOULD be 1 !

> sum( (dfram[,3] - plogis(c(dmat %*% glmtmp$coef)))^2 /
     dlogis(c(dmat %*% glmtmp$coef)) ) 
[1] 1000.947   ### Fine for chi-squared 300 df!

> mtabl <- table(dfram[,1]>0, dfram[,2] > 0, dfram[,3])
  mtabl
		### These are the observed counts
, , 0
      FALSE TRUE 
FALSE    61  267
 TRUE   349   46

, , 1
      FALSE TRUE 
FALSE    18  135
 TRUE   100   24

> etabl <- aggregate.data.frame(plogis(c(dmat %*% 
    glmtmp$coef)), by= list(dfram[,1]>0, dfram[,2] > 0), sum)
> etabl
  Group.1 Group.2         x 
1   FALSE   FALSE  19.26567
2    TRUE   FALSE 103.74966
3   FALSE    TRUE 131.91261
4    TRUE    TRUE  22.07206

> sum((etabl[,3]-mtabl[,,2])^2/etabl[,3])
[1] 0.4593281		### Very small chi-square value !!