Homework Solutions to Problems 17--18.
=====================================
[NB solutions are in Splus 6.0]

(17) Log to Simulate and Begin Analysis of Thoroughly 
Misspecified Linear Model Dataset Dset17.asc for HW17.
=====================================================

The model: y_i ~  N(a + Xmat %*% b), c^2) 
is an ordinary linear regression model with scalar 
intercept a, error-variance c^2, and 3-dim regression 
coefficients b, altogether giving 5-dim unknown parameter.

> Xmat <- cbind(rnorm(350)*0.4+0.5, rnorm(350)*(-1.2), rexp(350)*0.6)
> dimnames(Xmat)[[2]] <- c("X1","X2","X3")
> Yvec <- -1.6 + Xmat[,1]^2 - 0.5*Xmat[,2] + 
      sqrt(Xmat[,3]) + rnorm(350)*.4
> Lfram <- data.frame(cbind(Xmat[,c(1,3)], Y=Yvec))
> write.table(Lfram,"/home1/evs/public_html/s798c/Data/Dset17.asc",
    sep="  ")
## NB transformed two variables and omitted the third !

> lm1 <- lm(formula= Y ~ . , data=Lfram)
Call:
lm(formula = Y ~ ., data = Lfram)

Coefficients:
 (Intercept)      X1        X3 
   -1.387107 1.06529 0.5260654

*** So for part (i), get this for the b0,b1,b2 coeff's and
***   the estimated variance parameter is
> summary(lm1)$sigma^2 
[1] 0.581183


*** For part (ii), we re-do the estimation using nlminb:
> lLikNR <- function(betsigsq) 
    -(nrow(Lfram)/2)*log(betsigsq[4]) - (0.5/betsigsq[4])*
    sum((-betsigsq[1] + c(data.matrix(Lfram) %*% 
    c(betsigsq[2:3],1)))^2)
> nlminb(c(rep(0,3),1), function(x4) -lLikNR(x4), lower=c(
    rep(-Inf,3),1e-5), upper=rep(Inf,4))$param
[1] -1.3871072 -1.0652906 -0.5260648  0.5762019   ## OK !!

------------------------------------------------------
But NOTE: there is a discrepancy between the estimated 
sigma^2 in the lm function and the ML estimated one in nlmin.
This is not a numerical error: the lm function uses the 
unbiased (also "REML") estimator equal to the residual sum 
of squares divided by (n-p) while the ML variance estimator 
is the residual sum of squares divided by n. Thus in our 
example, 0.5762019 * 350/347 = 0.5811835 exactly reproduces
the value estimated by lm.
--------------------------------------------------------

## Recover cov.unscaled from  summary(lm1)$sigma^2 and
##    summary(lm1)$cov.unscaled

> I0 <- solve(summary(lm1)$cov.unscaled)
> round(I0,4)
            (Intercept)       X1       X3 
(Intercept)    350.0000 178.7370 231.0422
         X1    178.7370 151.4908 124.0789
         X3    231.0422 124.0789 293.3373

> Xm <- cbind(rep(1,350), data.matrix(Lfram[,1:2]))
  t(Xm) %*% Xm 
                  X1       X3 
   350.0000 178.7370 231.0422
X1 178.7370 151.4908 124.0789
X3 231.0422 124.0789 293.3373

> round(summary(lm1)$coef,3)
             Value Std. Error t value Pr(>|t|) 
(Intercept) -1.387      0.076 -18.353        0
         X1  1.065      0.098  10.820        0
         X3  0.526      0.064   8.171        0

> sqrt(diag(summary(lm1)$cov.unscaled))*summary(lm1)$sigma
[1] 0.07558005 0.09846005 0.06438342

*** So for part (iii), we use these estimates and SE's
***  to get Wald-type CI's:
> rbind(lower = lm1$coef-1.96*summary(lm1)$coef[,2],
        upper = lm1$coef+1.96*summary(lm1)$coef[,2])
      (Intercept)        X1        X3 
lower   -1.535244 0.8723081 0.3998739
upper   -1.238971 1.2582715 0.6522569

### Now proceed to compare Iobs=Iobs1 vss Jobs=Iobs2

> Iobs1 <- t(Xm) %*% Xm /summary(lm1)$sigma^2

### Full Iobs1 matrix also has sigma^2 row and column:
> Iobs1 <- cbind(rbind(Iobs1,rep(0,3)), c(0,0,0,
      175/summary(lm1)$sigma^4))
  Iobs1
                  X1       X3          
   602.2199 307.5399 397.5377   0.0000
X1 307.5399 260.6594 213.4936   0.0000
X3 397.5377 213.4936 504.7244   0.0000
     0.0000   0.0000   0.0000 518.0983

> tm <- c(Lfram$Y - Xm %*% lm1$coef)
  Iobs2 <- t((tm^2*Xm)) %*% Xm / summary(lm1)$sigma^4

> Iobs2
                  X1       X3 
   597.0580 274.7184 344.1382
X1 274.7184 278.5695 184.6571
X3 344.1382 184.6571 407.6772

### This is like our Jobs in class, but so far just the 
###    upper-left 3x3  block.

> baux <- c(t(tm*Xm) %*% (tm^2/summary(lm1)$sigma^2-1)/
    (2*summary(lm1)$sigma^4))
> Iobs2 <- cbind(rbind(Iobs2,baux), c(baux, sum((tm^2/
    summary(lm1)$sigma^2-1)^2/(4*summary(lm1)$sigma^4))))
> Iobs2
                     X1        X3           
     597.0580 274.71839 344.13823 125.75393
  X1 274.7184 278.56954 184.65713  31.72295
  X3 344.1382 184.65713 407.67716  32.08578
baux 125.7539  31.72295  32.08578 524.76716

## Recall the raw standard errors of coefficients:
> round(sqrt(diag(summary(lm1)$cov.unscaled))*
      summary(lm1)$sigma,5)
[1] 0.07558 0.09846 0.06438   
> sqrt(diag(solve(Iobs1)))
[1] 0.07558005 0.09846005 0.06438342 0.04393331

### Now we have the "robust sandwich" version:
> RS.SE <- round(sqrt(diag(solve(Iobs1) %*% Iobs2 
      %*% solve(Iobs1))),5)
[1] 0.08905 0.11936 0.05996 0.04422

### So the "robust" calculation makes a noticeable difference,
    although not a large one, even in the asymptotic standard
    error for  sigma^2.

*** Part (iv) Robust CI's for each of Intercept, X1, X2:

> rbind(lower=lm1$coef-1.96*RS.SE[1:3],
        upper=lm1$coef+1.96*RS.SE[1:3])
      (Intercept)        X1        X3 
lower   -1.561645 0.8313442 0.4085438
upper   -1.212569 1.2992354 0.6435870


(18) (i) Example as in class. begin by maximizing logLik directly.

> Tvec <- scan(file="Dset3B.dat")  ## after saving data into file
> lLikmix <- function(plam) 
    sum(log((1-plam[1])*plam[2]*exp(-(plam[2]-2)*Tvec)+2*plam[1]))
> nlminb(c(.5,2), function(pl) -lLikmix(pl), lower=c(1e-6,1e-5),
    upper=c(1-1e-6,1e2))[1:4]
$parameters:
[1] 0.3998813 0.3663359

$objective:
[1] -2133.742

$message:
[1] "RELATIVE FUNCTION CONVERGENCE"

$grad.norm:
[1] 0.0129577		### took 18 iterations ...

> plam <- .Last.value[[1]]

## Let's check this, roughly. The mixture density should
##  have mean  p/2 + (1-p)/lambda
> c(plam[1]/2 + (1-plam[1])/plam[2], mean(Tvec))
[1] 1.838106 1.839836          #### OK !!

(ii) EM algorithm:  (poor starting point .5, 2 as above)

> plamEM <- function(plam0) {
    probs <- 1/(1+(1/plam0[1]-1)*0.5*plam0[2]*exp(
               -(plam0[2]-2)*Tvec))
    mp <- mean(probs)
    c(mp,(1-mp)/mean(Tvec*(1-probs))) }

Next step: recursively calculate, and print results:
> plam0 <- c(.5,2)
  for(i in 1:100) {
     plam0 <- plamEM(plam0) 
     paux <- round(plam0,5)
     cat("iter",i,",p=", paux[1],",lam=",paux[2]," ")
     if(i %% 2 == 0) cat("\n") }
... 
iter 41 ,p= 0.39988 ,lam= 0.36634  iter 42 ,p= 0.39988 ,lam= 0.36634  
### Reach this value at iteration 41 and didn't change ...
>  lLikmix(c(.5,2))
[1] 693.1472
>  lLikmix(c(0.39988, 0.36634))
[1] 2133.742

### Let's confirm that the logLik never decreased:
> plamLik <- numeric(101)
  plam0 <- c(.5,2)
  plamLik[1] <- lLikmix(plam0)
  for(i in 2:101) {
     plam0 <- plamEM(plam0) 
     plamLik[i] <- lLikmix(plam0) }
  sum(diff(plamLik) < 0)
> min(diff(plamLik))
[1] -5.911716e-12