Example Logs, STAT 705, Fall 2015
=================================


                 PART II   October 2015
                 ======================

							10/7/2015

We saw in class that the method of "control variates" involves
linear regression.

In the setting of the example in the log that we covered last time:

http://www.math.umd.edu/~slud/s705/Rlogs/Antith_Contr09


> Control.fr = cbind.data.frame(X1 = rexp(1e5)*0.8, X2 = runif(1e5,2,4),
    X3 = rlogis(1e5), X4 = rgamma(1e5,1.5))
  Y = c(data.matrix(Control.fr) %*% rep(1,4))

> Control.fr = cbind.data.frame(Control.fr, Y=Y, Yctr = Y-5.3, 
                Yhi = as.numeric(Y>5.5))
  rm(Y)
  tmp = lm(Yhi ~ Yctr, data=Control.fr)
  
> names(tmp)
 [1] "coefficients"  "residuals"     "effects"       "rank"         
 [5] "fitted.values" "assign"        "qr"            "df.residual"  
 [9] "xlevels"       "call"          "terms"         "model"        
> names(summary(tmp))
 [1] "call"          "terms"         "residuals"     "coefficients" 
 [5] "aliased"       "sigma"         "df"            "r.squared"    
 [9] "adj.r.squared" "fstatistic"    "cov.unscaled" 

> summary(tmp)$coef[,1:2]
             Estimate   Std. Error
(Intercept) 0.4440900 0.0009788089
Yctr        0.1623008 0.0004088673

## Note that the SE 0.00098 is essentially the same as

> sqrt(1-cor(Control.fr[,"Y"],Control.fr[,"Yhi"])^2)*sd(Control.fr$Yhi)/sqrt(1e5)
[1] 0.0009788011

====================================================================

## Generating multivariate densities by conditional prob. integral transform
----------------------------------------------------------------------------

Want to generate random point x1 < x2 < ... < x6 in [0,1]^6.

Several methods:

> sort(runif(6))

> tmparr = array(runif(6*10000), c(10000,6))
  inds = which(apply(tmparr, 1, function(row) min(diff(row)))>0)
  length(inds)
[1] 19

>   tmparr[inds[1],]
[1] 0.01267986 0.02371346 0.22257134 0.58500541 0.62999614 0.92351900

> vec = c(rep(0,6),1)
  for(i in 6:1) vec[i]=vec[i+1]*rbeta(1,i,1)

### To do it in an array of many rows:

> ordmat = cbind(array(0, c(1e4,6)), rep(1,1e4))
  for(i in 6:1) ordmat[,i] = ordmat[,i+1]*rbeta(1e4,i,1)

### For example, can check that the 4th order-statistic has Beta(4,3) density:

hist(ordmat[,4], nclass=40, prob=T)
curve(dbeta(x,4,3),0,1, add=T, col="green")       ### OK

## NB this is an instance of successive conditional univariate densities

### Other methods ? ...


### Another R topic: Aggregation, splitting and lists
-----------------------------------------------------

Suppose we want to consider a ("mixture" or "stratified") model in which
there are H = 6 population strata with respective sizes or probabilities 
(0.3, 0.1, 0.2, 0.15, 0.15, 0.1) and within stratum h

Y_i ~ N(mu_h, sig_h)

Let's generate a large populatioon (N=1e5) of this sort

pvec = c(0.3, 0.1, 0.2, 0.15, 0.15, 0.1)
muvec = c(4, 2, 1, 5, 3, 6)
sigvec = sqrt(1:6)

hvec = sample(1:6, 1e5, prob=pvec, replace=T)

> rbind(Emp = table(hvec)/1e5, True=pvec)
           1       2       3       4       5       6
Emp  0.29853 0.09915 0.20125 0.15079 0.15072 0.09956
True 0.30000 0.10000 0.20000 0.15000 0.15000 0.10000

Yvec = rnorm(1e5, muvec[hvec], sigvec[hvec])

### Now want to display stratum properties
### Could do:

gp1 = which(hvec==1)
> c(mean=mean(Yvec[gp1]), sd=sd(Yvec[gp1]))
     mean        sd 
3.9977241 0.9993129 

etcetera, or else:

> tmplist = split(Yvec,hvec)
> class(tmplist)
[1] "list"
> sapply(tmplist,length)
    1     2     3     4     5     6 
29853  9915 20125 15079 15072  9956 
> sapply(tmplist, function(gp) c(mean(gp),sd(gp)))
             1        2        3        4        5        6
[1,] 3.9977241 1.986277 1.006401 5.008875 2.969467 6.009526
[2,] 0.9993129 1.412418 1.720414 1.994702 2.246038 2.444997

or else

> tmpfr = aggregate(Yvec, by=list(hvec), function(gp) c(mean(gp),sd(gp)))
> class(tmpfr)
[1] "data.frame"
> tmpfr
  Group.1       x.1       x.2
1       1 3.9977241 0.9993129
2       2 1.9862770 1.4124179
3       3 1.0064005 1.7204142
4       4 5.0088751 1.9947024
5       5 2.9694670 2.2460384
6       6 6.0095257 2.4449966

>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>

Log related to simulation and MLE behavior            10/14/2015

### Let's start with a model

Z ~ N(a,b^2)

X ~ Poisson( exp(nu+tau*Z) )

Y ~ Binom(1, plogis(beta1 + beta2*X + beta3*Z))

## Generate data   n=400  based on vectors of parameters

par = c(a=4.1, lb=log(0.8), nu=-2, tau = 0.2, beta=c(-.5,.8,.1))

DataGen = function(N, par) {
   Zvec = rnorm(N,par[1],exp(par[2]))
   Xvec = rpois(N,exp(par[3]+par[4]*Zvec))
   Yvec = rbinom(N,1,plogis(par[5]+par[6]*Xvec+par[7]*Zvec))
   cbind(Y=Yvec, X=Xvec, Z=Zvec) }

tmp = cbind(DataGen(400, par),One=rep(1,400))     #### 400 x 4

### Now a likelihood

nlogL = function(thet, Dat) 
    -mean(log(dnorm(Dat[,"Z"],thet[1],exp(thet[2])))) -
     mean(log(dpois(Dat[,"X"],exp(c(Dat[,c(4,3)]%*%thet[3:4]))))) - 
     mean(log(dbinom(Dat[,"Y"],1,plogis(c(Dat[,c(4,2,3)]%*% thet[5:7])))))

### Now MLE calculation

MLE = nlm(nlogL, c(5,log(2),rep(0,5)), hessian=T, Dat=tmp)

> MLE
$minimum
[1] 2.579204

$estimate
[1]  4.1044118 -0.2136951 -2.5665736  0.3401607 -0.7309071  0.5007728  0.2446937

$gradient
[1] -8.035650e-08  3.794565e-07  1.018789e-07  2.237255e-07  4.363399e-08
[6]  9.181322e-08  1.613332e-07

$hessian
              [,1]          [,2]          [,3]         [,4]          [,5]
[1,]  1.533251e+00 -6.290957e-04 -1.081980e-08 1.081980e-08  0.000000e+00
[2,] -6.290957e-04  1.999599e+00 -4.440892e-08 0.000000e+00 -4.440892e-08
[3,] -1.081980e-08 -4.440892e-08  3.225323e-01 1.398468e+00 -4.440892e-08
[4,]  1.081980e-08  0.000000e+00  1.398468e+00 6.290156e+00  4.440892e-08
[5,]  0.000000e+00 -4.440892e-08 -4.440892e-08 4.440892e-08  2.327332e-01
[6,]  0.000000e+00 -4.440892e-08 -4.440892e-08 0.000000e+00  6.167920e-02
[7,]  0.000000e+00  0.000000e+00  0.000000e+00 0.000000e+00  9.449793e-01
              [,6]      [,7]
[1,]  0.000000e+00 0.0000000
[2,] -4.440892e-08 0.0000000
[3,] -4.440892e-08 0.0000000
[4,]  0.000000e+00 0.0000000
[5,]  6.167920e-02 0.9449793
[6,]  7.749374e-02 0.2604345
[7,]  2.604345e-01 3.9821295

$code
[1] 1

$iterations
[1] 41



### CONTINUE IN CLASS with an actual simulation (say of 100 data-batches)


Example:

ThetMat = array(0, c(1000,7,2))
    ### This matrix will contain the parameter estimators and their individual
  ### estimated "theoretical" SE's derived from diagonal of the inverted 
  ### observed-information matrix.
ConvCod = logical(1000)

for(r in 1:1000) {
   tmp = cbind(DataGen(400, par),One=rep(1,400)) 
   MLE = nlm(nlogL, par, hessian=T, Dat=tmp)
   ThetMat[r,,1] = MLE$est
   ConvCod[r] = (MLE$code ==1 & sqrt(sum(MLE$grad^2))/7 < 1.e-6 & 
               min(eigen(MLE$hess)$values)>0)
   ThetMat[r,,2] = sqrt(diag(solve(MLE$hess)))
   }

> hist(ThetMat[,6,1], nclass=25, prob=T)
 curve(dnorm(x, par[6], sd(ThetMat[,6,1])), add=T, col="red")

### really nice agreement


> round(rbind(Theor.SD = sqrt(apply(ThetMat[,,2]^2,2,mean)/400),
        Empir.SD = apply(ThetMat[,,1],2,sd)),5)

            [,1]    [,2]    [,3]    [,4]    [,5]    [,6]    [,7]
Theor.SD 0.03994 0.03536 0.48483 0.11265 0.54209 0.21052 0.13034
Empir.SD 0.03999 0.03560 0.49943 0.11617 0.56854 0.21452 0.13754

### ALSO VERY NICE AGREEMENT !

#>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>

						10/19/2015

### Another useful form of matrix indexing (applies also to more general arrays)

> tmat = array(1:20,c(5,4))
> tmat
     [,1] [,2] [,3] [,4]
[1,]    1    6   11   16
[2,]    2    7   12   17
[3,]    3    8   13   18
[4,]    4    9   14   19
[5,]    5   10   15   20

> tmat[cbind(1:5,c(3,2,1,4,1))]
[1] 11  7  3 19  5

> tarr = array(1:60, c(5,4,3))
> tarr
, , 1
     [,1] [,2] [,3] [,4]
[1,]    1    6   11   16
[2,]    2    7   12   17
[3,]    3    8   13   18
[4,]    4    9   14   19
[5,]    5   10   15   20

, , 2
     [,1] [,2] [,3] [,4]
[1,]   21   26   31   36
[2,]   22   27   32   37
[3,]   23   28   33   38
[4,]   24   29   34   39
[5,]   25   30   35   40

, , 3
     [,1] [,2] [,3] [,4]
[1,]   41   46   51   56
[2,]   42   47   52   57
[3,]   43   48   53   58
[4,]   44   49   54   59
[5,]   45   50   55   60

> tarr[cbind(1:5,c(2,3,2,4,1),c(1,3,2,1,3))]
[1]  6 52 28 19 45

##--------------------------------------------------------------------------------
### Contrasting MLEs and variances from correctly and incorrectly specified models

## Suppose our data comes from lognormal(mu,sigma^2) and we analyze it as Gamma(a,lam)


> Xdat = exp(rnorm(400, 0.5, 0.8))

## "Correct" ML analysis gives parameter estimates 

> MLtru = c(mean(log(Xdat)), var(log(Xdat)))
[1] 0.5355010 0.6796627   ### NB 2nd parameter is variance, to be compare with .8^2

and "empirical" or "observed" information matrix (per observation)

> diag(c(1/MLtru[2]^2, 1/MLtru[2]^4))
         [,1]    [,2]
[1,] 2.164777 0.00000
[2,] 0.000000 4.68626


#### Now let's analyze the data via ML as Gamma(a,lam)

alam.ini = (mean(Xdat)/var(Xdat))*c(mean(Xdat),1)
> alam.ini
[1] 0.9997862 0.4183704        ### not ultimately meaningful, but not too wild

Gfit = optim(alam.ini, function(alam, data) -sum(log(dgamma(data,alam[1],alam[2]))),
                   data=Xdat, hessian=T)    ### 49 iterations, conv=0
> round( unlist(Gfit[1:2]), 6)
      par1       par2      value 

  1.636170   0.684693 723.132589 
> Gfit$hess
          [,1]      [,2]
[1,]  333.4763 -584.2035
[2,] -584.2035 1396.0407

Gfit2 = nlm(function(alam, data) -sum(log(dgamma(data,alam[1],alam[2]))), alam.ini,
                  data=Xdat, hessian=T)    ### only 7 iterations
> Gfit2$est
[1] 1.6363411 0.6847426
> Gfit2$hess
          [,1]      [,2]
[1,]  333.3872 -584.1184
[2,] -584.1184 1395.5721
> Gfit2$grad
[1] -4.029622e-05  8.742518e-05

nlm(function(alam, data) -sum(log(dgamma(data,alam[1],alam[2]))), alam.ini,
                  data=Xdat, hessian=T, gradtol=1e-6)$est
[1] 1.6363411 0.6847426
> nlm(function(alam, data) -sum(log(dgamma(data,alam[1],alam[2]))), alam.ini,
                  data=Xdat, hessian=T, gradtol=1e-9)$est
[1] 1.6363475 0.6847457

### Analytical gradient of logL contributions for each X_i, as nX2 matrix

grlogL =  function(alam, data) 
             cbind(log(data)+log(alam[2])-digamma(alam[1]), 
                   alam[1]/alam[2] - data)

tmp = grlogL(Gfit2$est,Xdat)

> c(t(tmp) %*% rep(1,400))
[1] 0.0003131229 0.0006105371    ### 0 to sufficient precision, after dividing by 400

> Mis.Info = t(tmp) %*% tmp      
          [,1]      [,2]
[1,]  271.1854 -642.7003
[2,] -642.7003 2279.0728

                         #### compare with Observed Info, above
> Gfit2$hess
          [,1]      [,2]
[1,]  333.3872 -584.1184
[2,] -584.1184 1395.5721

### As shown in class, a "robustified" variance for the ML estimators via 
### the mis-specified model is

> Rob.Var = solve(Mis.Info) %*% Gfit$hess %*% solve(Mis.Info)
            [,1]         [,2]
[1,] 0.014215887 0.0030794743
[2,] 0.003079474 0.0008750884

compared with the "naive" variance

> solve(Gfit2$hess)
            [,1]        [,2]
[1,] 0.011248119 0.004707914
[2,] 0.004707914 0.002687055

### The corresponding SE's for the gamma-based MLE's are:

> round(rbind(Naive= sqrt(diag(solve(Gfit2$hess))), 
        Robust= sqrt(diag(Rob.Var)) ), 4)
         [,1]   [,2]
Naive  0.1061 0.0518
Robust 0.1192 0.0296

### This means that it might take a large simulation to feel the effects of 
###  incorrect CI coverage for the shape-parameter  a ,  but the estimates
###  for lambda  will actually have a lot smaller variance than the naive 
###  variance matrix would tell us.

#------------------------------------------------------------------
                                                 10/21/2015

"Classic" EM Algorithm Problem

Contingency Table with Missing Cells.
Consider the following data problem. 

Xv = sample(1:3, 500, prob=c(.45,.25,.3), rep=T)
Yv = sample(1:4, 500, prob=c(.22,.32, .18, .28), rep=T)

> XYtab0 = table(Xv,Yv)
  XYtab0
   Yv
Xv   1  2  3  4
  1 45 76 40 65
  2 29 39 21 33
  3 40 47 28 37

NB 
> apply(XYtab,1,sum)/500
    1     2     3 
0.452 0.244 0.304      ### close to .45,.25,.3 as expected
> apply(XYtab,2,sum)/500
    1     2     3     4 
0.228 0.324 0.178 0.270 ### close to .22, .32, .18, .28 as expected

### Now suppose that the (3,3),(2,4), and (1,1) cells are blanked out
### ie treated as missing, with only the number 45+33+28 = 106 of such 
### missing observations known. In that view, the missing data is the 
information of exactly which (or how many of) those 106 observations 
fall ito each of the (X=1,Y=1), (X=3,Y=3) and (X=2,Y=4) cells.

XYtab=XYtab0; XYtab0[cbind(1:3,c(1,4,3))] = NA
XYtab0
   Yv
Xv   1  2  3  4
  1    76 40 65
  2 29 39 21   
  3 40 47    37

along with table total N=500. Data is XYtab0.

sum(c(XYtab0), na.rm=T)  ### 394, so 106 fell in missing cells


We apply the EM algorithm, in the form=

EMiter = function(pvec, qvec) {
   denom = pvec[1]*qvec[1]+pvec[3]*qvec[3]+pvec[2]*qvec[4]
   ptmp = c(181+106*pvec[1]*qvec[1]/denom, 89+106*pvec[2]*qvec[4]/denom,
            124+106*pvec[3]*qvec[3]/denom)
   qtmp = c(69+106*pvec[1]*qvec[1]/denom, 162, 61+106*pvec[3]*qvec[3]/denom,
            102+106*pvec[2]*qvec[4]/denom)
   list(pout = ptmp/sum(ptmp), qout = qtmp/sum(qtmp))
   }

### So EMiter gives a mapping from one pair of 3 and 4 dimensional prob vectors
###   to another such pair, and we want to iterate to convergence.

> pv = rep(.333,3); qv = rep(.25,4)
  listvec = list(pv,qv)

> for(i in 1:10) listvec = EMiter(listvec[[1]], listvec[[2]])
  round(unlist(listvec),4)
 pout1  pout2  pout3  qout1  qout2  qout3  qout4 
0.4699 0.2357 0.2944 0.2459 0.3240 0.1684 0.2617 
   
> for(i in 1:10) listvec = EMiter(listvec[[1]], listvec[[2]])
  round(unlist(listvec),4)
 pout1  pout2  pout3  qout1  qout2  qout3  qout4 
0.4701 0.2356 0.2943 0.2461 0.3240 0.1683 0.2616 

> listvec0 = listvec
  for(i in 1:10) listvec = EMiter(listvec[[1]], listvec[[2]])
    round(unlist(listvec),4)
 pout1  pout2  pout3  qout1  qout2  qout3  qout4 
0.4701 0.2356 0.2943 0.2461 0.3240 0.1683 0.2616 

> unlist(listvec)-unlist(listvec0)
        pout1         pout2         pout3         qout1         qout2 
 5.886801e-07 -3.658261e-07 -2.228540e-07  5.886801e-07  0.000000e+00 
        qout3         qout4 
-2.228540e-07 -3.658261e-07         ### So good convergence has happened !

##--------------------------------------------------------------
## Now a continuous distributional example -- random effects ANOVA

Let  Y_{ij} = mu_i + a[j] + eps[i,j]   i=1,..,5, j=1,..4

where eps[i,j] ~ N(0,sig^2) iid independent of 
      a[j]    ~ N(0,siga^2) iid
and mu_i are unknown parameters along with  sig, sigA

NB aj given Dobs is N(Ybar_{.j}, sig^2*0.2*sigA^2/(sigA^2+0.2*sig^2)

Data: avec = rnorm(4,0,1.5)
      eps = array(rnorm(20,0,0.8), c(5,4))
      Ymat = outer(c(6,3,5,2,4),avec,"+") + eps 
      Yidot = apply(Ymat,1,mean)
      Ydotj = apply(Ymat,2,mean)
      Ybar = mean(Yidot)

listpar = list(mu=rep(1,5), sigAsq=2, sigsq=1)

EMiter2 = function(listpar) {
      mubar = mean(listpar[[1]])
      gam = listpar[[2]]/(listpar[[2]]+0.2*listpar[[3]])
      avar = 0.2*listpar[[3]]*gam
      newEaj = mubar + gam*(Ydotj-mubar)
      newmuvec = Yidot - mean(newEaj)
      newsgAsq = avar + mean(newEaj^2)
      newsgsq = avar + mean(c(Ymat - outer(newmuvec,newEaj,"+"))^2)
      list(mu =newmuvec, sigAsq=newsgAsq, sigsq=newsgsq) }


####--------------------------------------------------------
## Now a continuous distributional example -- random effects ANOVA

Let  Y_{ij} = mu_i + a[j] + eps[i,j]   i=1,..,5, j=1,..4

where eps[i,j] ~ N(0,sig^2) iid independent of       
      a[j]    ~ N(0,siga^2) iid
and mu_i are unknown parameters along with  sig, sigA

NB aj given Dobs is    N(gam*(Ybar_{.j}-mubar), sig^2*0.2*)
           where
                 gam = sigA^2/(sigA^2+0.2*sig^2)
           and   mubar = sum_{i=1}^5 mu_i/5

NOTE THAT THIS CONDITIONAL DISTRIBUTION WAS WRITTEN WRONGLY WHEN I DID
THIS MUCH IN CLASS. IT IS CORRECTED HERE.


Data: avec = rnorm(4,0,1.5)
      eps = array(rnorm(20,0,0.8), c(5,4))
      Ymat = outer(c(6,3,5,2,4),avec,"+") + eps 
                  ## correct mu = (6,3,5,2,4), sigAsq=1.5^2, sigsq=0.8^2
      Yidot = apply(Ymat,1,mean)
      Ydotj = apply(Ymat,2,mean)
      Ybar = mean(Yidot)

listpar = list(mu=rep(1,5), sigAsq=2, sigsq=1)   ### Initial values


EMiter2 = function(listpar) {         mubar = mean(listpar[[1]])
      gam = listpar[[2]]/(listpar[[2]]+0.2*listpar[[3]])
      avar = 0.2*listpar[[3]]*gam
      newEaj = gam*(Ydotj-mubar) 
      newmuvec = Yidot - mean(newEaj)
      newsgAsq = avar + mean(newEaj^2)
      newsgsq = avar + mean(c(Ymat - outer(newmuvec,newEaj,"+"))^2)
      list(mu =newmuvec, sigAsq=newsgAsq, sigsq=newsgsq)      }

> listpar = EMiter2(listpar)             ###  first EM iterate
  round(unlist(listpar),3)
   mu1    mu2    mu3    mu4    mu5 sigAsq  sigsq 
 3.340  0.907  3.037 -1.714  0.662  7.682  0.720

> listpar = EMiter2(listpar)
  round(unlist(listpar),3)
    mu1    mu2    mu3    mu4   mu5 sigAsq  sigsq 
 3.385  0.952  3.082 -1.669  0.707  7.658  0.666 

>  listpar = EMiter2(listpar)
    round(unlist(listpar),3)
    mu1    mu2    mu3    mu4   mu5 sigAsq  sigsq 
 3.427  0.993  3.124 -1.627  0.749  7.454  0.655 

>  listpar = EMiter2(listpar)
   round(unlist(listpar),3)
   mu1    mu2    mu3    mu4    mu5 sigAsq  sigsq 
 3.468  1.034  3.165 -1.586  0.790  7.258  0.653  

### will require many more iterates ...

>  for (i in 1:50)  listpar = EMiter2(listpar)
   round(unlist(listpar),3)
   mu1    mu2    mu3    mu4    mu5 sigAsq  sigsq 
 5.524  3.091  5.221  0.470  2.846  1.697  0.654 

### Stabilizing, but not quite there yet !
### Recall that the "true" parameter vector is
###    (6,3,5,2,4, 1.5^2, 0.8^2)

>   for (i in 1:50)  listpar = EMiter2(listpar)
       round(unlist(listpar),3)
   mu1    mu2    mu3    mu4    mu5 sigAsq  sigsq 
 5.797  3.364  5.494  0.743  3.119  1.605  0.655 

>  for (i in 1:50)  listpar = EMiter2(listpar)
      round(unlist(listpar),3)
   mu1    mu2    mu3    mu4    mu5 sigAsq  sigsq 
 5.803  3.369  5.500  0.749  3.125  1.605  0.655 

### You can see appropriate convergence, but it is slow.

>  for (i in 1:100)  listpar = EMiter2(listpar)
    round(unlist(listpar),3)

   mu1    mu2    mu3    mu4    mu5 sigAsq  sigsq 
 5.803  3.369  5.500  0.749  3.125  1.605  0.655 

   #### But we seem to have gotten there !!

=========================================================
                                                10/26/2015

Material from Slog4.txt log, updated:

> steamdat <- matrix(c(10.98, 35.3, 11.13, 29.7, 12.51, 30.8, 8.40, 58.8,
   9.27, 61.4, 8.73, 71.3, 6.36, 74.4, 8.50, 76.7, 7.82, 70.7, 9.14, 57.5, 
   8.24, 46.4, 12.19, 28.9, 11.88, 28.1, 9.57, 39.1, 10.94, 46.8, 9.58, 
   48.5, 10.09, 59.3, 8.11, 70.0, 6.83, 70.0, 8.88, 74.5, 7.68, 72.1,
   8.47, 58.1, 8.86, 44.6, 10.36, 33.4, 11.08, 28.6), ncol=2, byrow=T,
   dimnames=list(NULL,c("steamuse","temp")))

>  plot(steamdat[,2],steamdat[,1], xlab="Temp", ylab="Use",
      main="Draper-Smith Steam Data Example")

>  lmtmp = lm(steamuse ~ . , data=data.frame(steamdat))
>  identify(steamdat[,2],steamdat[,1])
[1]  3  7 11 15 17 19

> summary(lmtmp)

Call:
lm(formula = steamuse ~ ., data = data.frame(steamdat))

Residuals:
    Min      1Q  Median      3Q     Max 
-1.6789 -0.5291 -0.1221  0.7988  1.3457 

Coefficients:
            Estimate Std. Error t value Pr(>|t|)    
(Intercept) 13.62299    0.58146  23.429  < 2e-16 ***
temp        -0.07983    0.01052  -7.586 1.05e-07 ***
---
Signif. codes:  0 �***� 0.001 �**� 0.01 �*� 0.05 �.� 0.1 � � 1

Residual standard error: 0.8901 on 23 degrees of freedom
Multiple R-squared:  0.7144,    Adjusted R-squared:  0.702 
F-statistic: 57.54 on 1 and 23 DF,  p-value: 1.055e-07

> names(summary(lmtmp))
 [1] "call"          "terms"         "residuals"     "coefficients" 
 [5] "aliased"       "sigma"         "df"            "r.squared"    
 [9] "adj.r.squared" "fstatistic"    "cov.unscaled" 

#### It is a good idea to test your model knowledge so that you 
can compute "by hand" (in R) as many as possible of the  lm  output
list components, e.g.:

> c(summary(lmtmp)$sigma^2, sum(lmtmp$residuals^2)/23)
[1] 0.7923217 0.7923217 

> summary(lmtmp)$cov.unscaled
             (Intercept)          temp
(Intercept)  0.426720377 -0.0073520984
temp        -0.007352098  0.0001397737

> mat = model.matrix(lmtmp)
  solve(t(mat) %*% mat)
             (Intercept)          temp
(Intercept)  0.426720377 -0.0073520984
temp        -0.007352098  0.0001397737

> c(summary(lmtmp)$r.sq, cor(steamdat[,1], lmtmp$fitted)^2)
[1] 0.7144375  0.7144375


> rvar = { mtmp = model.matrix(lmtmp)
           summary(lmtmp)$sigma^2*diag(diag(25)-mtmp %*% 
               solve(t(mtmp) %*% mtmp, t(mtmp))) }
  stdrsd = lmtmp$resid/sqrt(rvar)
 

>  plot(steamdat[,2],steamdat[,1], xlab="Temp", ylab="Use",
      main="Draper-Smith Steam Data Example")
   identify(steamdat[,2],steamdat[,1], labels=round(stdrsd,2))
[1]  3  7 11 15 17 19 20

> anova(lmtmp)
Analysis of Variance Table

Response: steamuse
          Df Sum Sq Mean Sq F value    Pr(>F)    
temp       1 45.592  45.592  57.543 1.055e-07 ***
Residuals 23 18.223   0.792                      
---
Signif. codes:  0 �***� 0.001 �**� 0.01 �*� 0.05 �.� 0.1 � � 1

## NB 
> c(sum(lmtmp$resid^2), sum((steamdat[,1]-mean(steamdat[,1]))^2))
[1] 18.2234 63.8158  ## 2nd number is "total (corrected) sum of squares"

#### Continue using RlogF09.LinRegr.txt or StepExmp.Log

>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>

Model syntax:   y ~ x + z

                y ~ x*z

> data(attitude)    
> dim(attitude)
[1] 30  7
         
> tmpfit = lm( rating ~ . , data=attitude)

              update(   )

      update(tmpfit, .~ complaints)

 stpfit1 = step(tmpfit, ratings ~ (complaints + privileges + 
    learning + raises + critical + advance)^2, direction = "forw")
             
### Further discussion about factors, contrasts ... 


                                             10/28/2015

GAUSSIAN QUADRATURE

So far our numerical intergration formulas (Trapezoid, Simpson's) apply 
to finite ranges of integration and are taken from a large class 
of numerical quadrature  formulas callled "Newton-Cotes formulas".

Another approach, called "Gaussian Quadrature", applies also to infinite
ranges and approximates integrals   \int g(x) f(x) dx
for functions g  (smooth, well-approximated by polynomials)
by sums   \sum_{k=1}^n g(p_k) w_k
when f(x) is any of a number of special families of "kernels" (mostly,
probability densities after scaling).

The most important example for us is Gaussian quadrature with respect 
to the "error-function" kernel f(x) = exp(-x^2) on (-Infty,Infty)
which we view as the N(0,1/2) scaled up by a factor  sqrt(pi).

> GHquad.wt =            #### adapted from "Numerical Recipes" book
function (N) 
{
    bvec <- sqrt((1:(N - 1))/2)
    elist <- eigen(matrix(replace(rep(0, N * N), (N + 1) * (1:(N - 
        1)), bvec) + replace(rep(0, N * N), seq(2, N * (N - 1), 
        N + 1), bvec), ncol = N), symm = T)
    list(pts = elist$values, wts = sqrt(pi) * c(elist$vectors[1, 
        ])^2)
}

Here N is the desired number of points (placed symmetrically around the origin)

In order to calculate E g(Z) for Z ~ N(0,1), view Z = sqrt(2)*Z' where 
Z' ~ N(0, 1/2)

tmp = GHquad.wt(20)      ### N may be taken larger, but sometimes not necessary!
sum( g(sqrt(2)*tmp$pts)*tmp$wts )/sqrt(pi)

### Example

> tmp=GHquad.wt(20); tmpB=GHquad.wt(100)

> sum( (sqrt(2)*tmp$pts)^2*tmp$wts ) /sqrt(pi) - 1
[1] 2.220446e-15
> sum( (sqrt(2)*tmpB$pts)^2*tmpB$wts ) /sqrt(pi) - 1
[1] -6.994405e-15

> sum( (sqrt(2)*tmp$pts)^4*tmp$wts ) /sqrt(pi) - 3
[1] 4.884981e-15
> sum( (sqrt(2)*tmpB$pts)^4*tmpB$wts ) /sqrt(pi) - 3
[1] -3.330669e-14

### Now let's try a function g not a low-order polynomial

> A1 = sum( log(1+2*3.5*tmp$pts^2) * tmp$wts )/sqrt(pi)
> A2 = sum( log(1+2*3.5*tmpB$pts^2) * tmpB$wts )/sqrt(pi)
> c(A1=A1, A2=A2, diff=A1-A2)
         A1          A2        diff 
1.094413002 1.089033938 0.005379065 

> integrate(function(x) log(1+3.5*x^2)*dnorm(x),-Inf,Inf)
1.089027 with absolute error < 4.8e-08

>  integrate(function(x) log(1+3.5*x^2)*dnorm(x),-Inf,Inf, rel.tol=1.e-9)
1.089027 with absolute error < 2.4e-11

> tmpC = GHquad.wt(160)
> sum( log(1+2*3.5*tmpC$pts^2) * tmpC$wts )/sqrt(pi)
[1] 1.089028

### Suppose we want integrals of log(1+b*x^2)*dnorm(x)
    for all  b in   seq(1,3.5,by=.01)

bvec =   seq(1,10,by=.01)
vec1 = vec2 = numeric(901)

unix.time({for(i in 1:901) vec1[i] = 2*integrate(function(x,b) 
      log(1+b*x^2)*dnorm(x),0,Inf,rel.tol=1.e-10, b=bvec[i])$val })
   user  system elapsed 
   0.11    0.00    0.10 

### 900 high-accuracy integrals, 0.13 sec.

> unix.time( { tmp = GHquad.wt(100)
     mat = log(1+outer(2*bvec, tmp$pts^2,"*"))
     vec2 = c(mat %*% tmp$wts)/sqrt(pi) } )
   user  system elapsed 
   0.01    0.00    0.02 

> summary(vec1-vec2)   ### errors range from 1e-9 near b=1
               #### to 1e-4 near b=10

> unix.time( { tmp = GHquad.wt(200)
     mat = log(1+outer(2*bvec, tmp$pts^2,"*"))
     vec2 = c(mat %*% tmp$wts)/sqrt(pi) } )
   user  system elapsed 
   0.01    0.00    0.02 
> summary(vec1-vec2)
      Min.    1st Qu.     Median       Mean    3rd Qu.       Max. 
-2.431e-05 -7.312e-06 -1.122e-06 -4.661e-06 -3.171e-08  0.000e+00 

> unix.time( { tmp = GHquad.wt(400)
     mat = log(1+outer(2*bvec, tmp$pts^2,"*"))
     vec2 = c(mat %*% tmp$wts)/sqrt(pi) } )
   user  system elapsed 
   0.11    0.00    0.11 

  summary(vec1-vec2)
      Min.    1st Qu.     Median       Mean    3rd Qu.       Max. 
-4.229e-07 -7.689e-08 -5.371e-09 -6.133e-08 -3.380e-11  0.000e+00 

>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>