Homework Solution to Problem 21.
=================================

> library(MASS)
> rubberlm = lm(loss ~ ., data=Rubber)
> rubberlm$coef
 (Intercept)     hard      tens 
    885.1611 -6.57083 -1.374312
### The simplest rule based on the linear regression
###   is to predict each (hard,tens) observation as above-median
###   if   885.1611-6.57083*hard -1.374312*tens > 165.
### One the original data this yields
> table(rubberlm$fit > 165, Rubber$loss > 165)
      FALSE TRUE 
FALSE    14    1
 TRUE     1   14   ### so only 2 are misclassified.

## The second method is logistic regression:
> Above = as.numeric(Rubber$loss > 165)
  rubberlgs <- glm(cbind(Above, 1-Above) ~ hard+tens,
     data=Rubber, family=binomial)
> rubberlgs$coef
 (Intercept)       hard       tens 
    57.24511 -0.5125454 -0.1120316
> table(rubberlgs$fit > 0.5, Rubber$loss > 165)
      FALSE TRUE 
FALSE    14    2
 TRUE     1   13   ### now 3 are misclassified

## Third method: predict based on kernel NPR value > 165
> kerRubber = outer(rubberlm$fit,rubberlm$fit, function(x,y) 
     dlogis(x,y,40))    ## 30x30 matrix:
  kerRubfit = c(kerRubber %*% Rubber$loss)/c(kerRubber %*% rep(1,30)) 
> table(kerRubfit > 165, Rubber$loss > 165)
      FALSE TRUE 
FALSE    14    3
 TRUE     1   12   ### now 4 are misclassified

> RubbAlt = cbind.data.frame(Rubber, binvec=
      as.numeric(Rubber$loss > 165))
> Prdct3 = function(trainfram,testfram) {
    trainlm = lm(loss ~ hard+tens, data=trainfram)
    testlmfit = trainlm$coef[1]+c(data.matrix(testfram[,2:3]) %*%
       trainlm$coef[2:3])
    trainlgs =  glm(cbind(binvec, 1-binvec) ~ hard+tens,
       data=trainfram, family=binomial)
    testlgsc = trainlgs$coef[1]+c(data.matrix(testfram[,2:3]) %*%
       trainlgs$coef[2:3])
    kertrain = outer(testlmfit, trainlm$fit, function(x,y) 
       dlogis(x,y,40))  
    kertest = c(kertrain %*% trainfram$loss)/c(kertrain %*% 
       rep(1,nrow(trainfram)))
    list(lm.mis = sum( (testlmfit > 165) != (testfram$loss > 165)),
       glm.mis = sum( (testlgsc>0) != (testfram$loss > 165)),
       ker.mis = sum( (kertest > 165) != (testfram$loss > 165))) }

> unlist(Prdct3(RubbAlt,RubbAlt))
 lm.mis glm.mis ker.mis 
      2       3       4  ## Reproduces previous calculation
### This completes part (i). NOTE that these estimates of mis-
###  classification rates out of 30 will turn out to be too low!

>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>

#### Another way to do the kernel-density NPR modelling is by 
means of ksmooth [which is just like the Nadaraya-Watson kernel-based
nonparametric regression estimate coded above] or by locpoly in
package KernSmooth, as follows:

RubKsmth = ksmooth(rubberlm$fit,Rubber$loss, bandwidth=40, 
              kernel="normal",x.points=rubberlm$fit)
table(RubKsmth$y > 165, Rubber$loss > 165)
        FALSE TRUE
  FALSE     9    7
  TRUE      6    8    ### Not too good ?!
 
RubKsmth2 = locpoly(rubberlm$fit,Rubber$loss, degree=0, bandwidth=40)
RubKsmth3 = locpoly(rubberlm$fit,Rubber$loss, degree=1, bandwidth=40)
RubKsmth4 = locpoly(rubberlm$fit,Rubber$loss, degree=2, bandwidth=40)
### 2 and 3 are almost identical, nearly a straightline fit, but 
###   RubKsmth4 has a nonlinear `hook' at the upper end.
### But must do some coding to recover predictions linearly
###    interpolated between grid points !!

RubKsmth5 = loess(loss ~ hard + tens, data=Rubber)
table(RubKsmth5$fitted > 165, Rubber$loss > 165)
        FALSE TRUE
  FALSE    15    1
  TRUE      0   14   ### this fit is much better, but costs
          ### more in terms of parameters !

>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>

### Now resume the main thread of the HW Problem:

NOTE: it turns out in further testing with this Prdct3 function that --
as pointed out to me by a former student, Sam Younkin -- there are
several cases of cross-validation training samples in which the
logistic fit is "unstable": these are fits in which the 
(not-quite-converged) models are trying to fit essentially 0-or-1
predictions but where there are still some wrongly-predicted cases!

> Misclass = array(0, dim=c(1000,3), dimnames=list(NULL, 
       c("lm.mis","glm.mis","ker.mis")))  
  for(i in 1:1000) {
     lvout = sample(30,5)
     lvin = setdiff(1:30,lvout)
     Misclass[i,] = unlist(Prdct3(RubbAlt[lvin,],RubbAlt[lvout,]))}
  apply(Misclass,2,sum)/5000
 lm.mis glm.mis ker.mis 
 0.1118  0.1162  0.1272   ### These are the estimated misclassifi-
   ### cation rates, suggesting that "linear" works best for these 
   ### data and that roughly one in 8 points are missclassified. 
   ### (Actually, we would need a considerably larger simulation 
   ### study to confirm this, and that is constrained by the remark 
   ### that {30 choose 5} = 142506 .