HW18 solution steps
===================                      12/12/2015

library(MASS)
Traff = Traffic

table(Yr1961=Traffic$limit[1:92], Yr1962=Traffic$limit[93:184])
      Yr1962
Yr1961 no yes
   no  28  43
   yes 16   5

### So we view the days in 1961 and 1962 with same day-index as
## "paired" ie comparable except possibly for whether there was
## speed-limit and enforcement on the given day.

Traff2 = cbind.data.frame(Traff[1:92,3:4], Traff[93:184,3:4])
names(Traff2) = c("1961-lim","y1","1962-lim","y2")
Traff2[,1] = ifelse(Traff2[,1]=="yes",1,-1)
Traff2[,3] = ifelse(Traff2[,3]=="yes",1,-1)

(a) 
## If we really think that different (non-paired) days may be 
##  quite different, then all we can use in estimating average 
##  differences E(y|no-limit) - E(y|limit) are the discordant days
##  with limit in one year but not the other. Then the estimate 
##  would be:  
> mean((Traff2$y1*Traff2[,1]+Traff2$y2*Traff2[,3])[Traff2[,1]!=Traff2[,3]])
[1] -6.440678

## That is the point estimate: if we want 90% CI, then we want to allow
## all permutations of signs between the 1,-1 pairs in rows among
## the 59 discordant-pair observations. But we also want the null 
## distribution to reflect the residuals from lim and no-lim group means

tmpfr = Traff2[Traff2[,1]!=Traff2[,3],]
avy.lim = (sum(tmpfr$y1[tmpfr[,1]==1])+sum(tmpfr$y2[tmpfr[,3]==1]))/59
avy.nolim = (sum(tmpfr$y1[tmpfr[,1]==-1])+sum(tmpfr$y2[tmpfr[,3]==-1]))/59
> c(with.lim=avy.lim, without.lim=avy.nolim)
   with.lim without.lim 
   18.67797    25.11864     ### difference is the -6.44 found above
tmpfr$y1[tmpfr[,1]==1] = tmpfr$y1[tmpfr[,1]==1]-avy.lim
tmpfr$y1[tmpfr[,1]==-1] = tmpfr$y1[tmpfr[,1]==-1]-avy.nolim
tmpfr$y2[tmpfr[,3]==1] = tmpfr$y2[tmpfr[,3]==1]-avy.lim
tmpfr$y2[tmpfr[,3]==-1] = tmpfr$y2[tmpfr[,3]==-1]-avy.nolim

> mean(tmpfr$y1*tmpfr[,1]+tmpfr$y2*tmpfr[,3])
[1] -2.950324e-15   ### OK, =0

## the "permutations" are the selections of signs +/-

> indarr = array(2*rbinom(59*1e4,1,0.5)-1, c(1e4,59))
  ### these will be the +/- factors to apply to the single-index
  ### contributions to the statistic
  aux =  tmpfr$y1*tmpfr[,1]+tmpfr$y2*tmpfr[,3]
  
  permstat = c(indarr %*% aux)/59
> hist(permstat, nclass=50, prob=T)
  curve(dnorm(x,mean(permstat),sd(permstat)), c(-3.2,3.2), add=T, col="blue")
     ### visually perfect agreement
  c(mean(permstat)-qnorm(0.95)*sd(permstat), quantile(permstat, prob=0.05))
                 5% 
-1.608765 -1.605947 

  c(mean(permstat)+qnorm(0.95)*sd(permstat), quantile(permstat, prob=0.95))  
              95% 
1.608247 1.611663 


The CI is then:

> mean((Traff2$y1*Traff2[,1]+Traff2$y2*Traff2[,3])[
       Traff2[,1]!=Traff2[,3]]) - c(1.611663, -1.605947)
[1] -8.052341 -4.834731

OTHER COHERENTLY ARGUED SYSTEMS OF PERMUTATIONS MAY ALSO GIVE
ACCEPTABLE HW SOLUTIONS

(b) ## Now try statistic based on mean y over all cases with limit minus
## mean of y over all cases without limit.