HW18 solution steps =================== 12/12/2015 library(MASS) Traff = Traffic table(Yr1961=Traffic$limit[1:92], Yr1962=Traffic$limit[93:184]) Yr1962 Yr1961 no yes no 28 43 yes 16 5 ### So we view the days in 1961 and 1962 with same day-index as ## "paired" ie comparable except possibly for whether there was ## speed-limit and enforcement on the given day. Traff2 = cbind.data.frame(Traff[1:92,3:4], Traff[93:184,3:4]) names(Traff2) = c("1961-lim","y1","1962-lim","y2") Traff2[,1] = ifelse(Traff2[,1]=="yes",1,-1) Traff2[,3] = ifelse(Traff2[,3]=="yes",1,-1) (a) ## If we really think that different (non-paired) days may be ## quite different, then all we can use in estimating average ## differences E(y|no-limit) - E(y|limit) are the discordant days ## with limit in one year but not the other. Then the estimate ## would be: > mean((Traff2$y1*Traff2[,1]+Traff2$y2*Traff2[,3])[Traff2[,1]!=Traff2[,3]]) [1] -6.440678 ## That is the point estimate: if we want 90% CI, then we want to allow ## all permutations of signs between the 1,-1 pairs in rows among ## the 59 discordant-pair observations. But we also want the null ## distribution to reflect the residuals from lim and no-lim group means tmpfr = Traff2[Traff2[,1]!=Traff2[,3],] avy.lim = (sum(tmpfr$y1[tmpfr[,1]==1])+sum(tmpfr$y2[tmpfr[,3]==1]))/59 avy.nolim = (sum(tmpfr$y1[tmpfr[,1]==-1])+sum(tmpfr$y2[tmpfr[,3]==-1]))/59 > c(with.lim=avy.lim, without.lim=avy.nolim) with.lim without.lim 18.67797 25.11864 ### difference is the -6.44 found above tmpfr$y1[tmpfr[,1]==1] = tmpfr$y1[tmpfr[,1]==1]-avy.lim tmpfr$y1[tmpfr[,1]==-1] = tmpfr$y1[tmpfr[,1]==-1]-avy.nolim tmpfr$y2[tmpfr[,3]==1] = tmpfr$y2[tmpfr[,3]==1]-avy.lim tmpfr$y2[tmpfr[,3]==-1] = tmpfr$y2[tmpfr[,3]==-1]-avy.nolim > mean(tmpfr$y1*tmpfr[,1]+tmpfr$y2*tmpfr[,3]) [1] -2.950324e-15 ### OK, =0 ## the "permutations" are the selections of signs +/- > indarr = array(2*rbinom(59*1e4,1,0.5)-1, c(1e4,59)) ### these will be the +/- factors to apply to the single-index ### contributions to the statistic aux = tmpfr$y1*tmpfr[,1]+tmpfr$y2*tmpfr[,3] permstat = c(indarr %*% aux)/59 > hist(permstat, nclass=50, prob=T) curve(dnorm(x,mean(permstat),sd(permstat)), c(-3.2,3.2), add=T, col="blue") ### visually perfect agreement c(mean(permstat)-qnorm(0.95)*sd(permstat), quantile(permstat, prob=0.05)) 5% -1.608765 -1.605947 c(mean(permstat)+qnorm(0.95)*sd(permstat), quantile(permstat, prob=0.95)) 95% 1.608247 1.611663 The CI is then: > mean((Traff2$y1*Traff2[,1]+Traff2$y2*Traff2[,3])[ Traff2[,1]!=Traff2[,3]]) - c(1.611663, -1.605947) [1] -8.052341 -4.834731 OTHER COHERENTLY ARGUED SYSTEMS OF PERMUTATIONS MAY ALSO GIVE ACCEPTABLE HW SOLUTIONS (b) ## Now try statistic based on mean y over all cases with limit minus ## mean of y over all cases without limit.