Homework Solutions to Problems 3--4. =================================== [NB solutions are in Splus 6.0] (3) (a) > matcols <- function(xvec, outcols) { outmat <- cbind(rep(1,length(xvec)), xvec) if(outcols > 1) for(i in 2:outcols) outmat <- cbind(outmat, xvec^i) outmat } > matcols(1:3,3) xvec [1,] 1 1 1 1 [2,] 1 2 4 8 [3,] 1 3 9 27 > matcols(2:5,1) xvec [1,] 1 2 [2,] 1 3 [3,] 1 4 [4,] 1 5 (b) > convtype <- function(infram, charac=NULL, numer=NULL, bool=NULL){ tmpfram <- infram if(!is.null(charac) && charac == "factor") { charcols <- unlist(lapply(tmpfram, is.character)) if(sum(charcols)) for(i in (1:ncol(tmpfram))[charcols]) tmpfram[,i] <- factor(tmpfram[,i]) } if(!is.null(numer) && numer == "charac") { numcols <- unlist(lapply(tmpfram, is.numeric)) if(sum(numcols)) for(i in (1:ncol(tmpfram))[numcols]) tmpfram[,i] <- as.character(tmpfram[,i]) } if(!is.null(bool) && (bool=="charac" | bool=="numer")) { boolcols <- unlist(lapply(tmpfram, is.logical)) if(bool=="charac") for(i in (1:ncol(tmpfram))[boolcols]) tmpfram[,i] <- as.character(tmpfram[,i]) else for(i in (1:ncol(tmpfram))[boolcols]) tmpfram[,i] <- as.numeric(tmpfram[,i]) } tmpfram } > auxfram <- cbind.data.frame(factor(as.character(1:10)), rep(c(3,5),5), rep(c(3,5),5) < 4, letters[12:21], stringsAsFactors=F) > auxfram X1 X2 X3 X4 1 1 3 T l 2 2 5 F m 3 3 3 T n 4 4 5 F o 5 5 3 T p 6 6 5 F q 7 7 3 T r 8 8 5 F s 9 9 3 T t 10 10 5 F u > unlist(lapply(auxfram,class)) X1 X2 X3 X4 "factor" "integer" "logical" "character" > convtype(auxfram, charac="factor") X1 X2 X3 X4 1 1 3 T l 2 2 5 F m 3 3 3 T n 4 4 5 F o 5 5 3 T p 6 6 5 F q 7 7 3 T r 8 8 5 F s 9 9 3 T t 10 10 5 F u > unlist(lapply(.Last.value,class)) X1 X2 X3 X4 "factor" "integer" "logical" "factor" > convtype(auxfram, numer="charac", bool="numer") X1 X2 X3 X4 1 1 3 1 l 2 2 5 0 m 3 3 3 1 n 4 4 5 0 o 5 5 3 1 p 6 6 5 0 q 7 7 3 1 r 8 8 5 0 s 9 9 3 1 t 10 10 5 0 u > unlist(lapply(.Last.value,class)) X1 X2 X3 X4 "factor" "character" "numeric" "character" (4) (a) > SimLin <- function(errsd, coef, xvec=NULL, simx = NULL) { ### NB: required arguments are: ### errsd = error standard deviation ### coef = vector with 2 components (intercept,slope) ### Either vector xvec or sim=(mean,sd,length) ### parameter vector must be given non-null if(is.null(xvec)) xvec <- rnorm(simx[3],simx[1],simx[2]) list( xvec = xvec, coef = coef, errsd = errsd, simx = simx, yvec =coef[1]+coef[2]*xvec+rnorm(length(xvec))*errsd) } ### eg > SimLin(0.5, c(-1,1), 1:5) -> slst1 > round(slst1$yvec,3) [1] -0.010 0.494 2.458 2.309 3.765 (b) > slst2 <- SimLin(0.5, c(-1,1), xvec=runif(1000)) slst3 <- SimLin(1, c(2,.2), simx= c(0,1,1000)) ## For first case, show the numbers of xvec obs in each interval ## (k/10, (k+1)/10], and theoretical and observed mean, variance, ## covariance among xvec and yvec values > table(floor(slst2$xvec*10)) 0 1 2 3 4 5 6 7 8 9 110 93 104 100 104 99 113 101 105 71 ## Fairly even distribution: BUT probability for a count <= 71 in a ## specific interval is: > pbinom(71,1000,.1) [1] 0.0008603278 ### This is too small !! Bad random numbers! ## The next 10 batches of runif(1000) numbers gave smallest interval ## counts resp. equal to 85, 79, 93, 88, 93, 86, 79, 87, 85, 90 > round(c(mean(slst2$xvec), mean(slst2$yvec), var(cbind(slst2$xvec, slst2$yvec))), 4) [1] 0.4871 -0.5194 0.0796 0.0724 0.0724 0.3273 ### cf. .5, -.5 .0833 .0833 .0833 .3333 Theor. values ### Now exhibit residuals epsilon for slst3 , through fractions ### falling in ranges (-10,-1.5), (-1.5,-.5), (-.5,.5), (.5,1.5), (1.5,10) > eps3 <- slst3$yvec - 2 - .2*slst3$xvec hist(eps3, breaks=c(-10,-1.5,-.5,.5,1.5,10), plot=F) .. $counts: [1] 72 234 374 241 79 ### compare with theoretical expected values > round(diff(pnorm(c(-10,-1.5,-.5,.5,1.5,10)))*1000,2) [1] 66.81 241.73 382.92 241.73 66.81 ### OK !!! ### And again: means and (co)variances followed by theoretical values > round(c(mean(slst3$xvec), mean(slst3$yvec), var(cbind(slst3$xvec, slst3$yvec))), 4) [1] 0.0137 2.0281 1.0110 0.1638 0.1638 1.0816 ### 0 2 1 .2 .2 1.04 Theor. values