R Log for Extended Data Analysis of Southern Oscillation Index (SOI)
and Recruitment (new fish population) Time Series
========================================================== 4/16/17 ff.

### http://www.stat.pitt.edu/stoffer/tsa2/data/soi.dat.txt and
### http://www.stat.pitt.edu/stoffer/tsa2/data/recruit.dat.txt

> SOI = ts(scan("http://www.stat.pitt.edu/stoffer/tsa2/data/soi.dat.txt"),
             start=1950, frequency=12)            ### read 453 items

  Rec = ts(scan("http://www.stat.pitt.edu/stoffer/tsa2/data/recruit.dat.txt"),
             start=1950, frequency=12) 
 
## Example 1.5 plotting
>   par(mfrow = c(2,1), mar=c(3, 4, 3, 2))         # set up the graphics
    plot(SOI, ylab="", xlab="", main="Southern Oscillation Index")
    plot(Rec, ylab="", xlab="", main="Recruitment")

> par(mfrow=c(3,1))
 acf(SOI, 60)
 acf(Rec, 60)
 ccf(SOI, Rec, 60)       ### so far, as in book except took lags out to 5yr

### Very slowly decaying --- SOI has strong yearly cycle !!! This immediately 
###  suggests we should want to plot the periodogram to check this and any other 
###  "hidden perodicities"

### functions to call, "fft" (indirectly) via "spec.pgram" or "spectrum"
### Note that
> tmp = spec.pgram(SOI)
  tmp = spec.pgram(SOI, spans=c(2,2), taper=0)   ## Daniell window/smoother
  tmp = spec.pgram(SOI, spans=c(2,4), taper=0)   ## Daniell window/smoother

## Clear peak at 1,2, etc., perhaps another near 1/4 eg 4+ year cycle
## Note that the scaling of the spec.pgram output is for [0,frequency/2], 
##    where frequency is the 3rd tsp parameter, 12/yr in this case. 
##  (See the "help" documentation for "spectrum" to confirm this.) So the 
##  number 6 is identified with the frequency pi in the interval [-pi,pi].
##  The values near 1/4 say, actually correspond to lambda = (pi/6)*1/4 = 2*pi/48
##  which is roughly a 4-year period. The peaks near 1,2,3,3.8,4.8 correspond to
##    lambda resp. equal to (pi/6)*c(1:3,3.9,5.8) ~ 2*pi*c(1/12,1/6,1/4,1/3,1/2)
 abline(v=1/4, lty="dotted")
 abline(v=1, lty="dotted")
 abline(v=2, lty="dotted")
 abline(v=3, lty="dotted")

> tmp = spec.pgram(Rec)
  tmp = spec.pgram(Rec, spans=c(3,3))   ## Daniell window/smoother
  tmp = spec.pgram(Rec, spans=c(3,5))   ## Daniell window/smoother
## Even STRONGER PEAK NEAR 1/4

## Installed package "astsa" from CRAN, already contained data "rec", "soi"
> attributes(rec)
$tsp
[1] 1950.000 1987.667   12.000

$class
[1] "ts"

### Another preliminary plot, "lag-plot" can be used to show scatterplot behavior
###  of SOI versus lagged versions of it.

> lag.plot(soi, lags=12, layout=c(3,4), diag=FALSE)
### In example 2.6, the book has us do lagged-plots of SOI vs Rec, but we will
###   return to these questions of series dependence later ...

### Now try some models

         [,1]     [,2] [,3]
[1,] 1950.000 1987.667   12
[2,] 1949.917 1987.583   12
[3,] 1950.083 1987.750   12

> SOIlagmat = array(NA, c(453,50))
  for(i in 1:50) SOIlagmat[(i+1):453,i] = SOI[1:(453-i)]

> SOIlagmat[1:6,1:6]
       [,1]  [,2]  [,3]  [,4]  [,5] [,6]
[1,]     NA    NA    NA    NA    NA   NA
[2,]  0.377    NA    NA    NA    NA   NA
[3,]  0.246 0.377    NA    NA    NA   NA
[4,]  0.311 0.246 0.377    NA    NA   NA
[5,]  0.104 0.311 0.246 0.377    NA   NA
[6,] -0.016 0.104 0.311 0.246 0.377   NA

> tmpfit = lm(SOI ~ SOIlagmat)

> round(summary(tmpfit)$coef[,3],3)
(Intercept)  SOIlagmat1  SOIlagmat2  SOIlagmat3  SOIlagmat4  SOIlagmat5  SOIlagmat6 
      0.627       5.556       1.191       3.034       2.329       0.966       0.211 
 SOIlagmat7  SOIlagmat8  SOIlagmat9 SOIlagmat10 SOIlagmat11 SOIlagmat12 SOIlagmat13 
     -0.511      -1.202      -0.342       0.710       0.912       1.025      -0.004 
SOIlagmat14 SOIlagmat15 SOIlagmat16 SOIlagmat17 SOIlagmat18 SOIlagmat19 SOIlagmat20 
     -2.165      -1.491       0.020       0.539      -0.534       0.044       0.187 
SOIlagmat21 SOIlagmat22 SOIlagmat23 SOIlagmat24 SOIlagmat25 SOIlagmat26 SOIlagmat27 
     -0.013       0.671       1.656       0.512      -0.628      -1.433       0.221 
SOIlagmat28 SOIlagmat29 SOIlagmat30 SOIlagmat31 SOIlagmat32 SOIlagmat33 SOIlagmat34 
      0.279      -0.056      -2.114      -0.923       0.446      -0.637       0.752 
SOIlagmat35 SOIlagmat36 SOIlagmat37 SOIlagmat38 SOIlagmat39 SOIlagmat40 SOIlagmat41 
      1.969       2.407       0.744      -0.214      -1.977      -1.035      -0.763 
SOIlagmat42 SOIlagmat43 SOIlagmat44 SOIlagmat45 SOIlagmat46 SOIlagmat47 SOIlagmat48 
      1.134      -0.400      -0.602       0.539      -0.627       1.992       0.491 
SOIlagmat49 SOIlagmat50 
      0.481       0.369 

### Suggests that lags 1:4,14:15,35:36,47 might have significant coefs

> par(mfrow=c(1,1))
  pacf(SOI,60)

### This does suggest lag effects dying off after a couple of years, with 
###   some real peaks near 1/4 to 1/3, 1, 2, 3, 4

> SOIfr = data.frame(cbind(SOI,SOIlagmat)[51:453,])
  names(SOIfr)[2:51] = paste("SOI",1:50,sep="")
  tmpfit0 = lm(SOI~SOI1, data=SOIfr)
  tmpfitA = lm(SOI~., data=SOIfr)
  tmpfit2 = step(tmpfit0, formula(tmpfitA), direction="forward", data=SOIfr)
### series of "steps" using default AIC k=2 indicate 
###   initial AIC = -948.72 with single variable SOI1
###   successive steps add:  SOI47, SOI30, SOI35, SOI3, SOI23, SOI14, 
###                  SOI39, DSOI36, SOI4, AOI11, SOI31, SOI15, SOI12
> anova(tmpfit2)

Analysis of Variance Table

Response: SOI
           Df  Sum Sq Mean Sq  F value    Pr(>F)    
SOI1        1 20.9481 20.9481 312.9506 < 2.2e-16 ***
SOI47       1  5.0217  5.0217  75.0203 < 2.2e-16 ***
SOI30       1  1.4705  1.4705  21.9680 3.844e-06 ***
SOI35       1  1.5435  1.5435  23.0596 2.247e-06 ***
SOI3        1  0.5795  0.5795   8.6578 0.0034522 ** 
SOI23       1  0.7490  0.7490  11.1901 0.0009025 ***
SOI14       1  0.6301  0.6301   9.4132 0.0023055 ** 
SOI39       1  0.3609  0.3609   5.3921 0.0207446 *  
SOI36       1  0.5061  0.5061   7.5613 0.0062424 ** 
SOI4        1  0.3755  0.3755   5.6104 0.0183430 *  
SOI11       1  0.2244  0.2244   3.3519 0.0678939 .  
SOI31       1  0.1750  0.1750   2.6141 0.1067318    
SOI15       1  0.1541  0.1541   2.3028 0.1299540    
SOI2        1  0.1344  0.1344   2.0079 0.1572824    
Residuals 388 25.9717  0.0669                       
---
Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1

> c(extractAIC(tmpfit0), extractAIC(tmpfitA), extractAIC(tmpfit2))
[1]     2.0000  -948.7225    51.0000 -1019.6893    15.0000 -1074.9972

> round(t(summary(tmpfit2)$coef[,c(1,3)]),4)
         (Intercept)   SOI1  SOI47   SOI30  SOI35   SOI3  SOI23   SOI14   SOI39
Estimate      0.0125 0.3232 0.1272 -0.1399 0.1272 0.1590 0.1121 -0.1149 -0.1157
t value       0.8254 6.6742 2.9804 -3.1611 2.6326 3.1091 2.7889 -2.4565 -2.7247
          SOI36   SOI4  SOI11   SOI31   SOI15   SOI2
Estimate 0.1350 0.1161 0.0856 -0.0731 -0.0730 0.0714
t value  2.9238 2.4631 2.0000 -1.6073 -1.5706 1.4170

###  t-distribution p-values correspond to residual.df = 453-50-15        

> c(2*(1-pt(abs(summary(tmpfit2)$coef[15,3]),388)), summary(tmpfit2)$coef[15,4])
[1] 0.1572824 0.1572824
> c(2*(1-pt(abs(summary(tmpfit2)$coef[14,3]),388)), summary(tmpfit2)$coef[14,4])
[1] 0.1170939 0.1170939

### Now let's try it using and ARMA model fitting technique
### later we can compare coefficients

> tmpfitT0 = arima0(SOI,order=c(50,0,0), method="CSS")
> round(tmpfitT0$coef/sqrt(diag(tmpfitT0$var.coef)),3)
      ar1       ar2       ar3       ar4       ar5       ar6       ar7       ar8 
    7.015     1.584     3.525     2.329     0.400    -0.645    -0.939    -1.178 
      ar9      ar10      ar11      ar12      ar13      ar14      ar15      ar16 
   -0.295     1.615     1.423     1.596     0.104    -2.742    -1.512    -0.305 
     ar17      ar18      ar19      ar20      ar21      ar22      ar23      ar24 
    0.402    -0.413    -0.388     0.121    -0.214     1.080     1.805     0.443 
     ar25      ar26      ar27      ar28      ar29      ar30      ar31      ar32 
   -0.936    -1.146     0.283     0.495     0.472    -2.341    -0.893     0.404 
     ar33      ar34      ar35      ar36      ar37      ar38      ar39      ar40 
   -0.678     0.726     1.389     2.610     0.597    -0.208    -2.393    -1.277 
     ar41      ar42      ar43      ar44      ar45      ar46      ar47      ar48 
   -0.137     1.906    -0.057    -0.705     0.395    -1.107     2.162     0.346 
     ar49      ar50 intercept 
    0.166     0.384     1.613 

> round(rbind(tmpfitT0$coef, tmpfit$coef[c(2:51,1)]),3)
       ar1   ar2   ar3   ar4   ar5    ar6    ar7    ar8    ar9  ar10  ar11  ar12
[1,] 0.330 0.079 0.176 0.118 0.020 -0.033 -0.048 -0.060 -0.015 0.082 0.073 0.081
[2,] 0.296 0.066 0.170 0.132 0.055  0.012 -0.029 -0.068 -0.019 0.040 0.052 0.058
      ar13   ar14   ar15   ar16  ar17   ar18   ar19  ar20   ar21  ar22  ar23  ar24
[1,] 0.005 -0.140 -0.077 -0.016 0.021 -0.021 -0.020 0.006 -0.011 0.055 0.093 0.023
[2,] 0.000 -0.123 -0.084  0.001 0.030 -0.030  0.002 0.011 -0.001 0.038 0.092 0.029
       ar25   ar26  ar27  ar28   ar29   ar30   ar31  ar32   ar33  ar34  ar35  ar36
[1,] -0.048 -0.059 0.015 0.026  0.024 -0.121 -0.046 0.021 -0.036 0.038 0.073 0.137
[2,] -0.035 -0.080 0.012 0.016 -0.003 -0.119 -0.052 0.025 -0.036 0.043 0.112 0.136
      ar37   ar38   ar39   ar40   ar41  ar42   ar43   ar44  ar45   ar46  ar47  ar48
[1,] 0.031 -0.011 -0.125 -0.067 -0.007 0.100 -0.003 -0.037 0.021 -0.059 0.115 0.018
[2,] 0.042 -0.012 -0.111 -0.058 -0.043 0.064 -0.023 -0.034 0.030 -0.035 0.112 0.027
      ar49  ar50 intercept
[1,] 0.009 0.019     0.072
[2,] 0.027 0.020     0.011

> .Last.value[,which(abs(tmpfitT0$coef/sqrt(diag(tmpfitT0$var.coef)))>1.645)]
       ar1   ar3   ar4   ar14  ar23   ar30  ar36   ar39  ar42  ar47
[1,] 0.330 0.176 0.118 -0.140 0.093 -0.121 0.137 -0.125 0.100 0.115
[2,] 0.296 0.170 0.132 -0.123 0.092 -0.119 0.136 -0.111 0.064 0.112

## Compare these with those identified in stepwise regression:
##   1, 3, 4, 11, 12, 14, 15, 23, 30, 31, 35, 36, 39, 47

## The set found in stepwise regression with k=5:

> tmpfit2B = step(tmpfit0, formula(tmpfitA), direction="forward", 
        k=5, data=SOIfr)
> names(tmpfit2B$coef)
 [1] "(Intercept)" "SOI1"        "SOI47"       "SOI30"       "SOI35"      
 [6] "SOI3"        "SOI23"       "SOI14"       "SOI39"       "SOI36"      
[11] "SOI4" 

### Next consider residuals in these stepwise regression fits (based on 51:453)

> hist(tmpfit2B$residuals, prob=T, nclass=32)
  xres = sort(tmpfit2B$residuals)
> c(mean(xres), sum(xres^2)/tmpfit2B$df.residual)
[1] 5.174039e-18 6.800921e-02

> lines(xres, dnorm(xres,0,sqrt(.Last.value[2])), col="red")
### So normality not too bad with this fitted AR(47) model !

> tmpfitT1 = arima0(SOI,order=c(47,0,2), method="ML")
               #### VERY slow to converge !!?


> rbind(ar50=extractAIC(tmpfit),
        step2=extractAIC(tmpfit2),
        step2B=extractAIC(tmpfit2B))
       [,1]      [,2]
ar50     51 -1019.689
step2    15 -1074.997
step2B   11 -1072.462

> logLik(tmpfit)
'log Lik.' -10.98759 (df=52)
> logLik(tmpfit2)
'log Lik.' -19.33364 (df=16)
> logLik(tmpfit2B)
'log Lik.' -24.60119 (df=12)
> tmpfitT0$loglik
[1] -20.53249
> tmpfitT1$loglik
[1] -17.54536

> rbind(coef=tmpfitT1$coef[48:49], tscor=tmpfitT1$coef[48:49]/
            sqrt(tmpfitT1$var.coef[cbind(48:49,48:49)]))
             ma1         ma2
coef  -0.3185705 -0.03261856
tscor -0.4840470 -0.08436763

### So let's proceed with an AR(47) model
### NOTE: the model could be refitted with the ar R-function

> tmp = spec.pgram(SOI, spans=c(3,5))  
   ### this periodogram was a bit smoothed, plotted before
  ### now overlay with AR-model-based periodogram
  spec.ar(SOI, 200, order = 47, add=T, col="blue", 
        na.action = na.fail, method = "yule-walker")
### VERY NICE OVERLAY, ALTHOUGH SOME OF THE DIPS ARE NOT
###   QUITE AS DEEP AS WITH THE SMOOTHED PERIODOGRAM ...

>  k = kernel("daniell",4)
> names(k)
[1] "coef" "m"   
> k
Daniell(4) 
coef[-4] = 0.1111
coef[-3] = 0.1111
coef[-2] = 0.1111
coef[-1] = 0.1111
coef[ 0] = 0.1111
coef[ 1] = 0.1111
coef[ 2] = 0.1111
coef[ 3] = 0.1111
coef[ 4] = 0.1111

> soi.ave = spec.pgram(SOI, k, taper=0)
 abline(v=1, lty="dotted")
 abline(v=2, lty="dotted")
 abline(v=3, lty="dotted")
 abline(v=1/4, lty="dotted")

### Other modifications in examples 4.11, 4.12 ...

#----------------------------------------------------
# Try forecasting: from "predict" with an arima fit

> aux = predict(tmpfitT0, n.ahead=5)
       ### can also use predict.ar for model fitted with "ar"

> tmp400 = ar.yw(SOI[1:400], AIC=F, order=47)
  tims = time(SOI)
  plot(tims,SOI, type="l")
> prd400 = predict(tmp400, n.ahead=53)

> plot(tims[401:453], SOI[401:453], xlab="",ylab="SOI",main=
     "Multistep Model-Based Prediction of SOI")
  lines(tims[401:453], prd400$pred, col="red")

## We might do better with a more parsimonious model, e.g. the AR 
##   model coming from the stochastic regression tmpfit2B

> formula(tmpfit2B)
SOI ~ SOI1 + SOI47 + SOI30 + SOI35 + SOI3 + SOI23 + SOI14 + SOI39 + 
    SOI36 + SOI4
> coef(tmpfit2B)
(Intercept)        SOI1       SOI47       SOI30       SOI35        SOI3 
 0.00836317  0.38043494  0.15365533 -0.18981562  0.13741423  0.16962452 
      SOI23       SOI14       SOI39       SOI36        SOI4 
 0.12454273 -0.11415515 -0.13083063  0.13577173  0.11034122 

> aux2B = update(tmpfit2B, data=SOIfr[1:400,])
> aux2B$coef
 (Intercept)         SOI1        SOI47        SOI30        SOI35         SOI3 
 0.006397479  0.379166219  0.157492612 -0.184067534  0.136278366  0.176400176 
       SOI23        SOI14        SOI39        SOI36         SOI4 
 0.120575252 -0.112390142 -0.133060104  0.143285172  0.106106028

### These will serve as AR coefficients, the others 0; to find the mean:
> mean2B = aux2B$coef[1]/(1-sum(aux2B$coef[-1]))
  mean2B
(Intercept) 
  0.03043318  
> phi2B = numeric(47)
  phi2B[c(1,47,30,35,3,23,14,39,36,4)] = aux2B$coef[-1]

## Now do the forecasting another way:

> prd2B =  numeric(53)
  xvec = SOI[(400-46):400]
  for(i in 1:53) {
      prd2B[i] = mean2B + sum(xvec[1:47]*phi2B)
      xvec = c(xvec[2:47],prd2B) }

> lines(tims[401:453], prd2B, col="blue")
  legend(locator(), legend=c("AR(47) full model","tmpfit2B"),
         col=c("red","blue"), lty=1)
  ## saved as SOIforecast.pdf                          WE STOPPED HERE IN CLASS 4/19/17
                                                       RESUMED   4/26/2017
###---------------------------------------------------------------------
### Still to do with this data application: Q goodness of fit statistic,
###   and some indication of coherency and dependence of Rec on SOI.

> Box.test(tmpfitA$resid, fitdf=51, lag=75)

        Box-Pierce test

data:  tmpfitA$resid
X-squared = 23.379, df = 24, p-value = 0.4975

> Box.test(tmpfit2$resid, fitdf=15, lag=50)

        Box-Pierce test

data:  tmpfit2$resid
X-squared = 23.09, df = 35, p-value = 0.9388

> Box.test(tmpfit2B$resid, fitdf=11, lag=50)

        Box-Pierce test

data:  tmpfit2B$resid
X-squared = 30.365, df = 39, p-value = 0.8374

### So the fit of any of these models is `adequate' by the Q test
###---------------------------------------------------------------

> par(mfrow=c(1,1))
  ccf(SOI, Rec, 60)   
  
> SOI.Rec = ts(cbind(SOI,Rec), freq=12)  
>    class(SOI.Rec)
[1] "mts"    "ts"     "matrix"

> tmp = spec.pgram(SOI.Rec, k, taper=0)
 abline(v=1, lty="dotted") 
 abline(v=2, lty="dotted")
 abline(v=3, lty="dotted")
 abline(v=1/4, lty="dotted")
 
> plot(tmp$freq,tmp$coh,type="l", xlab="frequency", 
     ylab="Sq.Coherence", main=
	 paste("Smoothed-Periodogram Coherence Plot",
	 "\n for SOI and Recruitment Series",sep="")) 
 abline(v=1, lty="dotted") 
 abline(v=2, lty="dotted")
 abline(v=3, lty="dotted")
 abline(v=1/4, lty="dotted")

### Return to lag-plots and other exhibits for these datasets at 
###   http://www.stat.pitt.edu/stoffer/tsa2/textRcode.htm

### Remaining: FURTHER INTERPRETATION OF COHERENCY AND LAGGED DEPENDENT 
###  OF RECRUITMENT ON  SOI , quantified by STOCHASTIC REGRESSION

par(mfrow=c(2,1))
plot(SOI.Rec)

par(mfrow=c(1,1))
plot(tmp)
 plot(tmp$spec[,1], type="l")
 abline(v=(1/6)*240, lty="dotted") 
 abline(v=(2/6)*240, lty="dotted")
 abline(v=(3/6)*240, lty="dotted")
 abline(v=(1/24)*240, lty="dotted")
 
 plot(tmp$freq, tmp$phase, xlab="frequency", ylab="Phase of Coherence", 
     main=  paste("Plot of Phase of f_{XY}(lambda) \n",
	   "for SOI and Recruitment Series",sep=""))
 
### This plot shows that for lambda in (-pi,pi] the phase is something like 
###   arg( f_{SOI,Rec}(lambda) = exp(2i*pi*(lambda*6/(1.1*pi) - 1))
###   which is roughly associated with Y_t being best approximated by c*X_{t-1}
###   (among time-invariant linear filters of X)

## We can see from the previous ccf plot
>  ccf(SOI, Rec, 60)

### that the correlation is negative, so the SOI depresses Recruitment
### with lag 0.5 yr to 1.5 yr
### Let's confirm this with a stochastic-regession approach:

> names(SOIfr)[53:70] = paste("Rec",1:18,sep="")
> names(SOIfr)
 [1] "SOI"   "SOI1"  "SOI2"  "SOI3"  "SOI4"  "SOI5"  "SOI6"  "SOI7"  "SOI8" 
[10] "SOI9"  "SOI10" "SOI11" "SOI12" "SOI13" "SOI14" "SOI15" "SOI16" "SOI17"
[19] "SOI18" "SOI19" "SOI20" "SOI21" "SOI22" "SOI23" "SOI24" "SOI25" "SOI26"
[28] "SOI27" "SOI28" "SOI29" "SOI30" "SOI31" "SOI32" "SOI33" "SOI34" "SOI35"
[37] "SOI36" "SOI37" "SOI38" "SOI39" "SOI40" "SOI41" "SOI42" "SOI43" "SOI44"
[46] "SOI45" "SOI46" "SOI47" "SOI48" "SOI49" "SOI50" "Rec"   "Rec1"  "Rec2" 
[55] "Rec3"  "Rec4"  "Rec5"  "Rec6"  "Rec7"  "Rec8"  "Rec9"  "Rec10" "Rec11"
[64] "Rec12" "Rec13" "Rec14" "Rec15" "Rec16" "Rec17" "Rec18"

> Recfit0 = lm(Rec ~ ., data=SOIfr)
> Recfit0$coef[abs(summary(Recfit0)$coef[,3]) > 1.96]
(Intercept)        SOI5        SOI6       SOI13       SOI22       SOI29 
 23.3790633 -19.5901418   9.9313342   4.3658260   4.2519692  -4.7800181 
      SOI31        Rec1        Rec2        Rec9 
  3.8325738   1.1833913  -0.3124311  -0.1856093 
               #### these are the significant coefficients
> Recfit1 = step(Recfit0, k=5, direction="backward")			   
> anova(Recfit1)
Analysis of Variance Table

Response: Rec
           Df Sum Sq Mean Sq   F value    Pr(>F)    
SOI5        1  79761   79761 1717.5733 < 2.2e-16 ***
SOI6        1  37619   37619  810.0900 < 2.2e-16 ***
SOI9        1  44787   44787  964.4391 < 2.2e-16 ***
SOI29       1   5533    5533  119.1466 < 2.2e-16 ***
SOI31       1     30      30    0.6546  0.418970    
Rec1        1 118772  118772 2557.6353 < 2.2e-16 ***
Rec2        1   4353    4353   93.7386 < 2.2e-16 ***
Rec4        1    149     149    3.2092  0.073997 .  
Rec5        1    255     255    5.4835  0.019698 *  
Rec7        1   1611    1611   34.6872 8.351e-09 ***
Rec16       1    340     340    7.3112  0.007152 ** 
Residuals 391  18157      46                        
---
Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1

>  extractAIC(Recfit1, k=5)
[1]   12.000 1594.582
>  extractAIC(Recfit0, k=5)
[1]   70.000 1820.338

> Recfit2 = update(Recfit1, formula = .~. - SOI31-Rec4)
>  extractAIC(Recfit2, k=5)
[1]   10.000 1610.984
>  c(logLik(Recfit0), logLik(Recfit1), logLik(Recfit2))
[1] -1307.001 -1339.123 -1352.324

> round(t(summary(Recfit1)$coef[,1:2]),4)
           (Intercept)     SOI5   SOI6    SOI9   SOI29  SOI31   Rec1    Rec2
Estimate       15.0180 -20.0130 9.1309 -5.2130 -4.7081 3.6628 1.2109 -0.3087
Std. Error      1.7379   1.1811 1.4742  1.4045  1.1349 1.0885 0.0451  0.0489
              Rec4   Rec5    Rec7   Rec16
Estimate   -0.2341 0.2726 -0.1218 -0.0370
Std. Error  0.0536 0.0492  0.0220  0.0137

### Let's try a 5-step ahead model:

> Recfit3 = step(lm(Rec ~ ., data=SOIfr[,c(6:52,57:70)]), 
         k=5, direction="backward")   ### left out SOI and all lags < 5
> extractAIC(Recfit3, k=5) 
[1]   15.000 2208.644