NOTES and R Log for Lecture 5/3/2017                    STAT 730  

>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>

Additional notes related to ARIMA Model-Building and Diagnostics

SEC 3.8.

(1) Introduce   idea  X_t = m_t + S_t + Z_t  with  

      m_t the (long-term) "signal" (essentially, provided by low-pass filter

      S_t the "seasonal" part with known structural period d (usually 

          corresp. to 1 yr, so 4 for quarterly series, 12 for monthly, etc.)

      Z_t stationary residuals, modeled as ARMA(p,q)


(2) Example as in book with GNP data:

> GNP = ts(read.table("http://www.stat.pitt.edu/stoffer/tsa2/data/gnp96.dat.txt")[,2],
             start=1947.25, frequency=4)        

> tsp(GNP)
[1] 1947.25 2002.75    4.00      ### 223 quarterly observations (in $10^6)

### Consider series of year-average observations through end of 2001


> YrGnp = ts(apply(matrix(GNP[1:220], c(55,4), byrow=T),1,mean), 
             start=1947, freq=1)

> tsp(diff(YrGnp))
[1] 1948 2001    1     ### differencing yields a "time series" result

plot(YrGnp, main="GNP series, Yr avgs, 1947-2001")

abline(lm(YrGnp ~ time(YrGnp))$coef, col="red")  

tmp0 = diff(YrGnp)

plot(tmp0, main="1st Diff of GNP series Yr avgs")

abline(lm(tmp0 ~ time(tmp0))$coef, col="red")  


timY = c(time(YrGnp))

plot(YrGnp, main="GNP series, Yr avgs, 1947-2001")

lines(c(time(YrGnp)), lm(YrGnp ~ timY+ I(timY^2) )$fit, col="blue")

     ### suggests that 2nd differencing on the original scale removes trend
    ### but that is not necessarily the best way to get to stationarity


par(mfrow=c(2,1))

plot(c(time(YrGnp)), lm(YrGnp ~ timY+ I(timY^2) )$resid, xlab="Time",
       ylab="Residuals", main="Residuals from a Quadratic Fit to YrAvg GNP")

plot(diff(tmp0), xlab="Time",
       ylab="2nd diff of GNP", main="Twiced-differenced GNP")


timG = c(time(GNP))

par(mfrow=c(1,1))

plot(timG, lm(GNP ~ timG + I(timG^2))$resid,  xlab="Time",
       ylab="Residuals", main="Residuals from a Quadratic Fit to GNP")


## slight  modification: residuals from YrAvg fit indexed to mid-year

cf = lm(YrGnp ~ I(timG+.5) + I((timG+.5)^2))$coef

plot(timG, GNP-cf[1]-cf[2]*(timG+.5)-cf[3]*(timG+.5)^2,  xlab="Time",
       ylab="Residuals", main="Residuals from a Quadratic Fit to GNP")

    ### Not so successful


GnpL = log(GNP)

plot(GnpL)

plot(diff(GnpL))


DGnpL = diff(log(GNP))

 par(mfrow=c(2,1))

 acf(DGnpL, 24)

 pacf(DGnpL, 24)


  DGnpL.arma = arima(DGnpL, order = c(2, 0, 2)))

> DGnpL.arma$coef/sqrt(diag(DGnpL.arma$var.coef))

      ar1       ar2       ma1       ma2 intercept 
 9.798346 -4.792280 -5.678659  2.851789 10.412051 

> tmpGnp = tsdiag(DGnpL.arma, gof.lag=20)

  DGnpL.ma = arima(DGnpL, order = c(0, 0, 2))

> DGnpL.ma$coef

       ma1        ma2  intercept 
0.30280854 0.20351890 0.00832671 

> DGnpL.ma$coef/sqrt(diag(DGnpL.ma$var))

      ma1       ma2 intercept 
 4.627238  3.159361  8.717756 


 tsdiag(DGnpL.ma, gof.lag=20)  

     ### interesting plots including standardized residuals

> tmpG = residuals(DGnpL.arma)   ### length 222

  sum(abs(tmpG - DGnpL.arma$resid))
[1] 0

> prdG = predict(DGnpL.arma,1, se.fit=T)

> hist(tmpG, nclass=24, prob=T)
  curve(dnorm(x,mean(tmpG),sd(tmpG)), add=T, col="green")

        ### interestingly non-normal