Ohio Rural Two Lane – Daily Speed (ARIMA)

Subasish Das and Choalun Ma

2018-11-12

# Step 1: Load R Packages 
### options(repos='http://cran.rstudio.com/')
#install.packages("astsa")
#install.packages('ggplot2')
#install.packages('forecast')
#install.packages('tseries')
#install.packages("data.table")

library(astsa)
library(forecast)
library(tseries)
library(zoo)
library(tseries)

library(data.table)
library(dplyr)
library(tidyr)
library(naniar)
library(stringr)
library(ggplot2)
library(DT)
library(lubridate)
library(ggpubr)


mytype = 'R2'
setwd("/scratch/user/cma16/Task4_Deliverable2/OHprocess4/AllCrash/FacilityBased/")
load("./two-lane_undivided_OH_reduce_withCrash.rData")

setwd(paste0("/scratch/user/cma16/Task4_Deliverable2/OHprocess4/AllCrash/FacilityBased/",mytype))
df_R2 <- OH_2un_nomed
df_R2$spd_av = 3600*df_R2$DISTANCE/df_R2$Travel_TIME_ALL_VEHICLES
df_R2$spd_pv = 3600*df_R2$DISTANCE/df_R2$Travel_TIME_PASSENGER_VEHICLES
df_R2$spd_ft = 3600*df_R2$DISTANCE/df_R2$Travel_TIME_FREIGHT_TRUCKS

### Month, Day
df_R2$date <- as.character(df_R2$DATE)
df_R2$date <- str_pad(df_R2$DATE, 8, pad = "0")
df_R2$Month <- substr(df_R2$date, start = 1, stop = 2)
df_R2$Day   <- substr(df_R2$date, start = 3, stop = 4)
df_R2$Year  <- substr(df_R2$date, start = 5, stop = 8)
df_R2$MonthDay <- paste0(df_R2$Month,"_", df_R2$Day)
head(df_R2)
##            TimeStamp       TMC    DATE EPOCH1h Travel_TIME_ALL_VEHICLES
## 1:  108N05389_0101_0 108N05389 1012015       0                       NA
## 2:  108N05389_0101_1 108N05389 1012015       1                       NA
## 3: 108N05389_0101_10 108N05389 1012015      10                    201.5
## 4: 108N05389_0101_11 108N05389 1012015      11                       NA
## 5: 108N05389_0101_12 108N05389 1012015      12                    186.0
## 6: 108N05389_0101_13 108N05389 1012015      13                    181.0
##    Travel_TIME_PASSENGER_VEHICLES Travel_TIME_FREIGHT_TRUCKS ADMIN_LEVE
## 1:                             NA                         NA        USA
## 2:                             NA                         NA        USA
## 3:                             NA                      201.5        USA
## 4:                             NA                         NA        USA
## 5:                             NA                      186.0        USA
## 6:                             NA                      181.0        USA
##    ADMIN_LE_1 ADMIN_LE_2 DISTANCE ROAD_NUMBE ROAD_NAME LATITUDE LONGITUDE
## 1:       Ohio     Fulton  2.78045      US-20            41.6733 -84.35867
## 2:       Ohio     Fulton  2.78045      US-20            41.6733 -84.35867
## 3:       Ohio     Fulton  2.78045      US-20            41.6733 -84.35867
## 4:       Ohio     Fulton  2.78045      US-20            41.6733 -84.35867
## 5:       Ohio     Fulton  2.78045      US-20            41.6733 -84.35867
## 6:       Ohio     Fulton  2.78045      US-20            41.6733 -84.35867
##    ROAD_DIREC  ORN_FID COUNTY divided SURF_TYP NHS_CDE HPMS ACCESS AADT_YR
## 1:  Eastbound 21204.78    FUL       U        G       N           N      14
## 2:  Eastbound 21204.78    FUL       U        G       N           N      14
## 3:  Eastbound 21204.78    FUL       U        G       N           N      14
## 4:  Eastbound 21204.78    FUL       U        G       N           N      14
## 5:  Eastbound 21204.78    FUL       U        G       N           N      14
## 6:  Eastbound 21204.78    FUL       U        G       N           N      14
##    FED_FACI PK_LANES MED_TYPE FED_MEDW BEGMP ENDMP   SEG_LNG cnty_rte
## 1:        2       NA        1       NA     0 22.32 0.4074587 FUL0020R
## 2:        2       NA        1       NA     0 22.32 0.4074587 FUL0020R
## 3:        2       NA        1       NA     0 22.32 0.4074587 FUL0020R
## 4:        2       NA        1       NA     0 22.32 0.4074587 FUL0020R
## 5:        2       NA        1       NA     0 22.32 0.4074587 FUL0020R
## 6:        2       NA        1       NA     0 22.32 0.4074587 FUL0020R
##    rte_nbr     aadt  aadt_bc  aadt_pt surf_wid no_lanes func_cls rodwycls
## 1:   0020R 2506.222 623.6426 1882.579       24        2        2        8
## 2:   0020R 2506.222 623.6426 1882.579       24        2        2        8
## 3:   0020R 2506.222 623.6426 1882.579       24        2        2        8
## 4:   0020R 2506.222 623.6426 1882.579       24        2        2        8
## 5:   0020R 2506.222 623.6426 1882.579       24        2        2        8
## 6:   0020R 2506.222 623.6426 1882.579       24        2        2        8
##    Total K A B C O DAYMTH Crash   spd_av spd_pv   spd_ft     date Month
## 1:     2 0 0 1 0 1   0101     0       NA     NA       NA 01012015    01
## 2:     2 0 0 1 0 1   0101     0       NA     NA       NA 01012015    01
## 3:     2 0 0 1 0 1   0101     0 49.67553     NA 49.67553 01012015    01
## 4:     2 0 0 1 0 1   0101     0       NA     NA       NA 01012015    01
## 5:     2 0 0 1 0 1   0101     0 53.81516     NA 53.81516 01012015    01
## 6:     2 0 0 1 0 1   0101     0 55.30177     NA 55.30177 01012015    01
##    Day Year MonthDay
## 1:  01 2015    01_01
## 2:  01 2015    01_01
## 3:  01 2015    01_01
## 4:  01 2015    01_01
## 5:  01 2015    01_01
## 6:  01 2015    01_01
day1<- df_R2[,-c(1)] %>% group_by(MonthDay) %>% summarize(Speed_All_Mean=mean(spd_av, na.rm=TRUE))
day1
## # A tibble: 365 x 2
##    MonthDay Speed_All_Mean
##    <chr>             <dbl>
##  1 01_01              47.7
##  2 01_02              46.1
##  3 01_03              44.2
##  4 01_04              45.6
##  5 01_05              45.2
##  6 01_06              41.9
##  7 01_07              42.9
##  8 01_08              43.5
##  9 01_09              41.5
## 10 01_10              45.7
## # ... with 355 more rows
# Step 2: Examine Data
speed_clean <- tsclean(ts(day1$Speed_All_Mean))
plot.ts(speed_clean)

# ggplot() + geom_line(data = Q1, aes(x = TimeStamp, y = speed_clean)) + ylab('Cleaned Speed Records')

day1$cnt_ma = ma(speed_clean, order=7) # using the clean count with no outliers
day1$cnt_ma30 = ma(speed_clean, order=30)
# Step 3: Decompose Your Data
count_ma = ts(na.omit(speed_clean), frequency=30)
decomp = stl(count_ma, s.window="periodic")
deseasonal_cnt <- seasadj(decomp)
plot(decomp)

# Step 4: Stationarity
# statinary test
adf.test(count_ma, alternative = "stationary")
## 
##  Augmented Dickey-Fuller Test
## 
## data:  count_ma
## Dickey-Fuller = -3.6804, Lag order = 7, p-value = 0.02548
## alternative hypothesis: stationary
adf.test(deseasonal_cnt, alternative = "stationary")
## 
##  Augmented Dickey-Fuller Test
## 
## data:  deseasonal_cnt
## Dickey-Fuller = -3.645, Lag order = 7, p-value = 0.02889
## alternative hypothesis: stationary
d1 = diff(deseasonal_cnt)
adf.test(d1, alternative = "stationary")
## Warning in adf.test(d1, alternative = "stationary"): p-value smaller than
## printed p-value
## 
##  Augmented Dickey-Fuller Test
## 
## data:  d1
## Dickey-Fuller = -10.672, Lag order = 7, p-value = 0.01
## alternative hypothesis: stationary
# Step 5: Autocorrelations and Choosing Model Order
# check ACF and PACF
acf2(count_ma)

##          ACF  PACF
##   [1,]  0.42  0.42
##   [2,]  0.14 -0.05
##   [3,]  0.10  0.07
##   [4,]  0.17  0.13
##   [5,]  0.16  0.04
##   [6,]  0.27  0.23
##   [7,]  0.45  0.32
##   [8,]  0.27 -0.05
##   [9,]  0.07 -0.05
##  [10,]  0.02 -0.05
##  [11,] -0.01 -0.17
##  [12,]  0.05  0.02
##  [13,]  0.30  0.25
##  [14,]  0.49  0.29
##  [15,]  0.23 -0.06
##  [16,]  0.03 -0.03
##  [17,]  0.08  0.08
##  [18,]  0.10 -0.02
##  [19,]  0.06 -0.09
##  [20,]  0.23  0.06
##  [21,]  0.47  0.20
##  [22,]  0.32  0.07
##  [23,]  0.07 -0.03
##  [24,]  0.00 -0.05
##  [25,] -0.01 -0.10
##  [26,]  0.06 -0.02
##  [27,]  0.23  0.01
##  [28,]  0.37  0.05
##  [29,]  0.18 -0.02
##  [30,] -0.03 -0.08
##  [31,] -0.08 -0.13
##  [32,] -0.09 -0.08
##  [33,] -0.06 -0.03
##  [34,]  0.13 -0.01
##  [35,]  0.37  0.14
##  [36,]  0.19  0.02
##  [37,] -0.01  0.06
##  [38,] -0.05  0.02
##  [39,]  0.01  0.03
##  [40,]  0.02 -0.05
##  [41,]  0.14 -0.08
##  [42,]  0.31  0.02
##  [43,]  0.16 -0.06
##  [44,] -0.06 -0.09
##  [45,] -0.14 -0.04
##  [46,] -0.10  0.03
##  [47,] -0.09 -0.04
##  [48,]  0.06  0.00
##  [49,]  0.27  0.10
##  [50,]  0.13  0.02
##  [51,] -0.12 -0.08
##  [52,] -0.15  0.00
##  [53,] -0.11 -0.03
##  [54,] -0.09 -0.02
##  [55,]  0.04 -0.01
##  [56,]  0.23  0.01
##  [57,]  0.10 -0.04
##  [58,] -0.12 -0.01
##  [59,] -0.17  0.00
##  [60,] -0.14 -0.04
##  [61,] -0.09  0.01
##  [62,]  0.04  0.00
##  [63,]  0.22  0.01
##  [64,]  0.09 -0.02
##  [65,] -0.13  0.02
##  [66,] -0.15  0.04
##  [67,] -0.12  0.02
##  [68,] -0.12 -0.02
##  [69,]  0.00  0.03
##  [70,]  0.17 -0.01
##  [71,]  0.05 -0.03
##  [72,] -0.12  0.08
##  [73,] -0.16  0.03
##  [74,] -0.12  0.03
##  [75,] -0.11 -0.03
##  [76,]  0.02 -0.04
##  [77,]  0.20  0.00
##  [78,]  0.09 -0.01
##  [79,] -0.12 -0.04
##  [80,] -0.15  0.04
##  [81,] -0.17 -0.06
##  [82,] -0.13  0.02
##  [83,]  0.00  0.03
##  [84,]  0.15 -0.02
##  [85,]  0.05 -0.02
##  [86,] -0.14 -0.01
##  [87,] -0.18 -0.02
##  [88,] -0.16 -0.04
##  [89,] -0.14 -0.04
##  [90,] -0.01 -0.02
##  [91,]  0.14 -0.01
##  [92,]  0.06  0.03
##  [93,] -0.13  0.01
##  [94,] -0.16  0.04
##  [95,] -0.14  0.01
##  [96,] -0.11 -0.02
##  [97,]  0.00  0.03
##  [98,]  0.13 -0.02
##  [99,]  0.05 -0.02
## [100,] -0.11  0.01
## [101,] -0.15  0.00
## [102,] -0.13  0.01
## [103,] -0.09  0.07
## [104,]  0.01 -0.01
## [105,]  0.14  0.01
## [106,]  0.05 -0.04
## [107,] -0.12 -0.01
## [108,] -0.16  0.00
## [109,] -0.15 -0.05
## [110,] -0.12 -0.01
## [111,] -0.01  0.02
## [112,]  0.12 -0.03
## [113,]  0.05 -0.03
## [114,] -0.09  0.05
## [115,] -0.14 -0.02
## [116,] -0.14 -0.03
## [117,] -0.11 -0.01
## [118,]  0.00  0.01
## [119,]  0.14  0.03
## [120,]  0.06 -0.01
acf2(deseasonal_cnt)

##          ACF  PACF
##   [1,]  0.44  0.44
##   [2,]  0.15 -0.06
##   [3,]  0.11  0.08
##   [4,]  0.18  0.13
##   [5,]  0.16  0.04
##   [6,]  0.28  0.24
##   [7,]  0.46  0.32
##   [8,]  0.28 -0.05
##   [9,]  0.07 -0.07
##  [10,]  0.02 -0.05
##  [11,]  0.00 -0.16
##  [12,]  0.05  0.01
##  [13,]  0.31  0.27
##  [14,]  0.50  0.29
##  [15,]  0.26 -0.04
##  [16,]  0.03 -0.05
##  [17,]  0.09  0.09
##  [18,]  0.10 -0.04
##  [19,]  0.06 -0.08
##  [20,]  0.24  0.06
##  [21,]  0.48  0.21
##  [22,]  0.33  0.05
##  [23,]  0.06 -0.04
##  [24,]  0.00 -0.03
##  [25,] -0.01 -0.11
##  [26,]  0.06 -0.01
##  [27,]  0.23  0.01
##  [28,]  0.38  0.05
##  [29,]  0.19 -0.03
##  [30,] -0.04 -0.10
##  [31,] -0.08 -0.11
##  [32,] -0.09 -0.09
##  [33,] -0.06 -0.01
##  [34,]  0.14  0.01
##  [35,]  0.38  0.14
##  [36,]  0.19 -0.01
##  [37,] -0.01  0.08
##  [38,] -0.04  0.02
##  [39,]  0.00  0.01
##  [40,]  0.02 -0.04
##  [41,]  0.15 -0.09
##  [42,]  0.32 -0.01
##  [43,]  0.16 -0.07
##  [44,] -0.06 -0.05
##  [45,] -0.13 -0.03
##  [46,] -0.11  0.03
##  [47,] -0.09 -0.02
##  [48,]  0.06 -0.02
##  [49,]  0.27  0.10
##  [50,]  0.13  0.02
##  [51,] -0.11 -0.06
##  [52,] -0.15 -0.02
##  [53,] -0.12 -0.03
##  [54,] -0.09 -0.01
##  [55,]  0.04 -0.04
##  [56,]  0.24  0.03
##  [57,]  0.10 -0.04
##  [58,] -0.12 -0.01
##  [59,] -0.17  0.00
##  [60,] -0.16 -0.06
##  [61,] -0.10  0.01
##  [62,]  0.03 -0.01
##  [63,]  0.23  0.03
##  [64,]  0.09 -0.03
##  [65,] -0.13  0.02
##  [66,] -0.16  0.04
##  [67,] -0.12  0.04
##  [68,] -0.12 -0.02
##  [69,]  0.00  0.01
##  [70,]  0.17  0.00
##  [71,]  0.05 -0.05
##  [72,] -0.12  0.07
##  [73,] -0.16  0.03
##  [74,] -0.12  0.04
##  [75,] -0.11 -0.02
##  [76,]  0.02 -0.04
##  [77,]  0.20  0.00
##  [78,]  0.08 -0.03
##  [79,] -0.13 -0.02
##  [80,] -0.16  0.04
##  [81,] -0.16 -0.04
##  [82,] -0.13  0.01
##  [83,] -0.01  0.03
##  [84,]  0.16 -0.02
##  [85,]  0.05 -0.04
##  [86,] -0.14  0.01
##  [87,] -0.18 -0.04
##  [88,] -0.17 -0.04
##  [89,] -0.14 -0.04
##  [90,] -0.03 -0.04
##  [91,]  0.14  0.00
##  [92,]  0.06  0.03
##  [93,] -0.13  0.03
##  [94,] -0.16  0.04
##  [95,] -0.15  0.02
##  [96,] -0.12 -0.02
##  [97,]  0.00  0.04
##  [98,]  0.14 -0.02
##  [99,]  0.05 -0.04
## [100,] -0.11  0.01
## [101,] -0.15 -0.02
## [102,] -0.13  0.01
## [103,] -0.09  0.08
## [104,]  0.00  0.01
## [105,]  0.15  0.02
## [106,]  0.04 -0.06
## [107,] -0.12 -0.01
## [108,] -0.17 -0.02
## [109,] -0.16 -0.05
## [110,] -0.12  0.00
## [111,]  0.00  0.04
## [112,]  0.12 -0.04
## [113,]  0.04 -0.01
## [114,] -0.09  0.07
## [115,] -0.14 -0.05
## [116,] -0.14 -0.01
## [117,] -0.11 -0.02
## [118,]  0.00  0.00
## [119,]  0.15  0.04
## [120,]  0.04 -0.04

Seasonility Not in Consideration

# Step 6: Fitting an ARIMA model
auto.arima(deseasonal_cnt, seasonal=FALSE)
## Series: deseasonal_cnt 
## ARIMA(2,1,5) 
## 
## Coefficients:
##           ar1      ar2     ma1      ma2      ma3      ma4     ma5
##       -0.9368  -0.4127  0.3544  -0.5112  -0.7357  -0.0547  0.1788
## s.e.   0.2875   0.1670  0.2872   0.2314   0.1274   0.0924  0.0886
## 
## sigma^2 estimated as 0.2206:  log likelihood=-238.87
## AIC=493.73   AICc=494.14   BIC=524.91
# Step 7: Evaluate and Iterate
# (try different model)
fit<-auto.arima(deseasonal_cnt, seasonal=FALSE)
tsdisplay(residuals(fit), lag.max=45, main='Model Residuals [Seasonality not considered]')

# step 8 forcast
fcast <- forecast(fit, h=30)
plot(fcast)

Seasonility in Consideration

# Step 6: Fitting an ARIMA model
auto.arima(deseasonal_cnt, seasonal=TRUE)
## Series: deseasonal_cnt 
## ARIMA(2,1,3)(2,0,1)[30] 
## 
## Coefficients:
##           ar1      ar2      ma1     ma2      ma3     sar1     sar2
##       -0.5010  -0.9052  -0.2214  0.2989  -0.7969  -0.1350  -0.3966
## s.e.   0.0423   0.0366   0.0431  0.0477   0.0487   0.1476   0.0694
##          sma1
##       -0.0958
## s.e.   0.1635
## 
## sigma^2 estimated as 0.1864:  log likelihood=-213.94
## AIC=445.87   AICc=446.38   BIC=480.95
# Step 7: Evaluate and Iterate
# (try different model)
fit<-auto.arima(deseasonal_cnt, seasonal=TRUE)
tsdisplay(residuals(fit), lag.max=45, main='Model Residuals [Seasonality considered]')

# step 8 forcast
fcast <- forecast(fit, h=30)
plot(fcast)