North Carolina Rural Interstate – Daily Speed (ARIMA)

Subasish Das and Choalun Ma

2018-11-12

# Step 1: Load R Packages 
### options(repos='http://cran.rstudio.com/')
#install.packages("astsa")
#install.packages('ggplot2')
#install.packages('forecast')
#install.packages('tseries')
#install.packages("data.table")

library(astsa)
library(forecast)
library(tseries)
library(zoo)
library(tseries)

library(data.table)
library(dplyr)
library(tidyr)
library(naniar)
library(stringr)
library(ggplot2)
library(DT)
library(lubridate)
library(ggpubr)


setwd("/scratch/user/cma16/Task4_Deliverable2/NCprocess4/AllCrash/FacilityBased/")
load("NC_Rural_Principle_Arterial_Interstate_1_TMC_TT_SI_reduce_withCrash.rData")
mytype = 'RI'
setwd(paste0("/scratch/user/cma16/Task4_Deliverable2/NCprocess4/AllCrash/FacilityBased/",mytype))

df_RI <- b02a
dim(df_RI)
## [1] 2261664      30
### Calculating Speed
df_RI$spd_av = 3600*df_RI$TMC_length/df_RI$Travel_TIME_ALL_VEHICLES/5280
df_RI$spd_pv = 3600*df_RI$TMC_length/df_RI$Travel_TIME_PASSENGER_VEHICLES/5280
df_RI$spd_ft = 3600*df_RI$TMC_length/df_RI$Travel_TIME_FREIGHT_TRUCKS/5280

### Month, Day
df_RI$date <- as.character(df_RI$DATE)
df_RI$date <- str_pad(df_RI$DATE, 8, pad = "0")
df_RI$Month <- substr(df_RI$date, start = 1, stop = 2)
df_RI$Day   <- substr(df_RI$date, start = 3, stop = 4)
df_RI$Year  <- substr(df_RI$date, start = 5, stop = 8)
df_RI$MonthDay <- paste0(df_RI$Month,"_", df_RI$Day)
head(df_RI)
##            TimeStamp       TMC    DATE EPOCH15 Travel_TIME_ALL_VEHICLES
## 1:  125N04870_0101_0 125N04870 1012015       0                       NA
## 2:  125N04870_0101_1 125N04870 1012015       1                       72
## 3: 125N04870_0101_10 125N04870 1012015      10                       NA
## 4: 125N04870_0101_11 125N04870 1012015      11                       89
## 5: 125N04870_0101_12 125N04870 1012015      12                       84
## 6: 125N04870_0101_13 125N04870 1012015      13                       78
##    Travel_TIME_PASSENGER_VEHICLES Travel_TIME_FREIGHT_TRUCKS TMC_length
## 1:                             NA                         NA   7851.174
## 2:                             72                         NA   7851.174
## 3:                             NA                         NA   7851.174
## 4:                             NA                         89   7851.174
## 5:                             NA                         84   7851.174
## 6:                             73                         83   7851.174
##    ave_aadt ave_wtdsgspd ave_medwid ave_peaklane ave_row ave_sur_wid
## 1: 115852.4           NA    10.5365           NA     160          36
## 2: 115852.4           NA    10.5365           NA     160          36
## 3: 115852.4           NA    10.5365           NA     160          36
## 4: 115852.4           NA    10.5365           NA     160          36
## 5: 115852.4           NA    10.5365           NA     160          36
## 6: 115852.4           NA    10.5365           NA     160          36
##    ave_no_lanes ave_spd_limt ave_rodwycls ave_rshldwid FC TER ACC MED
## 1:            2           45            8      5.61569  1   2   F  St
## 2:            2           45            8      5.61569  1   2   F  St
## 3:            2           45            8      5.61569  1   2   F  St
## 4:            2           45            8      5.61569  1   2   F  St
## 5:            2           45            8      5.61569  1   2   F  St
## 6:            2           45            8      5.61569  1   2   F  St
##    Total K A B C  O DAYMTH Crash   spd_av   spd_pv   spd_ft     date Month
## 1:    34 0 2 0 8 24   0101     0       NA       NA       NA 01012015    01
## 2:    34 0 2 0 8 24   0101     0 74.34824 74.34824       NA 01012015    01
## 3:    34 0 2 0 8 24   0101     0       NA       NA       NA 01012015    01
## 4:    34 0 2 0 8 24   0101     0 60.14689       NA 60.14689 01012015    01
## 5:    34 0 2 0 8 24   0101     0 63.72706       NA 63.72706 01012015    01
## 6:    34 0 2 0 8 24   0101     0 68.62914 73.32977 64.49486 01012015    01
##    Day Year MonthDay
## 1:  01 2015    01_01
## 2:  01 2015    01_01
## 3:  01 2015    01_01
## 4:  01 2015    01_01
## 5:  01 2015    01_01
## 6:  01 2015    01_01
day1<- df_RI[,-c(1)] %>% group_by(MonthDay) %>% summarize(Speed_All_Mean=mean(spd_av, na.rm=TRUE))
day1
## # A tibble: 365 x 2
##    MonthDay Speed_All_Mean
##    <chr>             <dbl>
##  1 01_01              49.8
##  2 01_02              43.5
##  3 01_03              47.4
##  4 01_04              50.9
##  5 01_05              43.1
##  6 01_06              42.8
##  7 01_07              42.7
##  8 01_08              43.2
##  9 01_09              42.9
## 10 01_10              46.9
## # ... with 355 more rows
# Step 2: Examine Data
speed_clean <- tsclean(ts(day1$Speed_All_Mean))
plot.ts(speed_clean)

# ggplot() + geom_line(data = Q1, aes(x = TimeStamp, y = speed_clean)) + ylab('Cleaned Speed Records')

day1$cnt_ma = ma(speed_clean, order=7) # using the clean count with no outliers
day1$cnt_ma30 = ma(speed_clean, order=30)
# Step 3: Decompose Your Data
count_ma = ts(na.omit(speed_clean), frequency=30)
decomp = stl(count_ma, s.window="periodic")
deseasonal_cnt <- seasadj(decomp)
plot(decomp)

# Step 4: Stationarity
# statinary test
adf.test(count_ma, alternative = "stationary")
## Warning in adf.test(count_ma, alternative = "stationary"): p-value smaller
## than printed p-value
## 
##  Augmented Dickey-Fuller Test
## 
## data:  count_ma
## Dickey-Fuller = -4.6151, Lag order = 7, p-value = 0.01
## alternative hypothesis: stationary
adf.test(deseasonal_cnt, alternative = "stationary")
## Warning in adf.test(deseasonal_cnt, alternative = "stationary"): p-value
## smaller than printed p-value
## 
##  Augmented Dickey-Fuller Test
## 
## data:  deseasonal_cnt
## Dickey-Fuller = -4.6218, Lag order = 7, p-value = 0.01
## alternative hypothesis: stationary
d1 = diff(deseasonal_cnt)
adf.test(d1, alternative = "stationary")
## Warning in adf.test(d1, alternative = "stationary"): p-value smaller than
## printed p-value
## 
##  Augmented Dickey-Fuller Test
## 
## data:  d1
## Dickey-Fuller = -9.5432, Lag order = 7, p-value = 0.01
## alternative hypothesis: stationary
# Step 5: Autocorrelations and Choosing Model Order
# check ACF and PACF
acf2(count_ma)

##          ACF  PACF
##   [1,]  0.31  0.31
##   [2,] -0.17 -0.29
##   [3,] -0.20 -0.05
##   [4,] -0.22 -0.21
##   [5,] -0.20 -0.15
##   [6,]  0.25  0.35
##   [7,]  0.81  0.72
##   [8,]  0.23 -0.29
##   [9,] -0.21 -0.15
##  [10,] -0.21  0.02
##  [11,] -0.23  0.01
##  [12,] -0.21  0.02
##  [13,]  0.24  0.06
##  [14,]  0.77  0.28
##  [15,]  0.22 -0.11
##  [16,] -0.23 -0.13
##  [17,] -0.23 -0.04
##  [18,] -0.23  0.08
##  [19,] -0.21  0.01
##  [20,]  0.23  0.00
##  [21,]  0.75  0.21
##  [22,]  0.22 -0.04
##  [23,] -0.22 -0.01
##  [24,] -0.23 -0.09
##  [25,] -0.25  0.00
##  [26,] -0.20  0.11
##  [27,]  0.23  0.02
##  [28,]  0.75  0.14
##  [29,]  0.23  0.02
##  [30,] -0.21 -0.01
##  [31,] -0.22 -0.04
##  [32,] -0.24  0.01
##  [33,] -0.20  0.00
##  [34,]  0.21 -0.05
##  [35,]  0.72  0.07
##  [36,]  0.20 -0.12
##  [37,] -0.22  0.06
##  [38,] -0.24 -0.04
##  [39,] -0.21  0.14
##  [40,] -0.18  0.00
##  [41,]  0.22  0.00
##  [42,]  0.71  0.03
##  [43,]  0.19 -0.03
##  [44,] -0.23  0.00
##  [45,] -0.24 -0.03
##  [46,] -0.24 -0.03
##  [47,] -0.19 -0.03
##  [48,]  0.20 -0.01
##  [49,]  0.67 -0.01
##  [50,]  0.18 -0.01
##  [51,] -0.23  0.03
##  [52,] -0.24 -0.04
##  [53,] -0.24  0.02
##  [54,] -0.20 -0.04
##  [55,]  0.19  0.02
##  [56,]  0.66  0.03
##  [57,]  0.17 -0.06
##  [58,] -0.23  0.04
##  [59,] -0.25 -0.03
##  [60,] -0.23  0.02
##  [61,] -0.20 -0.07
##  [62,]  0.19  0.08
##  [63,]  0.65  0.01
##  [64,]  0.17  0.04
##  [65,] -0.22  0.00
##  [66,] -0.23  0.04
##  [67,] -0.23 -0.02
##  [68,] -0.19 -0.06
##  [69,]  0.19  0.02
##  [70,]  0.64  0.02
##  [71,]  0.16 -0.05
##  [72,] -0.23 -0.04
##  [73,] -0.24  0.01
##  [74,] -0.22  0.08
##  [75,] -0.19  0.01
##  [76,]  0.19 -0.03
##  [77,]  0.62  0.01
##  [78,]  0.16  0.04
##  [79,] -0.21  0.06
##  [80,] -0.20  0.06
##  [81,] -0.21 -0.02
##  [82,] -0.17  0.02
##  [83,]  0.19 -0.01
##  [84,]  0.60 -0.07
##  [85,]  0.14  0.02
##  [86,] -0.22 -0.01
##  [87,] -0.23 -0.10
##  [88,] -0.22  0.02
##  [89,] -0.18  0.00
##  [90,]  0.18  0.03
##  [91,]  0.58 -0.03
##  [92,]  0.15  0.04
##  [93,] -0.21 -0.01
##  [94,] -0.23 -0.01
##  [95,] -0.22 -0.04
##  [96,] -0.19 -0.01
##  [97,]  0.16 -0.04
##  [98,]  0.54 -0.08
##  [99,]  0.13  0.03
## [100,] -0.21  0.06
## [101,] -0.22 -0.01
## [102,] -0.21  0.01
## [103,] -0.17 -0.03
## [104,]  0.15  0.00
## [105,]  0.55  0.07
## [106,]  0.13 -0.06
## [107,] -0.20  0.00
## [108,] -0.22  0.00
## [109,] -0.20  0.01
## [110,] -0.17 -0.04
## [111,]  0.15  0.02
## [112,]  0.52 -0.04
## [113,]  0.12  0.02
## [114,] -0.20 -0.02
## [115,] -0.22  0.02
## [116,] -0.20  0.01
## [117,] -0.17  0.00
## [118,]  0.14 -0.02
## [119,]  0.51 -0.02
## [120,]  0.13  0.06
acf2(deseasonal_cnt)

##          ACF  PACF
##   [1,]  0.31  0.31
##   [2,] -0.17 -0.30
##   [3,] -0.20 -0.04
##   [4,] -0.22 -0.21
##   [5,] -0.21 -0.15
##   [6,]  0.25  0.35
##   [7,]  0.82  0.74
##   [8,]  0.24 -0.30
##   [9,] -0.22 -0.15
##  [10,] -0.21  0.02
##  [11,] -0.23  0.04
##  [12,] -0.20  0.05
##  [13,]  0.25  0.07
##  [14,]  0.79  0.29
##  [15,]  0.22 -0.11
##  [16,] -0.23 -0.10
##  [17,] -0.22 -0.05
##  [18,] -0.23  0.07
##  [19,] -0.22 -0.02
##  [20,]  0.23  0.00
##  [21,]  0.76  0.19
##  [22,]  0.22 -0.02
##  [23,] -0.23 -0.02
##  [24,] -0.23 -0.11
##  [25,] -0.25 -0.02
##  [26,] -0.21  0.12
##  [27,]  0.24  0.04
##  [28,]  0.76  0.12
##  [29,]  0.23 -0.02
##  [30,] -0.23 -0.05
##  [31,] -0.23  0.01
##  [32,] -0.24  0.02
##  [33,] -0.20  0.01
##  [34,]  0.21 -0.08
##  [35,]  0.73  0.08
##  [36,]  0.19 -0.11
##  [37,] -0.23  0.11
##  [38,] -0.24 -0.06
##  [39,] -0.22  0.14
##  [40,] -0.18  0.00
##  [41,]  0.23  0.03
##  [42,]  0.73  0.04
##  [43,]  0.20 -0.02
##  [44,] -0.23  0.01
##  [45,] -0.24 -0.07
##  [46,] -0.24  0.00
##  [47,] -0.20 -0.04
##  [48,]  0.21  0.01
##  [49,]  0.69 -0.06
##  [50,]  0.18  0.01
##  [51,] -0.24  0.01
##  [52,] -0.25 -0.01
##  [53,] -0.24 -0.01
##  [54,] -0.20 -0.05
##  [55,]  0.19 -0.02
##  [56,]  0.67  0.03
##  [57,]  0.17 -0.02
##  [58,] -0.24  0.04
##  [59,] -0.25 -0.05
##  [60,] -0.25 -0.03
##  [61,] -0.21 -0.03
##  [62,]  0.20  0.10
##  [63,]  0.66  0.05
##  [64,]  0.18  0.02
##  [65,] -0.22  0.01
##  [66,] -0.23  0.04
##  [67,] -0.23  0.03
##  [68,] -0.20 -0.09
##  [69,]  0.19  0.02
##  [70,]  0.66  0.02
##  [71,]  0.17 -0.02
##  [72,] -0.23 -0.03
##  [73,] -0.24  0.03
##  [74,] -0.22  0.09
##  [75,] -0.19 -0.03
##  [76,]  0.19 -0.03
##  [77,]  0.64 -0.01
##  [78,]  0.16  0.06
##  [79,] -0.21  0.03
##  [80,] -0.20  0.06
##  [81,] -0.21 -0.06
##  [82,] -0.18  0.05
##  [83,]  0.19 -0.03
##  [84,]  0.61 -0.08
##  [85,]  0.15 -0.02
##  [86,] -0.22  0.02
##  [87,] -0.23 -0.08
##  [88,] -0.23  0.01
##  [89,] -0.20 -0.03
##  [90,]  0.17 -0.02
##  [91,]  0.59  0.00
##  [92,]  0.15  0.04
##  [93,] -0.20  0.03
##  [94,] -0.22 -0.03
##  [95,] -0.22 -0.01
##  [96,] -0.20 -0.02
##  [97,]  0.16  0.00
##  [98,]  0.55 -0.12
##  [99,]  0.13  0.05
## [100,] -0.20  0.07
## [101,] -0.21  0.03
## [102,] -0.21  0.00
## [103,] -0.17 -0.02
## [104,]  0.16  0.01
## [105,]  0.56  0.05
## [106,]  0.13 -0.07
## [107,] -0.20 -0.02
## [108,] -0.21  0.03
## [109,] -0.20 -0.01
## [110,] -0.18 -0.03
## [111,]  0.15 -0.02
## [112,]  0.53 -0.02
## [113,]  0.12 -0.01
## [114,] -0.21 -0.05
## [115,] -0.22 -0.02
## [116,] -0.21  0.04
## [117,] -0.18  0.01
## [118,]  0.14 -0.02
## [119,]  0.51 -0.06
## [120,]  0.12  0.03

Seasonility Not in Consideration

# Step 6: Fitting an ARIMA model
auto.arima(deseasonal_cnt, seasonal=FALSE)
## Series: deseasonal_cnt 
## ARIMA(4,1,3) 
## 
## Coefficients:
##           ar1     ar2      ar3      ar4      ma1      ma2    ma3
##       -0.0266  0.0671  -0.3931  -0.2772  -0.6684  -0.6399  0.523
## s.e.   0.0825  0.0747   0.0635   0.0659   0.0717   0.1018  0.065
## 
## sigma^2 estimated as 5.099:  log likelihood=-811.12
## AIC=1638.23   AICc=1638.64   BIC=1669.41
# Step 7: Evaluate and Iterate
# (try different model)
fit<-auto.arima(deseasonal_cnt, seasonal=FALSE)
tsdisplay(residuals(fit), lag.max=45, main='Model Residuals [Seasonality not considered]')

# step 8 forcast
fcast <- forecast(fit, h=30)
plot(fcast)

Seasonility in Consideration

# Step 6: Fitting an ARIMA model
auto.arima(deseasonal_cnt, seasonal=TRUE)
## Series: deseasonal_cnt 
## ARIMA(1,1,3)(2,0,0)[30] with drift 
## 
## Coefficients:
##           ar1      ma1      ma2     ma3     sar1    sar2    drift
##       -0.6345  -0.2486  -0.8471  0.1469  -0.4118  -0.493  -0.0065
## s.e.   0.1022   0.1328   0.0490  0.0978   0.0556   0.058   0.0022
## 
## sigma^2 estimated as 4.63:  log likelihood=-803.25
## AIC=1622.49   AICc=1622.9   BIC=1653.67
# Step 7: Evaluate and Iterate
# (try different model)
fit<-auto.arima(deseasonal_cnt, seasonal=TRUE)
tsdisplay(residuals(fit), lag.max=45, main='Model Residuals [Seasonality considered]')

# step 8 forcast
fcast <- forecast(fit, h=30)
plot(fcast)