# Step 1: Load R Packages
### options(repos='http://cran.rstudio.com/')
#install.packages("astsa")
#install.packages('ggplot2')
#install.packages('forecast')
#install.packages('tseries')
#install.packages("data.table")
library(astsa)
library(forecast)
library(tseries)
library(zoo)
library(tseries)
library(data.table)
library(dplyr)
library(tidyr)
library(naniar)
library(stringr)
library(ggplot2)
library(DT)
library(lubridate)
library(ggpubr)
setwd("/scratch/user/cma16/Task4_Deliverable2/NCprocess4/AllCrash/FacilityBased/")
load("NC_Rural_Principle_Arterial_Interstate_1_TMC_TT_SI_reduce_withCrash.rData")
mytype = 'RI'
setwd(paste0("/scratch/user/cma16/Task4_Deliverable2/NCprocess4/AllCrash/FacilityBased/",mytype))
df_RI <- b02a
dim(df_RI)
## [1] 2261664 30
### Calculating Speed
df_RI$spd_av = 3600*df_RI$TMC_length/df_RI$Travel_TIME_ALL_VEHICLES/5280
df_RI$spd_pv = 3600*df_RI$TMC_length/df_RI$Travel_TIME_PASSENGER_VEHICLES/5280
df_RI$spd_ft = 3600*df_RI$TMC_length/df_RI$Travel_TIME_FREIGHT_TRUCKS/5280
### Month, Day
df_RI$date <- as.character(df_RI$DATE)
df_RI$date <- str_pad(df_RI$DATE, 8, pad = "0")
df_RI$Month <- substr(df_RI$date, start = 1, stop = 2)
df_RI$Day <- substr(df_RI$date, start = 3, stop = 4)
df_RI$Year <- substr(df_RI$date, start = 5, stop = 8)
df_RI$MonthDay <- paste0(df_RI$Month,"_", df_RI$Day)
head(df_RI)
## TimeStamp TMC DATE EPOCH15 Travel_TIME_ALL_VEHICLES
## 1: 125N04870_0101_0 125N04870 1012015 0 NA
## 2: 125N04870_0101_1 125N04870 1012015 1 72
## 3: 125N04870_0101_10 125N04870 1012015 10 NA
## 4: 125N04870_0101_11 125N04870 1012015 11 89
## 5: 125N04870_0101_12 125N04870 1012015 12 84
## 6: 125N04870_0101_13 125N04870 1012015 13 78
## Travel_TIME_PASSENGER_VEHICLES Travel_TIME_FREIGHT_TRUCKS TMC_length
## 1: NA NA 7851.174
## 2: 72 NA 7851.174
## 3: NA NA 7851.174
## 4: NA 89 7851.174
## 5: NA 84 7851.174
## 6: 73 83 7851.174
## ave_aadt ave_wtdsgspd ave_medwid ave_peaklane ave_row ave_sur_wid
## 1: 115852.4 NA 10.5365 NA 160 36
## 2: 115852.4 NA 10.5365 NA 160 36
## 3: 115852.4 NA 10.5365 NA 160 36
## 4: 115852.4 NA 10.5365 NA 160 36
## 5: 115852.4 NA 10.5365 NA 160 36
## 6: 115852.4 NA 10.5365 NA 160 36
## ave_no_lanes ave_spd_limt ave_rodwycls ave_rshldwid FC TER ACC MED
## 1: 2 45 8 5.61569 1 2 F St
## 2: 2 45 8 5.61569 1 2 F St
## 3: 2 45 8 5.61569 1 2 F St
## 4: 2 45 8 5.61569 1 2 F St
## 5: 2 45 8 5.61569 1 2 F St
## 6: 2 45 8 5.61569 1 2 F St
## Total K A B C O DAYMTH Crash spd_av spd_pv spd_ft date Month
## 1: 34 0 2 0 8 24 0101 0 NA NA NA 01012015 01
## 2: 34 0 2 0 8 24 0101 0 74.34824 74.34824 NA 01012015 01
## 3: 34 0 2 0 8 24 0101 0 NA NA NA 01012015 01
## 4: 34 0 2 0 8 24 0101 0 60.14689 NA 60.14689 01012015 01
## 5: 34 0 2 0 8 24 0101 0 63.72706 NA 63.72706 01012015 01
## 6: 34 0 2 0 8 24 0101 0 68.62914 73.32977 64.49486 01012015 01
## Day Year MonthDay
## 1: 01 2015 01_01
## 2: 01 2015 01_01
## 3: 01 2015 01_01
## 4: 01 2015 01_01
## 5: 01 2015 01_01
## 6: 01 2015 01_01
day1<- df_RI[,-c(1)] %>% group_by(MonthDay) %>% summarize(Speed_All_Mean=mean(spd_av, na.rm=TRUE))
day1
## # A tibble: 365 x 2
## MonthDay Speed_All_Mean
## <chr> <dbl>
## 1 01_01 49.8
## 2 01_02 43.5
## 3 01_03 47.4
## 4 01_04 50.9
## 5 01_05 43.1
## 6 01_06 42.8
## 7 01_07 42.7
## 8 01_08 43.2
## 9 01_09 42.9
## 10 01_10 46.9
## # ... with 355 more rows
# Step 2: Examine Data
speed_clean <- tsclean(ts(day1$Speed_All_Mean))
plot.ts(speed_clean)
# ggplot() + geom_line(data = Q1, aes(x = TimeStamp, y = speed_clean)) + ylab('Cleaned Speed Records')
day1$cnt_ma = ma(speed_clean, order=7) # using the clean count with no outliers
day1$cnt_ma30 = ma(speed_clean, order=30)
# Step 3: Decompose Your Data
count_ma = ts(na.omit(speed_clean), frequency=30)
decomp = stl(count_ma, s.window="periodic")
deseasonal_cnt <- seasadj(decomp)
plot(decomp)
# Step 4: Stationarity
# statinary test
adf.test(count_ma, alternative = "stationary")
## Warning in adf.test(count_ma, alternative = "stationary"): p-value smaller
## than printed p-value
##
## Augmented Dickey-Fuller Test
##
## data: count_ma
## Dickey-Fuller = -4.6151, Lag order = 7, p-value = 0.01
## alternative hypothesis: stationary
adf.test(deseasonal_cnt, alternative = "stationary")
## Warning in adf.test(deseasonal_cnt, alternative = "stationary"): p-value
## smaller than printed p-value
##
## Augmented Dickey-Fuller Test
##
## data: deseasonal_cnt
## Dickey-Fuller = -4.6218, Lag order = 7, p-value = 0.01
## alternative hypothesis: stationary
d1 = diff(deseasonal_cnt)
adf.test(d1, alternative = "stationary")
## Warning in adf.test(d1, alternative = "stationary"): p-value smaller than
## printed p-value
##
## Augmented Dickey-Fuller Test
##
## data: d1
## Dickey-Fuller = -9.5432, Lag order = 7, p-value = 0.01
## alternative hypothesis: stationary
# Step 5: Autocorrelations and Choosing Model Order
# check ACF and PACF
acf2(count_ma)
## ACF PACF
## [1,] 0.31 0.31
## [2,] -0.17 -0.29
## [3,] -0.20 -0.05
## [4,] -0.22 -0.21
## [5,] -0.20 -0.15
## [6,] 0.25 0.35
## [7,] 0.81 0.72
## [8,] 0.23 -0.29
## [9,] -0.21 -0.15
## [10,] -0.21 0.02
## [11,] -0.23 0.01
## [12,] -0.21 0.02
## [13,] 0.24 0.06
## [14,] 0.77 0.28
## [15,] 0.22 -0.11
## [16,] -0.23 -0.13
## [17,] -0.23 -0.04
## [18,] -0.23 0.08
## [19,] -0.21 0.01
## [20,] 0.23 0.00
## [21,] 0.75 0.21
## [22,] 0.22 -0.04
## [23,] -0.22 -0.01
## [24,] -0.23 -0.09
## [25,] -0.25 0.00
## [26,] -0.20 0.11
## [27,] 0.23 0.02
## [28,] 0.75 0.14
## [29,] 0.23 0.02
## [30,] -0.21 -0.01
## [31,] -0.22 -0.04
## [32,] -0.24 0.01
## [33,] -0.20 0.00
## [34,] 0.21 -0.05
## [35,] 0.72 0.07
## [36,] 0.20 -0.12
## [37,] -0.22 0.06
## [38,] -0.24 -0.04
## [39,] -0.21 0.14
## [40,] -0.18 0.00
## [41,] 0.22 0.00
## [42,] 0.71 0.03
## [43,] 0.19 -0.03
## [44,] -0.23 0.00
## [45,] -0.24 -0.03
## [46,] -0.24 -0.03
## [47,] -0.19 -0.03
## [48,] 0.20 -0.01
## [49,] 0.67 -0.01
## [50,] 0.18 -0.01
## [51,] -0.23 0.03
## [52,] -0.24 -0.04
## [53,] -0.24 0.02
## [54,] -0.20 -0.04
## [55,] 0.19 0.02
## [56,] 0.66 0.03
## [57,] 0.17 -0.06
## [58,] -0.23 0.04
## [59,] -0.25 -0.03
## [60,] -0.23 0.02
## [61,] -0.20 -0.07
## [62,] 0.19 0.08
## [63,] 0.65 0.01
## [64,] 0.17 0.04
## [65,] -0.22 0.00
## [66,] -0.23 0.04
## [67,] -0.23 -0.02
## [68,] -0.19 -0.06
## [69,] 0.19 0.02
## [70,] 0.64 0.02
## [71,] 0.16 -0.05
## [72,] -0.23 -0.04
## [73,] -0.24 0.01
## [74,] -0.22 0.08
## [75,] -0.19 0.01
## [76,] 0.19 -0.03
## [77,] 0.62 0.01
## [78,] 0.16 0.04
## [79,] -0.21 0.06
## [80,] -0.20 0.06
## [81,] -0.21 -0.02
## [82,] -0.17 0.02
## [83,] 0.19 -0.01
## [84,] 0.60 -0.07
## [85,] 0.14 0.02
## [86,] -0.22 -0.01
## [87,] -0.23 -0.10
## [88,] -0.22 0.02
## [89,] -0.18 0.00
## [90,] 0.18 0.03
## [91,] 0.58 -0.03
## [92,] 0.15 0.04
## [93,] -0.21 -0.01
## [94,] -0.23 -0.01
## [95,] -0.22 -0.04
## [96,] -0.19 -0.01
## [97,] 0.16 -0.04
## [98,] 0.54 -0.08
## [99,] 0.13 0.03
## [100,] -0.21 0.06
## [101,] -0.22 -0.01
## [102,] -0.21 0.01
## [103,] -0.17 -0.03
## [104,] 0.15 0.00
## [105,] 0.55 0.07
## [106,] 0.13 -0.06
## [107,] -0.20 0.00
## [108,] -0.22 0.00
## [109,] -0.20 0.01
## [110,] -0.17 -0.04
## [111,] 0.15 0.02
## [112,] 0.52 -0.04
## [113,] 0.12 0.02
## [114,] -0.20 -0.02
## [115,] -0.22 0.02
## [116,] -0.20 0.01
## [117,] -0.17 0.00
## [118,] 0.14 -0.02
## [119,] 0.51 -0.02
## [120,] 0.13 0.06
## ACF PACF
## [1,] 0.31 0.31
## [2,] -0.17 -0.30
## [3,] -0.20 -0.04
## [4,] -0.22 -0.21
## [5,] -0.21 -0.15
## [6,] 0.25 0.35
## [7,] 0.82 0.74
## [8,] 0.24 -0.30
## [9,] -0.22 -0.15
## [10,] -0.21 0.02
## [11,] -0.23 0.04
## [12,] -0.20 0.05
## [13,] 0.25 0.07
## [14,] 0.79 0.29
## [15,] 0.22 -0.11
## [16,] -0.23 -0.10
## [17,] -0.22 -0.05
## [18,] -0.23 0.07
## [19,] -0.22 -0.02
## [20,] 0.23 0.00
## [21,] 0.76 0.19
## [22,] 0.22 -0.02
## [23,] -0.23 -0.02
## [24,] -0.23 -0.11
## [25,] -0.25 -0.02
## [26,] -0.21 0.12
## [27,] 0.24 0.04
## [28,] 0.76 0.12
## [29,] 0.23 -0.02
## [30,] -0.23 -0.05
## [31,] -0.23 0.01
## [32,] -0.24 0.02
## [33,] -0.20 0.01
## [34,] 0.21 -0.08
## [35,] 0.73 0.08
## [36,] 0.19 -0.11
## [37,] -0.23 0.11
## [38,] -0.24 -0.06
## [39,] -0.22 0.14
## [40,] -0.18 0.00
## [41,] 0.23 0.03
## [42,] 0.73 0.04
## [43,] 0.20 -0.02
## [44,] -0.23 0.01
## [45,] -0.24 -0.07
## [46,] -0.24 0.00
## [47,] -0.20 -0.04
## [48,] 0.21 0.01
## [49,] 0.69 -0.06
## [50,] 0.18 0.01
## [51,] -0.24 0.01
## [52,] -0.25 -0.01
## [53,] -0.24 -0.01
## [54,] -0.20 -0.05
## [55,] 0.19 -0.02
## [56,] 0.67 0.03
## [57,] 0.17 -0.02
## [58,] -0.24 0.04
## [59,] -0.25 -0.05
## [60,] -0.25 -0.03
## [61,] -0.21 -0.03
## [62,] 0.20 0.10
## [63,] 0.66 0.05
## [64,] 0.18 0.02
## [65,] -0.22 0.01
## [66,] -0.23 0.04
## [67,] -0.23 0.03
## [68,] -0.20 -0.09
## [69,] 0.19 0.02
## [70,] 0.66 0.02
## [71,] 0.17 -0.02
## [72,] -0.23 -0.03
## [73,] -0.24 0.03
## [74,] -0.22 0.09
## [75,] -0.19 -0.03
## [76,] 0.19 -0.03
## [77,] 0.64 -0.01
## [78,] 0.16 0.06
## [79,] -0.21 0.03
## [80,] -0.20 0.06
## [81,] -0.21 -0.06
## [82,] -0.18 0.05
## [83,] 0.19 -0.03
## [84,] 0.61 -0.08
## [85,] 0.15 -0.02
## [86,] -0.22 0.02
## [87,] -0.23 -0.08
## [88,] -0.23 0.01
## [89,] -0.20 -0.03
## [90,] 0.17 -0.02
## [91,] 0.59 0.00
## [92,] 0.15 0.04
## [93,] -0.20 0.03
## [94,] -0.22 -0.03
## [95,] -0.22 -0.01
## [96,] -0.20 -0.02
## [97,] 0.16 0.00
## [98,] 0.55 -0.12
## [99,] 0.13 0.05
## [100,] -0.20 0.07
## [101,] -0.21 0.03
## [102,] -0.21 0.00
## [103,] -0.17 -0.02
## [104,] 0.16 0.01
## [105,] 0.56 0.05
## [106,] 0.13 -0.07
## [107,] -0.20 -0.02
## [108,] -0.21 0.03
## [109,] -0.20 -0.01
## [110,] -0.18 -0.03
## [111,] 0.15 -0.02
## [112,] 0.53 -0.02
## [113,] 0.12 -0.01
## [114,] -0.21 -0.05
## [115,] -0.22 -0.02
## [116,] -0.21 0.04
## [117,] -0.18 0.01
## [118,] 0.14 -0.02
## [119,] 0.51 -0.06
## [120,] 0.12 0.03
Seasonility Not in Consideration
# Step 6: Fitting an ARIMA model
auto.arima(deseasonal_cnt, seasonal=FALSE)
## Series: deseasonal_cnt
## ARIMA(4,1,3)
##
## Coefficients:
## ar1 ar2 ar3 ar4 ma1 ma2 ma3
## -0.0266 0.0671 -0.3931 -0.2772 -0.6684 -0.6399 0.523
## s.e. 0.0825 0.0747 0.0635 0.0659 0.0717 0.1018 0.065
##
## sigma^2 estimated as 5.099: log likelihood=-811.12
## AIC=1638.23 AICc=1638.64 BIC=1669.41
# Step 7: Evaluate and Iterate
# (try different model)
fit<-auto.arima(deseasonal_cnt, seasonal=FALSE)
tsdisplay(residuals(fit), lag.max=45, main='Model Residuals [Seasonality not considered]')
# step 8 forcast
fcast <- forecast(fit, h=30)
plot(fcast)
Seasonility in Consideration
# Step 6: Fitting an ARIMA model
auto.arima(deseasonal_cnt, seasonal=TRUE)
## Series: deseasonal_cnt
## ARIMA(1,1,3)(2,0,0)[30] with drift
##
## Coefficients:
## ar1 ma1 ma2 ma3 sar1 sar2 drift
## -0.6345 -0.2486 -0.8471 0.1469 -0.4118 -0.493 -0.0065
## s.e. 0.1022 0.1328 0.0490 0.0978 0.0556 0.058 0.0022
##
## sigma^2 estimated as 4.63: log likelihood=-803.25
## AIC=1622.49 AICc=1622.9 BIC=1653.67
# Step 7: Evaluate and Iterate
# (try different model)
fit<-auto.arima(deseasonal_cnt, seasonal=TRUE)
tsdisplay(residuals(fit), lag.max=45, main='Model Residuals [Seasonality considered]')
# step 8 forcast
fcast <- forecast(fit, h=30)
plot(fcast)