# Step 1: Load R Packages
### options(repos='http://cran.rstudio.com/')
#install.packages("astsa")
#install.packages('ggplot2')
#install.packages('forecast')
#install.packages('tseries')
#install.packages("data.table")
library(astsa)
library(forecast)
library(tseries)
library(zoo)
library(tseries)
library(data.table)
library(dplyr)
library(tidyr)
library(naniar)
library(stringr)
library(ggplot2)
library(DT)
library(lubridate)
library(ggpubr)
setwd("/scratch/user/cma16/Task4_Deliverable2/NCprocess4/AllCrash/FacilityBased/")
load("./multi-lane_divided_NC_reduce_withCrash_no_intersection.rData")
mytype = 'RMD'
setwd(paste0("/scratch/user/cma16/Task4_Deliverable2/NCprocess4/AllCrash/FacilityBased/",mytype))
df_RMD <- N_mun_med
dim(df_RMD)
## [1] 6157824 30
### Calculating Speed
df_RMD$spd_av = 3600*df_RMD$TMC_length/df_RMD$Travel_TIME_ALL_VEHICLES/5280
df_RMD$spd_pv = 3600*df_RMD$TMC_length/df_RMD$Travel_TIME_PASSENGER_VEHICLES/5280
df_RMD$spd_ft = 3600*df_RMD$TMC_length/df_RMD$Travel_TIME_FREIGHT_TRUCKS/5280
### Month, Day
df_RMD$date <- as.character(df_RMD$DATE)
df_RMD$date <- str_pad(df_RMD$DATE, 8, pad = "0")
df_RMD$Month <- substr(df_RMD$date, start = 1, stop = 2)
df_RMD$Day <- substr(df_RMD$date, start = 3, stop = 4)
df_RMD$Year <- substr(df_RMD$date, start = 5, stop = 8)
df_RMD$MonthDay <- paste0(df_RMD$Month,"_", df_RMD$Day)
head(df_RMD)
## TimeStamp TMC DATE EPOCH15 Travel_TIME_ALL_VEHICLES
## 1: 110P16085_0101_0 110P16085 1012015 0 NA
## 2: 110P16085_0101_1 110P16085 1012015 1 NA
## 3: 110P16085_0101_10 110P16085 1012015 10 204
## 4: 110P16085_0101_11 110P16085 1012015 11 NA
## 5: 110P16085_0101_12 110P16085 1012015 12 NA
## 6: 110P16085_0101_13 110P16085 1012015 13 NA
## Travel_TIME_PASSENGER_VEHICLES Travel_TIME_FREIGHT_TRUCKS TMC_length
## 1: NA NA 15234.63
## 2: NA NA 15234.63
## 3: 204 NA 15234.63
## 4: NA NA 15234.63
## 5: NA NA 15234.63
## 6: NA NA 15234.63
## ave_aadt ave_wtdsgspd ave_medwid ave_peaklane ave_row ave_sur_wid
## 1: 18000 70 15 NA 178.605 29.42966
## 2: 18000 70 15 NA 178.605 29.42966
## 3: 18000 70 15 NA 178.605 29.42966
## 4: 18000 70 15 NA 178.605 29.42966
## 5: 18000 70 15 NA 178.605 29.42966
## 6: 18000 70 15 NA 178.605 29.42966
## ave_no_lanes ave_spd_limt ave_rodwycls ave_rshldwid FC TER ACC MED
## 1: 3.856655 61.20598 4.750853 7.71331 6 3 F Gr
## 2: 3.856655 61.20598 4.750853 7.71331 6 3 F Gr
## 3: 3.856655 61.20598 4.750853 7.71331 6 3 F Gr
## 4: 3.856655 61.20598 4.750853 7.71331 6 3 F Gr
## 5: 3.856655 61.20598 4.750853 7.71331 6 3 F Gr
## 6: 3.856655 61.20598 4.750853 7.71331 6 3 F Gr
## Total K A B C O DAYMTH Crash spd_av spd_pv spd_ft date Month
## 1: 0 0 0 0 0 0 0101 0 NA NA NA 01012015 01
## 2: 0 0 0 0 0 0 0101 0 NA NA NA 01012015 01
## 3: 0 0 0 0 0 0 0101 0 50.91787 50.91787 NA 01012015 01
## 4: 0 0 0 0 0 0 0101 0 NA NA NA 01012015 01
## 5: 0 0 0 0 0 0 0101 0 NA NA NA 01012015 01
## 6: 0 0 0 0 0 0 0101 0 NA NA NA 01012015 01
## Day Year MonthDay
## 1: 01 2015 01_01
## 2: 01 2015 01_01
## 3: 01 2015 01_01
## 4: 01 2015 01_01
## 5: 01 2015 01_01
## 6: 01 2015 01_01
day1<- df_RMD[,-c(1)] %>% group_by(MonthDay) %>% summarize(Speed_All_Mean=mean(spd_av, na.rm=TRUE))
day1
## # A tibble: 365 x 2
## MonthDay Speed_All_Mean
## <chr> <dbl>
## 1 01_01 53.1
## 2 01_02 50.3
## 3 01_03 51.5
## 4 01_04 52.9
## 5 01_05 49.6
## 6 01_06 49.7
## 7 01_07 49.4
## 8 01_08 49.5
## 9 01_09 49.5
## 10 01_10 51.9
## # ... with 355 more rows
# Step 2: Examine Data
speed_clean <- tsclean(ts(day1$Speed_All_Mean))
plot.ts(speed_clean)
# ggplot() + geom_line(data = Q1, aes(x = TimeStamp, y = speed_clean)) + ylab('Cleaned Speed Records')
day1$cnt_ma = ma(speed_clean, order=7) # using the clean count with no outliers
day1$cnt_ma30 = ma(speed_clean, order=30)
# Step 3: Decompose Your Data
count_ma = ts(na.omit(speed_clean), frequency=30)
decomp = stl(count_ma, s.window="periodic")
deseasonal_cnt <- seasadj(decomp)
plot(decomp)
# Step 4: Stationarity
# statinary test
adf.test(count_ma, alternative = "stationary")
## Warning in adf.test(count_ma, alternative = "stationary"): p-value smaller
## than printed p-value
##
## Augmented Dickey-Fuller Test
##
## data: count_ma
## Dickey-Fuller = -4.2316, Lag order = 7, p-value = 0.01
## alternative hypothesis: stationary
adf.test(deseasonal_cnt, alternative = "stationary")
## Warning in adf.test(deseasonal_cnt, alternative = "stationary"): p-value
## smaller than printed p-value
##
## Augmented Dickey-Fuller Test
##
## data: deseasonal_cnt
## Dickey-Fuller = -4.309, Lag order = 7, p-value = 0.01
## alternative hypothesis: stationary
d1 = diff(deseasonal_cnt)
adf.test(d1, alternative = "stationary")
## Warning in adf.test(d1, alternative = "stationary"): p-value smaller than
## printed p-value
##
## Augmented Dickey-Fuller Test
##
## data: d1
## Dickey-Fuller = -11.744, Lag order = 7, p-value = 0.01
## alternative hypothesis: stationary
# Step 5: Autocorrelations and Choosing Model Order
# check ACF and PACF
acf2(count_ma)
## ACF PACF
## [1,] 0.36 0.36
## [2,] -0.16 -0.33
## [3,] -0.18 0.03
## [4,] -0.18 -0.20
## [5,] -0.17 -0.09
## [6,] 0.30 0.46
## [7,] 0.84 0.73
## [8,] 0.30 -0.27
## [9,] -0.17 0.01
## [10,] -0.16 0.08
## [11,] -0.17 -0.05
## [12,] -0.17 0.05
## [13,] 0.29 0.10
## [14,] 0.80 0.27
## [15,] 0.28 -0.14
## [16,] -0.18 -0.01
## [17,] -0.18 -0.08
## [18,] -0.18 0.02
## [19,] -0.17 -0.03
## [20,] 0.28 0.04
## [21,] 0.77 0.16
## [22,] 0.27 -0.07
## [23,] -0.19 0.01
## [24,] -0.19 -0.08
## [25,] -0.20 -0.02
## [26,] -0.19 -0.03
## [27,] 0.27 0.07
## [28,] 0.77 0.20
## [29,] 0.28 -0.01
## [30,] -0.20 -0.10
## [31,] -0.19 0.06
## [32,] -0.19 0.03
## [33,] -0.18 0.01
## [34,] 0.25 -0.03
## [35,] 0.74 0.06
## [36,] 0.23 -0.18
## [37,] -0.21 0.06
## [38,] -0.20 -0.08
## [39,] -0.18 0.09
## [40,] -0.17 -0.01
## [41,] 0.25 0.00
## [42,] 0.72 0.01
## [43,] 0.23 0.02
## [44,] -0.21 0.05
## [45,] -0.20 -0.04
## [46,] -0.20 -0.05
## [47,] -0.18 -0.03
## [48,] 0.23 0.04
## [49,] 0.70 -0.02
## [50,] 0.23 0.05
## [51,] -0.19 0.05
## [52,] -0.19 -0.03
## [53,] -0.19 0.07
## [54,] -0.18 0.00
## [55,] 0.23 -0.01
## [56,] 0.68 0.02
## [57,] 0.23 -0.01
## [58,] -0.18 0.05
## [59,] -0.19 0.01
## [60,] -0.18 0.00
## [61,] -0.17 0.00
## [62,] 0.23 0.02
## [63,] 0.67 -0.01
## [64,] 0.23 0.04
## [65,] -0.17 0.09
## [66,] -0.17 0.04
## [67,] -0.17 0.00
## [68,] -0.16 -0.05
## [69,] 0.22 0.05
## [70,] 0.65 0.00
## [71,] 0.21 -0.09
## [72,] -0.18 -0.02
## [73,] -0.18 -0.02
## [74,] -0.16 0.05
## [75,] -0.16 0.01
## [76,] 0.22 -0.01
## [77,] 0.64 0.02
## [78,] 0.21 0.02
## [79,] -0.18 -0.02
## [80,] -0.16 0.06
## [81,] -0.16 -0.03
## [82,] -0.15 -0.02
## [83,] 0.20 -0.06
## [84,] 0.60 -0.04
## [85,] 0.19 0.00
## [86,] -0.18 -0.01
## [87,] -0.18 -0.11
## [88,] -0.18 -0.02
## [89,] -0.16 0.00
## [90,] 0.19 -0.01
## [91,] 0.58 -0.01
## [92,] 0.18 0.00
## [93,] -0.17 -0.02
## [94,] -0.19 -0.04
## [95,] -0.18 0.05
## [96,] -0.16 -0.01
## [97,] 0.18 -0.02
## [98,] 0.55 -0.08
## [99,] 0.16 0.03
## [100,] -0.18 0.01
## [101,] -0.19 -0.02
## [102,] -0.19 -0.02
## [103,] -0.17 -0.02
## [104,] 0.17 0.03
## [105,] 0.54 0.03
## [106,] 0.16 -0.03
## [107,] -0.18 0.01
## [108,] -0.18 0.04
## [109,] -0.17 -0.02
## [110,] -0.17 0.00
## [111,] 0.16 0.05
## [112,] 0.52 -0.01
## [113,] 0.15 0.01
## [114,] -0.19 -0.03
## [115,] -0.19 -0.03
## [116,] -0.18 0.01
## [117,] -0.18 -0.05
## [118,] 0.14 -0.03
## [119,] 0.50 -0.01
## [120,] 0.15 0.03
## ACF PACF
## [1,] 0.36 0.36
## [2,] -0.16 -0.33
## [3,] -0.17 0.03
## [4,] -0.17 -0.20
## [5,] -0.18 -0.09
## [6,] 0.31 0.46
## [7,] 0.85 0.74
## [8,] 0.30 -0.27
## [9,] -0.18 -0.01
## [10,] -0.16 0.08
## [11,] -0.16 -0.03
## [12,] -0.16 0.07
## [13,] 0.30 0.10
## [14,] 0.81 0.26
## [15,] 0.28 -0.14
## [16,] -0.18 0.01
## [17,] -0.17 -0.07
## [18,] -0.17 -0.01
## [19,] -0.18 -0.05
## [20,] 0.28 0.03
## [21,] 0.78 0.16
## [22,] 0.27 -0.07
## [23,] -0.19 -0.01
## [24,] -0.19 -0.10
## [25,] -0.20 -0.03
## [26,] -0.19 -0.01
## [27,] 0.27 0.08
## [28,] 0.78 0.18
## [29,] 0.28 -0.03
## [30,] -0.21 -0.12
## [31,] -0.19 0.08
## [32,] -0.19 0.05
## [33,] -0.18 0.02
## [34,] 0.25 -0.04
## [35,] 0.75 0.06
## [36,] 0.23 -0.17
## [37,] -0.22 0.10
## [38,] -0.20 -0.07
## [39,] -0.18 0.07
## [40,] -0.17 0.00
## [41,] 0.26 0.01
## [42,] 0.73 0.03
## [43,] 0.24 0.04
## [44,] -0.21 0.03
## [45,] -0.20 -0.07
## [46,] -0.20 -0.06
## [47,] -0.18 -0.03
## [48,] 0.24 0.04
## [49,] 0.70 -0.04
## [50,] 0.23 0.04
## [51,] -0.19 0.05
## [52,] -0.19 -0.02
## [53,] -0.19 0.07
## [54,] -0.18 -0.01
## [55,] 0.23 -0.04
## [56,] 0.68 0.03
## [57,] 0.23 0.02
## [58,] -0.19 0.07
## [59,] -0.19 0.00
## [60,] -0.19 -0.04
## [61,] -0.17 0.01
## [62,] 0.23 0.05
## [63,] 0.68 0.02
## [64,] 0.23 0.04
## [65,] -0.17 0.08
## [66,] -0.17 0.03
## [67,] -0.17 0.03
## [68,] -0.16 -0.05
## [69,] 0.23 0.03
## [70,] 0.66 0.00
## [71,] 0.22 -0.09
## [72,] -0.18 -0.01
## [73,] -0.17 0.01
## [74,] -0.16 0.04
## [75,] -0.16 -0.03
## [76,] 0.22 -0.03
## [77,] 0.64 0.01
## [78,] 0.21 0.01
## [79,] -0.18 -0.04
## [80,] -0.16 0.05
## [81,] -0.16 -0.04
## [82,] -0.16 -0.01
## [83,] 0.20 -0.06
## [84,] 0.61 -0.06
## [85,] 0.19 -0.02
## [86,] -0.18 -0.01
## [87,] -0.18 -0.09
## [88,] -0.18 0.00
## [89,] -0.17 -0.01
## [90,] 0.18 -0.05
## [91,] 0.58 0.00
## [92,] 0.19 0.02
## [93,] -0.17 0.01
## [94,] -0.18 -0.03
## [95,] -0.17 0.06
## [96,] -0.17 -0.01
## [97,] 0.17 0.01
## [98,] 0.55 -0.08
## [99,] 0.17 0.03
## [100,] -0.18 0.02
## [101,] -0.18 -0.02
## [102,] -0.18 -0.01
## [103,] -0.17 0.00
## [104,] 0.17 0.04
## [105,] 0.55 0.01
## [106,] 0.16 -0.05
## [107,] -0.18 -0.01
## [108,] -0.18 0.04
## [109,] -0.17 -0.04
## [110,] -0.17 0.00
## [111,] 0.15 0.03
## [112,] 0.52 -0.01
## [113,] 0.15 -0.01
## [114,] -0.19 -0.05
## [115,] -0.19 -0.05
## [116,] -0.19 0.02
## [117,] -0.18 -0.04
## [118,] 0.14 -0.01
## [119,] 0.50 -0.01
## [120,] 0.14 0.00
Seasonility Not in Consideration
# Step 6: Fitting an ARIMA model
auto.arima(deseasonal_cnt, seasonal=FALSE)
## Series: deseasonal_cnt
## ARIMA(4,1,4)
##
## Coefficients:
## ar1 ar2 ar3 ar4 ma1 ma2 ma3 ma4
## 0.8434 -1.2940 0.6022 -0.7646 -1.7171 1.6677 -0.9892 0.2558
## s.e. 0.0438 0.0542 0.0554 0.0402 0.0671 0.1038 0.0927 0.0538
##
## sigma^2 estimated as 0.9354: log likelihood=-503.6
## AIC=1025.2 AICc=1025.7 BIC=1060.27
# Step 7: Evaluate and Iterate
# (try different model)
fit<-auto.arima(deseasonal_cnt, seasonal=FALSE)
tsdisplay(residuals(fit), lag.max=45, main='Model Residuals [Seasonality not considered]')
# step 8 forcast
fcast <- forecast(fit, h=30)
plot(fcast)
Seasonility in Consideration
# Step 6: Fitting an ARIMA model
auto.arima(deseasonal_cnt, seasonal=TRUE)
## Series: deseasonal_cnt
## ARIMA(1,1,2)(2,0,1)[30]
##
## Coefficients:
## ar1 ma1 ma2 sar1 sar2 sma1
## -0.6686 -0.0499 -0.8377 -0.1391 -0.3383 -0.4303
## s.e. 0.0883 0.0395 0.0398 0.2482 0.1374 0.2724
##
## sigma^2 estimated as 1.499: log likelihood=-598.8
## AIC=1211.59 AICc=1211.91 BIC=1238.87
# Step 7: Evaluate and Iterate
# (try different model)
fit<-auto.arima(deseasonal_cnt, seasonal=TRUE)
tsdisplay(residuals(fit), lag.max=45, main='Model Residuals [Seasonality considered]')
# step 8 forcast
fcast <- forecast(fit, h=30)
plot(fcast)