# Step 1: Load R Packages
### options(repos='http://cran.rstudio.com/')
#install.packages("astsa")
#install.packages('ggplot2')
#install.packages('forecast')
#install.packages('tseries')
#install.packages("data.table")
library(astsa)
library(forecast)
library(tseries)
library(zoo)
library(tseries)
library(data.table)
library(dplyr)
library(tidyr)
library(naniar)
library(stringr)
library(ggplot2)
library(DT)
library(lubridate)
library(ggpubr)
setwd("/scratch/user/cma16/Task4_Deliverable2/Process4/AllCrash/FacilityBased/")
load("WA_Rural_Interstate_41_TMC_TT_SI_reduce_withCrash.rData")
mytype = 'RI'
setwd(paste0("/scratch/user/cma16/Task4_Deliverable2/Process4/AllCrash/FacilityBased/",mytype))
df_RI <- b02a
dim(df_RI)
## [1] 9303840 64
df_RI$spd_av = 3600*df_RI$DISTANCE/df_RI$Travel_TIME_ALL_VEHICLES
df_RI$spd_pv = 3600*df_RI$DISTANCE/df_RI$Travel_TIME_PASSENGER_VEHICLES
df_RI$spd_ft = 3600*df_RI$DISTANCE/df_RI$Travel_TIME_FREIGHT_TRUCKS
### Month, Day
df_RI$date <- as.character(df_RI$DATE)
df_RI$date <- str_pad(df_RI$DATE, 8, pad = "0")
df_RI$Month <- substr(df_RI$date, start = 1, stop = 2)
df_RI$Day <- substr(df_RI$date, start = 3, stop = 4)
df_RI$Year <- substr(df_RI$date, start = 5, stop = 8)
df_RI$MonthDay <- paste0(df_RI$Month,"_", df_RI$Day)
head(df_RI)
## TimeStamp TMC V1 DATE EPOCH15 Travel_TIME_ALL_VEHICLES
## 1: 114N04098_0101_0 114N04098 1 1012015 0 NA
## 2: 114N04098_0101_1 114N04098 2 1012015 1 222
## 3: 114N04098_0101_10 114N04098 11 1012015 10 NA
## 4: 114N04098_0101_11 114N04098 12 1012015 11 184
## 5: 114N04098_0101_12 114N04098 13 1012015 12 227
## 6: 114N04098_0101_13 114N04098 14 1012015 13 192
## Travel_TIME_PASSENGER_VEHICLES Travel_TIME_FREIGHT_TRUCKS NP ADMIN_LEVE
## 1: NA NA N USA
## 2: NA 222 N USA
## 3: NA NA N USA
## 4: NA 184 N USA
## 5: NA 227 N USA
## 6: NA 192 N USA
## ADMIN_LE_1 ADMIN_LE_2 DISTANCE ROAD_NUMBE ROAD_NAME LATITUDE LONGITUDE
## 1: Washington King 2.79553 I-90 47.44331 -121.6681
## 2: Washington King 2.79553 I-90 47.44331 -121.6681
## 3: Washington King 2.79553 I-90 47.44331 -121.6681
## 4: Washington King 2.79553 I-90 47.44331 -121.6681
## 5: Washington King 2.79553 I-90 47.44331 -121.6681
## 6: Washington King 2.79553 I-90 47.44331 -121.6681
## ROAD_DIREC ORN_FID FID_1 ACCESS LSHL_TY2 LSHL_TYP MED_TYPE NHS_IND
## 1: Eastbound 17787.59 6232.662 F A A S Y
## 2: Eastbound 17787.59 6232.662 F A A S Y
## 3: Eastbound 17787.59 6232.662 F A A S Y
## 4: Eastbound 17787.59 6232.662 F A A S Y
## 5: Eastbound 17787.59 6232.662 F A A S Y
## 6: Eastbound 17787.59 6232.662 F A A S Y
## PRK_ZNE RSHL_TY2 RSHL_TYP SURF_TYP SURF_TY2 TERRAIN COMP_DIR COUNTY
## 1: A A P P M E 17
## 2: A A P P M E 17
## 3: A A P P M E 17
## 4: A A P P M E 17
## 5: A A P P M E 17
## 6: A A P P M E 17
## FUNC_CLS MEDBARTY ST_FUNC RTE_NBR HPMS ROAD_INV SPD_LIMT BEGMP
## 1: 41 DE R5 90 2.514882e-311 90 70 33.36
## 2: 41 DE R5 90 2.514882e-311 90 70 33.36
## 3: 41 DE R5 90 2.514882e-311 90 70 33.36
## 4: 41 DE R5 90 2.514882e-311 90 70 33.36
## 5: 41 DE R5 90 2.514882e-311 90 70 33.36
## 6: 41 DE R5 90 2.514882e-311 90 70 33.36
## ENDMP LSHLDWID MEDWID NO_LANE1 NO_LANE2 NO_LANES RSHLDWID RSHL_WD2
## 1: 36.16 7.779324 100.0268 4 3 7 9.655901 9.584419
## 2: 36.16 7.779324 100.0268 4 3 7 9.655901 9.584419
## 3: 36.16 7.779324 100.0268 4 3 7 9.655901 9.584419
## 4: 36.16 7.779324 100.0268 4 3 7 9.655901 9.584419
## 5: 36.16 7.779324 100.0268 4 3 7 9.655901 9.584419
## 6: 36.16 7.779324 100.0268 4 3 7 9.655901 9.584419
## SEG_LNG lanewid rdwy_wd1 rdwy_wd2 rdwy_wid AADT mvmt
## 1: 0.5697374 12.25326 48.6882 37.13331 85.82151 32974.55 6.874649
## 2: 0.5697374 12.25326 48.6882 37.13331 85.82151 32974.55 6.874649
## 3: 0.5697374 12.25326 48.6882 37.13331 85.82151 32974.55 6.874649
## 4: 0.5697374 12.25326 48.6882 37.13331 85.82151 32974.55 6.874649
## 5: 0.5697374 12.25326 48.6882 37.13331 85.82151 32974.55 6.874649
## 6: 0.5697374 12.25326 48.6882 37.13331 85.82151 32974.55 6.874649
## rodwycls ORN_FID_1 Total Fatal Injury PDO DAYMTH Crash spd_av spd_pv
## 1: 6 17787.59 17 0 6 11 0101 0 NA NA
## 2: 6 17787.59 17 0 6 11 0101 0 45.33292 NA
## 3: 6 17787.59 17 0 6 11 0101 0 NA NA
## 4: 6 17787.59 17 0 6 11 0101 0 54.69515 NA
## 5: 6 17787.59 17 0 6 11 0101 0 44.33440 NA
## 6: 6 17787.59 17 0 6 11 0101 0 52.41619 NA
## spd_ft date Month Day Year MonthDay
## 1: NA 01012015 01 01 2015 01_01
## 2: 45.33292 01012015 01 01 2015 01_01
## 3: NA 01012015 01 01 2015 01_01
## 4: 54.69515 01012015 01 01 2015 01_01
## 5: 44.33440 01012015 01 01 2015 01_01
## 6: 52.41619 01012015 01 01 2015 01_01
day1<- df_RI[,-c(1)] %>% group_by(MonthDay) %>% summarize(Speed_All_Mean=mean(spd_av, na.rm=TRUE))
day1
## # A tibble: 365 x 2
## MonthDay Speed_All_Mean
## <chr> <dbl>
## 1 01_01 60.4
## 2 01_02 59.9
## 3 01_03 60.3
## 4 01_04 55.3
## 5 01_05 59.0
## 6 01_06 59.4
## 7 01_07 59.2
## 8 01_08 59.2
## 9 01_09 60.0
## 10 01_10 59.9
## # ... with 355 more rows
# Step 2: Examine Data
speed_clean <- tsclean(ts(day1$Speed_All_Mean))
plot.ts(speed_clean)
# ggplot() + geom_line(data = Q1, aes(x = TimeStamp, y = speed_clean)) + ylab('Cleaned Speed Records')
day1$cnt_ma = ma(speed_clean, order=7) # using the clean count with no outliers
day1$cnt_ma30 = ma(speed_clean, order=30)
# Step 3: Decompose Your Data
count_ma = ts(na.omit(speed_clean), frequency=30)
decomp = stl(count_ma, s.window="periodic")
deseasonal_cnt <- seasadj(decomp)
plot(decomp)
# Step 4: Stationarity
# statinary test
adf.test(count_ma, alternative = "stationary")
##
## Augmented Dickey-Fuller Test
##
## data: count_ma
## Dickey-Fuller = -2.0188, Lag order = 7, p-value = 0.569
## alternative hypothesis: stationary
adf.test(deseasonal_cnt, alternative = "stationary")
##
## Augmented Dickey-Fuller Test
##
## data: deseasonal_cnt
## Dickey-Fuller = -1.9854, Lag order = 7, p-value = 0.5831
## alternative hypothesis: stationary
d1 = diff(deseasonal_cnt)
adf.test(d1, alternative = "stationary")
## Warning in adf.test(d1, alternative = "stationary"): p-value smaller than
## printed p-value
##
## Augmented Dickey-Fuller Test
##
## data: d1
## Dickey-Fuller = -9.1652, Lag order = 7, p-value = 0.01
## alternative hypothesis: stationary
# Step 5: Autocorrelations and Choosing Model Order
# check ACF and PACF
acf2(count_ma)
## ACF PACF
## [1,] 0.77 0.77
## [2,] 0.60 0.02
## [3,] 0.50 0.08
## [4,] 0.47 0.15
## [5,] 0.54 0.28
## [6,] 0.63 0.27
## [7,] 0.72 0.31
## [8,] 0.63 -0.15
## [9,] 0.51 -0.07
## [10,] 0.41 -0.07
## [11,] 0.41 0.10
## [12,] 0.48 0.06
## [13,] 0.59 0.17
## [14,] 0.64 0.02
## [15,] 0.60 0.08
## [16,] 0.50 -0.05
## [17,] 0.38 -0.10
## [18,] 0.40 0.11
## [19,] 0.46 0.03
## [20,] 0.55 0.05
## [21,] 0.58 -0.04
## [22,] 0.49 -0.19
## [23,] 0.37 -0.10
## [24,] 0.26 -0.08
## [25,] 0.28 0.04
## [26,] 0.34 -0.06
## [27,] 0.42 0.00
## [28,] 0.45 -0.03
## [29,] 0.39 -0.01
## [30,] 0.26 -0.11
## [31,] 0.18 -0.01
## [32,] 0.19 -0.03
## [33,] 0.28 0.13
## [34,] 0.40 0.09
## [35,] 0.42 0.04
## [36,] 0.35 -0.10
## [37,] 0.21 -0.03
## [38,] 0.13 0.00
## [39,] 0.12 -0.03
## [40,] 0.20 0.01
## [41,] 0.31 0.07
## [42,] 0.34 0.07
## [43,] 0.27 -0.03
## [44,] 0.14 -0.03
## [45,] 0.08 0.05
## [46,] 0.07 0.00
## [47,] 0.14 0.02
## [48,] 0.25 0.05
## [49,] 0.29 -0.01
## [50,] 0.22 -0.05
## [51,] 0.10 -0.05
## [52,] 0.03 -0.01
## [53,] 0.03 -0.01
## [54,] 0.10 -0.07
## [55,] 0.19 0.01
## [56,] 0.25 0.09
## [57,] 0.17 -0.03
## [58,] 0.05 -0.04
## [59,] -0.02 -0.01
## [60,] -0.02 -0.08
## [61,] 0.05 -0.01
## [62,] 0.13 -0.01
## [63,] 0.20 0.09
## [64,] 0.12 -0.09
## [65,] -0.02 -0.08
## [66,] -0.07 0.06
## [67,] -0.06 -0.02
## [68,] 0.02 0.03
## [69,] 0.10 -0.03
## [70,] 0.13 -0.02
## [71,] 0.06 -0.03
## [72,] -0.05 0.07
## [73,] -0.11 -0.01
## [74,] -0.10 -0.01
## [75,] -0.01 0.06
## [76,] 0.07 -0.01
## [77,] 0.10 0.03
## [78,] 0.03 -0.02
## [79,] -0.06 0.03
## [80,] -0.15 0.01
## [81,] -0.15 -0.09
## [82,] -0.08 -0.09
## [83,] 0.01 0.00
## [84,] 0.05 0.04
## [85,] -0.01 0.06
## [86,] -0.11 -0.04
## [87,] -0.18 -0.02
## [88,] -0.16 0.03
## [89,] -0.11 -0.06
## [90,] -0.04 -0.07
## [91,] 0.01 0.00
## [92,] -0.05 -0.02
## [93,] -0.13 0.08
## [94,] -0.18 0.03
## [95,] -0.16 0.03
## [96,] -0.12 -0.08
## [97,] -0.04 -0.01
## [98,] -0.01 0.02
## [99,] -0.08 -0.03
## [100,] -0.16 -0.02
## [101,] -0.23 -0.05
## [102,] -0.21 0.04
## [103,] -0.17 -0.08
## [104,] -0.08 0.01
## [105,] -0.04 0.04
## [106,] -0.10 0.04
## [107,] -0.19 0.01
## [108,] -0.24 0.00
## [109,] -0.21 -0.01
## [110,] -0.16 -0.01
## [111,] -0.08 -0.03
## [112,] -0.06 -0.04
## [113,] -0.11 -0.05
## [114,] -0.20 0.00
## [115,] -0.26 0.01
## [116,] -0.25 -0.02
## [117,] -0.19 0.01
## [118,] -0.12 -0.02
## [119,] -0.10 -0.01
## [120,] -0.14 -0.01
## ACF PACF
## [1,] 0.78 0.78
## [2,] 0.61 0.01
## [3,] 0.50 0.05
## [4,] 0.48 0.17
## [5,] 0.55 0.29
## [6,] 0.64 0.27
## [7,] 0.73 0.30
## [8,] 0.64 -0.16
## [9,] 0.52 -0.07
## [10,] 0.42 -0.07
## [11,] 0.42 0.10
## [12,] 0.48 0.07
## [13,] 0.60 0.18
## [14,] 0.65 0.01
## [15,] 0.60 0.05
## [16,] 0.50 -0.03
## [17,] 0.39 -0.09
## [18,] 0.40 0.11
## [19,] 0.47 0.03
## [20,] 0.56 0.04
## [21,] 0.59 -0.04
## [22,] 0.50 -0.20
## [23,] 0.37 -0.12
## [24,] 0.27 -0.06
## [25,] 0.29 0.05
## [26,] 0.34 -0.07
## [27,] 0.43 -0.01
## [28,] 0.46 -0.03
## [29,] 0.39 -0.01
## [30,] 0.26 -0.14
## [31,] 0.18 0.03
## [32,] 0.20 -0.01
## [33,] 0.28 0.11
## [34,] 0.40 0.10
## [35,] 0.43 0.03
## [36,] 0.35 -0.09
## [37,] 0.21 -0.02
## [38,] 0.13 0.00
## [39,] 0.13 -0.02
## [40,] 0.20 0.01
## [41,] 0.32 0.07
## [42,] 0.35 0.06
## [43,] 0.27 -0.02
## [44,] 0.15 -0.03
## [45,] 0.07 0.04
## [46,] 0.07 0.01
## [47,] 0.14 0.00
## [48,] 0.26 0.07
## [49,] 0.30 -0.01
## [50,] 0.22 -0.06
## [51,] 0.10 -0.04
## [52,] 0.03 -0.01
## [53,] 0.03 -0.03
## [54,] 0.10 -0.07
## [55,] 0.19 0.01
## [56,] 0.25 0.09
## [57,] 0.18 -0.03
## [58,] 0.06 -0.03
## [59,] -0.01 -0.01
## [60,] -0.03 -0.12
## [61,] 0.05 0.01
## [62,] 0.13 0.00
## [63,] 0.20 0.09
## [64,] 0.12 -0.07
## [65,] -0.02 -0.11
## [66,] -0.07 0.08
## [67,] -0.06 0.00
## [68,] 0.02 0.02
## [69,] 0.10 -0.03
## [70,] 0.13 -0.02
## [71,] 0.06 -0.03
## [72,] -0.05 0.08
## [73,] -0.11 0.00
## [74,] -0.10 -0.02
## [75,] -0.01 0.05
## [76,] 0.07 -0.01
## [77,] 0.10 0.02
## [78,] 0.03 0.01
## [79,] -0.07 0.03
## [80,] -0.15 -0.02
## [81,] -0.15 -0.08
## [82,] -0.08 -0.09
## [83,] 0.00 -0.01
## [84,] 0.05 0.04
## [85,] -0.01 0.06
## [86,] -0.12 -0.03
## [87,] -0.18 -0.03
## [88,] -0.16 0.04
## [89,] -0.11 -0.07
## [90,] -0.05 -0.11
## [91,] 0.00 0.03
## [92,] -0.05 -0.01
## [93,] -0.13 0.09
## [94,] -0.19 0.05
## [95,] -0.16 0.01
## [96,] -0.12 -0.06
## [97,] -0.05 0.00
## [98,] -0.01 0.02
## [99,] -0.08 -0.01
## [100,] -0.17 -0.03
## [101,] -0.24 -0.06
## [102,] -0.21 0.05
## [103,] -0.17 -0.07
## [104,] -0.08 0.00
## [105,] -0.05 0.03
## [106,] -0.10 0.04
## [107,] -0.19 -0.01
## [108,] -0.24 0.03
## [109,] -0.21 0.00
## [110,] -0.16 -0.03
## [111,] -0.08 -0.03
## [112,] -0.06 -0.05
## [113,] -0.11 -0.06
## [114,] -0.20 0.00
## [115,] -0.26 0.02
## [116,] -0.25 -0.02
## [117,] -0.20 0.00
## [118,] -0.12 0.00
## [119,] -0.10 -0.02
## [120,] -0.14 -0.05
Seasonility Not in Consideration
# Step 6: Fitting an ARIMA model
auto.arima(deseasonal_cnt, seasonal=FALSE)
## Series: deseasonal_cnt
## ARIMA(4,1,3)
##
## Coefficients:
## ar1 ar2 ar3 ar4 ma1 ma2 ma3
## -0.1154 0.2794 -0.6509 -0.3840 -0.2486 -0.5248 0.5934
## s.e. 0.0702 0.0563 0.0585 0.0581 0.0621 0.0829 0.0728
##
## sigma^2 estimated as 0.201: log likelihood=-221.94
## AIC=459.87 AICc=460.28 BIC=491.05
# Step 7: Evaluate and Iterate
# (try different model)
fit<-auto.arima(deseasonal_cnt, seasonal=FALSE)
tsdisplay(residuals(fit), lag.max=45, main='Model Residuals [Seasonality not considered]')
# step 8 forcast
fcast <- forecast(fit, h=30)
plot(fcast)
Seasonility in Consideration
# Step 6: Fitting an ARIMA model
auto.arima(deseasonal_cnt, seasonal=TRUE)
## Series: deseasonal_cnt
## ARIMA(4,1,3)(0,0,1)[30]
##
## Coefficients:
## ar1 ar2 ar3 ar4 ma1 ma2 ma3 sma1
## -0.1081 0.2681 -0.6208 -0.3735 -0.2623 -0.5221 0.5706 -0.1718
## s.e. 0.0756 0.0617 0.0668 0.0598 0.0673 0.0903 0.0831 0.0715
##
## sigma^2 estimated as 0.1978: log likelihood=-218.89
## AIC=455.78 AICc=456.29 BIC=490.85
# Step 7: Evaluate and Iterate
# (try different model)
fit<-auto.arima(deseasonal_cnt, seasonal=TRUE)
tsdisplay(residuals(fit), lag.max=45, main='Model Residuals [Seasonality considered]')
# step 8 forcast
fcast <- forecast(fit, h=30)
plot(fcast)