# Step 1: Load R Packages
### options(repos='http://cran.rstudio.com/')
#install.packages("astsa")
#install.packages('ggplot2')
#install.packages('forecast')
#install.packages('tseries')
#install.packages("data.table")
library(astsa)
library(forecast)
library(tseries)
library(zoo)
library(tseries)
library(data.table)
library(dplyr)
library(tidyr)
library(naniar)
library(stringr)
library(ggplot2)
library(DT)
library(lubridate)
library(ggpubr)
setwd("/scratch/user/cma16/Task4_Deliverable2/Process4/AllCrash/FacilityBased/")
load("./multi-lane_undivided_WA_reduce_withCrash.rData")
mytype = 'RMU'
setwd(paste0("/scratch/user/cma16/Task4_Deliverable2/Process4/AllCrash/FacilityBased/",mytype))
df_RMU <- W_mun_nomed
dim(df_RMU)
## [1] 946944 64
df_RMU$spd_av = 3600*df_RMU$DISTANCE/df_RMU$Travel_TIME_ALL_VEHICLES
df_RMU$spd_pv = 3600*df_RMU$DISTANCE/df_RMU$Travel_TIME_PASSENGER_VEHICLES
df_RMU$spd_ft = 3600*df_RMU$DISTANCE/df_RMU$Travel_TIME_FREIGHT_TRUCKS
### Month, Day
df_RMU$date <- as.character(df_RMU$DATE)
df_RMU$date <- str_pad(df_RMU$DATE, 8, pad = "0")
df_RMU$Month <- substr(df_RMU$date, start = 1, stop = 2)
df_RMU$Day <- substr(df_RMU$date, start = 3, stop = 4)
df_RMU$Year <- substr(df_RMU$date, start = 5, stop = 8)
df_RMU$MonthDay <- paste0(df_RMU$Month,"_", df_RMU$Day)
head(df_RMU)
## TimeStamp TMC V1 DATE EPOCH15
## 1: 114N05556_0101_0 114N05556 479137 1012015 0
## 2: 114N05556_0101_1 114N05556 479138 1012015 1
## 3: 114N05556_0101_10 114N05556 479147 1012015 10
## 4: 114N05556_0101_11 114N05556 479148 1012015 11
## 5: 114N05556_0101_12 114N05556 479149 1012015 12
## 6: 114N05556_0101_13 114N05556 479150 1012015 13
## Travel_TIME_ALL_VEHICLES Travel_TIME_PASSENGER_VEHICLES
## 1: NA NA
## 2: NA NA
## 3: NA NA
## 4: 156 NA
## 5: NA NA
## 6: NA NA
## Travel_TIME_FREIGHT_TRUCKS NP ADMIN_LEVE ADMIN_LE_1 ADMIN_LE_2 DISTANCE
## 1: NA N USA Washington Whitman 0.67404
## 2: NA N USA Washington Whitman 0.67404
## 3: NA N USA Washington Whitman 0.67404
## 4: 156 N USA Washington Whitman 0.67404
## 5: NA N USA Washington Whitman 0.67404
## 6: NA N USA Washington Whitman 0.67404
## ROAD_NUMBE ROAD_NAME LATITUDE LONGITUDE ROAD_DIREC ORN_FID FID_1
## 1: US-195 46.8799 -117.3648 Southbound 31759.53 9634.893
## 2: US-195 46.8799 -117.3648 Southbound 31759.53 9634.893
## 3: US-195 46.8799 -117.3648 Southbound 31759.53 9634.893
## 4: US-195 46.8799 -117.3648 Southbound 31759.53 9634.893
## 5: US-195 46.8799 -117.3648 Southbound 31759.53 9634.893
## 6: US-195 46.8799 -117.3648 Southbound 31759.53 9634.893
## ACCESS LSHL_TY2 LSHL_TYP MED_TYPE NHS_IND PRK_ZNE RSHL_TY2 RSHL_TYP
## 1: 5 C Y X C
## 2: 5 C Y X C
## 3: 5 C Y X C
## 4: 5 C Y X C
## 5: 5 C Y X C
## 6: 5 C Y X C
## SURF_TYP SURF_TY2 TERRAIN COMP_DIR COUNTY FUNC_CLS MEDBARTY ST_FUNC
## 1: A R N 38 43 R1
## 2: A R N 38 43 R1
## 3: A R N 38 43 R1
## 4: A R N 38 43 R1
## 5: A R N 38 43 R1
## 6: A R N 38 43 R1
## RTE_NBR HPMS ROAD_INV SPD_LIMT BEGMP ENDMP LSHLDWID MEDWID
## 1: 195 6195380374900 195 60 35.95 36.64 0 0
## 2: 195 6195380374900 195 60 35.95 36.64 0 0
## 3: 195 6195380374900 195 60 35.95 36.64 0 0
## 4: 195 6195380374900 195 60 35.95 36.64 0 0
## 5: 195 6195380374900 195 60 35.95 36.64 0 0
## 6: 195 6195380374900 195 60 35.95 36.64 0 0
## NO_LANE1 NO_LANE2 NO_LANES RSHLDWID RSHL_WD2 SEG_LNG lanewid
## 1: 1.961917 1.977804 4 0 0 0.05879853 13.23761
## 2: 1.961917 1.977804 4 0 0 0.05879853 13.23761
## 3: 1.961917 1.977804 4 0 0 0.05879853 13.23761
## 4: 1.961917 1.977804 4 0 0 0.05879853 13.23761
## 5: 1.961917 1.977804 4 0 0 0.05879853 13.23761
## 6: 1.961917 1.977804 4 0 0 0.05879853 13.23761
## rdwy_wd1 rdwy_wd2 rdwy_wid AADT mvmt rodwycls ORN_FID_1 Total
## 1: 51.96074 0 51.96074 10130.56 0.2192078 10 31759.53 10
## 2: 51.96074 0 51.96074 10130.56 0.2192078 10 31759.53 10
## 3: 51.96074 0 51.96074 10130.56 0.2192078 10 31759.53 10
## 4: 51.96074 0 51.96074 10130.56 0.2192078 10 31759.53 10
## 5: 51.96074 0 51.96074 10130.56 0.2192078 10 31759.53 10
## 6: 51.96074 0 51.96074 10130.56 0.2192078 10 31759.53 10
## Fatal Injury PDO DAYMTH Crash spd_av spd_pv spd_ft date Month
## 1: 0 2 8 0101 0 NA NA NA 01012015 01
## 2: 0 2 8 0101 0 NA NA NA 01012015 01
## 3: 0 2 8 0101 0 NA NA NA 01012015 01
## 4: 0 2 8 0101 0 15.55477 NA 15.55477 01012015 01
## 5: 0 2 8 0101 0 NA NA NA 01012015 01
## 6: 0 2 8 0101 0 NA NA NA 01012015 01
## Day Year MonthDay
## 1: 01 2015 01_01
## 2: 01 2015 01_01
## 3: 01 2015 01_01
## 4: 01 2015 01_01
## 5: 01 2015 01_01
## 6: 01 2015 01_01
day1<- df_RMU[,-c(1)] %>% group_by(MonthDay) %>% summarize(Speed_All_Mean=mean(spd_av, na.rm=TRUE))
day1
## # A tibble: 365 x 2
## MonthDay Speed_All_Mean
## <chr> <dbl>
## 1 01_01 36.6
## 2 01_02 35.9
## 3 01_03 37.0
## 4 01_04 33.9
## 5 01_05 36.0
## 6 01_06 37.5
## 7 01_07 37.3
## 8 01_08 36.5
## 9 01_09 36.0
## 10 01_10 35.8
## # ... with 355 more rows
# Step 2: Examine Data
speed_clean <- tsclean(ts(day1$Speed_All_Mean))
plot.ts(speed_clean)
# ggplot() + geom_line(data = Q1, aes(x = TimeStamp, y = speed_clean)) + ylab('Cleaned Speed Records')
day1$cnt_ma = ma(speed_clean, order=7) # using the clean count with no outliers
day1$cnt_ma30 = ma(speed_clean, order=30)
# Step 3: Decompose Your Data
count_ma = ts(na.omit(speed_clean), frequency=30)
decomp = stl(count_ma, s.window="periodic")
deseasonal_cnt <- seasadj(decomp)
plot(decomp)
# Step 4: Stationarity
# statinary test
adf.test(count_ma, alternative = "stationary")
##
## Augmented Dickey-Fuller Test
##
## data: count_ma
## Dickey-Fuller = -3.7714, Lag order = 7, p-value = 0.0207
## alternative hypothesis: stationary
adf.test(deseasonal_cnt, alternative = "stationary")
##
## Augmented Dickey-Fuller Test
##
## data: deseasonal_cnt
## Dickey-Fuller = -3.7924, Lag order = 7, p-value = 0.01965
## alternative hypothesis: stationary
d1 = diff(deseasonal_cnt)
adf.test(d1, alternative = "stationary")
## Warning in adf.test(d1, alternative = "stationary"): p-value smaller than
## printed p-value
##
## Augmented Dickey-Fuller Test
##
## data: d1
## Dickey-Fuller = -10.073, Lag order = 7, p-value = 0.01
## alternative hypothesis: stationary
# Step 5: Autocorrelations and Choosing Model Order
# check ACF and PACF
acf2(count_ma)
## ACF PACF
## [1,] 0.49 0.49
## [2,] 0.56 0.43
## [3,] 0.51 0.24
## [4,] 0.49 0.14
## [5,] 0.53 0.19
## [6,] 0.43 0.00
## [7,] 0.63 0.35
## [8,] 0.40 -0.09
## [9,] 0.49 0.05
## [10,] 0.45 0.02
## [11,] 0.41 -0.04
## [12,] 0.46 0.05
## [13,] 0.40 0.05
## [14,] 0.56 0.19
## [15,] 0.35 -0.08
## [16,] 0.46 0.04
## [17,] 0.40 -0.03
## [18,] 0.39 0.01
## [19,] 0.46 0.09
## [20,] 0.38 0.02
## [21,] 0.52 0.09
## [22,] 0.35 -0.03
## [23,] 0.43 -0.03
## [24,] 0.37 -0.03
## [25,] 0.37 0.03
## [26,] 0.40 -0.05
## [27,] 0.31 -0.06
## [28,] 0.49 0.14
## [29,] 0.31 -0.04
## [30,] 0.38 -0.03
## [31,] 0.31 -0.05
## [32,] 0.35 0.05
## [33,] 0.37 0.00
## [34,] 0.29 -0.01
## [35,] 0.47 0.13
## [36,] 0.27 -0.06
## [37,] 0.37 0.01
## [38,] 0.32 0.02
## [39,] 0.32 -0.01
## [40,] 0.34 -0.01
## [41,] 0.25 -0.06
## [42,] 0.44 0.07
## [43,] 0.24 -0.01
## [44,] 0.32 -0.06
## [45,] 0.29 0.02
## [46,] 0.29 0.02
## [47,] 0.33 0.03
## [48,] 0.26 0.00
## [49,] 0.39 0.00
## [50,] 0.24 0.02
## [51,] 0.33 0.04
## [52,] 0.31 0.05
## [53,] 0.27 -0.03
## [54,] 0.33 0.01
## [55,] 0.26 0.00
## [56,] 0.36 -0.04
## [57,] 0.22 -0.02
## [58,] 0.27 -0.09
## [59,] 0.24 -0.04
## [60,] 0.25 0.03
## [61,] 0.26 -0.04
## [62,] 0.19 -0.04
## [63,] 0.30 0.00
## [64,] 0.20 0.06
## [65,] 0.25 0.01
## [66,] 0.23 0.02
## [67,] 0.22 -0.02
## [68,] 0.24 -0.01
## [69,] 0.16 -0.04
## [70,] 0.27 -0.05
## [71,] 0.17 0.01
## [72,] 0.21 -0.02
## [73,] 0.18 -0.03
## [74,] 0.21 0.05
## [75,] 0.22 0.02
## [76,] 0.11 -0.09
## [77,] 0.27 0.10
## [78,] 0.12 -0.04
## [79,] 0.20 0.03
## [80,] 0.16 -0.03
## [81,] 0.12 -0.10
## [82,] 0.14 -0.07
## [83,] 0.06 -0.04
## [84,] 0.21 0.02
## [85,] 0.06 -0.03
## [86,] 0.12 -0.01
## [87,] 0.10 -0.04
## [88,] 0.09 0.01
## [89,] 0.14 0.06
## [90,] 0.05 0.00
## [91,] 0.20 0.10
## [92,] 0.03 -0.06
## [93,] 0.14 0.04
## [94,] 0.08 -0.05
## [95,] 0.08 0.03
## [96,] 0.10 -0.02
## [97,] 0.00 -0.08
## [98,] 0.15 0.03
## [99,] 0.03 0.02
## [100,] 0.09 -0.01
## [101,] 0.04 -0.02
## [102,] 0.03 -0.05
## [103,] 0.06 -0.03
## [104,] -0.03 0.01
## [105,] 0.09 -0.02
## [106,] 0.00 0.05
## [107,] 0.06 0.00
## [108,] 0.03 0.04
## [109,] 0.00 -0.06
## [110,] 0.02 -0.02
## [111,] -0.05 0.00
## [112,] 0.12 0.08
## [113,] -0.04 -0.05
## [114,] 0.03 -0.03
## [115,] -0.01 -0.01
## [116,] -0.03 -0.03
## [117,] 0.01 0.01
## [118,] -0.05 0.08
## [119,] 0.06 -0.06
## [120,] -0.08 -0.04
## ACF PACF
## [1,] 0.50 0.50
## [2,] 0.57 0.43
## [3,] 0.53 0.24
## [4,] 0.51 0.14
## [5,] 0.53 0.18
## [6,] 0.44 -0.01
## [7,] 0.64 0.36
## [8,] 0.41 -0.11
## [9,] 0.51 0.06
## [10,] 0.46 0.01
## [11,] 0.42 -0.04
## [12,] 0.47 0.04
## [13,] 0.41 0.06
## [14,] 0.57 0.19
## [15,] 0.37 -0.08
## [16,] 0.47 0.02
## [17,] 0.41 -0.02
## [18,] 0.39 -0.01
## [19,] 0.48 0.12
## [20,] 0.39 0.02
## [21,] 0.53 0.09
## [22,] 0.36 -0.04
## [23,] 0.43 -0.04
## [24,] 0.38 -0.03
## [25,] 0.38 0.05
## [26,] 0.41 -0.04
## [27,] 0.32 -0.06
## [28,] 0.50 0.14
## [29,] 0.32 -0.03
## [30,] 0.36 -0.09
## [31,] 0.33 -0.01
## [32,] 0.36 0.07
## [33,] 0.38 -0.01
## [34,] 0.30 0.00
## [35,] 0.47 0.11
## [36,] 0.28 -0.08
## [37,] 0.37 0.04
## [38,] 0.33 0.00
## [39,] 0.33 -0.01
## [40,] 0.34 -0.02
## [41,] 0.27 -0.06
## [42,] 0.44 0.07
## [43,] 0.25 0.01
## [44,] 0.32 -0.06
## [45,] 0.31 0.02
## [46,] 0.29 0.02
## [47,] 0.34 0.03
## [48,] 0.26 -0.02
## [49,] 0.40 0.04
## [50,] 0.25 0.03
## [51,] 0.34 0.04
## [52,] 0.32 0.04
## [53,] 0.28 -0.04
## [54,] 0.33 0.00
## [55,] 0.26 0.03
## [56,] 0.37 -0.04
## [57,] 0.23 -0.02
## [58,] 0.27 -0.08
## [59,] 0.25 -0.04
## [60,] 0.24 -0.01
## [61,] 0.27 -0.02
## [62,] 0.20 -0.03
## [63,] 0.30 -0.01
## [64,] 0.21 0.07
## [65,] 0.25 0.00
## [66,] 0.23 0.01
## [67,] 0.23 0.02
## [68,] 0.25 -0.02
## [69,] 0.17 -0.04
## [70,] 0.26 -0.06
## [71,] 0.18 -0.01
## [72,] 0.21 0.00
## [73,] 0.19 -0.01
## [74,] 0.21 0.04
## [75,] 0.23 0.03
## [76,] 0.12 -0.11
## [77,] 0.28 0.10
## [78,] 0.12 -0.07
## [79,] 0.20 0.06
## [80,] 0.17 -0.03
## [81,] 0.13 -0.10
## [82,] 0.14 -0.09
## [83,] 0.06 -0.05
## [84,] 0.21 0.02
## [85,] 0.07 0.00
## [86,] 0.13 -0.01
## [87,] 0.11 -0.04
## [88,] 0.09 -0.01
## [89,] 0.14 0.08
## [90,] 0.04 -0.03
## [91,] 0.20 0.12
## [92,] 0.04 -0.06
## [93,] 0.14 0.03
## [94,] 0.09 -0.07
## [95,] 0.07 0.05
## [96,] 0.10 -0.05
## [97,] 0.00 -0.04
## [98,] 0.15 0.01
## [99,] 0.04 0.03
## [100,] 0.08 -0.06
## [101,] 0.05 0.00
## [102,] 0.03 -0.05
## [103,] 0.07 0.00
## [104,] -0.03 0.00
## [105,] 0.10 -0.01
## [106,] 0.00 0.00
## [107,] 0.05 0.03
## [108,] 0.03 -0.01
## [109,] 0.00 -0.02
## [110,] 0.02 -0.02
## [111,] -0.05 0.01
## [112,] 0.11 0.05
## [113,] -0.05 -0.05
## [114,] 0.02 -0.03
## [115,] 0.00 0.04
## [116,] -0.02 -0.03
## [117,] 0.01 0.02
## [118,] -0.05 0.06
## [119,] 0.06 -0.04
## [120,] -0.09 -0.08
Seasonility Not in Consideration
# Step 6: Fitting an ARIMA model
auto.arima(deseasonal_cnt, seasonal=FALSE)
## Series: deseasonal_cnt
## ARIMA(0,1,1)
##
## Coefficients:
## ma1
## -0.8769
## s.e. 0.0275
##
## sigma^2 estimated as 0.5475: log likelihood=-407.08
## AIC=818.15 AICc=818.18 BIC=825.95
# Step 7: Evaluate and Iterate
# (try different model)
fit<-auto.arima(deseasonal_cnt, seasonal=FALSE)
tsdisplay(residuals(fit), lag.max=45, main='Model Residuals [Seasonality not considered]')
# step 8 forcast
fcast <- forecast(fit, h=30)
plot(fcast)
Seasonility in Consideration
# Step 6: Fitting an ARIMA model
auto.arima(deseasonal_cnt, seasonal=TRUE)
## Series: deseasonal_cnt
## ARIMA(0,1,3)(2,0,0)[30]
##
## Coefficients:
## ma1 ma2 ma3 sar1 sar2
## -0.9411 0.1711 -0.1092 -0.0628 -0.0520
## s.e. 0.0531 0.0762 0.0544 0.0593 0.0598
##
## sigma^2 estimated as 0.5427: log likelihood=-403.6
## AIC=819.21 AICc=819.44 BIC=842.59
# Step 7: Evaluate and Iterate
# (try different model)
fit<-auto.arima(deseasonal_cnt, seasonal=TRUE)
tsdisplay(residuals(fit), lag.max=45, main='Model Residuals [Seasonality considered]')
# step 8 forcast
fcast <- forecast(fit, h=30)
plot(fcast)