# Step 1: Load R Packages
### options(repos='http://cran.rstudio.com/')
#install.packages("astsa")
#install.packages('ggplot2')
#install.packages('forecast')
#install.packages('tseries')
#install.packages("data.table")
library(astsa)
library(forecast)
library(tseries)
library(zoo)
library(tseries)
library(data.table)
library(dplyr)
library(tidyr)
library(naniar)
library(stringr)
library(ggplot2)
library(DT)
library(lubridate)
library(ggpubr)
mytype = 'RI'
setwd("/scratch/user/cma16/Task4_Deliverable2/OHprocess4/AllCrash/FacilityBased/")
load("./OH_Principal_Arterial_Rural_Interstate_1_TMC_TT_SI_reduce_withCrash.rData")
setwd(paste0("/scratch/user/cma16/Task4_Deliverable2/OHprocess4/AllCrash/FacilityBased/",mytype))
df_RMA <- b02a
df_RMA$spd_av = 3600*df_RMA$DISTANCE/df_RMA$Travel_TIME_ALL_VEHICLES
df_RMA$spd_pv = 3600*df_RMA$DISTANCE/df_RMA$Travel_TIME_PASSENGER_VEHICLES
df_RMA$spd_ft = 3600*df_RMA$DISTANCE/df_RMA$Travel_TIME_FREIGHT_TRUCKS
### Month, Day
df_RMA$date <- as.character(df_RMA$DATE)
df_RMA$date <- str_pad(df_RMA$DATE, 8, pad = "0")
df_RMA$Month <- substr(df_RMA$date, start = 1, stop = 2)
df_RMA$Day <- substr(df_RMA$date, start = 3, stop = 4)
df_RMA$Year <- substr(df_RMA$date, start = 5, stop = 8)
df_RMA$MonthDay <- paste0(df_RMA$Month,"_", df_RMA$Day)
head(df_RMA)
## TimeStamp TMC DATE EPOCH1h Travel_TIME_ALL_VEHICLES
## 1: 108N05179_0101_0 108N05179 1012015 0 30.000000
## 2: 108N05179_0101_1 108N05179 1012015 1 5.000000
## 3: 108N05179_0101_10 108N05179 1012015 10 6.500000
## 4: 108N05179_0101_11 108N05179 1012015 11 4.833333
## 5: 108N05179_0101_12 108N05179 1012015 12 5.000000
## 6: 108N05179_0101_13 108N05179 1012015 13 5.200000
## Travel_TIME_PASSENGER_VEHICLES Travel_TIME_FREIGHT_TRUCKS ADMIN_LEVE
## 1: NA 30.0 USA
## 2: 5.000000 NA USA
## 3: NA 6.5 USA
## 4: 4.666667 6.0 USA
## 5: 5.000000 NA USA
## 6: 5.250000 5.5 USA
## ADMIN_LE_1 ADMIN_LE_2 DISTANCE ROAD_NUMBE ROAD_NAME LATITUDE LONGITUDE
## 1: Ohio Wood 0.08766 I-75 41.16766 -83.64961
## 2: Ohio Wood 0.08766 I-75 41.16766 -83.64961
## 3: Ohio Wood 0.08766 I-75 41.16766 -83.64961
## 4: Ohio Wood 0.08766 I-75 41.16766 -83.64961
## 5: Ohio Wood 0.08766 I-75 41.16766 -83.64961
## 6: Ohio Wood 0.08766 I-75 41.16766 -83.64961
## ROAD_DIREC ORN_FID COUNTY divided SURF_TYP NHS_CDE HPMS ACCESS AADT_YR
## 1: Southbound 33004.3 WOO D G N F 12.03248
## 2: Southbound 33004.3 WOO D G N F 12.03248
## 3: Southbound 33004.3 WOO D G N F 12.03248
## 4: Southbound 33004.3 WOO D G N F 12.03248
## 5: Southbound 33004.3 WOO D G N F 12.03248
## 6: Southbound 33004.3 WOO D G N F 12.03248
## FED_FACI PK_LANES MED_TYPE FED_MEDW BEGMP ENDMP SEG_LNG cnty_rte
## 1: 2 4 4.032475 65.93505 0 25.24 0.2367525 WOO0075R
## 2: 2 4 4.032475 65.93505 0 25.24 0.2367525 WOO0075R
## 3: 2 4 4.032475 65.93505 0 25.24 0.2367525 WOO0075R
## 4: 2 4 4.032475 65.93505 0 25.24 0.2367525 WOO0075R
## 5: 2 4 4.032475 65.93505 0 25.24 0.2367525 WOO0075R
## 6: 2 4 4.032475 65.93505 0 25.24 0.2367525 WOO0075R
## rte_nbr aadt aadt_bc aadt_pt surf_wid no_lanes func_cls rodwycls
## 1: 0075R 49134.52 13491.41 35643.11 48 4 1 6
## 2: 0075R 49134.52 13491.41 35643.11 48 4 1 6
## 3: 0075R 49134.52 13491.41 35643.11 48 4 1 6
## 4: 0075R 49134.52 13491.41 35643.11 48 4 1 6
## 5: 0075R 49134.52 13491.41 35643.11 48 4 1 6
## 6: 0075R 49134.52 13491.41 35643.11 48 4 1 6
## Total K A B C O DAYMTH Crash spd_av spd_pv spd_ft date Month
## 1: 0 0 0 0 0 0 0101 0 10.51920 NA 10.51920 01012015 01
## 2: 0 0 0 0 0 0 0101 0 63.11520 63.11520 NA 01012015 01
## 3: 0 0 0 0 0 0 0101 0 48.55015 NA 48.55015 01012015 01
## 4: 0 0 0 0 0 0 0101 0 65.29159 67.62343 52.59600 01012015 01
## 5: 0 0 0 0 0 0 0101 0 63.11520 63.11520 NA 01012015 01
## 6: 0 0 0 0 0 0 0101 0 60.68769 60.10971 57.37745 01012015 01
## Day Year MonthDay
## 1: 01 2015 01_01
## 2: 01 2015 01_01
## 3: 01 2015 01_01
## 4: 01 2015 01_01
## 5: 01 2015 01_01
## 6: 01 2015 01_01
day1<- df_RMA[,-c(1)] %>% group_by(MonthDay) %>% summarize(Speed_All_Mean=mean(spd_av, na.rm=TRUE))
day1
## # A tibble: 365 x 2
## MonthDay Speed_All_Mean
## <chr> <dbl>
## 1 01_01 63.0
## 2 01_02 63.8
## 3 01_03 61.9
## 4 01_04 62.5
## 5 01_05 61.8
## 6 01_06 57.4
## 7 01_07 60.0
## 8 01_08 60.7
## 9 01_09 58.4
## 10 01_10 62.5
## # ... with 355 more rows
# Step 2: Examine Data
speed_clean <- tsclean(ts(day1$Speed_All_Mean))
plot.ts(speed_clean)
# ggplot() + geom_line(data = Q1, aes(x = TimeStamp, y = speed_clean)) + ylab('Cleaned Speed Records')
day1$cnt_ma = ma(speed_clean, order=7) # using the clean count with no outliers
day1$cnt_ma30 = ma(speed_clean, order=30)
# Step 3: Decompose Your Data
count_ma = ts(na.omit(speed_clean), frequency=30)
decomp = stl(count_ma, s.window="periodic")
deseasonal_cnt <- seasadj(decomp)
plot(decomp)
# Step 4: Stationarity
# statinary test
adf.test(count_ma, alternative = "stationary")
##
## Augmented Dickey-Fuller Test
##
## data: count_ma
## Dickey-Fuller = -3.7802, Lag order = 7, p-value = 0.02026
## alternative hypothesis: stationary
adf.test(deseasonal_cnt, alternative = "stationary")
##
## Augmented Dickey-Fuller Test
##
## data: deseasonal_cnt
## Dickey-Fuller = -3.7277, Lag order = 7, p-value = 0.02288
## alternative hypothesis: stationary
d1 = diff(deseasonal_cnt)
adf.test(d1, alternative = "stationary")
## Warning in adf.test(d1, alternative = "stationary"): p-value smaller than
## printed p-value
##
## Augmented Dickey-Fuller Test
##
## data: d1
## Dickey-Fuller = -9.8542, Lag order = 7, p-value = 0.01
## alternative hypothesis: stationary
# Step 5: Autocorrelations and Choosing Model Order
# check ACF and PACF
acf2(count_ma)
## ACF PACF
## [1,] 0.71 0.71
## [2,] 0.49 -0.02
## [3,] 0.41 0.13
## [4,] 0.37 0.08
## [5,] 0.41 0.20
## [6,] 0.53 0.31
## [7,] 0.56 0.10
## [8,] 0.47 -0.05
## [9,] 0.36 -0.06
## [10,] 0.24 -0.15
## [11,] 0.24 0.08
## [12,] 0.36 0.16
## [13,] 0.50 0.23
## [14,] 0.58 0.19
## [15,] 0.49 -0.08
## [16,] 0.35 -0.05
## [17,] 0.29 0.03
## [18,] 0.25 -0.10
## [19,] 0.31 0.00
## [20,] 0.45 0.10
## [21,] 0.52 0.11
## [22,] 0.43 -0.01
## [23,] 0.29 -0.10
## [24,] 0.19 -0.03
## [25,] 0.17 -0.05
## [26,] 0.26 0.01
## [27,] 0.37 0.03
## [28,] 0.44 0.06
## [29,] 0.37 -0.03
## [30,] 0.23 -0.10
## [31,] 0.11 -0.10
## [32,] 0.06 -0.10
## [33,] 0.15 0.03
## [34,] 0.29 0.04
## [35,] 0.37 0.09
## [36,] 0.30 -0.01
## [37,] 0.18 -0.01
## [38,] 0.06 -0.10
## [39,] 0.06 0.06
## [40,] 0.17 0.05
## [41,] 0.26 -0.07
## [42,] 0.32 0.01
## [43,] 0.24 -0.09
## [44,] 0.08 -0.09
## [45,] -0.02 0.01
## [46,] -0.01 0.06
## [47,] 0.04 -0.02
## [48,] 0.15 -0.01
## [49,] 0.21 -0.04
## [50,] 0.16 0.04
## [51,] 0.05 0.00
## [52,] -0.04 0.01
## [53,] -0.09 -0.12
## [54,] 0.00 0.03
## [55,] 0.12 -0.03
## [56,] 0.17 0.03
## [57,] 0.13 0.02
## [58,] 0.01 0.00
## [59,] -0.10 -0.02
## [60,] -0.10 0.05
## [61,] -0.02 0.04
## [62,] 0.08 -0.01
## [63,] 0.15 -0.02
## [64,] 0.12 0.02
## [65,] 0.01 0.06
## [66,] -0.09 0.01
## [67,] -0.10 0.05
## [68,] -0.02 0.00
## [69,] 0.09 0.02
## [70,] 0.15 0.01
## [71,] 0.09 -0.04
## [72,] -0.03 0.00
## [73,] -0.11 0.08
## [74,] -0.10 0.04
## [75,] -0.01 0.05
## [76,] 0.11 0.04
## [77,] 0.18 0.04
## [78,] 0.12 -0.04
## [79,] 0.03 0.05
## [80,] -0.07 -0.07
## [81,] -0.08 0.00
## [82,] 0.01 0.00
## [83,] 0.11 0.03
## [84,] 0.15 0.04
## [85,] 0.12 0.00
## [86,] 0.03 0.00
## [87,] -0.05 0.02
## [88,] -0.07 -0.03
## [89,] 0.00 -0.01
## [90,] 0.12 0.01
## [91,] 0.19 -0.02
## [92,] 0.14 -0.02
## [93,] 0.05 0.06
## [94,] -0.01 0.03
## [95,] 0.00 0.11
## [96,] 0.07 -0.05
## [97,] 0.18 0.01
## [98,] 0.23 0.00
## [99,] 0.19 0.00
## [100,] 0.09 -0.05
## [101,] 0.01 0.03
## [102,] 0.01 0.00
## [103,] 0.10 0.04
## [104,] 0.19 -0.05
## [105,] 0.24 0.04
## [106,] 0.21 0.01
## [107,] 0.12 0.00
## [108,] 0.04 -0.04
## [109,] 0.02 -0.02
## [110,] 0.07 -0.09
## [111,] 0.18 0.03
## [112,] 0.22 -0.07
## [113,] 0.19 0.05
## [114,] 0.11 0.01
## [115,] 0.04 -0.01
## [116,] 0.03 -0.03
## [117,] 0.09 -0.04
## [118,] 0.17 -0.05
## [119,] 0.22 -0.02
## [120,] 0.18 -0.02
## ACF PACF
## [1,] 0.71 0.71
## [2,] 0.50 -0.02
## [3,] 0.41 0.13
## [4,] 0.38 0.09
## [5,] 0.41 0.19
## [6,] 0.54 0.32
## [7,] 0.58 0.10
## [8,] 0.48 -0.06
## [9,] 0.37 -0.06
## [10,] 0.24 -0.15
## [11,] 0.24 0.09
## [12,] 0.36 0.17
## [13,] 0.51 0.20
## [14,] 0.58 0.20
## [15,] 0.50 -0.08
## [16,] 0.36 -0.06
## [17,] 0.29 0.05
## [18,] 0.26 -0.11
## [19,] 0.31 0.01
## [20,] 0.46 0.13
## [21,] 0.53 0.09
## [22,] 0.44 0.00
## [23,] 0.30 -0.11
## [24,] 0.20 -0.06
## [25,] 0.17 -0.03
## [26,] 0.26 0.00
## [27,] 0.37 0.02
## [28,] 0.44 0.08
## [29,] 0.38 -0.05
## [30,] 0.22 -0.12
## [31,] 0.10 -0.08
## [32,] 0.06 -0.12
## [33,] 0.15 0.05
## [34,] 0.29 0.04
## [35,] 0.37 0.07
## [36,] 0.30 0.02
## [37,] 0.18 -0.02
## [38,] 0.06 -0.09
## [39,] 0.06 0.07
## [40,] 0.17 0.03
## [41,] 0.26 -0.06
## [42,] 0.33 0.03
## [43,] 0.25 -0.12
## [44,] 0.08 -0.06
## [45,] -0.02 0.02
## [46,] -0.01 0.04
## [47,] 0.04 0.00
## [48,] 0.15 -0.03
## [49,] 0.21 -0.06
## [50,] 0.17 0.09
## [51,] 0.05 -0.02
## [52,] -0.04 0.02
## [53,] -0.09 -0.11
## [54,] 0.00 0.00
## [55,] 0.12 -0.02
## [56,] 0.18 0.04
## [57,] 0.14 0.00
## [58,] 0.01 0.02
## [59,] -0.10 -0.04
## [60,] -0.11 0.02
## [61,] -0.02 0.07
## [62,] 0.07 -0.04
## [63,] 0.15 0.00
## [64,] 0.12 0.02
## [65,] 0.00 0.04
## [66,] -0.10 0.03
## [67,] -0.10 0.06
## [68,] -0.02 -0.03
## [69,] 0.09 0.04
## [70,] 0.15 0.00
## [71,] 0.10 -0.03
## [72,] -0.03 0.01
## [73,] -0.11 0.06
## [74,] -0.10 0.04
## [75,] -0.01 0.07
## [76,] 0.11 0.01
## [77,] 0.18 0.06
## [78,] 0.12 -0.07
## [79,] 0.03 0.03
## [80,] -0.06 0.00
## [81,] -0.07 -0.01
## [82,] 0.00 0.01
## [83,] 0.11 0.02
## [84,] 0.15 0.02
## [85,] 0.13 0.01
## [86,] 0.03 0.02
## [87,] -0.05 -0.01
## [88,] -0.06 0.00
## [89,] 0.01 -0.03
## [90,] 0.11 -0.01
## [91,] 0.18 0.00
## [92,] 0.14 -0.06
## [93,] 0.06 0.07
## [94,] -0.01 0.04
## [95,] -0.01 0.10
## [96,] 0.06 -0.03
## [97,] 0.18 0.02
## [98,] 0.24 -0.02
## [99,] 0.19 0.01
## [100,] 0.09 -0.06
## [101,] 0.02 0.04
## [102,] 0.02 -0.01
## [103,] 0.10 0.04
## [104,] 0.19 -0.03
## [105,] 0.25 0.06
## [106,] 0.21 -0.01
## [107,] 0.13 0.01
## [108,] 0.04 -0.07
## [109,] 0.02 -0.03
## [110,] 0.08 -0.05
## [111,] 0.19 0.02
## [112,] 0.22 -0.06
## [113,] 0.19 0.04
## [114,] 0.11 0.00
## [115,] 0.04 -0.02
## [116,] 0.04 -0.02
## [117,] 0.09 -0.07
## [118,] 0.17 -0.02
## [119,] 0.22 -0.04
## [120,] 0.18 -0.04
Seasonility Not in Consideration
# Step 6: Fitting an ARIMA model
auto.arima(deseasonal_cnt, seasonal=FALSE)
## Series: deseasonal_cnt
## ARIMA(2,1,1) with drift
##
## Coefficients:
## ar1 ar2 ma1 drift
## 0.5404 -0.2185 -0.9114 0.0051
## s.e. 0.0546 0.0541 0.0239 0.0038
##
## sigma^2 estimated as 0.2936: log likelihood=-292.12
## AIC=594.25 AICc=594.42 BIC=613.73
# Step 7: Evaluate and Iterate
# (try different model)
fit<-auto.arima(deseasonal_cnt, seasonal=FALSE)
tsdisplay(residuals(fit), lag.max=45, main='Model Residuals [Seasonality not considered]')
# step 8 forcast
fcast <- forecast(fit, h=30)
plot(fcast)
Seasonility in Consideration
# Step 6: Fitting an ARIMA model
auto.arima(deseasonal_cnt, seasonal=TRUE)
## Series: deseasonal_cnt
## ARIMA(5,1,2)(2,0,0)[30]
##
## Coefficients:
## Warning in sqrt(diag(x$var.coef)): NaNs produced
## ar1 ar2 ar3 ar4 ar5 ma1 ma2 sar1
## 0.6552 -0.7782 -0.0748 -0.3181 -0.1462 -1.1488 0.8776 -0.0068
## s.e. NaN NaN NaN NaN NaN NaN NaN NaN
## sar2
## -0.1002
## s.e. NaN
##
## sigma^2 estimated as 0.2481: log likelihood=-259.57
## AIC=539.15 AICc=539.77 BIC=578.12
# Step 7: Evaluate and Iterate
# (try different model)
fit<-auto.arima(deseasonal_cnt, seasonal=TRUE)
tsdisplay(residuals(fit), lag.max=45, main='Model Residuals [Seasonality considered]')
# step 8 forcast
fcast <- forecast(fit, h=30)
plot(fcast)