Ohio Conflated Data (Rural Multilane Undivided)

library(data.table)
library(dplyr)
library(tidyr)
library(naniar)
library(stringr)
library(ggplot2)
library(DT)
library(lubridate)
library(ggpubr)
library(SmartEDA)


mytype = 'RMU'
setwd("/scratch/user/cma16/Task4_Deliverable2/OHprocess4/AllCrash/FacilityBased/")
load("./multi-lane_undivided_OH_reduce_withCrash.rData")


df_RMU <- OH_mun_nomed
dim(df_RMU)
## [1] 552168     48
### Calculating Speed
df_RMU$Spd_All = 3600*df_RMU$DISTANCE/df_RMU$Travel_TIME_ALL_VEHICLES
df_RMU$Spd_Car = 3600*df_RMU$DISTANCE/df_RMU$Travel_TIME_PASSENGER_VEHICLES
df_RMU$Spd_Truck = 3600*df_RMU$DISTANCE/df_RMU$Travel_TIME_FREIGHT_TRUCKS


### Month, Day
df_RMU$date <- as.character(df_RMU$DATE)
df_RMU$date <- str_pad(df_RMU$DATE, 8, pad = "0")
df_RMU$Month <- substr(df_RMU$date, start = 1, stop = 2)
df_RMU$Day   <- substr(df_RMU$date, start = 3, stop = 4)
df_RMU$Year  <- substr(df_RMU$date, start = 5, stop = 8)

ConvEpoc2HM <- function(x) {
  # for a given epoc number, get its hour:min
  y.hr <- x
  y.min <- 0
  x <- paste(str_pad(y.hr, 2, side = 'left', pad='0'), 
             str_pad(y.min, 2, side = 'left', pad='0'), 
             '00', sep = ':')
}


df_RMU$Hour1 <- ConvEpoc2HM(df_RMU$EPOCH1h)
DATE4 <- paste(strptime(df_RMU$date, format = "%m%d%Y", tz =""), df_RMU$Hour1, sep = ' ')
df_RMU$PCT_TIME <- as.POSIXct(DATE4, tz ="", format = "%Y-%m-%d %H:%M:%OS")
df_RMU$Hour <- strftime(df_RMU$PCT_TIME, format="%H")
df_RMU$DOW <- wday(df_RMU$PCT_TIME, label = TRUE)

Temporal Patterns

names(df_RMU)
##  [1] "TimeStamp"                      "TMC"                           
##  [3] "DATE"                           "EPOCH1h"                       
##  [5] "Travel_TIME_ALL_VEHICLES"       "Travel_TIME_PASSENGER_VEHICLES"
##  [7] "Travel_TIME_FREIGHT_TRUCKS"     "ADMIN_LEVE"                    
##  [9] "ADMIN_LE_1"                     "ADMIN_LE_2"                    
## [11] "DISTANCE"                       "ROAD_NUMBE"                    
## [13] "ROAD_NAME"                      "LATITUDE"                      
## [15] "LONGITUDE"                      "ROAD_DIREC"                    
## [17] "ORN_FID"                        "COUNTY"                        
## [19] "divided"                        "SURF_TYP"                      
## [21] "NHS_CDE"                        "HPMS"                          
## [23] "ACCESS"                         "AADT_YR"                       
## [25] "FED_FACI"                       "PK_LANES"                      
## [27] "MED_TYPE"                       "FED_MEDW"                      
## [29] "BEGMP"                          "ENDMP"                         
## [31] "SEG_LNG"                        "cnty_rte"                      
## [33] "rte_nbr"                        "aadt"                          
## [35] "aadt_bc"                        "aadt_pt"                       
## [37] "surf_wid"                       "no_lanes"                      
## [39] "func_cls"                       "rodwycls"                      
## [41] "Total"                          "K"                             
## [43] "A"                              "B"                             
## [45] "C"                              "O"                             
## [47] "DAYMTH"                         "Crash"                         
## [49] "Spd_All"                        "Spd_Car"                       
## [51] "Spd_Truck"                      "date"                          
## [53] "Month"                          "Day"                           
## [55] "Year"                           "Hour1"                         
## [57] "PCT_TIME"                       "Hour"                          
## [59] "DOW"
df_RMU$AADT1 <- cut(df_RMU$aadt , breaks=c(0,2000,5000,10000, 15000, 20000, 30000, Inf), 
                   labels=c("0-2000","2001-5000","5001-10000","10001-15000","15001-20000","20001-30000","> 30000"))
table(df_RMU$AADT1)
## 
##      0-2000   2001-5000  5001-10000 10001-15000 15001-20000 20001-30000 
##           0       43800      183960      184248       52560       87600 
##     > 30000 
##           0
df_RMU$Crash1 <- cut(df_RMU$Crash , breaks=c(-1,0, Inf), 
                    labels=c("No crash","Crash"))
table(df_RMU$Crash1)
## 
## No crash    Crash 
##   551906      262
df_RMU$DayNight <- cut(df_RMU$EPOCH1h , breaks=c(-1,6,16,23))
df_RMU$DayNight <- as.numeric(df_RMU$DayNight)
df_RMU$DayNight <- c("Night","Day","Night")[df_RMU$DayNight]
table(df_RMU$DayNight)
## 
##    Day  Night 
## 230070 322098
df_RMU$PeakOffPeak <- cut(df_RMU$EPOCH1h , breaks=c(-1,6,8,15,19,23))
df_RMU$PeakOffPeak <- as.numeric(df_RMU$PeakOffPeak)
df_RMU$PeakOffPeak <- c("Off-Peak","Morning Peak","Off-Peak", "Evening Peak", "Off-Peak")[df_RMU$PeakOffPeak]
table(df_RMU$PeakOffPeak)
## 
## Evening Peak Morning Peak     Off-Peak 
##        92028        46014       414126
df_RMU01 <- df_RMU[,c("divided", "MED_TYPE", "surf_wid", "no_lanes", "EPOCH1h",
                    "Hour","Day","DOW","Month","Year", "AADT1","Crash1",     
                    "DayNight","PeakOffPeak","Spd_All","Spd_Car","Spd_Truck")]
df_RMU02 <- df_RMU01[,c(5:17)]


cols <- c("EPOCH1h", "Hour", "Day", "DOW", "Month", "AADT1" , "Crash1", "DayNight", "PeakOffPeak")
cols1 <- c("Spd_All", "Spd_Car", "Spd_Truck")
cols2 <- c("divided", "MED_TYPE", "surf_wid","no_lanes")

df_RMU02= df_RMU02 %<>%
  mutate_at(cols, funs(factor(.)))

hour1 <- ExpCustomStat(df_RMU02,Cvar = c("Hour"), Nvar=cols1, stat = c('Count','Prop','mean','median','p0.85','min', 'max','sd', 'var','PS'),gpby=FALSE)
day1 <- ExpCustomStat(df_RMU02,Cvar = c("Day"), Nvar=cols1, stat = c('Count','Prop','mean','median','p0.85','min', 'max','sd', 'var','PS'),gpby=FALSE)
DOW1 <- ExpCustomStat(df_RMU02,Cvar = c("DOW"), Nvar=cols1, stat = c('Count','Prop','mean','median','p0.85','min', 'max','sd', 'var','PS'),gpby=FALSE)
Month1 <- ExpCustomStat(df_RMU02,Cvar = c("Month"), Nvar=cols1, stat = c('Count','Prop','mean','median','p0.85','min', 'max','sd', 'var','PS'),gpby=FALSE)
AADT2 <- ExpCustomStat(df_RMU02,Cvar = c("AADT1"), Nvar=cols1, stat = c('Count','Prop','mean','median','p0.85','min', 'max','sd', 'var','PS'),gpby=FALSE)
Crash2 <- ExpCustomStat(df_RMU02,Cvar = c("Crash1", "Hour"), Nvar=cols1, stat = c('Count','Prop','mean','median','p0.85','min', 'max','sd', 'var','PS'))
DayNight1 <- ExpCustomStat(df_RMU02,Cvar = c("DayNight"), Nvar=cols1, stat = c('Count','Prop','mean','median','p0.85','min', 'max','sd', 'var','PS'),gpby=FALSE)
PeakOffPeak1 <- ExpCustomStat(df_RMU02,Cvar = c("PeakOffPeak"), Nvar=cols1, stat = c('Count','Prop','mean','median','p0.85','min', 'max','sd', 'var','PS'),gpby=FALSE)
geo <- ExpCustomStat(df_RMU01, Nvar=cols2, stat = c('mean','median','p0.85','min', 'max','sd', 'var','PS'))
## divided variable/s not in numeric type 
##  Either convert it into numeric or remove that from 'Nvar' list
ggline(gather(hour1[,c(1, 2, 6, 8, 11)], condition, measurement,  mean:sd, factor_key=TRUE), x = "Level", y = "measurement", color = "Attribute",
       palette = c("red", "blue", "black"))+theme(legend.title=element_blank())+ facet_grid(condition~ .)+labs(title="By Hour")

ggline(gather(DOW1[,c(1, 2, 6, 8, 11)], condition, measurement,  mean:sd, factor_key=TRUE), x = "Level", y = "measurement", color = "Attribute",
       palette = c("red", "blue", "black"))+theme(legend.title=element_blank())+ facet_grid(condition~ .)+labs(title="By Day of Week")

ggline(gather(Month1[,c(1, 2, 6, 8, 11)], condition, measurement,  mean:sd, factor_key=TRUE), x = "Level", y = "measurement", color = "Attribute",
       palette = c("red", "blue", "black"))+theme(legend.title=element_blank())+ facet_grid(condition~ .)+labs(title="By Month")

ggline(gather(AADT2[,c(1, 2, 6, 8, 11)], condition, measurement,  mean:sd, factor_key=TRUE), x = "Level", y = "measurement", color = "Attribute",
       palette = c("red", "blue", "black"))+theme(legend.title=element_blank())+ facet_grid(condition~ .)+labs(title="By AADT")

ggline(gather(Crash2[,c(1, 2, 3, 6, 8, 11)], condition, measurement,  mean:sd, factor_key=TRUE), x = "Hour", y = "measurement", color = "Attribute",
       palette = c("red", "blue", "black"))+theme(legend.title=element_blank())+ facet_grid(condition+Crash1~ .)+labs(title="By Crash")

ggline(gather(DayNight1[,c(1, 2, 6, 8, 11)], condition, measurement,  mean:sd, factor_key=TRUE), x = "Level", y = "measurement", color = "Attribute",
       palette = c("red", "blue", "black"))+theme(legend.title=element_blank())+ facet_grid(condition~ .)+labs(title="By Day/Night")

ggline(gather(PeakOffPeak1[,c(1, 2, 6, 8, 11)], condition, measurement,  mean:sd, factor_key=TRUE), x = "Level", y = "measurement", color = "Attribute",
       palette = c("red", "blue", "black"))+theme(legend.title=element_blank())+ facet_grid(condition~ .)+labs(title="By Peak/Off-Peak")

Temporal Statistics of Operational Speed

# 
setwd("/scratch/user/cma16/Task4_Deliverable2/OHprocess4/AllCrash/FacilityBased/")

head(hour1)
##    Level Attribute Group_by Count Prop     mean   median    p0.85
## 1:    00   Spd_All     Hour 23007 4.17 41.37513 44.83879 55.49488
## 2:    01   Spd_All     Hour 23007 4.17 42.19813 45.61055 55.88618
## 3:    10   Spd_All     Hour 23007 4.17 36.91117 38.57175 52.72680
## 4:    11   Spd_All     Hour 23007 4.17 36.25634 37.74153 52.69269
## 5:    12   Spd_All     Hour 23007 4.17 35.97581 37.20162 52.64505
## 6:    13   Spd_All     Hour 23007 4.17 36.55924 38.04464 52.82549
##          min      max       sd      var   PS
## 1: 0.6213567 80.18452 14.18129 201.1091 2.88
## 2: 0.6214958 89.22426 13.66702 186.7875 2.87
## 3: 0.6210972 71.64060 14.54508 211.5593 4.95
## 4: 0.6213567 80.18452 14.85769 220.7509 4.88
## 5: 0.6214898 75.44486 14.93606 223.0860 4.86
## 6: 0.6210972 80.18452 14.81420 219.4605 4.91
head(day1)
##    Level Attribute Group_by Count Prop     mean   median    p0.85
## 1:    01   Spd_All      Day 18144 3.29 37.96883 39.72730 53.77577
## 2:    02   Spd_All      Day 18144 3.29 37.95171 39.93714 53.61830
## 3:    03   Spd_All      Day 18144 3.29 37.95099 40.31029 53.69269
## 4:    04   Spd_All      Day 18144 3.29 38.04695 40.06501 53.96895
## 5:    05   Spd_All      Day 18144 3.29 38.19243 40.60589 53.94491
## 6:    06   Spd_All      Day 18144 3.29 37.93506 39.99793 53.76221
##          min      max       sd      var   PS
## 1: 0.6213567 71.89200 14.60290 213.2446 3.10
## 2: 0.6213567 80.18452 14.58103 212.6065 3.31
## 3: 0.6214924 71.84291 14.63784 214.2665 3.29
## 4: 0.6213567 76.83200 14.68719 215.7137 3.23
## 5: 0.6213567 73.76976 14.74053 217.2832 3.22
## 6: 0.6210972 83.82927 14.79646 218.9353 3.30
head(DOW1)
##    Level Attribute Group_by Count  Prop     mean   median    p0.85
## 1:   Thu   Spd_All      DOW 80232 14.53 37.76296 39.73186 53.62047
## 2:   Fri   Spd_All      DOW 78624 14.24 37.67581 39.81919 53.56629
## 3:   Sat   Spd_All      DOW 78624 14.24 38.99868 41.98197 54.46747
## 4:   Sun   Spd_All      DOW 78624 14.24 39.62386 42.81286 54.93234
## 5:   Mon   Spd_All      DOW 78624 14.24 37.93915 39.86185 53.67164
## 6:   Tue   Spd_All      DOW 78624 14.24 37.83178 39.81600 53.53455
##          min      max       sd      var    PS
## 1: 0.6212149 78.50029 14.63054 214.0526 15.03
## 2: 0.6210972 87.82114 14.66075 214.9377 14.82
## 3: 0.6210972 80.18452 14.98226 224.4681 13.40
## 4: 0.6210972 83.82927 14.96202 223.8620 11.66
## 5: 0.6210972 89.22426 14.48469 209.8061 14.78
## 6: 0.6210972 83.82927 14.49648 210.1480 15.17
head(Month1)
##    Level Attribute Group_by Count Prop     mean   median    p0.85
## 1:    01   Spd_All    Month 43896 7.95 38.48722 40.77225 53.45635
## 2:    02   Spd_All    Month 39648 7.18 37.89431 39.90622 52.81399
## 3:    03   Spd_All    Month 43896 7.95 38.83591 41.57072 54.05158
## 4:    04   Spd_All    Month 42480 7.69 38.74260 41.28044 54.02431
## 5:    05   Spd_All    Month 43896 7.95 38.79982 41.35688 54.01680
## 6:    06   Spd_All    Month 42480 7.69 38.71500 41.62593 54.11075
##          min      max       sd      var   PS
## 1: 0.6210972 73.76976 14.14709 200.1401 7.46
## 2: 0.6210972 74.43993 14.07334 198.0590 6.92
## 3: 0.6210972 78.50029 14.53132 211.1593 7.95
## 4: 0.6210972 78.47847 14.45588 208.9723 7.81
## 5: 0.6210972 83.82927 14.40288 207.4429 8.14
## 6: 0.6210972 89.22426 14.66255 214.9904 8.05
head(AADT2)
##          Level Attribute Group_by  Count  Prop     mean   median    p0.85
## 1:  5001-10000   Spd_All    AADT1 183960 33.32 40.84231 44.64357 55.88618
## 2: 10001-15000   Spd_All    AADT1 184248 33.37 33.95948 31.06307 52.05358
## 3: 15001-20000   Spd_All    AADT1  52560  9.52 39.51630 43.89838 51.40394
## 4: 20001-30000   Spd_All    AADT1  87600 15.86 37.64575 39.18639 51.44150
## 5:   2001-5000   Spd_All    AADT1  43800  7.93 45.70845 48.38245 55.54357
## 6:  5001-10000   Spd_Car    AADT1 183960 33.32 41.29276 45.45761 56.73748
##          min      max       sd      var    PS
## 1: 0.6214898 89.22426 15.11555 228.4799 33.54
## 2: 0.6212149 78.50029 14.80321 219.1352 28.91
## 3: 0.6214960 72.27742 13.41198 179.8811 10.87
## 4: 0.6214730 72.62329 13.15377 173.0216 19.28
## 5: 0.6210972 78.93979 11.73237 137.6485  7.40
## 6: 0.6214866 89.22426 15.82431 250.4089 32.79
head(Crash2)
##      Crash1 Hour Attribute Count Prop     mean   median    p0.85       min
## 1: No crash   00   Spd_All 22999 4.17 41.37957 44.83956 55.49640 0.6213567
## 2: No crash   01   Spd_All 23004 4.17 42.20496 45.61055 55.88618 0.6214958
## 3: No crash   10   Spd_All 22990 4.16 36.91259 38.58059 52.72680 0.6210972
## 4: No crash   11   Spd_All 22993 4.16 36.25890 37.74931 52.69269 0.6213567
## 5: No crash   12   Spd_All 22995 4.16 35.97809 37.21119 52.65263 0.6214898
## 6: No crash   13   Spd_All 22999 4.17 36.56089 38.05197 52.82849 0.6210972
##         max       sd      var   PS
## 1: 80.18452 14.17576 200.9522 2.88
## 2: 89.22426 13.66269 186.6690 2.87
## 3: 71.64060 14.54790 211.6413 4.94
## 4: 80.18452 14.85981 220.8139 4.88
## 5: 75.44486 14.93779 223.1376 4.86
## 6: 80.18452 14.81534 219.4943 4.90
head(DayNight1)
##    Level Attribute Group_by  Count  Prop     mean   median    p0.85
## 1: Night   Spd_All DayNight 322098 58.33 39.70244 42.58362 54.58923
## 2:   Day   Spd_All DayNight 230070 41.67 36.62269 38.10481 52.99853
## 3: Night   Spd_Car DayNight 322098 58.33 40.36259 43.78400 55.57740
## 4:   Day   Spd_Car DayNight 230070 41.67 37.03012 38.79303 53.71584
## 5: Night Spd_Truck DayNight 322098 58.33 39.38778 41.62593 54.34760
## 6:   Day Spd_Truck DayNight 230070 41.67 37.16622 38.87064 53.21052
##          min      max       sd      var    PS
## 1: 0.6210972 89.22426 14.40279 207.4404 51.41
## 2: 0.6210972 80.18452 14.79800 218.9807 48.59
## 3: 0.6210972 89.22426 15.04120 226.2377 46.19
## 4: 0.6210972 86.34796 15.31198 234.4568 53.81
## 5: 0.6210972 74.11097 13.97627 195.3362 48.95
## 6: 0.6210972 73.76976 14.51054 210.5559 51.05
head(PeakOffPeak1)
##           Level Attribute    Group_by  Count  Prop     mean   median
## 1:     Off-Peak   Spd_All PeakOffPeak 414126 75.00 38.56150 40.99083
## 2: Evening Peak   Spd_All PeakOffPeak  92028 16.67 37.17529 39.00841
## 3: Morning Peak   Spd_All PeakOffPeak  46014  8.33 36.96484 38.25953
## 4:     Off-Peak   Spd_Car PeakOffPeak 414126 75.00 38.88149 41.69822
## 5: Evening Peak   Spd_Car PeakOffPeak  92028 16.67 37.83188 40.21943
## 6: Morning Peak   Spd_Car PeakOffPeak  46014  8.33 37.28607 38.86523
##       p0.85       min      max       sd      var    PS
## 1: 54.12661 0.6210972 89.22426 14.59982 213.1548 72.13
## 2: 53.37678 0.6210972 76.83200 14.92458 222.7429 18.44
## 3: 53.35351 0.6210972 78.47847 14.68614 215.6826  9.43
## 4: 54.74676 0.6210972 89.22426 15.20292 231.1287 69.76
## 5: 54.50152 0.6212149 85.41474 15.59732 243.2764 19.86
## 6: 53.92514 0.6210972 78.47847 15.09232 227.7781 10.38
write.csv(hour1, paste0("./",mytype,"/des_output/OH_RMU_OS_DS_hour.csv"),row.names = FALSE)
write.csv(day1, paste0("./",mytype,"/des_output/OH_RMU_OS_DS_day.csv"),row.names = FALSE)
write.csv(DOW1, paste0("./",mytype,"/des_output/OH_RMU_OS_DS_dow.csv"),row.names = FALSE)
write.csv(Month1,paste0("./",mytype,"/des_output/OH_RMU_OS_DS_month.csv"),row.names = FALSE)
write.csv(AADT2, paste0("./",mytype,"/des_output/OH_RMU_OS_DS_aadt.csv"),row.names = FALSE)
write.csv(Crash2, paste0("./",mytype,"/des_output/OH_RMU_OS_DS_crash.csv"),row.names = FALSE)
write.csv(DayNight1, paste0("./",mytype,"/des_output/OH_RMU_OS_DS_daynight.csv"),row.names = FALSE)
write.csv(PeakOffPeak1, paste0("./",mytype,"/des_output/OH_RMU_OS_DS_peakoffpeak.csv"),row.names = FALSE)