Ohio Conflated Data (Rural Interstate)

library(data.table)
library(dplyr)
library(tidyr)
library(naniar)
library(stringr)
library(ggplot2)
library(DT)
library(lubridate)
library(ggpubr)
library(SmartEDA)


setwd("/scratch/user/cma16/Task4_Deliverable2/OHprocess4/AllCrash/FacilityBased/")
load("OH_Principal_Arterial_Rural_Interstate_1_TMC_TT_SI_reduce_withCrash.rData")
mytype = 'RI'
setwd(paste0("/scratch/user/cma16/Task4_Deliverable2/OHprocess4/AllCrash/FacilityBased/",mytype))

df_RI <- b02a
dim(df_RI)
## [1] 3039720      48
### Calculating Speed
df_RI$Spd_All = 3600*df_RI$DISTANCE/df_RI$Travel_TIME_ALL_VEHICLES
df_RI$Spd_Car = 3600*df_RI$DISTANCE/df_RI$Travel_TIME_PASSENGER_VEHICLES
df_RI$Spd_Truck = 3600*df_RI$DISTANCE/df_RI$Travel_TIME_FREIGHT_TRUCKS


### Month, Day
df_RI$date <- as.character(df_RI$DATE)
df_RI$date <- str_pad(df_RI$DATE, 8, pad = "0")
df_RI$Month <- substr(df_RI$date, start = 1, stop = 2)
df_RI$Day   <- substr(df_RI$date, start = 3, stop = 4)
df_RI$Year  <- substr(df_RI$date, start = 5, stop = 8)

ConvEpoc2HM <- function(x) {
  # for a given epoc number, get its hour:min
  y.hr <- x
  y.min <- 0
  x <- paste(str_pad(y.hr, 2, side = 'left', pad='0'), 
             str_pad(y.min, 2, side = 'left', pad='0'), 
             '00', sep = ':')
}


df_RI$Hour1 <- ConvEpoc2HM(df_RI$EPOCH1h)
DATE4 <- paste(strptime(df_RI$date, format = "%m%d%Y", tz =""), df_RI$Hour1, sep = ' ')
df_RI$PCT_TIME <- as.POSIXct(DATE4, tz ="", format = "%Y-%m-%d %H:%M:%OS")
df_RI$Hour <- strftime(df_RI$PCT_TIME, format="%H")
df_RI$DOW <- wday(df_RI$PCT_TIME, label = TRUE)

Temporal Patterns

names(df_RI)
##  [1] "TimeStamp"                      "TMC"                           
##  [3] "DATE"                           "EPOCH1h"                       
##  [5] "Travel_TIME_ALL_VEHICLES"       "Travel_TIME_PASSENGER_VEHICLES"
##  [7] "Travel_TIME_FREIGHT_TRUCKS"     "ADMIN_LEVE"                    
##  [9] "ADMIN_LE_1"                     "ADMIN_LE_2"                    
## [11] "DISTANCE"                       "ROAD_NUMBE"                    
## [13] "ROAD_NAME"                      "LATITUDE"                      
## [15] "LONGITUDE"                      "ROAD_DIREC"                    
## [17] "ORN_FID"                        "COUNTY"                        
## [19] "divided"                        "SURF_TYP"                      
## [21] "NHS_CDE"                        "HPMS"                          
## [23] "ACCESS"                         "AADT_YR"                       
## [25] "FED_FACI"                       "PK_LANES"                      
## [27] "MED_TYPE"                       "FED_MEDW"                      
## [29] "BEGMP"                          "ENDMP"                         
## [31] "SEG_LNG"                        "cnty_rte"                      
## [33] "rte_nbr"                        "aadt"                          
## [35] "aadt_bc"                        "aadt_pt"                       
## [37] "surf_wid"                       "no_lanes"                      
## [39] "func_cls"                       "rodwycls"                      
## [41] "Total"                          "K"                             
## [43] "A"                              "B"                             
## [45] "C"                              "O"                             
## [47] "DAYMTH"                         "Crash"                         
## [49] "Spd_All"                        "Spd_Car"                       
## [51] "Spd_Truck"                      "date"                          
## [53] "Month"                          "Day"                           
## [55] "Year"                           "Hour1"                         
## [57] "PCT_TIME"                       "Hour"                          
## [59] "DOW"
df_RI$AADT1 <- cut(df_RI$aadt , breaks=c(0,2000,5000,10000, 15000, 20000, 30000, Inf), 
                   labels=c("0-2000","2001-5000","5001-10000","10001-15000","15001-20000","20001-30000","> 30000"))
table(df_RI$AADT1)
## 
##      0-2000   2001-5000  5001-10000 10001-15000 15001-20000 20001-30000 
##           0       17520           0       35040      131400      674520 
##     > 30000 
##     2181240
df_RI$Crash1 <- cut(df_RI$Crash , breaks=c(-1,0, Inf), 
                    labels=c("No crash","Crash"))
table(df_RI$Crash1)
## 
## No crash    Crash 
##  3034316     5404
df_RI$DayNight <- cut(df_RI$EPOCH1h , breaks=c(-1,6,16,23))
df_RI$DayNight <- as.numeric(df_RI$DayNight)
df_RI$DayNight <- c("Night","Day","Night")[df_RI$DayNight]
table(df_RI$DayNight)
## 
##     Day   Night 
## 1266550 1773170
df_RI$PeakOffPeak <- cut(df_RI$EPOCH1h , breaks=c(-1,6,8,15,19,23))
df_RI$PeakOffPeak <- as.numeric(df_RI$PeakOffPeak)
df_RI$PeakOffPeak <- c("Off-Peak","Morning Peak","Off-Peak", "Evening Peak", "Off-Peak")[df_RI$PeakOffPeak]
table(df_RI$PeakOffPeak)
## 
## Evening Peak Morning Peak     Off-Peak 
##       506620       253310      2279790
df_RI01 <- df_RI[,c("divided", "MED_TYPE", "surf_wid", "no_lanes", "EPOCH1h",
                    "Hour","Day","DOW","Month","Year", "AADT1","Crash1",     
                    "DayNight","PeakOffPeak","Spd_All","Spd_Car","Spd_Truck")]
df_RI02 <- df_RI01[,c(5:17)]


cols <- c("EPOCH1h", "Hour", "Day", "DOW", "Month", "AADT1" , "Crash1", "DayNight", "PeakOffPeak")
cols1 <- c("Spd_All", "Spd_Car", "Spd_Truck")
cols2 <- c("divided", "MED_TYPE", "surf_wid","no_lanes")

df_RI02= df_RI02 %<>%
  mutate_at(cols, funs(factor(.)))

hour1 <- ExpCustomStat(df_RI02,Cvar = c("Hour"), Nvar=cols1, stat = c('Count','Prop','mean','median','p0.85','min', 'max','sd', 'var','PS'),gpby=FALSE)
day1 <- ExpCustomStat(df_RI02,Cvar = c("Day"), Nvar=cols1, stat = c('Count','Prop','mean','median','p0.85','min', 'max','sd', 'var','PS'),gpby=FALSE)
DOW1 <- ExpCustomStat(df_RI02,Cvar = c("DOW"), Nvar=cols1, stat = c('Count','Prop','mean','median','p0.85','min', 'max','sd', 'var','PS'),gpby=FALSE)
Month1 <- ExpCustomStat(df_RI02,Cvar = c("Month"), Nvar=cols1, stat = c('Count','Prop','mean','median','p0.85','min', 'max','sd', 'var','PS'),gpby=FALSE)
AADT2 <- ExpCustomStat(df_RI02,Cvar = c("AADT1"), Nvar=cols1, stat = c('Count','Prop','mean','median','p0.85','min', 'max','sd', 'var','PS'),gpby=FALSE)
Crash2 <- ExpCustomStat(df_RI02,Cvar = c("Crash1", "Hour"), Nvar=cols1, stat = c('Count','Prop','mean','median','p0.85','min', 'max','sd', 'var','PS'))
DayNight1 <- ExpCustomStat(df_RI02,Cvar = c("DayNight"), Nvar=cols1, stat = c('Count','Prop','mean','median','p0.85','min', 'max','sd', 'var','PS'),gpby=FALSE)
PeakOffPeak1 <- ExpCustomStat(df_RI02,Cvar = c("PeakOffPeak"), Nvar=cols1, stat = c('Count','Prop','mean','median','p0.85','min', 'max','sd', 'var','PS'),gpby=FALSE)
geo <- ExpCustomStat(df_RI01, Nvar=cols2, stat = c('mean','median','p0.85','min', 'max','sd', 'var','PS'))
## divided variable/s not in numeric type 
##  Either convert it into numeric or remove that from 'Nvar' list
ggline(gather(hour1[,c(1, 2, 6, 8, 11)], condition, measurement,  mean:sd, factor_key=TRUE), x = "Level", y = "measurement", color = "Attribute",
       palette = c("red", "blue", "black"))+theme(legend.title=element_blank())+ facet_grid(condition~ .)+labs(title="By Hour")

ggline(gather(DOW1[,c(1, 2, 6, 8, 11)], condition, measurement,  mean:sd, factor_key=TRUE), x = "Level", y = "measurement", color = "Attribute",
       palette = c("red", "blue", "black"))+theme(legend.title=element_blank())+ facet_grid(condition~ .)+labs(title="By Day of Week")

ggline(gather(Month1[,c(1, 2, 6, 8, 11)], condition, measurement,  mean:sd, factor_key=TRUE), x = "Level", y = "measurement", color = "Attribute",
       palette = c("red", "blue", "black"))+theme(legend.title=element_blank())+ facet_grid(condition~ .)+labs(title="By Month")

ggline(gather(AADT2[,c(1, 2, 6, 8, 11)], condition, measurement,  mean:sd, factor_key=TRUE), x = "Level", y = "measurement", color = "Attribute",
       palette = c("red", "blue", "black"))+theme(legend.title=element_blank())+ facet_grid(condition~ .)+labs(title="By AADT")

ggline(gather(Crash2[,c(1, 2, 3, 6, 8, 11)], condition, measurement,  mean:sd, factor_key=TRUE), x = "Hour", y = "measurement", color = "Attribute",
       palette = c("red", "blue", "black"))+theme(legend.title=element_blank())+ facet_grid(condition+Crash1~ .)+labs(title="By Crash")

ggline(gather(DayNight1[,c(1, 2, 6, 8, 11)], condition, measurement,  mean:sd, factor_key=TRUE), x = "Level", y = "measurement", color = "Attribute",
       palette = c("red", "blue", "black"))+theme(legend.title=element_blank())+ facet_grid(condition~ .)+labs(title="By Day/Night")

ggline(gather(PeakOffPeak1[,c(1, 2, 6, 8, 11)], condition, measurement,  mean:sd, factor_key=TRUE), x = "Level", y = "measurement", color = "Attribute",
       palette = c("red", "blue", "black"))+theme(legend.title=element_blank())+ facet_grid(condition~ .)+labs(title="By Peak/Off-Peak")

Temporal Statistics of Operational Speed

# 
setwd("/scratch/user/cma16/Task4_Deliverable2/OHprocess4/AllCrash/FacilityBased/")

head(hour1)
##    Level Attribute Group_by  Count Prop     mean   median    p0.85
## 1:    00   Spd_All     Hour 126655 4.17 62.08045 63.41304 65.72271
## 2:    01   Spd_All     Hour 126655 4.17 61.76534 63.14429 65.40507
## 3:    10   Spd_All     Hour 126655 4.17 63.70539 64.73865 67.58900
## 4:    11   Spd_All     Hour 126655 4.17 63.77150 64.81246 67.74031
## 5:    12   Spd_All     Hour 126655 4.17 63.81950 64.86950 67.75535
## 6:    13   Spd_All     Hour 126655 4.17 63.89716 64.97793 67.83096
##          min     max       sd      var   PS
## 1: 0.6200625 119.052 6.754386 45.62173 4.01
## 2: 0.6213924 119.052 6.696766 44.84667 3.96
## 3: 0.6231028 119.052 6.818577 46.49299 4.27
## 4: 1.2203282 119.052 6.915388 47.82258 4.28
## 5: 0.6220800 119.052 6.922227 47.91723 4.28
## 6: 0.6214833 119.052 6.988718 48.84218 4.29
head(day1)
##    Level Attribute Group_by Count Prop     mean   median    p0.85
## 1:    01   Spd_All      Day 99936 3.29 62.25610 63.93984 66.89201
## 2:    02   Spd_All      Day 99936 3.29 63.07531 64.21239 67.01637
## 3:    03   Spd_All      Day 99936 3.29 63.03708 64.06309 66.85047
## 4:    04   Spd_All      Day 99936 3.29 62.84903 64.07122 66.94070
## 5:    05   Spd_All      Day 99936 3.29 62.98948 64.04664 67.10754
## 6:    06   Spd_All      Day 99936 3.29 62.95106 64.19990 67.18468
##          min     max       sd      var   PS
## 1: 0.6214833 119.052 7.751181 60.08080 3.23
## 2: 0.6214833 119.052 6.976043 48.66518 3.29
## 3: 0.6214833 119.052 6.743066 45.46894 3.29
## 4: 0.6214833 119.052 6.993581 48.91018 3.27
## 5: 0.6214833 119.052 7.011121 49.15582 3.28
## 6: 0.6200625 119.052 7.151070 51.13781 3.29
head(DOW1)
##    Level Attribute Group_by  Count  Prop     mean   median    p0.85
## 1:   Thu   Spd_All      DOW 441384 14.52 62.94057 64.11191 66.73193
## 2:   Fri   Spd_All      DOW 433056 14.25 63.41494 64.52085 67.57396
## 3:   Sat   Spd_All      DOW 433056 14.25 63.37675 64.28643 67.63918
## 4:   Sun   Spd_All      DOW 433056 14.25 63.26730 64.17695 67.88499
## 5:   Mon   Spd_All      DOW 433056 14.25 62.98443 64.22222 66.86169
## 6:   Tue   Spd_All      DOW 433056 14.25 62.76187 64.01453 66.38551
##          min     max       sd      var    PS
## 1: 0.6214833 119.052 6.810547 46.38355 14.53
## 2: 0.6214833 119.052 7.195011 51.76819 14.39
## 3: 0.6200625 119.052 7.089527 50.26140 14.30
## 4: 0.6214833 119.052 7.249130 52.54988 14.15
## 5: 0.6213924 119.052 6.800353 46.24481 14.21
## 6: 0.6183750 119.052 6.694334 44.81411 14.21
head(Month1)
##    Level Attribute Group_by  Count Prop     mean   median    p0.85
## 1:    01   Spd_All    Month 258168 8.49 61.62627 63.11316 65.83829
## 2:    02   Spd_All    Month 233184 7.67 61.41789 63.10587 65.82929
## 3:    03   Spd_All    Month 258168 8.49 62.89348 64.01762 66.71787
## 4:    04   Spd_All    Month 249840 8.22 63.10209 64.10436 66.84841
## 5:    05   Spd_All    Month 258168 8.49 62.90195 64.15156 67.00320
## 6:    06   Spd_All    Month 249840 8.22 63.04297 64.29096 67.35239
##          min     max       sd      var   PS
## 1: 0.6183750 119.052 7.306058 53.37848 8.22
## 2: 0.6214833 119.052 7.434753 55.27556 7.42
## 3: 0.6214833 119.052 7.011219 49.15719 8.42
## 4: 0.6214546 119.052 6.795880 46.18399 8.21
## 5: 0.6214833 119.052 7.194595 51.76220 8.46
## 6: 0.6200625 119.052 7.295811 53.22886 8.24
head(AADT2)
##          Level Attribute Group_by   Count  Prop     mean   median    p0.85
## 1:     > 30000   Spd_All    AADT1 2181240 71.76 63.67944 64.45408 67.29622
## 2: 15001-20000   Spd_All    AADT1  131400  4.32 64.31621 65.05847 67.80255
## 3: 20001-30000   Spd_All    AADT1  674520 22.19 62.03986 63.24464 66.15436
## 4: 10001-15000   Spd_All    AADT1   35040  1.15 50.30079 53.21514 65.87833
## 5:   2001-5000   Spd_All    AADT1   17520  0.58 40.26475 44.02383 54.82047
## 6:     > 30000   Spd_Car    AADT1 2181240 71.76 66.24458 67.43866 70.80186
##          min       max        sd       var    PS
## 1: 0.6183750 119.05200  6.715104  45.09262 72.14
## 2: 1.8737112  84.48578  4.330404  18.75240  4.50
## 3: 1.0751321 100.02600  5.581455  31.15264 22.14
## 4: 1.0563671  79.15004 14.021794 196.61072  0.89
## 5: 0.6214833  76.69103 15.408152 237.41115  0.33
## 6: 0.6183750 119.05200  7.756739  60.16700 72.26
head(Crash2)
##      Crash1 Hour Attribute  Count Prop     mean   median    p0.85
## 1: No crash   00   Spd_All 126486 4.16 62.08374 63.41384 65.72271
## 2: No crash   01   Spd_All 126524 4.16 61.76776 63.14491 65.40553
## 3: No crash   10   Spd_All 126462 4.16 63.71118 64.74092 67.59081
## 4: No crash   11   Spd_All 126405 4.16 63.77909 64.81430 67.74178
## 5: No crash   12   Spd_All 126420 4.16 63.82898 64.87032 67.75558
## 6: No crash   13   Spd_All 126384 4.16 63.90608 64.97901 67.83188
##          min     max       sd      var   PS
## 1: 0.6200625 119.052 6.750433 45.56835 4.01
## 2: 0.6213924 119.052 6.694610 44.81781 3.96
## 3: 0.6231028 119.052 6.812302 46.40746 4.27
## 4: 1.2203282 119.052 6.903947 47.66448 4.27
## 5: 0.6220800 119.052 6.902721 47.64755 4.28
## 6: 0.6214833 119.052 6.971375 48.60007 4.28
head(DayNight1)
##    Level Attribute Group_by   Count  Prop     mean   median    p0.85
## 1: Night   Spd_All DayNight 1773170 58.33 62.53753 63.74987 66.43402
## 2:   Day   Spd_All DayNight 1266550 41.67 63.79442 64.88290 67.74826
## 3: Night   Spd_Car DayNight 1773170 58.33 64.98934 66.50523 69.95208
## 4:   Day   Spd_Car DayNight 1266550 41.67 66.30734 67.80579 71.00758
## 5: Night Spd_Truck DayNight 1773170 58.33 60.55429 62.04748 63.72719
## 6:   Day Spd_Truck DayNight 1266550 41.67 60.58346 61.97760 63.34738
##          min     max       sd      var    PS
## 1: 0.6183750 119.052 6.922067 47.91500 57.26
## 2: 0.6214546 119.052 6.963746 48.49375 42.74
## 3: 0.6183750 119.052 7.883281 62.14612 56.67
## 4: 0.6213699 119.052 8.025944 64.41577 43.33
## 5: 0.6214022 107.748 6.023472 36.28221 57.65
## 6: 0.6183750 107.748 5.838873 34.09243 42.35
head(PeakOffPeak1)
##           Level Attribute    Group_by   Count  Prop     mean   median
## 1:     Off-Peak   Spd_All PeakOffPeak 2279790 75.00 62.84928 63.99045
## 2: Evening Peak   Spd_All PeakOffPeak  506620 16.67 63.90562 64.99961
## 3: Morning Peak   Spd_All PeakOffPeak  253310  8.33 63.33734 64.50531
## 4:     Off-Peak   Spd_Car PeakOffPeak 2279790 75.00 65.31384 66.80164
## 5: Evening Peak   Spd_Car PeakOffPeak  506620 16.67 66.39017 67.89139
## 6: Morning Peak   Spd_Car PeakOffPeak  253310  8.33 65.96587 67.52430
##       p0.85       min     max       sd      var    PS
## 1: 66.76710 0.6200625 119.052 6.907185 47.70920 74.45
## 2: 67.97517 0.6183750 119.052 7.152707 51.16122 17.11
## 3: 67.25157 0.6215014 119.052 6.986576 48.81225  8.45
## 4: 70.24519 0.6200625 119.052 7.927568 62.84634 74.07
## 5: 71.15226 0.6183750 119.052 8.078377 65.26017 17.37
## 6: 70.65412 0.6214833 119.052 8.005886 64.09420  8.56
write.csv(hour1, paste0("./",mytype,"/des_output/OH_RI_OS_DS_hour.csv"),row.names = FALSE)
write.csv(day1, paste0("./",mytype,"/des_output/OH_RI_OS_DS_day.csv"),row.names = FALSE)
write.csv(DOW1, paste0("./",mytype,"/des_output/OH_RI_OS_DS_dow.csv"),row.names = FALSE)
write.csv(Month1,paste0("./",mytype,"/des_output/OH_RI_OS_DS_month.csv"),row.names = FALSE)
write.csv(AADT2, paste0("./",mytype,"/des_output/OH_RI_OS_DS_aadt.csv"),row.names = FALSE)
write.csv(Crash2, paste0("./",mytype,"/des_output/OH_RI_OS_DS_crash.csv"),row.names = FALSE)
write.csv(DayNight1, paste0("./",mytype,"/des_output/OH_RI_OS_DS_daynight.csv"),row.names = FALSE)
write.csv(PeakOffPeak1, paste0("./",mytype,"/des_output/OH_RI_OS_DS_peakoffpeak.csv"),row.names = FALSE)