Ohio Conflated Data (Rural Multilane Divided)

library(data.table)
library(dplyr)
library(tidyr)
library(naniar)
library(stringr)
library(ggplot2)
library(DT)
library(lubridate)
library(ggpubr)
library(SmartEDA)


mytype = 'RMD'
setwd("/scratch/user/cma16/Task4_Deliverable2/OHprocess4/AllCrash/FacilityBased/")
load("./multi-lane_divided_OH_reduce_withCrash.rData")


df_RMD <- OH_mun_med
dim(df_RMD)
## [1] 4005288      48
### Calculating Speed
df_RMD$Spd_All = 3600*df_RMD$DISTANCE/df_RMD$Travel_TIME_ALL_VEHICLES
df_RMD$Spd_Car = 3600*df_RMD$DISTANCE/df_RMD$Travel_TIME_PASSENGER_VEHICLES
df_RMD$Spd_Truck = 3600*df_RMD$DISTANCE/df_RMD$Travel_TIME_FREIGHT_TRUCKS

### Remove outliers
df_RMD$spd_av = ifelse(df_RMD$spd_av <120, df_RMD$spd_av, NA)
df_RMD$spd_pv = ifelse(df_RMD$spd_pv <120, df_RMD$spd_pv, NA)
df_RMD$spd_ft = ifelse(df_RMD$spd_ft <120, df_RMD$spd_ft, NA)

### Month, Day
df_RMD$date <- as.character(df_RMD$DATE)
df_RMD$date <- str_pad(df_RMD$DATE, 8, pad = "0")
df_RMD$Month <- substr(df_RMD$date, start = 1, stop = 2)
df_RMD$Day   <- substr(df_RMD$date, start = 3, stop = 4)
df_RMD$Year  <- substr(df_RMD$date, start = 5, stop = 8)

ConvEpoc2HM <- function(x) {
  # for a given epoc number, get its hour:min
  y.hr <- x
  y.min <- 0
  x <- paste(str_pad(y.hr, 2, side = 'left', pad='0'), 
             str_pad(y.min, 2, side = 'left', pad='0'), 
             '00', sep = ':')
}


df_RMD$Hour1 <- ConvEpoc2HM(df_RMD$EPOCH1h)
DATE4 <- paste(strptime(df_RMD$date, format = "%m%d%Y", tz =""), df_RMD$Hour1, sep = ' ')
df_RMD$PCT_TIME <- as.POSIXct(DATE4, tz ="", format = "%Y-%m-%d %H:%M:%OS")
df_RMD$Hour <- strftime(df_RMD$PCT_TIME, format="%H")
df_RMD$DOW <- wday(df_RMD$PCT_TIME, label = TRUE)

Temporal Patterns

names(df_RMD)
##  [1] "TimeStamp"                      "TMC"                           
##  [3] "DATE"                           "EPOCH1h"                       
##  [5] "Travel_TIME_ALL_VEHICLES"       "Travel_TIME_PASSENGER_VEHICLES"
##  [7] "Travel_TIME_FREIGHT_TRUCKS"     "ADMIN_LEVE"                    
##  [9] "ADMIN_LE_1"                     "ADMIN_LE_2"                    
## [11] "DISTANCE"                       "ROAD_NUMBE"                    
## [13] "ROAD_NAME"                      "LATITUDE"                      
## [15] "LONGITUDE"                      "ROAD_DIREC"                    
## [17] "ORN_FID"                        "COUNTY"                        
## [19] "divided"                        "SURF_TYP"                      
## [21] "NHS_CDE"                        "HPMS"                          
## [23] "ACCESS"                         "AADT_YR"                       
## [25] "FED_FACI"                       "PK_LANES"                      
## [27] "MED_TYPE"                       "FED_MEDW"                      
## [29] "BEGMP"                          "ENDMP"                         
## [31] "SEG_LNG"                        "cnty_rte"                      
## [33] "rte_nbr"                        "aadt"                          
## [35] "aadt_bc"                        "aadt_pt"                       
## [37] "surf_wid"                       "no_lanes"                      
## [39] "func_cls"                       "rodwycls"                      
## [41] "Total"                          "K"                             
## [43] "A"                              "B"                             
## [45] "C"                              "O"                             
## [47] "DAYMTH"                         "Crash"                         
## [49] "Spd_All"                        "Spd_Car"                       
## [51] "Spd_Truck"                      "spd_av"                        
## [53] "spd_pv"                         "spd_ft"                        
## [55] "date"                           "Month"                         
## [57] "Day"                            "Year"                          
## [59] "Hour1"                          "PCT_TIME"                      
## [61] "Hour"                           "DOW"
df_RMD$AADT1 <- cut(df_RMD$aadt , breaks=c(0,2000,5000,10000, 15000, 20000, 30000, Inf), 
                   labels=c("0-2000","2001-5000","5001-10000","10001-15000","15001-20000","20001-30000","> 30000"))
table(df_RMD$AADT1)
## 
##      0-2000   2001-5000  5001-10000 10001-15000 15001-20000 20001-30000 
##           0       87600     1184304     1511328      766536      332880 
##     > 30000 
##      122640
df_RMD$Crash1 <- cut(df_RMD$Crash , breaks=c(-1,0, Inf), 
                    labels=c("No crash","Crash"))
table(df_RMD$Crash1)
## 
## No crash    Crash 
##  4003297     1991
df_RMD$DayNight <- cut(df_RMD$EPOCH1h , breaks=c(-1,6,16,23))
df_RMD$DayNight <- as.numeric(df_RMD$DayNight)
df_RMD$DayNight <- c("Night","Day","Night")[df_RMD$DayNight]
table(df_RMD$DayNight)
## 
##     Day   Night 
## 1668870 2336418
df_RMD$PeakOffPeak <- cut(df_RMD$EPOCH1h , breaks=c(-1,6,8,15,19,23))
df_RMD$PeakOffPeak <- as.numeric(df_RMD$PeakOffPeak)
df_RMD$PeakOffPeak <- c("Off-Peak","Morning Peak","Off-Peak", "Evening Peak", "Off-Peak")[df_RMD$PeakOffPeak]
table(df_RMD$PeakOffPeak)
## 
## Evening Peak Morning Peak     Off-Peak 
##       667548       333774      3003966
df_RMD01 <- df_RMD[,c("divided", "MED_TYPE", "surf_wid", "no_lanes", "EPOCH1h",
                    "Hour","Day","DOW","Month","Year", "AADT1","Crash1",     
                    "DayNight","PeakOffPeak","Spd_All","Spd_Car","Spd_Truck")]
df_RMD02 <- df_RMD01[,c(5:17)]


cols <- c("EPOCH1h", "Hour", "Day", "DOW", "Month", "AADT1" , "Crash1", "DayNight", "PeakOffPeak")
cols1 <- c("Spd_All", "Spd_Car", "Spd_Truck")
cols2 <- c("divided", "MED_TYPE", "surf_wid","no_lanes")

df_RMD02= df_RMD02 %<>%
  mutate_at(cols, funs(factor(.)))

hour1 <- ExpCustomStat(df_RMD02,Cvar = c("Hour"), Nvar=cols1, stat = c('Count','Prop','mean','median','p0.85','min', 'max','sd', 'var','PS'),gpby=FALSE)
day1 <- ExpCustomStat(df_RMD02,Cvar = c("Day"), Nvar=cols1, stat = c('Count','Prop','mean','median','p0.85','min', 'max','sd', 'var','PS'),gpby=FALSE)
DOW1 <- ExpCustomStat(df_RMD02,Cvar = c("DOW"), Nvar=cols1, stat = c('Count','Prop','mean','median','p0.85','min', 'max','sd', 'var','PS'),gpby=FALSE)
Month1 <- ExpCustomStat(df_RMD02,Cvar = c("Month"), Nvar=cols1, stat = c('Count','Prop','mean','median','p0.85','min', 'max','sd', 'var','PS'),gpby=FALSE)
AADT2 <- ExpCustomStat(df_RMD02,Cvar = c("AADT1"), Nvar=cols1, stat = c('Count','Prop','mean','median','p0.85','min', 'max','sd', 'var','PS'),gpby=FALSE)
Crash2 <- ExpCustomStat(df_RMD02,Cvar = c("Crash1", "Hour"), Nvar=cols1, stat = c('Count','Prop','mean','median','p0.85','min', 'max','sd', 'var','PS'))
DayNight1 <- ExpCustomStat(df_RMD02,Cvar = c("DayNight"), Nvar=cols1, stat = c('Count','Prop','mean','median','p0.85','min', 'max','sd', 'var','PS'),gpby=FALSE)
PeakOffPeak1 <- ExpCustomStat(df_RMD02,Cvar = c("PeakOffPeak"), Nvar=cols1, stat = c('Count','Prop','mean','median','p0.85','min', 'max','sd', 'var','PS'),gpby=FALSE)
geo <- ExpCustomStat(df_RMD01, Nvar=cols2, stat = c('mean','median','p0.85','min', 'max','sd', 'var','PS'))
## divided variable/s not in numeric type 
##  Either convert it into numeric or remove that from 'Nvar' list
ggline(gather(hour1[,c(1, 2, 6, 8, 11)], condition, measurement,  mean:sd, factor_key=TRUE), x = "Level", y = "measurement", color = "Attribute",
       palette = c("red", "blue", "black"))+theme(legend.title=element_blank())+ facet_grid(condition~ .)+labs(title="By Hour")

ggline(gather(DOW1[,c(1, 2, 6, 8, 11)], condition, measurement,  mean:sd, factor_key=TRUE), x = "Level", y = "measurement", color = "Attribute",
       palette = c("red", "blue", "black"))+theme(legend.title=element_blank())+ facet_grid(condition~ .)+labs(title="By Day of Week")

ggline(gather(Month1[,c(1, 2, 6, 8, 11)], condition, measurement,  mean:sd, factor_key=TRUE), x = "Level", y = "measurement", color = "Attribute",
       palette = c("red", "blue", "black"))+theme(legend.title=element_blank())+ facet_grid(condition~ .)+labs(title="By Month")

ggline(gather(AADT2[,c(1, 2, 6, 8, 11)], condition, measurement,  mean:sd, factor_key=TRUE), x = "Level", y = "measurement", color = "Attribute",
       palette = c("red", "blue", "black"))+theme(legend.title=element_blank())+ facet_grid(condition~ .)+labs(title="By AADT")

ggline(gather(Crash2[,c(1, 2, 3, 6, 8, 11)], condition, measurement,  mean:sd, factor_key=TRUE), x = "Hour", y = "measurement", color = "Attribute",
       palette = c("red", "blue", "black"))+theme(legend.title=element_blank())+ facet_grid(condition+Crash1~ .)+labs(title="By Crash")

ggline(gather(DayNight1[,c(1, 2, 6, 8, 11)], condition, measurement,  mean:sd, factor_key=TRUE), x = "Level", y = "measurement", color = "Attribute",
       palette = c("red", "blue", "black"))+theme(legend.title=element_blank())+ facet_grid(condition~ .)+labs(title="By Day/Night")

ggline(gather(PeakOffPeak1[,c(1, 2, 6, 8, 11)], condition, measurement,  mean:sd, factor_key=TRUE), x = "Level", y = "measurement", color = "Attribute",
       palette = c("red", "blue", "black"))+theme(legend.title=element_blank())+ facet_grid(condition~ .)+labs(title="By Peak/Off-Peak")

Temporal Statistics of Operational Speed

# 
setwd("/scratch/user/cma16/Task4_Deliverable2/OHprocess4/AllCrash/FacilityBased/")

head(hour1)
##    Level Attribute Group_by  Count Prop     mean   median    p0.85
## 1:    00   Spd_All     Hour 166887 4.17 58.48455 60.83899 64.83183
## 2:    01   Spd_All     Hour 166887 4.17 58.37079 60.73584 64.62766
## 3:    10   Spd_All     Hour 166887 4.17 57.80295 61.07039 65.27120
## 4:    11   Spd_All     Hour 166887 4.17 57.78630 61.16990 65.35023
## 5:    12   Spd_All     Hour 166887 4.17 57.78438 61.20038 65.36500
## 6:    13   Spd_All     Hour 166887 4.17 57.84885 61.27797 65.40489
##          min      max        sd       var   PS
## 1: 0.6214840 272.1281 10.250777 105.07842 3.57
## 2: 0.6214648 250.6443  9.920772  98.42171 3.47
## 3: 0.6211331 264.5690 12.642896 159.84282 4.49
## 4: 0.6214840 257.4185 12.894679 166.27275 4.50
## 5: 0.6214840 257.4185 12.953636 167.79668 4.51
## 6: 0.6211331 272.1281 12.905091 166.54136 4.51
head(day1)
##    Level Attribute Group_by  Count Prop     mean   median    p0.85
## 1:    01   Spd_All      Day 131664 3.29 57.61711 60.71793 65.23719
## 2:    02   Spd_All      Day 131664 3.29 57.96588 60.99035 65.22003
## 3:    03   Spd_All      Day 131664 3.29 57.92253 60.87106 65.12161
## 4:    04   Spd_All      Day 131664 3.29 57.91106 60.94299 65.27073
## 5:    05   Spd_All      Day 131664 3.29 57.76179 60.82585 65.24015
## 6:    06   Spd_All      Day 131664 3.29 57.86193 60.93333 65.30961
##          min      max       sd      var   PS
## 1: 0.6211331 257.4185 11.99304 143.8330 3.17
## 2: 0.6214840 257.4185 11.95445 142.9090 3.31
## 3: 0.6211331 257.4185 11.76154 138.3338 3.29
## 4: 0.6214178 264.5690 11.93759 142.5061 3.26
## 5: 0.6211331 250.6443 12.06850 145.6486 3.24
## 6: 0.6214965 272.1281 12.13001 147.1370 3.29
head(DOW1)
##    Level Attribute Group_by  Count  Prop     mean   median    p0.85
## 1:   Thu   Spd_All      DOW 581712 14.52 57.92764 60.98844 65.16683
## 2:   Fri   Spd_All      DOW 570384 14.24 58.12179 61.18471 65.53318
## 3:   Sat   Spd_All      DOW 570384 14.24 58.42778 61.19992 65.71344
## 4:   Sun   Spd_All      DOW 570624 14.25 58.72413 61.21103 65.80595
## 5:   Mon   Spd_All      DOW 570624 14.25 57.91514 60.94010 65.19768
## 6:   Tue   Spd_All      DOW 570624 14.25 57.87751 60.95594 65.05907
##          min      max       sd      var    PS
## 1: 0.6025721 297.6401 11.91944 142.0730 14.82
## 2: 0.6025721 260.9448 12.19127 148.6270 14.58
## 3: 0.6211331 280.1319 11.99254 143.8211 13.84
## 4: 0.6025721 288.6207 11.49998 132.2496 13.09
## 5: 0.6211331 297.6401 11.87505 141.0169 14.43
## 6: 0.6211331 288.6207 11.95583 142.9419 14.61
head(Month1)
##    Level Attribute Group_by  Count Prop     mean   median    p0.85
## 1:    01   Spd_All    Month 334056 8.34 57.11292 59.97152 64.15353
## 2:    02   Spd_All    Month 301728 7.53 56.77616 59.77624 64.19947
## 3:    03   Spd_All    Month 334056 8.34 57.93771 60.82323 64.88922
## 4:    04   Spd_All    Month 323280 8.07 58.44796 61.09889 65.17146
## 5:    05   Spd_All    Month 334056 8.34 58.48000 61.09633 65.28740
## 6:    06   Spd_All    Month 323280 8.07 58.63876 61.29491 65.51320
##          min      max       sd      var   PS
## 1: 0.6025721 297.6401 12.73606 162.2073 7.91
## 2: 0.6211331 260.9448 13.07597 170.9809 7.17
## 3: 0.6025721 288.6207 13.20530 174.3800 8.18
## 4: 0.6025721 297.6401 13.02073 169.5395 8.10
## 5: 0.6195108 284.3130 13.13131 172.4313 8.39
## 6: 0.6025721 288.6207 13.38780 179.2332 8.25
head(AADT2)
##          Level Attribute Group_by   Count  Prop     mean   median    p0.85
## 1: 15001-20000   Spd_All    AADT1  766536 19.14 59.59149 62.11846 66.05064
## 2: 20001-30000   Spd_All    AADT1  332880  8.31 60.98961 62.59266 67.29081
## 3:  5001-10000   Spd_All    AADT1 1184304 29.57 57.43510 59.80778 65.17224
## 4: 10001-15000   Spd_All    AADT1 1511328 37.73 58.20127 61.21323 64.73075
## 5:   2001-5000   Spd_All    AADT1   87600  2.19 49.94776 53.34819 58.66533
## 6:     > 30000   Spd_All    AADT1  122640  3.06 50.82713 56.50518 64.85297
##          min       max        sd      var    PS
## 1: 0.6025721 297.64013 14.845276 220.3822 20.16
## 2: 0.6214857  90.17709  8.086025  65.3838  9.05
## 3: 0.6211331 239.35200 11.355177 128.9400 28.83
## 4: 0.6195108  92.33262 10.411190 108.3929 37.63
## 5: 0.6214985  86.83900 10.887690 118.5418  1.46
## 6: 0.6214944  87.17053 16.174084 261.6010  2.86
head(Crash2)
##      Crash1 Hour Attribute  Count Prop     mean   median    p0.85
## 1: No crash   00   Spd_All 166842 4.17 58.48443 60.83897 64.83210
## 2: No crash   01   Spd_All 166830 4.17 58.37015 60.73527 64.62766
## 3: No crash   10   Spd_All 166809 4.16 57.80652 61.07051 65.27189
## 4: No crash   11   Spd_All 166833 4.17 57.78834 61.17046 65.35023
## 5: No crash   12   Spd_All 166821 4.17 57.78714 61.20204 65.36505
## 6: No crash   13   Spd_All 166810 4.16 57.85174 61.27882 65.40489
##          min      max        sd      var   PS
## 1: 0.6214840 272.1281 10.251210 105.0873 3.57
## 2: 0.6214648 250.6443  9.920701  98.4203 3.47
## 3: 0.6211331 264.5690 12.640244 159.7758 4.49
## 4: 0.6214840 257.4185 12.893053 166.2308 4.50
## 5: 0.6214840 257.4185 12.951345 167.7373 4.51
## 6: 0.6211331 272.1281 12.903137 166.4909 4.51
head(DayNight1)
##    Level Attribute Group_by   Count  Prop     mean   median    p0.85
## 1: Night   Spd_All DayNight 2336418 58.33 58.27664 60.94218 65.15445
## 2:   Day   Spd_All DayNight 1668870 41.67 57.90625 61.23048 65.53099
## 3: Night   Spd_Car DayNight 2336418 58.33 60.44762 62.83938 68.75917
## 4:   Day   Spd_Car DayNight 1668870 41.67 59.61768 62.90131 68.61116
## 5: Night Spd_Truck DayNight 2336418 58.33 57.52127 60.28041 63.43308
## 6:   Day Spd_Truck DayNight 1668870 41.67 56.94809 60.20199 63.08375
##          min      max        sd       var    PS
## 1: 0.6025721 297.6401 11.138129 124.05793 55.15
## 2: 0.6195108 288.6207 12.803336 163.92540 44.85
## 3: 0.6025721 317.4828 11.844742 140.29791 51.68
## 4: 0.6025721 288.6207 13.610888 185.25628 48.32
## 5: 0.6025721 272.1281  9.436967  89.05635 54.13
## 6: 0.6025721 257.4185 10.677444 114.00780 45.87
head(PeakOffPeak1)
##           Level Attribute    Group_by   Count  Prop     mean   median
## 1:     Off-Peak   Spd_All PeakOffPeak 3003966 75.00 58.10223 60.97507
## 2: Evening Peak   Spd_All PeakOffPeak  667548 16.67 58.26545 61.46440
## 3: Morning Peak   Spd_All PeakOffPeak  333774  8.33 57.86355 60.97360
## 4:     Off-Peak   Spd_Car PeakOffPeak 3003966 75.00 60.02842 62.74800
## 5: Evening Peak   Spd_Car PeakOffPeak  667548 16.67 60.38383 63.56265
## 6: Morning Peak   Spd_Car PeakOffPeak  333774  8.33 59.48389 62.50998
##       p0.85       min      max       sd      var    PS
## 1: 65.19198 0.6025721 297.6401 11.65888 135.9295 73.44
## 2: 65.84686 0.6025721 297.6401 12.66727 160.4597 17.73
## 3: 65.50543 0.6195108 284.3130 12.48016 155.7544  8.82
## 4: 68.54349 0.6025721 317.4828 12.46834 155.4596 71.72
## 5: 69.23460 0.6211331 297.6401 13.48261 181.7809 18.91
## 6: 68.44389 0.6195108 284.3130 13.24512 175.4332  9.37
write.csv(hour1, paste0("./",mytype,"/des_output/OH_RMD_OS_DS_hour.csv"),row.names = FALSE)
write.csv(day1, paste0("./",mytype,"/des_output/OH_RMD_OS_DS_day.csv"),row.names = FALSE)
write.csv(DOW1, paste0("./",mytype,"/des_output/OH_RMD_OS_DS_dow.csv"),row.names = FALSE)
write.csv(Month1,paste0("./",mytype,"/des_output/OH_RMD_OS_DS_month.csv"),row.names = FALSE)
write.csv(AADT2, paste0("./",mytype,"/des_output/OH_RMD_OS_DS_aadt.csv"),row.names = FALSE)
write.csv(Crash2, paste0("./",mytype,"/des_output/OH_RMD_OS_DS_crash.csv"),row.names = FALSE)
write.csv(DayNight1, paste0("./",mytype,"/des_output/OH_RMD_OS_DS_daynight.csv"),row.names = FALSE)
write.csv(PeakOffPeak1, paste0("./",mytype,"/des_output/OH_RMD_OS_DS_peakoffpeak.csv"),row.names = FALSE)