Ohio Conflated Data (Rural Two Lane)

library(data.table)
library(dplyr)
library(tidyr)
library(naniar)
library(stringr)
library(ggplot2)
library(DT)
library(lubridate)
library(ggpubr)
library(SmartEDA)


mytype = 'R2'
setwd("/scratch/user/cma16/Task4_Deliverable2/OHprocess4/AllCrash/FacilityBased/")
load("./two-lane_undivided_OH_reduce_withCrash.rData")


df_R2 <- OH_2un_nomed
dim(df_R2)
## [1] 5735712      48
### Calculating Speed
df_R2$Spd_All = 3600*df_R2$DISTANCE/df_R2$Travel_TIME_ALL_VEHICLES
df_R2$Spd_Car = 3600*df_R2$DISTANCE/df_R2$Travel_TIME_PASSENGER_VEHICLES
df_R2$Spd_Truck = 3600*df_R2$DISTANCE/df_R2$Travel_TIME_FREIGHT_TRUCKS


### Month, Day
df_R2$date <- as.character(df_R2$DATE)
df_R2$date <- str_pad(df_R2$DATE, 8, pad = "0")
df_R2$Month <- substr(df_R2$date, start = 1, stop = 2)
df_R2$Day   <- substr(df_R2$date, start = 3, stop = 4)
df_R2$Year  <- substr(df_R2$date, start = 5, stop = 8)

ConvEpoc2HM <- function(x) {
  # for a given epoc number, get its hour:min
  y.hr <- x
  y.min <- 0
  x <- paste(str_pad(y.hr, 2, side = 'left', pad='0'), 
             str_pad(y.min, 2, side = 'left', pad='0'), 
             '00', sep = ':')
}


df_R2$Hour1 <- ConvEpoc2HM(df_R2$EPOCH1h)
DATE4 <- paste(strptime(df_R2$date, format = "%m%d%Y", tz =""), df_R2$Hour1, sep = ' ')
df_R2$PCT_TIME <- as.POSIXct(DATE4, tz ="", format = "%Y-%m-%d %H:%M:%OS")
df_R2$Hour <- strftime(df_R2$PCT_TIME, format="%H")
df_R2$DOW <- wday(df_R2$PCT_TIME, label = TRUE)

Temporal Patterns

names(df_R2)
##  [1] "TimeStamp"                      "TMC"                           
##  [3] "DATE"                           "EPOCH1h"                       
##  [5] "Travel_TIME_ALL_VEHICLES"       "Travel_TIME_PASSENGER_VEHICLES"
##  [7] "Travel_TIME_FREIGHT_TRUCKS"     "ADMIN_LEVE"                    
##  [9] "ADMIN_LE_1"                     "ADMIN_LE_2"                    
## [11] "DISTANCE"                       "ROAD_NUMBE"                    
## [13] "ROAD_NAME"                      "LATITUDE"                      
## [15] "LONGITUDE"                      "ROAD_DIREC"                    
## [17] "ORN_FID"                        "COUNTY"                        
## [19] "divided"                        "SURF_TYP"                      
## [21] "NHS_CDE"                        "HPMS"                          
## [23] "ACCESS"                         "AADT_YR"                       
## [25] "FED_FACI"                       "PK_LANES"                      
## [27] "MED_TYPE"                       "FED_MEDW"                      
## [29] "BEGMP"                          "ENDMP"                         
## [31] "SEG_LNG"                        "cnty_rte"                      
## [33] "rte_nbr"                        "aadt"                          
## [35] "aadt_bc"                        "aadt_pt"                       
## [37] "surf_wid"                       "no_lanes"                      
## [39] "func_cls"                       "rodwycls"                      
## [41] "Total"                          "K"                             
## [43] "A"                              "B"                             
## [45] "C"                              "O"                             
## [47] "DAYMTH"                         "Crash"                         
## [49] "Spd_All"                        "Spd_Car"                       
## [51] "Spd_Truck"                      "date"                          
## [53] "Month"                          "Day"                           
## [55] "Year"                           "Hour1"                         
## [57] "PCT_TIME"                       "Hour"                          
## [59] "DOW"
df_R2$AADT1 <- cut(df_R2$aadt , breaks=c(0,2000,5000,10000, 15000, 20000, 30000, Inf), 
                   labels=c("0-2000","2001-5000","5001-10000","10001-15000","15001-20000","20001-30000","> 30000"))
table(df_R2$AADT1)
## 
##      0-2000   2001-5000  5001-10000 10001-15000 15001-20000 20001-30000 
##      183960     2388744     2821368      297840       43800           0 
##     > 30000 
##           0
df_R2$Crash1 <- cut(df_R2$Crash , breaks=c(-1,0, Inf), 
                    labels=c("No crash","Crash"))
table(df_R2$Crash1)
## 
## No crash    Crash 
##  5733547     2165
df_R2$DayNight <- cut(df_R2$EPOCH1h , breaks=c(-1,6,16,23))
df_R2$DayNight <- as.numeric(df_R2$DayNight)
df_R2$DayNight <- c("Night","Day","Night")[df_R2$DayNight]
table(df_R2$DayNight)
## 
##     Day   Night 
## 2389880 3345832
df_R2$PeakOffPeak <- cut(df_R2$EPOCH1h , breaks=c(-1,6,8,15,19,23))
df_R2$PeakOffPeak <- as.numeric(df_R2$PeakOffPeak)
df_R2$PeakOffPeak <- c("Off-Peak","Morning Peak","Off-Peak", "Evening Peak", "Off-Peak")[df_R2$PeakOffPeak]
table(df_R2$PeakOffPeak)
## 
## Evening Peak Morning Peak     Off-Peak 
##       955952       477976      4301784
df_R201 <- df_R2[,c("divided", "MED_TYPE", "surf_wid", "no_lanes", "EPOCH1h",
                    "Hour","Day","DOW","Month","Year", "AADT1","Crash1",     
                    "DayNight","PeakOffPeak","Spd_All","Spd_Car","Spd_Truck")]
df_R202 <- df_R201[,c(5:17)]


cols <- c("EPOCH1h", "Hour", "Day", "DOW", "Month", "AADT1" , "Crash1", "DayNight", "PeakOffPeak")
cols1 <- c("Spd_All", "Spd_Car", "Spd_Truck")
cols2 <- c("divided", "MED_TYPE", "surf_wid","no_lanes")

df_R202= df_R202 %<>%
  mutate_at(cols, funs(factor(.)))

hour1 <- ExpCustomStat(df_R202,Cvar = c("Hour"), Nvar=cols1, stat = c('Count','Prop','mean','median','p0.85','min', 'max','sd', 'var','PS'),gpby=FALSE)
day1 <- ExpCustomStat(df_R202,Cvar = c("Day"), Nvar=cols1, stat = c('Count','Prop','mean','median','p0.85','min', 'max','sd', 'var','PS'),gpby=FALSE)
DOW1 <- ExpCustomStat(df_R202,Cvar = c("DOW"), Nvar=cols1, stat = c('Count','Prop','mean','median','p0.85','min', 'max','sd', 'var','PS'),gpby=FALSE)
Month1 <- ExpCustomStat(df_R202,Cvar = c("Month"), Nvar=cols1, stat = c('Count','Prop','mean','median','p0.85','min', 'max','sd', 'var','PS'),gpby=FALSE)
AADT2 <- ExpCustomStat(df_R202,Cvar = c("AADT1"), Nvar=cols1, stat = c('Count','Prop','mean','median','p0.85','min', 'max','sd', 'var','PS'),gpby=FALSE)
Crash2 <- ExpCustomStat(df_R202,Cvar = c("Crash1", "Hour"), Nvar=cols1, stat = c('Count','Prop','mean','median','p0.85','min', 'max','sd', 'var','PS'))
DayNight1 <- ExpCustomStat(df_R202,Cvar = c("DayNight"), Nvar=cols1, stat = c('Count','Prop','mean','median','p0.85','min', 'max','sd', 'var','PS'),gpby=FALSE)
PeakOffPeak1 <- ExpCustomStat(df_R202,Cvar = c("PeakOffPeak"), Nvar=cols1, stat = c('Count','Prop','mean','median','p0.85','min', 'max','sd', 'var','PS'),gpby=FALSE)
geo <- ExpCustomStat(df_R201, Nvar=cols2, stat = c('mean','median','p0.85','min', 'max','sd', 'var','PS'))
## divided variable/s not in numeric type 
##  Either convert it into numeric or remove that from 'Nvar' list
ggline(gather(hour1[,c(1, 2, 6, 8, 11)], condition, measurement,  mean:sd, factor_key=TRUE), x = "Level", y = "measurement", color = "Attribute",
       palette = c("red", "blue", "black"))+theme(legend.title=element_blank())+ facet_grid(condition~ .)+labs(title="By Hour")

ggline(gather(DOW1[,c(1, 2, 6, 8, 11)], condition, measurement,  mean:sd, factor_key=TRUE), x = "Level", y = "measurement", color = "Attribute",
       palette = c("red", "blue", "black"))+theme(legend.title=element_blank())+ facet_grid(condition~ .)+labs(title="By Day of Week")

ggline(gather(Month1[,c(1, 2, 6, 8, 11)], condition, measurement,  mean:sd, factor_key=TRUE), x = "Level", y = "measurement", color = "Attribute",
       palette = c("red", "blue", "black"))+theme(legend.title=element_blank())+ facet_grid(condition~ .)+labs(title="By Month")

ggline(gather(AADT2[,c(1, 2, 6, 8, 11)], condition, measurement,  mean:sd, factor_key=TRUE), x = "Level", y = "measurement", color = "Attribute",
       palette = c("red", "blue", "black"))+theme(legend.title=element_blank())+ facet_grid(condition~ .)+labs(title="By AADT")

ggline(gather(Crash2[,c(1, 2, 3, 6, 8, 11)], condition, measurement,  mean:sd, factor_key=TRUE), x = "Hour", y = "measurement", color = "Attribute",
       palette = c("red", "blue", "black"))+theme(legend.title=element_blank())+ facet_grid(condition+Crash1~ .)+labs(title="By Crash")

ggline(gather(DayNight1[,c(1, 2, 6, 8, 11)], condition, measurement,  mean:sd, factor_key=TRUE), x = "Level", y = "measurement", color = "Attribute",
       palette = c("red", "blue", "black"))+theme(legend.title=element_blank())+ facet_grid(condition~ .)+labs(title="By Day/Night")

ggline(gather(PeakOffPeak1[,c(1, 2, 6, 8, 11)], condition, measurement,  mean:sd, factor_key=TRUE), x = "Level", y = "measurement", color = "Attribute",
       palette = c("red", "blue", "black"))+theme(legend.title=element_blank())+ facet_grid(condition~ .)+labs(title="By Peak/Off-Peak")

Temporal Statistics of Operational Speed

# 
setwd("/scratch/user/cma16/Task4_Deliverable2/OHprocess4/AllCrash/FacilityBased/")

head(hour1)
##    Level Attribute Group_by  Count Prop     mean   median    p0.85
## 1:    00   Spd_All     Hour 238988 4.17 46.54176 50.30881 57.29199
## 2:    01   Spd_All     Hour 238988 4.17 46.64795 50.44740 57.62392
## 3:    10   Spd_All     Hour 238988 4.17 44.56996 48.44912 56.10018
## 4:    11   Spd_All     Hour 238988 4.17 44.43626 48.41103 56.09725
## 5:    12   Spd_All     Hour 238988 4.17 44.41430 48.42635 56.12063
## 6:    13   Spd_All     Hour 238988 4.17 44.51282 48.49174 56.13871
##          min      max       sd      var   PS
## 1: 0.6212757 91.37022 12.47010 155.5034 2.67
## 2: 0.6212308 84.49200 12.66086 160.2974 2.58
## 3: 0.6212308 84.49200 12.93021 167.1902 5.08
## 4: 0.6212308 84.63938 13.08443 171.2024 5.11
## 5: 0.6212308 84.49200 13.14118 172.6906 5.11
## 6: 0.6166143 84.49200 13.10005 171.6113 5.13
head(day1)
##    Level Attribute Group_by  Count Prop     mean   median    p0.85
## 1:    01   Spd_All      Day 188544 3.29 45.04269 48.87549 56.52934
## 2:    02   Spd_All      Day 188544 3.29 45.15224 48.99593 56.46963
## 3:    03   Spd_All      Day 188544 3.29 45.08333 48.83672 56.37550
## 4:    04   Spd_All      Day 188544 3.29 45.03154 48.86677 56.46675
## 5:    05   Spd_All      Day 188544 3.29 45.06273 48.88477 56.43811
## 6:    06   Spd_All      Day 188544 3.29 45.03663 48.88556 56.38563
##          min      max       sd      var   PS
## 1: 0.6212308 84.49200 12.95075 167.7220 3.07
## 2: 0.6212308 88.58962 12.80607 163.9953 3.37
## 3: 0.6212308 88.62274 12.75700 162.7410 3.30
## 4: 0.6212757 84.49200 12.88951 166.1395 3.20
## 5: 0.6214662 84.49200 12.87144 165.6739 3.21
## 6: 0.6166143 85.53802 12.85837 165.3378 3.31
head(DOW1)
##    Level Attribute Group_by  Count  Prop     mean   median    p0.85
## 1:   Thu   Spd_All      DOW 833016 14.52 45.06986 48.90450 56.37111
## 2:   Fri   Spd_All      DOW 817008 14.24 45.02206 48.95335 56.39028
## 3:   Sat   Spd_All      DOW 817008 14.24 45.37804 49.40502 56.79718
## 4:   Sun   Spd_All      DOW 817032 14.24 45.84801 49.80977 57.09305
## 5:   Mon   Spd_All      DOW 817032 14.24 45.20765 49.03092 56.48045
## 6:   Tue   Spd_All      DOW 817032 14.24 45.13370 48.96765 56.39400
##          min      max       sd      var    PS
## 1: 0.6166143 91.25528 12.78357 163.4196 15.57
## 2: 0.6166545 91.37022 12.89946 166.3962 15.13
## 3: 0.6166143 91.37022 13.20426 174.3524 12.42
## 4: 0.6212308 90.66918 13.09268 171.4183 10.60
## 5: 0.6166143 84.98071 12.73764 162.2474 15.08
## 6: 0.6166143 88.16620 12.72482 161.9209 15.61
head(Month1)
##    Level Attribute Group_by  Count Prop     mean   median    p0.85
## 1:    01   Spd_All    Month 478392 8.34 44.74701 48.20888 55.98756
## 2:    02   Spd_All    Month 432096 7.53 44.23628 47.50839 55.73179
## 3:    03   Spd_All    Month 478392 8.34 45.31483 49.08273 56.46668
## 4:    04   Spd_All    Month 462960 8.07 45.53027 49.48751 56.55341
## 5:    05   Spd_All    Month 478392 8.34 45.14393 49.16684 56.48147
## 6:    06   Spd_All    Month 462960 8.07 45.08760 49.08273 56.53964
##          min      max       sd      var   PS
## 1: 0.6212757 84.49200 12.51032 156.5080 7.52
## 2: 0.6212308 86.15404 12.51640 156.6602 6.91
## 3: 0.6212308 86.44356 12.63734 159.7023 8.07
## 4: 0.6166143 88.62274 12.71952 161.7862 8.09
## 5: 0.6166143 84.49200 12.97893 168.4526 8.20
## 6: 0.6166143 86.42871 13.04838 170.2602 8.13
head(AADT2)
##          Level Attribute Group_by   Count  Prop     mean   median    p0.85
## 1:   2001-5000   Spd_All    AADT1 2388744 41.65 45.92105 49.46992 56.53463
## 2:  5001-10000   Spd_All    AADT1 2821368 49.19 45.22713 49.40502 56.59426
## 3:      0-2000   Spd_All    AADT1  183960  3.21 48.84657 52.12393 57.63865
## 4: 10001-15000   Spd_All    AADT1  297840  5.19 40.69878 44.42596 54.43729
## 5: 15001-20000   Spd_All    AADT1   43800  0.76 34.55586 38.36620 50.44800
## 6:   2001-5000   Spd_Car    AADT1 2388744 41.65 46.49415 50.23438 57.20400
##          min      max       sd      var    PS
## 1: 0.6212757 92.10765 12.27450 150.6633 42.02
## 2: 0.6185854 91.37022 12.99365 168.8350 50.26
## 3: 0.6166143 88.00224 11.61727 134.9609  1.73
## 4: 0.6214663 82.59513 14.02563 196.7183  5.34
## 5: 0.6214469 83.43323 15.09530 227.8682  0.65
## 6: 0.6212757 92.60805 12.68285 160.8548 39.80
head(Crash2)
##      Crash1 Hour Attribute  Count Prop     mean   median    p0.85
## 1: No crash   00   Spd_All 238934 4.17 46.54295 50.30893 57.29208
## 2: No crash   01   Spd_All 238943 4.17 46.64850 50.44779 57.62392
## 3: No crash   10   Spd_All 238911 4.17 44.57174 48.45075 56.10026
## 4: No crash   11   Spd_All 238904 4.17 44.43814 48.41245 56.09762
## 5: No crash   12   Spd_All 238899 4.17 44.41719 48.42814 56.12063
## 6: No crash   13   Spd_All 238893 4.17 44.51532 48.49470 56.14093
##          min      max       sd      var   PS
## 1: 0.6212757 91.37022 12.46909 155.4781 2.67
## 2: 0.6212308 84.49200 12.66069 160.2931 2.58
## 3: 0.6212308 84.49200 12.92939 167.1690 5.08
## 4: 0.6212308 84.63938 13.08313 171.1684 5.11
## 5: 0.6212308 84.49200 13.13984 172.6555 5.11
## 6: 0.6166143 84.49200 13.09902 171.5844 5.13
head(DayNight1)
##    Level Attribute Group_by   Count  Prop     mean   median    p0.85
## 1: Night   Spd_All DayNight 3345832 58.33 45.88383 49.72364 56.77309
## 2:   Day   Spd_All DayNight 2389880 41.67 44.57277 48.49692 56.19578
## 3: Night   Spd_Car DayNight 3345832 58.33 46.39786 50.31493 57.47246
## 4:   Day   Spd_Car DayNight 2389880 41.67 44.91206 49.05937 56.81156
## 5: Night Spd_Truck DayNight 3345832 58.33 46.24961 50.26786 56.75937
## 6:   Day Spd_Truck DayNight 2389880 41.67 45.43792 49.54425 56.35677
##          min      max       sd      var    PS
## 1: 0.6166143 92.10765 12.64813 159.9752 49.60
## 2: 0.6166143 89.74647 13.03710 169.9660 50.40
## 3: 0.6166143 92.10765 12.99214 168.7956 43.71
## 4: 0.6166143 92.60805 13.54046 183.3441 56.29
## 5: 0.6212308 84.49200 12.34351 152.3622 47.47
## 6: 0.6212308 84.49200 12.56232 157.8118 52.53
head(PeakOffPeak1)
##           Level Attribute    Group_by   Count  Prop     mean   median
## 1:     Off-Peak   Spd_All PeakOffPeak 4301784 75.00 45.31394 49.18408
## 2: Evening Peak   Spd_All PeakOffPeak  955952 16.67 45.01170 49.04128
## 3: Morning Peak   Spd_All PeakOffPeak  477976  8.33 44.87432 48.63672
## 4:     Off-Peak   Spd_Car PeakOffPeak 4301784 75.00 45.63240 49.65797
## 5: Evening Peak   Spd_Car PeakOffPeak  955952 16.67 45.39757 49.68760
## 6: Morning Peak   Spd_Car PeakOffPeak  477976  8.33 45.30042 49.19991
##       p0.85       min      max       sd      var    PS
## 1: 56.54297 0.6166143 92.10765 12.82877 164.5774 71.37
## 2: 56.43141 0.6166143 89.74647 13.02153 169.5601 18.94
## 3: 56.37966 0.6212308 84.49200 12.81332 164.1811  9.69
## 4: 57.08630 0.6166143 92.60805 13.26465 175.9510 69.36
## 5: 57.22584 0.6166143 89.74647 13.65794 186.5394 19.84
## 6: 56.88060 0.6212308 90.53618 13.11375 171.9705 10.80
write.csv(hour1, paste0("./",mytype,"/des_output/OH_R2_OS_DS_hour.csv"),row.names = FALSE)
write.csv(day1, paste0("./",mytype,"/des_output/OH_R2_OS_DS_day.csv"),row.names = FALSE)
write.csv(DOW1, paste0("./",mytype,"/des_output/OH_R2_OS_DS_dow.csv"),row.names = FALSE)
write.csv(Month1,paste0("./",mytype,"/des_output/OH_R2_OS_DS_month.csv"),row.names = FALSE)
write.csv(AADT2, paste0("./",mytype,"/des_output/OH_R2_OS_DS_aadt.csv"),row.names = FALSE)
write.csv(Crash2, paste0("./",mytype,"/des_output/OH_R2_OS_DS_crash.csv"),row.names = FALSE)
write.csv(DayNight1, paste0("./",mytype,"/des_output/OH_R2_OS_DS_daynight.csv"),row.names = FALSE)
write.csv(PeakOffPeak1, paste0("./",mytype,"/des_output/OH_R2_OS_DS_peakoffpeak.csv"),row.names = FALSE)