North Carolina Conflated Data (Rural Multi-lane Undivided)

library(data.table)
library(dplyr)
library(tidyr)
library(naniar)
library(stringr)
library(ggplot2)
library(DT)
library(lubridate)
library(ggpubr)
library(SmartEDA)



setwd("/scratch/user/cma16/Task4_Deliverable2/NCprocess4/AllCrash/FacilityBased/")
load("./multi-lane_undivided_NC_reduce_withCrash_no_intersection.rData")
mytype = 'RMU'
setwd(paste0("/scratch/user/cma16/Task4_Deliverable2/NCprocess4/AllCrash/FacilityBased/",mytype))

df_RMU <- N_mun_nomed
dim(df_RMU)
## [1] 3995136      30
### Calculating Speed
df_RMU$Spd_All = 3600*df_RMU$TMC_length/df_RMU$Travel_TIME_ALL_VEHICLES/5280
df_RMU$Spd_Car = 3600*df_RMU$TMC_length/df_RMU$Travel_TIME_PASSENGER_VEHICLES/5280
df_RMU$Spd_Truck = 3600*df_RMU$TMC_length/df_RMU$Travel_TIME_FREIGHT_TRUCKS/5280


### Month, Day
df_RMU$date <- as.character(df_RMU$DATE)
df_RMU$date <- str_pad(df_RMU$DATE, 8, pad = "0")
df_RMU$Month <- substr(df_RMU$date, start = 1, stop = 2)
df_RMU$Day   <- substr(df_RMU$date, start = 3, stop = 4)
df_RMU$Year  <- substr(df_RMU$date, start = 5, stop = 8)

ConvEpoc2HM <- function(x) {
  # for a given epoc number, get its hour:min
  yy <- x*15
  y.hr <- yy %/% 60
  y.min <- yy %% 60
  x <- paste(str_pad(y.hr, 2, side = 'left', pad='0'), 
             str_pad(y.min, 2, side = 'left', pad='0'), 
             '00', sep = ':')
}


df_RMU$Hour1 <- ConvEpoc2HM(df_RMU$EPOCH15)
DATE4 <- paste(strptime(df_RMU$date, format = "%m%d%Y", tz =""), df_RMU$Hour1, sep = ' ')
df_RMU$PCT_TIME <- as.POSIXct(DATE4, tz ="", format = "%Y-%m-%d %H:%M:%OS")
df_RMU$Hour <- strftime(df_RMU$PCT_TIME, format="%H")
df_RMU$DOW <- wday(df_RMU$PCT_TIME, label = TRUE)

Temporal Patterns

names(df_RMU)
##  [1] "TimeStamp"                      "TMC"                           
##  [3] "DATE"                           "EPOCH15"                       
##  [5] "Travel_TIME_ALL_VEHICLES"       "Travel_TIME_PASSENGER_VEHICLES"
##  [7] "Travel_TIME_FREIGHT_TRUCKS"     "TMC_length"                    
##  [9] "ave_aadt"                       "ave_wtdsgspd"                  
## [11] "ave_medwid"                     "ave_peaklane"                  
## [13] "ave_row"                        "ave_sur_wid"                   
## [15] "ave_no_lanes"                   "ave_spd_limt"                  
## [17] "ave_rodwycls"                   "ave_rshldwid"                  
## [19] "FC"                             "TER"                           
## [21] "ACC"                            "MED"                           
## [23] "Total"                          "K"                             
## [25] "A"                              "B"                             
## [27] "C"                              "O"                             
## [29] "DAYMTH"                         "Crash"                         
## [31] "Spd_All"                        "Spd_Car"                       
## [33] "Spd_Truck"                      "date"                          
## [35] "Month"                          "Day"                           
## [37] "Year"                           "Hour1"                         
## [39] "PCT_TIME"                       "Hour"                          
## [41] "DOW"
df_RMU$AADT1 <- cut(df_RMU$ave_aadt , breaks=c(0,2000,5000,10000, 15000, 20000, 30000, Inf), 
                   labels=c("0-2000","2001-5000","5001-10000","10001-15000","15001-20000","20001-30000","> 30000"))
table(df_RMU$AADT1)
## 
##      0-2000   2001-5000  5001-10000 10001-15000 15001-20000 20001-30000 
##      257568      836736     1034400      650112      666912      444288 
##     > 30000 
##      105120
df_RMU$Crash1 <- cut(df_RMU$Crash , breaks=c(-1,0,Inf), 
                    labels=c("No crash","Crash"))
table(df_RMU$Crash1)
## 
## No crash    Crash 
##  3995012      124
# ############################################################
# df_RMU$DayNight <- cut(df_RMU$EPOCH15 , breaks=c(-1,26,67,95), 
#                    labels=c("Night","Day","Night"))
# table(df_RMU$DayNight)
# df_RMU$PeakOffPeak <- cut(df_RMU$EPOCH15 , breaks=c(-1,26,35,62,75, 96), 
#                    labels=c("Off-Peak","Morning Peak","Off-Peak", "Evening Peak", "Off-Peak"))
# table(df_RMU$PeakOffPeak)
# ###########################################################

df_RMU$DayNight <- cut(df_RMU$EPOCH15 , breaks=c(-1,26,67,95))
df_RMU$DayNight <- as.numeric(df_RMU$DayNight)
df_RMU$DayNight <- c("Night","Day","Night")[df_RMU$DayNight]
table(df_RMU$DayNight)
## 
##     Day   Night 
## 1706256 2288880
df_RMU$PeakOffPeak <- cut(df_RMU$EPOCH15 , breaks=c(-1,26,35,62,75, 96))
df_RMU$PeakOffPeak <- as.numeric(df_RMU$PeakOffPeak)
df_RMU$PeakOffPeak <- c("Off-Peak","Morning Peak","Off-Peak", "Evening Peak", "Off-Peak")[df_RMU$PeakOffPeak]
table(df_RMU$PeakOffPeak)
## 
## Evening Peak Morning Peak     Off-Peak 
##       541008       374544      3079584
# # ###########################################################
# df_RMU01 <- df_RMU[,c(26:28, 31, 32, 34, 38, 55, 56, 6, 53, 49,54, 48, 57, 58, 44:46)]
# df_RMU02 <- df_RMU01[,c(8:19)]
# # ###########################################################
df_RMU01 <- df_RMU[,c("ave_spd_limt","ave_medwid",  "ave_no_lanes","ave_rshldwid","TMC_length", 
                    "ave_sur_wid", "AADT1",   "Crash1",  "EPOCH15", "Hour","Day", "DOW", "Month",
                      "DayNight","PeakOffPeak","Spd_All", "Spd_Car", "Spd_Truck")]
df_RMU02 <- df_RMU01[,c( "AADT1","Crash1","EPOCH15","Hour", "Day", "DOW", "Month",
                         "DayNight","PeakOffPeak","Spd_All","Spd_Car","Spd_Truck")]


cols <- c("EPOCH15", "Hour", "Day", "DOW", "Month", "AADT1" , "Crash1", "DayNight", "PeakOffPeak")
cols1 <- c("Spd_All", "Spd_Car", "Spd_Truck")
cols2 <- c("ave_spd_limt","ave_medwid" , "ave_no_lanes", "ave_rshldwid" ,"TMC_length" ,  "ave_sur_wid")
df_RMU02= df_RMU02 %<>%
  mutate_at(cols, funs(factor(.)))


hour1 <- ExpCustomStat(df_RMU02,Cvar = c("Hour"), Nvar=cols1, stat = c('Count','Prop','mean','median','p0.85','min', 'max','sd', 'var','PS'),gpby=FALSE)
day1 <- ExpCustomStat(df_RMU02,Cvar = c("Day"), Nvar=cols1, stat = c('Count','Prop','mean','median','p0.85','min', 'max','sd', 'var','PS'),gpby=FALSE)
DOW1 <- ExpCustomStat(df_RMU02,Cvar = c("DOW"), Nvar=cols1, stat = c('Count','Prop','mean','median','p0.85','min', 'max','sd', 'var','PS'),gpby=FALSE)
Month1 <- ExpCustomStat(df_RMU02,Cvar = c("Month"), Nvar=cols1, stat = c('Count','Prop','mean','median','p0.85','min', 'max','sd', 'var','PS'),gpby=FALSE)
AADT2 <- ExpCustomStat(df_RMU02,Cvar = c("AADT1"), Nvar=cols1, stat = c('Count','Prop','mean','median','p0.85','min', 'max','sd', 'var','PS'),gpby=FALSE)
Crash2 <- ExpCustomStat(df_RMU02,Cvar = c("Crash1", "Hour"), Nvar=cols1, stat = c('Count','Prop','mean','median','p0.85','min', 'max','sd', 'var','PS'))
DayNight1 <- ExpCustomStat(df_RMU02,Cvar = c("DayNight"), Nvar=cols1, stat = c('Count','Prop','mean','median','p0.85','min', 'max','sd', 'var','PS'),gpby=FALSE)
PeakOffPeak1 <- ExpCustomStat(df_RMU02,Cvar = c("PeakOffPeak"), Nvar=cols1, stat = c('Count','Prop','mean','median','p0.85','min', 'max','sd', 'var','PS'),gpby=FALSE)
geo <- ExpCustomStat(df_RMU01, Nvar=cols2, stat = c('mean','median','p0.85','min', 'max','sd', 'var','PS'))


ggline(gather(hour1[,c(1, 2, 6, 8, 11)], condition, measurement,  mean:sd, factor_key=TRUE), x = "Level", y = "measurement", color = "Attribute",
       palette = c("red", "blue", "black"))+theme(legend.title=element_blank())+ facet_grid(condition~ .)+labs(title="By Hour")

ggline(gather(DOW1[,c(1, 2, 6, 8, 11)], condition, measurement,  mean:sd, factor_key=TRUE), x = "Level", y = "measurement", color = "Attribute",
       palette = c("red", "blue", "black"))+theme(legend.title=element_blank())+ facet_grid(condition~ .)+labs(title="By Day of Week")

ggline(gather(Month1[,c(1, 2, 6, 8, 11)], condition, measurement,  mean:sd, factor_key=TRUE), x = "Level", y = "measurement", color = "Attribute",
       palette = c("red", "blue", "black"))+theme(legend.title=element_blank())+ facet_grid(condition~ .)+labs(title="By Month")

ggline(gather(AADT2[,c(1, 2, 6, 8, 11)], condition, measurement,  mean:sd, factor_key=TRUE), x = "Level", y = "measurement", color = "Attribute",
       palette = c("red", "blue", "black"))+theme(legend.title=element_blank())+ facet_grid(condition~ .)+labs(title="By AADT")

ggline(gather(Crash2[,c(1, 2, 3, 6, 8, 11)], condition, measurement,  mean:sd, factor_key=TRUE), x = "Hour", y = "measurement", color = "Attribute",
       palette = c("red", "blue", "black"))+theme(legend.title=element_blank())+ facet_grid(condition+Crash1~ .)+labs(title="By Crash")

ggline(gather(DayNight1[,c(1, 2, 6, 8, 11)], condition, measurement,  mean:sd, factor_key=TRUE), x = "Level", y = "measurement", color = "Attribute",
       palette = c("red", "blue", "black"))+theme(legend.title=element_blank())+ facet_grid(condition~ .)+labs(title="By Day/Night")

ggline(gather(PeakOffPeak1[,c(1, 2, 6, 8, 11)], condition, measurement,  mean:sd, factor_key=TRUE), x = "Level", y = "measurement", color = "Attribute",
       palette = c("red", "blue", "black"))+theme(legend.title=element_blank())+ facet_grid(condition~ .)+labs(title="By Peak/Off-Peak")

Temporal Statistics of Operational Speed

setwd("/scratch/user/cma16/Task4_Deliverable2/NCprocess4/AllCrash/FacilityBased/")

head(hour1)
##    Level Attribute Group_by  Count Prop     mean   median    p0.85
## 1:    00   Spd_All     Hour 166464 4.17 34.88840 36.71138 54.01021
## 2:    02   Spd_All     Hour 166464 4.17 35.02084 36.95241 54.45292
## 3:    03   Spd_All     Hour 166464 4.17 35.74168 37.92478 55.32939
## 4:    04   Spd_All     Hour 166464 4.17 36.82886 39.51848 56.22285
## 5:    05   Spd_All     Hour 166464 4.17 37.72722 40.20848 56.40734
## 6:    06   Spd_All     Hour 166464 4.17 37.39770 39.75235 55.32051
##          min      max       sd      var   PS
## 1: 0.2028411 81.23672 16.51279 272.6723 1.17
## 2: 0.4383796 85.74184 16.61708 276.1272 1.11
## 3: 0.1704430 77.81245 16.70801 279.1576 1.24
## 4: 0.4315013 76.29874 16.95660 287.5262 1.62
## 5: 0.1704430 90.68976 16.73749 280.1437 2.83
## 6: 0.1280568 88.38403 16.11696 259.7563 4.56
head(day1)
##    Level Attribute Group_by  Count Prop     mean   median    p0.85
## 1:    01   Spd_All      Day 131136 3.28 33.79339 34.78349 52.80284
## 2:    02   Spd_All      Day 131136 3.28 33.79551 34.86386 52.46364
## 3:    03   Spd_All      Day 131136 3.28 33.59132 34.66359 52.49301
## 4:    04   Spd_All      Day 131136 3.28 33.67571 34.65910 52.68262
## 5:    05   Spd_All      Day 131136 3.28 33.66646 34.68389 52.75855
## 6:    06   Spd_All      Day 131136 3.28 33.68679 34.70245 52.81050
##          min      max       sd      var   PS
## 1: 0.1280568 79.35354 16.22467 263.2398 3.05
## 2: 0.2028411 84.52042 16.08435 258.7064 3.38
## 3: 0.1704430 77.81245 16.16386 261.2703 3.30
## 4: 0.1704430 91.84438 16.28294 265.1341 3.16
## 5: 0.2028411 76.29874 16.29860 265.6445 3.10
## 6: 0.1704430 78.82835 16.33793 266.9278 3.26
head(DOW1)
##    Level Attribute Group_by  Count  Prop     mean   median    p0.85
## 1:   Wed   Spd_All      DOW 573024 14.34 33.76697 34.78687 52.63384
## 2:   Thu   Spd_All      DOW 581568 14.56 33.77252 34.74139 52.68793
## 3:   Fri   Spd_All      DOW 566784 14.19 33.54073 34.50061 52.75834
## 4:   Sat   Spd_All      DOW 566784 14.19 33.30019 34.28094 52.93339
## 5:   Sun   Spd_All      DOW 568992 14.24 33.96463 35.07853 53.64747
## 6:   Mon   Spd_All      DOW 568992 14.24 33.83020 34.82655 52.61819
##          min      max       sd      var    PS
## 1: 0.1280568 81.78344 16.10328 259.3155 16.97
## 2: 0.1704430 85.74184 16.12811 260.1161 16.81
## 3: 0.1280568 88.76314 16.33621 266.8718 16.11
## 4: 0.1280568 91.84438 16.83760 283.5047  9.82
## 5: 0.1280568 86.61198 16.84141 283.6331  7.16
## 6: 0.1280568 91.75307 16.08074 258.5902 16.21
head(Month1)
##    Level Attribute Group_by  Count  Prop     mean   median    p0.85
## 1:    07   Spd_All    Month 389856  9.76 33.98403 35.12432 52.90236
## 2:    08   Spd_All    Month 389856  9.76 33.75334 34.82721 52.64627
## 3:    09   Spd_All    Month 377280  9.44 33.70719 34.80102 52.47885
## 4:    10   Spd_All    Month 389856  9.76 33.74643 34.86386 52.39222
## 5:    11   Spd_All    Month 443520 11.10 33.22323 34.18572 52.09202
## 6:    12   Spd_All    Month 458304 11.47 32.77630 33.49156 51.75821
##          min      max       sd      var    PS
## 1: 0.1280568 86.61198 16.33139 266.7144 10.48
## 2: 0.1280568 85.07449 16.21837 263.0354 10.15
## 3: 0.1280568 85.74184 16.12169 259.9090  9.68
## 4: 0.1704430 81.44904 16.10441 259.3520 10.15
## 5: 0.1280568 90.68976 16.13593 260.3683  9.53
## 6: 0.1280568 89.51329 16.21704 262.9923 10.30
head(AADT2)
##          Level Attribute Group_by   Count  Prop     mean   median    p0.85
## 1:  5001-10000   Spd_All    AADT1 1034400 25.89 37.59030 41.20839 54.14727
## 2:   2001-5000   Spd_All    AADT1  836736 20.94 45.21940 51.41315 58.45158
## 3: 20001-30000   Spd_All    AADT1  444288 11.12 23.08929 20.38246 37.77721
## 4: 15001-20000   Spd_All    AADT1  666912 16.69 32.85233 34.69960 46.96068
## 5:      0-2000   Spd_All    AADT1  257568  6.45 41.41035 45.79368 57.73949
## 6: 10001-15000   Spd_All    AADT1  650112 16.27 26.84927 28.04026 39.86132
##          min      max       sd      var    PS
## 1: 0.1280568 91.84438 16.31021 266.0228 32.02
## 2: 0.3750959 88.38403 15.72056 247.1360 19.35
## 3: 0.1704430 86.08784 11.56138 133.6656 12.85
## 4: 0.6200500 76.12151 13.80345 190.5351 17.78
## 5: 0.5196614 89.51329 16.52610 273.1121  3.14
## 6: 0.6077899 91.75307 12.21389 149.1791 10.42
head(Crash2)
##      Crash1 Hour Attribute  Count Prop     mean   median    p0.85
## 1: No crash   00   Spd_All 166457 4.17 34.88921 36.71138 54.00907
## 2: No crash   02   Spd_All 166443 4.17 35.01594 36.91892 54.42861
## 3: No crash   03   Spd_All 166432 4.17 35.73998 37.92478 55.32939
## 4: No crash   04   Spd_All 166427 4.17 36.83195 39.52011 56.22285
## 5: No crash   05   Spd_All 166448 4.17 37.72673 40.20848 56.40711
## 6: No crash   06   Spd_All 166464 4.17 37.39770 39.75235 55.32051
##          min      max       sd      var   PS
## 1: 0.2028411 81.23672 16.51163 272.6339 1.17
## 2: 0.4383796 85.74184 16.61363 276.0128 1.11
## 3: 0.1704430 77.81245 16.70731 279.1341 1.24
## 4: 0.4315013 76.29874 16.95539 287.4853 1.62
## 5: 0.1704430 90.68976 16.73699 280.1267 2.83
## 6: 0.1280568 88.38403 16.11696 259.7563 4.56
head(DayNight1)
##    Level Attribute Group_by   Count  Prop     mean   median    p0.85
## 1: Night   Spd_All DayNight 2288880 57.29 34.44608 35.94247 53.79533
## 2:   Day   Spd_All DayNight 1706256 42.71 33.34838 34.11727 52.16832
## 3: Night   Spd_Car DayNight 2288880 57.29 34.52268 36.10171 54.41386
## 4:   Day   Spd_Car DayNight 1706256 42.71 33.29266 34.11727 52.40124
## 5: Night Spd_Truck DayNight 2288880 57.29 33.66710 34.84667 52.33559
## 6:   Day Spd_Truck DayNight 1706256 42.71 33.67527 34.86386 52.06267
##          min      max       sd      var    PS
## 1: 0.1280568 91.84438 16.65366 277.3443 34.35
## 2: 0.1280568 91.75307 16.04102 257.3143 65.65
## 3: 0.1280568 91.84438 17.06441 291.1941 31.05
## 4: 0.1280568 91.75307 16.30507 265.8554 68.95
## 5: 0.1280568 74.88328 15.96289 254.8138 35.01
## 6: 0.1280568 76.47654 15.71805 247.0571 64.99
head(PeakOffPeak1)
##           Level Attribute    Group_by   Count  Prop     mean   median
## 1:     Off-Peak   Spd_All PeakOffPeak 3079584 77.08 33.70931 34.74701
## 2: Morning Peak   Spd_All PeakOffPeak  374544  9.38 34.83800 36.13345
## 3: Evening Peak   Spd_All PeakOffPeak  541008 13.54 32.96927 33.55108
## 4:     Off-Peak   Spd_Car PeakOffPeak 3079584 77.08 33.63384 34.73036
## 5: Morning Peak   Spd_Car PeakOffPeak  374544  9.38 35.02303 36.51102
## 6: Evening Peak   Spd_Car PeakOffPeak  541008 13.54 32.83376 33.38100
##       p0.85       min      max       sd      var    PS
## 1: 52.77364 0.1280568 91.84438 16.25762 264.3101 68.61
## 2: 52.99730 0.1280568 91.75307 15.87940 252.1552 13.29
## 3: 52.47677 0.1280568 88.76314 16.47299 271.3595 18.10
## 4: 52.93337 0.1280568 91.84438 16.55564 274.0894 67.16
## 5: 53.37584 0.1704430 91.75307 16.10868 259.4894 13.95
## 6: 52.80284 0.1280568 88.76314 16.76210 280.9682 18.89
write.csv(hour1, paste0("./",mytype,"/des_output/NC_RMU_OS_DS_hour.csv"),row.names = FALSE)
write.csv(day1, paste0("./",mytype,"/des_output/NC_RMU_OS_DS_day.csv"),row.names = FALSE)
write.csv(DOW1, paste0("./",mytype,"/des_output/NC_RMU_OS_DS_dow.csv"),row.names = FALSE)
write.csv(Month1,paste0("./",mytype,"/des_output/NC_RMU_OS_DS_month.csv"),row.names = FALSE)
write.csv(AADT2, paste0("./",mytype,"/des_output/NC_RMU_OS_DS_aadt.csv"),row.names = FALSE)
write.csv(Crash2, paste0("./",mytype,"/des_output/NC_RMU_OS_DS_crash.csv"),row.names = FALSE)
write.csv(DayNight1, paste0("./",mytype,"/des_output/NC_RMU_OS_DS_daynight.csv"),row.names = FALSE)
write.csv(PeakOffPeak1, paste0("./",mytype,"/des_output/NC_RMU_OS_DS_peakoffpeak.csv"),row.names = FALSE)