North Carolina Conflated Data (Rural Multi-lane Divided)

library(data.table)
library(dplyr)
library(tidyr)
library(naniar)
library(stringr)
library(ggplot2)
library(DT)
library(lubridate)
library(ggpubr)
library(SmartEDA)



setwd("/scratch/user/cma16/Task4_Deliverable2/NCprocess4/AllCrash/FacilityBased/")
load("./multi-lane_divided_NC_reduce_withCrash_no_intersection.rData")
mytype = 'RMD'
setwd(paste0("/scratch/user/cma16/Task4_Deliverable2/NCprocess4/AllCrash/FacilityBased/",mytype))

df_RMD <- N_mun_med
dim(df_RMD)
## [1] 6157824      30
### Calculating Speed
df_RMD$Spd_All = 3600*df_RMD$TMC_length/df_RMD$Travel_TIME_ALL_VEHICLES/5280
df_RMD$Spd_Car = 3600*df_RMD$TMC_length/df_RMD$Travel_TIME_PASSENGER_VEHICLES/5280
df_RMD$Spd_Truck = 3600*df_RMD$TMC_length/df_RMD$Travel_TIME_FREIGHT_TRUCKS/5280


### Month, Day
df_RMD$date <- as.character(df_RMD$DATE)
df_RMD$date <- str_pad(df_RMD$DATE, 8, pad = "0")
df_RMD$Month <- substr(df_RMD$date, start = 1, stop = 2)
df_RMD$Day   <- substr(df_RMD$date, start = 3, stop = 4)
df_RMD$Year  <- substr(df_RMD$date, start = 5, stop = 8)

ConvEpoc2HM <- function(x) {
  # for a given epoc number, get its hour:min
  yy <- x*15
  y.hr <- yy %/% 60
  y.min <- yy %% 60
  x <- paste(str_pad(y.hr, 2, side = 'left', pad='0'), 
             str_pad(y.min, 2, side = 'left', pad='0'), 
             '00', sep = ':')
}


df_RMD$Hour1 <- ConvEpoc2HM(df_RMD$EPOCH15)
DATE4 <- paste(strptime(df_RMD$date, format = "%m%d%Y", tz =""), df_RMD$Hour1, sep = ' ')
df_RMD$PCT_TIME <- as.POSIXct(DATE4, tz ="", format = "%Y-%m-%d %H:%M:%OS")
df_RMD$Hour <- strftime(df_RMD$PCT_TIME, format="%H")
df_RMD$DOW <- wday(df_RMD$PCT_TIME, label = TRUE)

Temporal Patterns

names(df_RMD)
##  [1] "TimeStamp"                      "TMC"                           
##  [3] "DATE"                           "EPOCH15"                       
##  [5] "Travel_TIME_ALL_VEHICLES"       "Travel_TIME_PASSENGER_VEHICLES"
##  [7] "Travel_TIME_FREIGHT_TRUCKS"     "TMC_length"                    
##  [9] "ave_aadt"                       "ave_wtdsgspd"                  
## [11] "ave_medwid"                     "ave_peaklane"                  
## [13] "ave_row"                        "ave_sur_wid"                   
## [15] "ave_no_lanes"                   "ave_spd_limt"                  
## [17] "ave_rodwycls"                   "ave_rshldwid"                  
## [19] "FC"                             "TER"                           
## [21] "ACC"                            "MED"                           
## [23] "Total"                          "K"                             
## [25] "A"                              "B"                             
## [27] "C"                              "O"                             
## [29] "DAYMTH"                         "Crash"                         
## [31] "Spd_All"                        "Spd_Car"                       
## [33] "Spd_Truck"                      "date"                          
## [35] "Month"                          "Day"                           
## [37] "Year"                           "Hour1"                         
## [39] "PCT_TIME"                       "Hour"                          
## [41] "DOW"
df_RMD$AADT1 <- cut(df_RMD$ave_aadt , breaks=c(0,2000,5000,10000, 15000, 20000, 30000, Inf), 
                   labels=c("0-2000","2001-5000","5001-10000","10001-15000","15001-20000","20001-30000","> 30000"))
table(df_RMD$AADT1)
## 
##      0-2000   2001-5000  5001-10000 10001-15000 15001-20000 20001-30000 
##       40896      105408      946368      911328      876576     1069440 
##     > 30000 
##     2207808
df_RMD$Crash1 <- cut(df_RMD$Crash , breaks=c(-1,0,Inf), 
                    labels=c("No crash","Crash"))
table(df_RMD$Crash1)
## 
## No crash    Crash 
##  6156327     1497
# ############################################################
# df_RMD$DayNight <- cut(df_RMD$EPOCH15 , breaks=c(-1,26,67,95), 
#                    labels=c("Night","Day","Night"))
# table(df_RMD$DayNight)
# df_RMD$PeakOffPeak <- cut(df_RMD$EPOCH15 , breaks=c(-1,26,35,62,75, 96), 
#                    labels=c("Off-Peak","Morning Peak","Off-Peak", "Evening Peak", "Off-Peak"))
# table(df_RMD$PeakOffPeak)
# ###########################################################

df_RMD$DayNight <- cut(df_RMD$EPOCH15 , breaks=c(-1,26,67,95))
df_RMD$DayNight <- as.numeric(df_RMD$DayNight)
df_RMD$DayNight <- c("Night","Day","Night")[df_RMD$DayNight]
table(df_RMD$DayNight)
## 
##     Day   Night 
## 2629904 3527920
df_RMD$PeakOffPeak <- cut(df_RMD$EPOCH15 , breaks=c(-1,26,35,62,75, 96))
df_RMD$PeakOffPeak <- as.numeric(df_RMD$PeakOffPeak)
df_RMD$PeakOffPeak <- c("Off-Peak","Morning Peak","Off-Peak", "Evening Peak", "Off-Peak")[df_RMD$PeakOffPeak]
table(df_RMD$PeakOffPeak)
## 
## Evening Peak Morning Peak     Off-Peak 
##       833872       577296      4746656
# # ###########################################################
# df_RMD01 <- df_RMD[,c(26:28, 31, 32, 34, 38, 55, 56, 6, 53, 49,54, 48, 57, 58, 44:46)]
# df_RMD02 <- df_RMD01[,c(8:19)]
# # ###########################################################
df_RMD01 <- df_RMD[,c("ave_spd_limt","ave_medwid",  "ave_no_lanes","ave_rshldwid","TMC_length", 
                    "ave_sur_wid", "AADT1",   "Crash1",  "EPOCH15", "Hour","Day", "DOW", "Month",
                      "DayNight","PeakOffPeak","Spd_All", "Spd_Car", "Spd_Truck")]
df_RMD02 <- df_RMD01[,c( "AADT1","Crash1","EPOCH15","Hour", "Day", "DOW", "Month",
                         "DayNight","PeakOffPeak","Spd_All","Spd_Car","Spd_Truck")]


cols <- c("EPOCH15", "Hour", "Day", "DOW", "Month", "AADT1" , "Crash1", "DayNight", "PeakOffPeak")
cols1 <- c("Spd_All", "Spd_Car", "Spd_Truck")
cols2 <- c("ave_spd_limt","ave_medwid" , "ave_no_lanes", "ave_rshldwid" ,"TMC_length" ,  "ave_sur_wid")
df_RMD02= df_RMD02 %<>%
  mutate_at(cols, funs(factor(.)))


hour1 <- ExpCustomStat(df_RMD02,Cvar = c("Hour"), Nvar=cols1, stat = c('Count','Prop','mean','median','p0.85','min', 'max','sd', 'var','PS'),gpby=FALSE)
day1 <- ExpCustomStat(df_RMD02,Cvar = c("Day"), Nvar=cols1, stat = c('Count','Prop','mean','median','p0.85','min', 'max','sd', 'var','PS'),gpby=FALSE)
DOW1 <- ExpCustomStat(df_RMD02,Cvar = c("DOW"), Nvar=cols1, stat = c('Count','Prop','mean','median','p0.85','min', 'max','sd', 'var','PS'),gpby=FALSE)
Month1 <- ExpCustomStat(df_RMD02,Cvar = c("Month"), Nvar=cols1, stat = c('Count','Prop','mean','median','p0.85','min', 'max','sd', 'var','PS'),gpby=FALSE)
AADT2 <- ExpCustomStat(df_RMD02,Cvar = c("AADT1"), Nvar=cols1, stat = c('Count','Prop','mean','median','p0.85','min', 'max','sd', 'var','PS'),gpby=FALSE)
Crash2 <- ExpCustomStat(df_RMD02,Cvar = c("Crash1", "Hour"), Nvar=cols1, stat = c('Count','Prop','mean','median','p0.85','min', 'max','sd', 'var','PS'))
DayNight1 <- ExpCustomStat(df_RMD02,Cvar = c("DayNight"), Nvar=cols1, stat = c('Count','Prop','mean','median','p0.85','min', 'max','sd', 'var','PS'),gpby=FALSE)
PeakOffPeak1 <- ExpCustomStat(df_RMD02,Cvar = c("PeakOffPeak"), Nvar=cols1, stat = c('Count','Prop','mean','median','p0.85','min', 'max','sd', 'var','PS'),gpby=FALSE)
geo <- ExpCustomStat(df_RMD01, Nvar=cols2, stat = c('mean','median','p0.85','min', 'max','sd', 'var','PS'))


ggline(gather(hour1[,c(1, 2, 6, 8, 11)], condition, measurement,  mean:sd, factor_key=TRUE), x = "Level", y = "measurement", color = "Attribute",
       palette = c("red", "blue", "black"))+theme(legend.title=element_blank())+ facet_grid(condition~ .)+labs(title="By Hour")

ggline(gather(DOW1[,c(1, 2, 6, 8, 11)], condition, measurement,  mean:sd, factor_key=TRUE), x = "Level", y = "measurement", color = "Attribute",
       palette = c("red", "blue", "black"))+theme(legend.title=element_blank())+ facet_grid(condition~ .)+labs(title="By Day of Week")

ggline(gather(Month1[,c(1, 2, 6, 8, 11)], condition, measurement,  mean:sd, factor_key=TRUE), x = "Level", y = "measurement", color = "Attribute",
       palette = c("red", "blue", "black"))+theme(legend.title=element_blank())+ facet_grid(condition~ .)+labs(title="By Month")

ggline(gather(AADT2[,c(1, 2, 6, 8, 11)], condition, measurement,  mean:sd, factor_key=TRUE), x = "Level", y = "measurement", color = "Attribute",
       palette = c("red", "blue", "black"))+theme(legend.title=element_blank())+ facet_grid(condition~ .)+labs(title="By AADT")

ggline(gather(Crash2[,c(1, 2, 3, 6, 8, 11)], condition, measurement,  mean:sd, factor_key=TRUE), x = "Hour", y = "measurement", color = "Attribute",
       palette = c("red", "blue", "black"))+theme(legend.title=element_blank())+ facet_grid(condition+Crash1~ .)+labs(title="By Crash")

ggline(gather(DayNight1[,c(1, 2, 6, 8, 11)], condition, measurement,  mean:sd, factor_key=TRUE), x = "Level", y = "measurement", color = "Attribute",
       palette = c("red", "blue", "black"))+theme(legend.title=element_blank())+ facet_grid(condition~ .)+labs(title="By Day/Night")

ggline(gather(PeakOffPeak1[,c(1, 2, 6, 8, 11)], condition, measurement,  mean:sd, factor_key=TRUE), x = "Level", y = "measurement", color = "Attribute",
       palette = c("red", "blue", "black"))+theme(legend.title=element_blank())+ facet_grid(condition~ .)+labs(title="By Peak/Off-Peak")

Temporal Statistics of Operational Speed

setwd("/scratch/user/cma16/Task4_Deliverable2/NCprocess4/AllCrash/FacilityBased/")

head(hour1)
##    Level Attribute Group_by  Count Prop     mean   median    p0.85
## 1:    00   Spd_All     Hour 256576 4.17 53.83458 59.99537 65.29274
## 2:    02   Spd_All     Hour 256576 4.17 54.26592 60.04870 65.05662
## 3:    03   Spd_All     Hour 256576 4.17 54.24507 60.08704 65.08699
## 4:    04   Spd_All     Hour 256576 4.17 53.69937 59.71979 65.10943
## 5:    05   Spd_All     Hour 256576 4.17 51.91318 58.14007 65.22657
## 6:    06   Spd_All     Hour 256576 4.17 49.92079 55.65186 65.14221
##          min      max       sd      var   PS
## 1: 0.2048118 87.11222 15.50244 240.3255 2.80
## 2: 0.2048118 88.45300 14.90309 222.1020 2.70
## 3: 0.2048118 91.86355 14.94006 223.2054 2.82
## 4: 0.1814656 92.39558 15.24706 232.4727 3.12
## 5: 0.2048118 88.64821 16.23434 263.5536 3.71
## 6: 0.3170686 91.86579 16.91175 286.0073 4.45
head(day1)
##    Level Attribute Group_by  Count Prop     mean   median    p0.85
## 1:    01   Spd_All      Day 202368 3.29 49.34222 56.13585 65.31973
## 2:    02   Spd_All      Day 202368 3.29 48.74752 55.30325 65.07381
## 3:    03   Spd_All      Day 202368 3.29 48.98923 55.75686 65.14298
## 4:    04   Spd_All      Day 202368 3.29 49.28554 56.16087 65.31878
## 5:    05   Spd_All      Day 202368 3.29 49.23603 56.16592 65.31723
## 6:    06   Spd_All      Day 202368 3.29 49.23405 56.01860 65.41614
##          min      max       sd      var   PS
## 1: 0.1184758 88.97020 17.88319 319.8084 3.16
## 2: 0.2048118 89.05935 17.92629 321.3518 3.33
## 3: 0.2048118 87.51167 17.97041 322.9358 3.30
## 4: 0.2048118 92.39558 17.94800 322.1305 3.22
## 5: 0.2048118 85.82514 18.01766 324.6360 3.22
## 6: 0.2048118 88.64821 17.98847 323.5850 3.30
head(DOW1)
##    Level Attribute Group_by  Count  Prop     mean   median    p0.85
## 1:   Thu   Spd_All      DOW 894624 14.53 48.44492 54.82553 64.94104
## 2:   Fri   Spd_All      DOW 876864 14.24 48.20993 54.61342 65.26334
## 3:   Sat   Spd_All      DOW 876864 14.24 50.54588 57.80838 66.27278
## 4:   Sun   Spd_All      DOW 876960 14.24 52.15133 59.19125 66.84729
## 5:   Mon   Spd_All      DOW 876960 14.24 48.60886 54.93928 65.09906
## 6:   Tue   Spd_All      DOW 876960 14.24 48.28196 54.64980 64.74147
##           min      max       sd      var    PS
## 1: 0.11847581 87.92185 17.90228 320.4917 15.36
## 2: 0.12271252 92.39558 18.33391 336.1322 14.99
## 3: 0.05923791 92.39558 18.11963 328.3209 12.93
## 4: 0.20481183 91.86579 17.57637 308.9289 11.34
## 5: 0.05923791 89.62647 17.92509 321.3088 14.93
## 6: 0.05923791 90.37851 17.83092 317.9416 15.19
head(Month1)
##    Level Attribute Group_by  Count Prop     mean   median    p0.85
## 1:    01   Spd_All    Month 496992 8.07 50.20179 57.33692 65.07641
## 2:    02   Spd_All    Month 448896 7.29 49.38057 56.23774 64.96172
## 3:    03   Spd_All    Month 496992 8.07 49.63032 56.59855 65.38897
## 4:    04   Spd_All    Month 480960 7.81 49.60577 56.42343 65.45032
## 5:    05   Spd_All    Month 496992 8.07 49.56501 56.51583 65.65598
## 6:    06   Spd_All    Month 480960 7.81 49.24684 55.88022 65.57771
##           min      max       sd      var   PS
## 1: 0.11847581 89.03472 17.43293 303.9069 7.62
## 2: 0.20481183 88.21117 17.63290 310.9191 6.93
## 3: 0.11073005 91.96609 17.87002 319.3377 8.13
## 4: 0.05923791 90.37851 17.85542 318.8160 8.07
## 5: 0.05923791 89.62647 18.10490 327.7876 8.35
## 6: 0.18631775 90.45863 18.10903 327.9369 8.38
head(AADT2)
##          Level Attribute Group_by   Count  Prop     mean   median    p0.85
## 1: 15001-20000   Spd_All    AADT1  876576 14.24 42.81686 49.21194 62.18298
## 2:     > 30000   Spd_All    AADT1 2207808 35.85 57.66772 62.68179 67.03154
## 3: 20001-30000   Spd_All    AADT1 1069440 17.37 35.83630 36.89741 53.51851
## 4:  5001-10000   Spd_All    AADT1  946368 15.37 44.98221 49.61881 59.76978
## 5: 10001-15000   Spd_All    AADT1  911328 14.80 42.15105 47.06954 59.89836
## 6:      0-2000   Spd_All    AADT1   40896  0.66 54.66992 56.79645 60.91419
##           min      max        sd       var    PS
## 1: 0.05923791 92.39558 19.367493 375.09979 10.35
## 2: 0.39416733 91.96609 13.889640 192.92210 56.19
## 3: 0.20481183 87.75896 16.323296 266.44998 11.71
## 4: 0.18146557 87.51965 15.586911 242.95179  9.89
## 5: 0.06211101 91.86579 18.187499 330.78513 10.50
## 6: 0.62144290 87.33217  9.424732  88.82557  0.74
head(Crash2)
##      Crash1 Hour Attribute  Count Prop     mean   median    p0.85
## 1: No crash   00   Spd_All 256516 4.17 53.83317 59.99537 65.29274
## 2: No crash   02   Spd_All 256322 4.16 54.25407 60.04188 65.05662
## 3: No crash   03   Spd_All 256183 4.16 54.23151 60.08017 65.08699
## 4: No crash   04   Spd_All 256097 4.16 53.67712 59.71100 65.10814
## 5: No crash   05   Spd_All 256440 4.16 51.90966 58.13945 65.22657
## 6: No crash   06   Spd_All 256576 4.17 49.92079 55.65186 65.14221
##          min      max       sd      var   PS
## 1: 0.2048118 87.11222 15.50407 240.3761 2.80
## 2: 0.2048118 88.45300 14.91101 222.3383 2.69
## 3: 0.2048118 91.86355 14.95302 223.5929 2.81
## 4: 0.1814656 92.39558 15.26297 232.9584 3.11
## 5: 0.2048118 88.64821 16.23666 263.6290 3.70
## 6: 0.3170686 91.86579 16.91175 286.0073 4.45
head(DayNight1)
##    Level Attribute Group_by   Count  Prop     mean   median    p0.85
## 1: Night   Spd_All DayNight 3527920 57.29 51.12783 58.02952 65.31878
## 2:   Day   Spd_All DayNight 2629904 42.71 47.22215 52.73157 65.26334
## 3: Night   Spd_Car DayNight 3527920 57.29 52.35213 58.79327 68.40550
## 4:   Day   Spd_Car DayNight 2629904 42.71 48.08682 53.10498 67.56492
## 5: Night Spd_Truck DayNight 3527920 57.29 53.60656 59.77857 63.87705
## 6:   Day Spd_Truck DayNight 2629904 42.71 50.33139 58.20766 63.27902
##           min      max       sd      var    PS
## 1: 0.05923791 92.39558 17.07953 291.7103 48.69
## 2: 0.05923791 91.10698 18.56801 344.7709 51.31
## 3: 0.05923791 93.62743 18.11264 328.0676 44.33
## 4: 0.05923791 92.98141 19.41016 376.7543 55.67
## 5: 0.18146557 84.61116 14.64981 214.6169 50.01
## 6: 0.18631775 82.69797 16.46503 271.0973 49.99
head(PeakOffPeak1)
##           Level Attribute    Group_by   Count  Prop     mean   median
## 1:     Off-Peak   Spd_All PeakOffPeak 4746656 77.08 49.62381 56.53339
## 2: Morning Peak   Spd_All PeakOffPeak  577296  9.38 47.99508 53.48295
## 3: Evening Peak   Spd_All PeakOffPeak  833872 13.54 47.19858 52.95628
## 4:     Off-Peak   Spd_Car PeakOffPeak 4746656 77.08 50.47720 56.74649
## 5: Morning Peak   Spd_Car PeakOffPeak  577296  9.38 48.81497 53.74890
## 6: Evening Peak   Spd_Car PeakOffPeak  833872 13.54 48.26363 53.60851
##       p0.85        min      max       sd      var    PS
## 1: 65.27237 0.05923791 92.39558 17.75968 315.4062 73.38
## 2: 65.20447 0.05923791 89.93297 18.01515 324.5456 10.93
## 3: 65.39350 0.17781163 92.39558 18.83815 354.8759 15.69
## 4: 67.98123 0.05923791 93.62743 18.80370 353.5790 71.23
## 5: 67.37831 0.05923791 91.41416 18.75257 351.6589 11.80
## 6: 68.04613 0.17781163 92.39558 19.75529 390.2716 16.96
write.csv(hour1, paste0("./",mytype,"/des_output/NC_RMD_OS_DS_hour.csv"),row.names = FALSE)
write.csv(day1, paste0("./",mytype,"/des_output/NC_RMD_OS_DS_day.csv"),row.names = FALSE)
write.csv(DOW1, paste0("./",mytype,"/des_output/NC_RMD_OS_DS_dow.csv"),row.names = FALSE)
write.csv(Month1,paste0("./",mytype,"/des_output/NC_RMD_OS_DS_month.csv"),row.names = FALSE)
write.csv(AADT2, paste0("./",mytype,"/des_output/NC_RMD_OS_DS_aadt.csv"),row.names = FALSE)
write.csv(Crash2, paste0("./",mytype,"/des_output/NC_RMD_OS_DS_crash.csv"),row.names = FALSE)
write.csv(DayNight1, paste0("./",mytype,"/des_output/NC_RMD_OS_DS_daynight.csv"),row.names = FALSE)
write.csv(PeakOffPeak1, paste0("./",mytype,"/des_output/NC_RMD_OS_DS_peakoffpeak.csv"),row.names = FALSE)