North Carolina Conflated Data (Rural Interstate)

library(data.table)
library(dplyr)
library(tidyr)
library(naniar)
library(stringr)
library(ggplot2)
library(DT)
library(lubridate)
library(ggpubr)
library(SmartEDA)



setwd("/scratch/user/cma16/Task4_Deliverable2/NCprocess4/AllCrash/FacilityBased/")
load("NC_Rural_Principle_Arterial_Interstate_1_TMC_TT_SI_reduce_withCrash.rData")
mytype = 'RI'
setwd(paste0("/scratch/user/cma16/Task4_Deliverable2/NCprocess4/AllCrash/FacilityBased/",mytype))

df_RI <- b02a
dim(df_RI)
## [1] 2261664      30
### Calculating Speed
df_RI$Spd_All = 3600*df_RI$TMC_length/df_RI$Travel_TIME_ALL_VEHICLES/5280
df_RI$Spd_Car = 3600*df_RI$TMC_length/df_RI$Travel_TIME_PASSENGER_VEHICLES/5280
df_RI$Spd_Truck = 3600*df_RI$TMC_length/df_RI$Travel_TIME_FREIGHT_TRUCKS/5280


### Month, Day
df_RI$date <- as.character(df_RI$DATE)
df_RI$date <- str_pad(df_RI$DATE, 8, pad = "0")
df_RI$Month <- substr(df_RI$date, start = 1, stop = 2)
df_RI$Day   <- substr(df_RI$date, start = 3, stop = 4)
df_RI$Year  <- substr(df_RI$date, start = 5, stop = 8)

ConvEpoc2HM <- function(x) {
  # for a given epoc number, get its hour:min
  yy <- x*15
  y.hr <- yy %/% 60
  y.min <- yy %% 60
  x <- paste(str_pad(y.hr, 2, side = 'left', pad='0'), 
             str_pad(y.min, 2, side = 'left', pad='0'), 
             '00', sep = ':')
}


df_RI$Hour1 <- ConvEpoc2HM(df_RI$EPOCH15)
DATE4 <- paste(strptime(df_RI$date, format = "%m%d%Y", tz =""), df_RI$Hour1, sep = ' ')
df_RI$PCT_TIME <- as.POSIXct(DATE4, tz ="", format = "%Y-%m-%d %H:%M:%OS")
df_RI$Hour <- strftime(df_RI$PCT_TIME, format="%H")
df_RI$DOW <- wday(df_RI$PCT_TIME, label = TRUE)

Temporal Patterns

names(df_RI)
##  [1] "TimeStamp"                      "TMC"                           
##  [3] "DATE"                           "EPOCH15"                       
##  [5] "Travel_TIME_ALL_VEHICLES"       "Travel_TIME_PASSENGER_VEHICLES"
##  [7] "Travel_TIME_FREIGHT_TRUCKS"     "TMC_length"                    
##  [9] "ave_aadt"                       "ave_wtdsgspd"                  
## [11] "ave_medwid"                     "ave_peaklane"                  
## [13] "ave_row"                        "ave_sur_wid"                   
## [15] "ave_no_lanes"                   "ave_spd_limt"                  
## [17] "ave_rodwycls"                   "ave_rshldwid"                  
## [19] "FC"                             "TER"                           
## [21] "ACC"                            "MED"                           
## [23] "Total"                          "K"                             
## [25] "A"                              "B"                             
## [27] "C"                              "O"                             
## [29] "DAYMTH"                         "Crash"                         
## [31] "Spd_All"                        "Spd_Car"                       
## [33] "Spd_Truck"                      "date"                          
## [35] "Month"                          "Day"                           
## [37] "Year"                           "Hour1"                         
## [39] "PCT_TIME"                       "Hour"                          
## [41] "DOW"
df_RI$AADT1 <- cut(df_RI$ave_aadt , breaks=c(0,2000,5000,10000, 15000, 20000, 30000, Inf), 
                   labels=c("0-2000","2001-5000","5001-10000","10001-15000","15001-20000","20001-30000","> 30000"))
table(df_RI$AADT1)
## 
##      0-2000   2001-5000  5001-10000 10001-15000 15001-20000 20001-30000 
##           0      140160      280320      356640      216192       52704 
##     > 30000 
##      350400
df_RI$Crash1 <- cut(df_RI$Crash , breaks=c(-1,0,Inf), 
                    labels=c("No crash","Crash"))
table(df_RI$Crash1)
## 
## No crash    Crash 
##  2261410      254
# ############################################################
# df_RI$DayNight <- cut(df_RI$EPOCH15 , breaks=c(-1,26,67,95), 
#                    labels=c("Night","Day","Night"))
# table(df_RI$DayNight)
# df_RI$PeakOffPeak <- cut(df_RI$EPOCH15 , breaks=c(-1,26,35,62,75, 96), 
#                    labels=c("Off-Peak","Morning Peak","Off-Peak", "Evening Peak", "Off-Peak"))
# table(df_RI$PeakOffPeak)
# ###########################################################

df_RI$DayNight <- cut(df_RI$EPOCH15 , breaks=c(-1,26,67,95))
df_RI$DayNight <- as.numeric(df_RI$DayNight)
df_RI$DayNight <- c("Night","Day","Night")[df_RI$DayNight]
table(df_RI$DayNight)
## 
##     Day   Night 
##  965919 1295745
df_RI$PeakOffPeak <- cut(df_RI$EPOCH15 , breaks=c(-1,26,35,62,75, 96))
df_RI$PeakOffPeak <- as.numeric(df_RI$PeakOffPeak)
df_RI$PeakOffPeak <- c("Off-Peak","Morning Peak","Off-Peak", "Evening Peak", "Off-Peak")[df_RI$PeakOffPeak]
table(df_RI$PeakOffPeak)
## 
## Evening Peak Morning Peak     Off-Peak 
##       306267       212031      1743366
# # ###########################################################
# df_RI01 <- df_RI[,c(26:28, 31, 32, 34, 38, 55, 56, 6, 53, 49,54, 48, 57, 58, 44:46)]
# df_RI02 <- df_RI01[,c(8:19)]
# # ###########################################################
df_RI01 <- df_RI[,c("ave_spd_limt","ave_medwid",  "ave_no_lanes","ave_rshldwid","TMC_length", 
                    "ave_sur_wid", "AADT1",   "Crash1",  "EPOCH15", "Hour","Day", "DOW", "Month",
                      "DayNight","PeakOffPeak","Spd_All", "Spd_Car", "Spd_Truck")]
df_RI02 <- df_RI01[,c( "AADT1","Crash1","EPOCH15","Hour", "Day", "DOW", "Month",
                         "DayNight","PeakOffPeak","Spd_All","Spd_Car","Spd_Truck")]


cols <- c("EPOCH15", "Hour", "Day", "DOW", "Month", "AADT1" , "Crash1", "DayNight", "PeakOffPeak")
cols1 <- c("Spd_All", "Spd_Car", "Spd_Truck")
cols2 <- c("ave_spd_limt","ave_medwid" , "ave_no_lanes", "ave_rshldwid" ,"TMC_length" ,  "ave_sur_wid")
df_RI02= df_RI02 %<>%
  mutate_at(cols, funs(factor(.)))


hour1 <- ExpCustomStat(df_RI02,Cvar = c("Hour"), Nvar=cols1, stat = c('Count','Prop','mean','median','p0.85','min', 'max','sd', 'var','PS'),gpby=FALSE)
day1 <- ExpCustomStat(df_RI02,Cvar = c("Day"), Nvar=cols1, stat = c('Count','Prop','mean','median','p0.85','min', 'max','sd', 'var','PS'),gpby=FALSE)
DOW1 <- ExpCustomStat(df_RI02,Cvar = c("DOW"), Nvar=cols1, stat = c('Count','Prop','mean','median','p0.85','min', 'max','sd', 'var','PS'),gpby=FALSE)
Month1 <- ExpCustomStat(df_RI02,Cvar = c("Month"), Nvar=cols1, stat = c('Count','Prop','mean','median','p0.85','min', 'max','sd', 'var','PS'),gpby=FALSE)
AADT2 <- ExpCustomStat(df_RI02,Cvar = c("AADT1"), Nvar=cols1, stat = c('Count','Prop','mean','median','p0.85','min', 'max','sd', 'var','PS'),gpby=FALSE)
Crash2 <- ExpCustomStat(df_RI02,Cvar = c("Crash1", "Hour"), Nvar=cols1, stat = c('Count','Prop','mean','median','p0.85','min', 'max','sd', 'var','PS'))
DayNight1 <- ExpCustomStat(df_RI02,Cvar = c("DayNight"), Nvar=cols1, stat = c('Count','Prop','mean','median','p0.85','min', 'max','sd', 'var','PS'),gpby=FALSE)
PeakOffPeak1 <- ExpCustomStat(df_RI02,Cvar = c("PeakOffPeak"), Nvar=cols1, stat = c('Count','Prop','mean','median','p0.85','min', 'max','sd', 'var','PS'),gpby=FALSE)
geo <- ExpCustomStat(df_RI01, Nvar=cols2, stat = c('mean','median','p0.85','min', 'max','sd', 'var','PS'))


ggline(gather(hour1[,c(1, 2, 6, 8, 11)], condition, measurement,  mean:sd, factor_key=TRUE), x = "Level", y = "measurement", color = "Attribute",
       palette = c("red", "blue", "black"))+theme(legend.title=element_blank())+ facet_grid(condition~ .)+labs(title="By Hour")

ggline(gather(DOW1[,c(1, 2, 6, 8, 11)], condition, measurement,  mean:sd, factor_key=TRUE), x = "Level", y = "measurement", color = "Attribute",
       palette = c("red", "blue", "black"))+theme(legend.title=element_blank())+ facet_grid(condition~ .)+labs(title="By Day of Week")

ggline(gather(Month1[,c(1, 2, 6, 8, 11)], condition, measurement,  mean:sd, factor_key=TRUE), x = "Level", y = "measurement", color = "Attribute",
       palette = c("red", "blue", "black"))+theme(legend.title=element_blank())+ facet_grid(condition~ .)+labs(title="By Month")

ggline(gather(AADT2[,c(1, 2, 6, 8, 11)], condition, measurement,  mean:sd, factor_key=TRUE), x = "Level", y = "measurement", color = "Attribute",
       palette = c("red", "blue", "black"))+theme(legend.title=element_blank())+ facet_grid(condition~ .)+labs(title="By AADT")

ggline(gather(Crash2[,c(1, 2, 3, 6, 8, 11)], condition, measurement,  mean:sd, factor_key=TRUE), x = "Hour", y = "measurement", color = "Attribute",
       palette = c("red", "blue", "black"))+theme(legend.title=element_blank())+ facet_grid(condition+Crash1~ .)+labs(title="By Crash")

ggline(gather(DayNight1[,c(1, 2, 6, 8, 11)], condition, measurement,  mean:sd, factor_key=TRUE), x = "Level", y = "measurement", color = "Attribute",
       palette = c("red", "blue", "black"))+theme(legend.title=element_blank())+ facet_grid(condition~ .)+labs(title="By Day/Night")

ggline(gather(PeakOffPeak1[,c(1, 2, 6, 8, 11)], condition, measurement,  mean:sd, factor_key=TRUE), x = "Level", y = "measurement", color = "Attribute",
       palette = c("red", "blue", "black"))+theme(legend.title=element_blank())+ facet_grid(condition~ .)+labs(title="By Peak/Off-Peak")

Temporal Statistics of Operational Speed

setwd("/scratch/user/cma16/Task4_Deliverable2/NCprocess4/AllCrash/FacilityBased/")

head(hour1)
##    Level Attribute Group_by Count Prop     mean   median    p0.85
## 1:    00   Spd_All     Hour 94236 4.17 52.13831 60.07747 65.28138
## 2:    02   Spd_All     Hour 94236 4.17 53.34298 60.18868 64.87674
## 3:    03   Spd_All     Hour 94236 4.17 53.84344 60.38302 64.76781
## 4:    04   Spd_All     Hour 94236 4.17 53.13700 60.23503 64.87674
## 5:    05   Spd_All     Hour 94236 4.17 50.91111 58.56050 64.56013
## 6:    06   Spd_All     Hour 94236 4.17 45.62911 50.58226 64.24582
##           min      max       sd      var   PS
## 1: 0.32597994 88.08880 17.22727 296.7790 2.56
## 2: 0.04537650 88.08880 16.24267 263.8244 2.37
## 3: 0.07330526 95.41623 15.77257 248.7740 2.45
## 4: 0.02443423 95.41623 16.24864 264.0185 2.64
## 5: 0.08958905 87.46488 16.98486 288.4855 3.14
## 6: 0.34783805 87.46488 18.96808 359.7882 4.15
head(day1)
##    Level Attribute Group_by Count Prop     mean   median    p0.85
## 1:    01   Spd_All      Day 74304 3.29 43.23029 46.61318 64.70613
## 2:    02   Spd_All      Day 74304 3.29 42.23233 44.74705 64.07164
## 3:    03   Spd_All      Day 74304 3.29 42.53875 45.19581 64.43091
## 4:    04   Spd_All      Day 74304 3.29 43.01859 45.85009 64.57589
## 5:    05   Spd_All      Day 74304 3.29 43.05309 46.13193 64.70613
## 6:    06   Spd_All      Day 74304 3.29 42.68542 44.92770 64.55236
##           min      max       sd      var   PS
## 1: 0.09873157 88.30629 20.63637 425.8599 3.17
## 2: 0.21993887 95.41623 20.61553 425.0000 3.30
## 3: 0.02443423 95.39461 20.70858 428.8452 3.30
## 4: 0.21993887 87.46488 20.59289 424.0672 3.20
## 5: 0.03512298 87.46488 20.77933 431.7805 3.20
## 6: 0.02443423 95.41623 20.72531 429.5385 3.30
head(DOW1)
##    Level Attribute Group_by  Count  Prop     mean   median    p0.85
## 1:   Thu   Spd_All      DOW 328896 14.54 41.63704 43.43769 63.75633
## 2:   Fri   Spd_All      DOW 320928 14.19 41.52896 43.02873 64.37046
## 3:   Sat   Spd_All      DOW 320832 14.19 44.98155 50.41793 65.63580
## 4:   Sun   Spd_All      DOW 322464 14.26 48.04098 57.37987 66.66829
## 5:   Mon   Spd_All      DOW 322464 14.26 41.74310 43.60004 63.97795
## 6:   Tue   Spd_All      DOW 322560 14.26 41.26947 42.90086 63.61082
##           min      max       sd      var    PS
## 1: 0.02443423 95.41623 20.48510 419.6391 15.43
## 2: 0.02443423 88.08880 20.84338 434.4463 15.08
## 3: 0.02443423 95.41623 20.97013 439.7462 12.80
## 4: 0.02443423 95.41623 20.31982 412.8953 11.07
## 5: 0.09773693 95.41623 20.49933 420.2226 14.95
## 6: 0.03283318 95.41623 20.36965 414.9226 15.31
head(Month1)
##    Level Attribute Group_by  Count Prop     mean   median    p0.85
## 1:    01   Spd_All    Month 169632 7.50 44.16214 50.19314 64.19419
## 2:    02   Spd_All    Month 153216 6.77 43.04699 47.03375 63.72706
## 3:    03   Spd_All    Month 169632 7.50 42.77420 46.07781 64.25991
## 4:    04   Spd_All    Month 164160 7.26 43.05030 46.55628 64.37046
## 5:    05   Spd_All    Month 169632 7.50 43.08041 46.55628 64.76781
## 6:    06   Spd_All    Month 164160 7.26 42.73845 45.29596 64.71094
##          min      max       sd      var   PS
## 1: 0.3259799 92.75093 20.30921 412.4638 7.37
## 2: 0.3478381 95.41623 20.24371 409.8076 6.78
## 3: 0.3259799 95.41623 20.56629 422.9724 8.00
## 4: 0.3478381 88.08880 20.53207 421.5660 7.87
## 5: 0.3259799 88.08880 20.75733 430.8668 8.06
## 6: 0.3478381 95.41623 20.56960 423.1084 8.08
head(AADT2)
##          Level Attribute Group_by  Count  Prop     mean   median    p0.85
## 1:     > 30000   Spd_All    AADT1 350400 15.49 62.82542 63.15486 67.43470
## 2:        <NA>   Spd_All    AADT1 865248 38.26 26.76228 25.46039 40.73887
## 3: 15001-20000   Spd_All    AADT1 216192  9.56 20.28796 18.41517 33.50963
## 4:  5001-10000   Spd_All    AADT1 280320 12.39 41.72002 44.76986 52.95279
## 5:   2001-5000   Spd_All    AADT1 140160  6.20 44.59757 51.86145 57.49856
## 6: 20001-30000   Spd_All    AADT1  52704  2.33 50.58303 53.15976 59.64266
##           min      max        sd       var    PS
## 1: 0.62142011 95.41623  6.297387  39.65708 55.87
## 2: 0.02443423 95.39461 15.335161 235.16716 16.28
## 3: 0.34783805 62.79667 11.029088 121.64078  4.46
## 4: 0.61934387 88.30629 13.627189 185.70028 12.41
## 5: 0.32597994 77.11178 14.915707 222.47833  3.06
## 6: 0.62165732 84.32238 11.382231 129.55518  2.38
head(Crash2)
##      Crash1 Hour Attribute Count Prop     mean   median    p0.85
## 1: No crash   00   Spd_All 94230 4.17 52.13729 60.07747 65.28138
## 2: No crash   02   Spd_All 94187 4.16 53.31895 60.18868 64.86836
## 3: No crash   03   Spd_All 94176 4.16 53.81908 60.37218 64.76781
## 4: No crash   04   Spd_All 94128 4.16 53.09010 60.22582 64.87674
## 5: No crash   05   Spd_All 94221 4.17 50.90493 58.55089 64.56013
## 6: No crash   06   Spd_All 94236 4.17 45.62911 50.58226 64.24582
##           min      max       sd      var   PS
## 1: 0.32597994 88.08880 17.22844 296.8190 2.56
## 2: 0.04537650 88.08880 16.25850 264.3387 2.37
## 3: 0.07330526 95.41623 15.79198 249.3867 2.44
## 4: 0.02443423 95.41623 16.28113 265.0752 2.63
## 5: 0.08958905 87.46488 16.98798 288.5915 3.14
## 6: 0.34783805 87.46488 18.96808 359.7882 4.15
head(DayNight1)
##    Level Attribute Group_by   Count  Prop     mean   median    p0.85
## 1: Night   Spd_All DayNight 1295745 57.29 46.98179 56.27510 64.76781
## 2:   Day   Spd_All DayNight  965919 42.71 39.50494 38.42959 64.12114
## 3: Night   Spd_Car DayNight 1295745 57.29 46.73902 53.51251 66.94186
## 4:   Day   Spd_Car DayNight  965919 42.71 39.88207 38.30941 65.59866
## 5: Night Spd_Truck DayNight 1295745 57.29 55.30393 60.27128 63.72706
## 6:   Day Spd_Truck DayNight  965919 42.71 49.87099 59.22836 63.24384
##           min      max       sd      var    PS
## 1: 0.02443423 95.41623 19.64445 385.9046 45.31
## 2: 0.02443423 95.41623 20.79184 432.3007 54.69
## 3: 0.02443423 95.41623 20.72890 429.6874 41.67
## 4: 0.02443423 95.41623 21.37923 457.0716 58.33
## 5: 0.26876716 80.73681 13.79049 190.1777 50.89
## 6: 0.34783805 81.79674 18.11422 328.1248 49.11
head(PeakOffPeak1)
##           Level Attribute    Group_by   Count  Prop     mean   median
## 1:     Off-Peak   Spd_All PeakOffPeak 1743366 77.08 43.59908 47.54682
## 2: Morning Peak   Spd_All PeakOffPeak  212031  9.38 40.41981 40.87840
## 3: Evening Peak   Spd_All PeakOffPeak  306267 13.54 39.94304 39.45241
## 4:     Off-Peak   Spd_Car PeakOffPeak 1743366 77.08 43.40379 45.23876
## 5: Morning Peak   Spd_Car PeakOffPeak  212031  9.38 40.56454 40.33655
## 6: Evening Peak   Spd_Car PeakOffPeak  306267 13.54 40.34147 39.07388
##       p0.85        min      max       sd      var    PS
## 1: 64.61158 0.02443423 95.41623 20.55000 422.3024 72.13
## 2: 63.61082 0.32597994 87.46488 20.31859 412.8451 11.38
## 3: 64.33425 0.03069904 88.08880 20.97413 439.9141 16.49
## 4: 66.38576 0.02443423 95.41623 21.36167 456.3208 70.37
## 5: 65.17009 0.32597994 95.41623 20.87164 435.6255 12.02
## 6: 66.04224 0.03069904 95.41623 21.63174 467.9322 17.60
write.csv(hour1, paste0("./",mytype,"/des_output/NC_RI_OS_DS_hour.csv"),row.names = FALSE)
write.csv(day1, paste0("./",mytype,"/des_output/NC_RI_OS_DS_day.csv"),row.names = FALSE)
write.csv(DOW1, paste0("./",mytype,"/des_output/NC_RI_OS_DS_dow.csv"),row.names = FALSE)
write.csv(Month1,paste0("./",mytype,"/des_output/NC_RI_OS_DS_month.csv"),row.names = FALSE)
write.csv(AADT2, paste0("./",mytype,"/des_output/NC_RI_OS_DS_aadt.csv"),row.names = FALSE)
write.csv(Crash2, paste0("./",mytype,"/des_output/NC_RI_OS_DS_crash.csv"),row.names = FALSE)
write.csv(DayNight1, paste0("./",mytype,"/des_output/NC_RI_OS_DS_daynight.csv"),row.names = FALSE)
write.csv(PeakOffPeak1, paste0("./",mytype,"/des_output/NC_RI_OS_DS_peakoffpeak.csv"),row.names = FALSE)