North Carolina Conflated Data (Rural Two-lane)

library(data.table)
library(dplyr)
library(tidyr)
library(naniar)
library(stringr)
library(ggplot2)
library(DT)
library(lubridate)
library(ggpubr)
library(SmartEDA)



setwd("/scratch/user/cma16/Task4_Deliverable2/NCprocess4/AllCrash/FacilityBased/")
load("./two-lane_undivided_NC_reduce_withCrash_no_intersection.rData")
mytype = 'R2'
setwd(paste0("/scratch/user/cma16/Task4_Deliverable2/NCprocess4/AllCrash/FacilityBased/",mytype))

df_R2 <- N_2un_nomed
dim(df_R2)
## [1] 16229280       30
### Calculating Speed
df_R2$Spd_All = 3600*df_R2$TMC_length/df_R2$Travel_TIME_ALL_VEHICLES/5280
df_R2$Spd_Car = 3600*df_R2$TMC_length/df_R2$Travel_TIME_PASSENGER_VEHICLES/5280
df_R2$Spd_Truck = 3600*df_R2$TMC_length/df_R2$Travel_TIME_FREIGHT_TRUCKS/5280


### Month, Day
df_R2$date <- as.character(df_R2$DATE)
df_R2$date <- str_pad(df_R2$DATE, 8, pad = "0")
df_R2$Month <- substr(df_R2$date, start = 1, stop = 2)
df_R2$Day   <- substr(df_R2$date, start = 3, stop = 4)
df_R2$Year  <- substr(df_R2$date, start = 5, stop = 8)

ConvEpoc2HM <- function(x) {
  # for a given epoc number, get its hour:min
  yy <- x*15
  y.hr <- yy %/% 60
  y.min <- yy %% 60
  x <- paste(str_pad(y.hr, 2, side = 'left', pad='0'), 
             str_pad(y.min, 2, side = 'left', pad='0'), 
             '00', sep = ':')
}


df_R2$Hour1 <- ConvEpoc2HM(df_R2$EPOCH15)
DATE4 <- paste(strptime(df_R2$date, format = "%m%d%Y", tz =""), df_R2$Hour1, sep = ' ')
df_R2$PCT_TIME <- as.POSIXct(DATE4, tz ="", format = "%Y-%m-%d %H:%M:%OS")
df_R2$Hour <- strftime(df_R2$PCT_TIME, format="%H")
df_R2$DOW <- wday(df_R2$PCT_TIME, label = TRUE)

Temporal Patterns

names(df_R2)
##  [1] "TimeStamp"                      "TMC"                           
##  [3] "DATE"                           "EPOCH15"                       
##  [5] "Travel_TIME_ALL_VEHICLES"       "Travel_TIME_PASSENGER_VEHICLES"
##  [7] "Travel_TIME_FREIGHT_TRUCKS"     "TMC_length"                    
##  [9] "ave_aadt"                       "ave_wtdsgspd"                  
## [11] "ave_medwid"                     "ave_peaklane"                  
## [13] "ave_row"                        "ave_sur_wid"                   
## [15] "ave_no_lanes"                   "ave_spd_limt"                  
## [17] "ave_rodwycls"                   "ave_rshldwid"                  
## [19] "FC"                             "TER"                           
## [21] "ACC"                            "MED"                           
## [23] "Total"                          "K"                             
## [25] "A"                              "B"                             
## [27] "C"                              "O"                             
## [29] "DAYMTH"                         "Crash"                         
## [31] "Spd_All"                        "Spd_Car"                       
## [33] "Spd_Truck"                      "date"                          
## [35] "Month"                          "Day"                           
## [37] "Year"                           "Hour1"                         
## [39] "PCT_TIME"                       "Hour"                          
## [41] "DOW"
df_R2$AADT1 <- cut(df_R2$ave_aadt , breaks=c(0,2000,5000,10000, 15000, 20000, 30000, Inf), 
                   labels=c("0-2000","2001-5000","5001-10000","10001-15000","15001-20000","20001-30000","> 30000"))
table(df_R2$AADT1)
## 
##      0-2000   2001-5000  5001-10000 10001-15000 15001-20000 20001-30000 
##      588096     3610080     4766208     3781440     1589376     1022880 
##     > 30000 
##      666336
df_R2$Crash1 <- cut(df_R2$Crash , breaks=c(-1,0,Inf), 
                    labels=c("No crash","Crash"))
table(df_R2$Crash1)
## 
## No crash    Crash 
## 16229176      104
# ############################################################
# df_R2$DayNight <- cut(df_R2$EPOCH15 , breaks=c(-1,26,67,95), 
#                    labels=c("Night","Day","Night"))
# table(df_R2$DayNight)
# df_R2$PeakOffPeak <- cut(df_R2$EPOCH15 , breaks=c(-1,26,35,62,75, 96), 
#                    labels=c("Off-Peak","Morning Peak","Off-Peak", "Evening Peak", "Off-Peak"))
# table(df_R2$PeakOffPeak)
# ###########################################################

df_R2$DayNight <- cut(df_R2$EPOCH15 , breaks=c(-1,26,67,95))
df_R2$DayNight <- as.numeric(df_R2$DayNight)
df_R2$DayNight <- c("Night","Day","Night")[df_R2$DayNight]
table(df_R2$DayNight)
## 
##     Day   Night 
## 6931255 9298025
df_R2$PeakOffPeak <- cut(df_R2$EPOCH15 , breaks=c(-1,26,35,62,75, 96))
df_R2$PeakOffPeak <- as.numeric(df_R2$PeakOffPeak)
df_R2$PeakOffPeak <- c("Off-Peak","Morning Peak","Off-Peak", "Evening Peak", "Off-Peak")[df_R2$PeakOffPeak]
table(df_R2$PeakOffPeak)
## 
## Evening Peak Morning Peak     Off-Peak 
##      2197715      1521495     12510070
# # ###########################################################
# df_R201 <- df_R2[,c(26:28, 31, 32, 34, 38, 55, 56, 6, 53, 49,54, 48, 57, 58, 44:46)]
# df_R202 <- df_R201[,c(8:19)]
# # ###########################################################
df_R201 <- df_R2[,c("ave_spd_limt","ave_medwid",  "ave_no_lanes","ave_rshldwid","TMC_length", 
                    "ave_sur_wid", "AADT1",   "Crash1",  "EPOCH15", "Hour","Day", "DOW", "Month",
                      "DayNight","PeakOffPeak","Spd_All", "Spd_Car", "Spd_Truck")]
df_R202 <- df_R201[,c( "AADT1","Crash1","EPOCH15","Hour", "Day", "DOW", "Month",
                         "DayNight","PeakOffPeak","Spd_All","Spd_Car","Spd_Truck")]


cols <- c("EPOCH15", "Hour", "Day", "DOW", "Month", "AADT1" , "Crash1", "DayNight", "PeakOffPeak")
cols1 <- c("Spd_All", "Spd_Car", "Spd_Truck")
cols2 <- c("ave_spd_limt","ave_medwid" , "ave_no_lanes", "ave_rshldwid" ,"TMC_length" ,  "ave_sur_wid")
df_R202= df_R202 %<>%
  mutate_at(cols, funs(factor(.)))


hour1 <- ExpCustomStat(df_R202,Cvar = c("Hour"), Nvar=cols1, stat = c('Count','Prop','mean','median','p0.85','min', 'max','sd', 'var','PS'),gpby=FALSE)
day1 <- ExpCustomStat(df_R202,Cvar = c("Day"), Nvar=cols1, stat = c('Count','Prop','mean','median','p0.85','min', 'max','sd', 'var','PS'),gpby=FALSE)
DOW1 <- ExpCustomStat(df_R202,Cvar = c("DOW"), Nvar=cols1, stat = c('Count','Prop','mean','median','p0.85','min', 'max','sd', 'var','PS'),gpby=FALSE)
Month1 <- ExpCustomStat(df_R202,Cvar = c("Month"), Nvar=cols1, stat = c('Count','Prop','mean','median','p0.85','min', 'max','sd', 'var','PS'),gpby=FALSE)
AADT2 <- ExpCustomStat(df_R202,Cvar = c("AADT1"), Nvar=cols1, stat = c('Count','Prop','mean','median','p0.85','min', 'max','sd', 'var','PS'),gpby=FALSE)
Crash2 <- ExpCustomStat(df_R202,Cvar = c("Crash1", "Hour"), Nvar=cols1, stat = c('Count','Prop','mean','median','p0.85','min', 'max','sd', 'var','PS'))
DayNight1 <- ExpCustomStat(df_R202,Cvar = c("DayNight"), Nvar=cols1, stat = c('Count','Prop','mean','median','p0.85','min', 'max','sd', 'var','PS'),gpby=FALSE)
PeakOffPeak1 <- ExpCustomStat(df_R202,Cvar = c("PeakOffPeak"), Nvar=cols1, stat = c('Count','Prop','mean','median','p0.85','min', 'max','sd', 'var','PS'),gpby=FALSE)
geo <- ExpCustomStat(df_R201, Nvar=cols2, stat = c('mean','median','p0.85','min', 'max','sd', 'var','PS'))


ggline(gather(hour1[,c(1, 2, 6, 8, 11)], condition, measurement,  mean:sd, factor_key=TRUE), x = "Level", y = "measurement", color = "Attribute",
       palette = c("red", "blue", "black"))+theme(legend.title=element_blank())+ facet_grid(condition~ .)+labs(title="By Hour")

ggline(gather(DOW1[,c(1, 2, 6, 8, 11)], condition, measurement,  mean:sd, factor_key=TRUE), x = "Level", y = "measurement", color = "Attribute",
       palette = c("red", "blue", "black"))+theme(legend.title=element_blank())+ facet_grid(condition~ .)+labs(title="By Day of Week")

ggline(gather(Month1[,c(1, 2, 6, 8, 11)], condition, measurement,  mean:sd, factor_key=TRUE), x = "Level", y = "measurement", color = "Attribute",
       palette = c("red", "blue", "black"))+theme(legend.title=element_blank())+ facet_grid(condition~ .)+labs(title="By Month")

ggline(gather(AADT2[,c(1, 2, 6, 8, 11)], condition, measurement,  mean:sd, factor_key=TRUE), x = "Level", y = "measurement", color = "Attribute",
       palette = c("red", "blue", "black"))+theme(legend.title=element_blank())+ facet_grid(condition~ .)+labs(title="By AADT")

ggline(gather(Crash2[,c(1, 2, 3, 6, 8, 11)], condition, measurement,  mean:sd, factor_key=TRUE), x = "Hour", y = "measurement", color = "Attribute",
       palette = c("red", "blue", "black"))+theme(legend.title=element_blank())+ facet_grid(condition+Crash1~ .)+labs(title="By Crash")

ggline(gather(DayNight1[,c(1, 2, 6, 8, 11)], condition, measurement,  mean:sd, factor_key=TRUE), x = "Level", y = "measurement", color = "Attribute",
       palette = c("red", "blue", "black"))+theme(legend.title=element_blank())+ facet_grid(condition~ .)+labs(title="By Day/Night")

ggline(gather(PeakOffPeak1[,c(1, 2, 6, 8, 11)], condition, measurement,  mean:sd, factor_key=TRUE), x = "Level", y = "measurement", color = "Attribute",
       palette = c("red", "blue", "black"))+theme(legend.title=element_blank())+ facet_grid(condition~ .)+labs(title="By Peak/Off-Peak")

Temporal Statistics of Operational Speed

setwd("/scratch/user/cma16/Task4_Deliverable2/NCprocess4/AllCrash/FacilityBased/")

head(hour1)
##    Level Attribute Group_by  Count Prop     mean   median    p0.85
## 1:    00   Spd_All     Hour 676220 4.17 34.12211 34.77486 54.15049
## 2:    02   Spd_All     Hour 676220 4.17 34.30065 34.78235 54.89322
## 3:    03   Spd_All     Hour 676220 4.17 36.08165 36.99886 56.88531
## 4:    04   Spd_All     Hour 676220 4.17 36.45347 38.17837 56.86640
## 5:    05   Spd_All     Hour 676220 4.17 35.87825 37.71932 55.83954
## 6:    06   Spd_All     Hour 676220 4.17 34.35155 35.41832 52.94302
##           min      max       sd      var   PS
## 1: 0.01276386 123.7934 17.65069 311.5470 1.24
## 2: 0.01044044 123.7934 17.79409 316.6297 1.01
## 3: 0.02071536 123.7934 17.92913 321.4537 1.16
## 4: 0.01029319 123.7934 18.12623 328.5603 1.57
## 5: 0.00691762 123.7934 17.92845 321.4295 2.69
## 6: 0.00385927 123.7934 17.01244 289.4230 4.58
head(day1)
##    Level Attribute Group_by  Count Prop     mean   median    p0.85
## 1:    01   Spd_All      Day 532896 3.28 31.83734 31.76814 51.00298
## 2:    02   Spd_All      Day 532896 3.28 31.66319 31.72370 50.66832
## 3:    03   Spd_All      Day 532896 3.28 31.78563 31.80791 51.00339
## 4:    04   Spd_All      Day 532896 3.28 31.81916 31.76814 51.06137
## 5:    05   Spd_All      Day 532896 3.28 31.89850 31.88013 51.10309
## 6:    06   Spd_All      Day 532896 3.28 31.69688 31.73387 51.03820
##            min      max       sd      var   PS
## 1: 0.002572847 123.7934 16.93550 286.8112 3.05
## 2: 0.002572847 123.7934 16.79058 281.9234 3.39
## 3: 0.002320447 123.7934 16.98187 288.3838 3.30
## 4: 0.002320447 123.7934 16.98703 288.5591 3.14
## 5: 0.003859270 123.7934 17.07776 291.6499 3.11
## 6: 0.002320447 123.7934 17.07678 291.6164 3.27
head(DOW1)
##    Level Attribute Group_by   Count  Prop     mean   median    p0.85
## 1:   Thu   Spd_All      DOW 2360640 14.55 31.45805 31.50617 50.43438
## 2:   Fri   Spd_All      DOW 2307456 14.22 31.38627 31.30211 50.67593
## 3:   Sat   Spd_All      DOW 2307456 14.22 32.40969 32.62479 52.26774
## 4:   Sun   Spd_All      DOW 2310240 14.24 33.94250 34.56154 54.11282
## 5:   Mon   Spd_All      DOW 2310240 14.24 31.66958 31.71600 50.55600
## 6:   Tue   Spd_All      DOW 2310144 14.23 31.46305 31.52242 50.36283
##            min      max       sd      var    PS
## 1: 0.002320447 123.7934 16.76551 281.0825 16.56
## 2: 0.002320447 123.7934 16.94950 287.2854 15.98
## 3: 0.002320447 123.7934 17.68508 312.7622 10.42
## 4: 0.002320447 123.7934 17.99501 323.8204  7.72
## 5: 0.002320447 123.7934 16.73084 279.9211 16.09
## 6: 0.002320447 123.7934 16.69213 278.6271 16.63
head(Month1)
##    Level Attribute Group_by   Count Prop     mean   median    p0.85
## 1:    01   Spd_All    Month 1160640 7.15 31.71852 31.81677 51.09529
## 2:    02   Spd_All    Month 1045632 6.44 31.36044 31.55900 50.38885
## 3:    03   Spd_All    Month 1160640 7.15 31.49013 31.54906 50.83791
## 4:    04   Spd_All    Month 1120320 6.90 31.70760 31.69983 51.11948
## 5:    05   Spd_All    Month 1160640 7.15 31.97548 31.89553 51.55953
## 6:    06   Spd_All    Month 1123200 6.92 32.06415 31.99638 51.64661
##            min      max       sd      var   PS
## 1: 0.002320447 123.7934 17.19389 295.6297 6.08
## 2: 0.003867411 123.7934 16.96025 287.6502 5.80
## 3: 0.002320447 123.7934 17.08310 291.8324 7.20
## 4: 0.002320447 123.7934 17.09920 292.3827 7.32
## 5: 0.002320447 123.7934 17.16365 294.5907 7.56
## 6: 0.002320447 123.7934 17.15220 294.1980 7.87
head(AADT2)
##          Level Attribute Group_by   Count  Prop     mean   median    p0.85
## 1: 10001-15000   Spd_All    AADT1 3781440 23.30 30.46188 30.38587 49.50371
## 2: 15001-20000   Spd_All    AADT1 1589376  9.79 33.58765 34.46804 51.24100
## 3: 20001-30000   Spd_All    AADT1 1022880  6.30 25.95755 26.57813 40.54094
## 4:  5001-10000   Spd_All    AADT1 4766208 29.37 30.76170 30.59294 49.10187
## 5:   2001-5000   Spd_All    AADT1 3610080 22.24 37.16963 40.87070 54.68730
## 6:     > 30000   Spd_All    AADT1  666336  4.11 35.41103 32.30093 61.89668
##            min       max       sd      var    PS
## 1: 0.006903998 113.65048 17.26938 298.2314 25.27
## 2: 0.296044575  91.51530 15.36608 236.1163 14.79
## 3: 0.049255839  90.37728 13.40319 179.6456  8.36
## 4: 0.002320447  87.93338 16.31189 266.0778 25.65
## 5: 0.002572847 104.83561 17.12121 293.1358 15.24
## 6: 0.109080568 123.79335 19.83871 393.5743  8.85
head(Crash2)
##      Crash1 Hour Attribute  Count Prop     mean   median    p0.85
## 1: No crash   00   Spd_All 676218 4.17 34.12211 34.77486 54.15049
## 2: No crash   02   Spd_All 676203 4.17 34.30086 34.78235 54.89337
## 3: No crash   03   Spd_All 676194 4.17 36.08072 36.99781 56.88517
## 4: No crash   04   Spd_All 676189 4.17 36.45355 38.17837 56.86640
## 5: No crash   05   Spd_All 676206 4.17 35.87791 37.71932 55.83949
## 6: No crash   06   Spd_All 676220 4.17 34.35155 35.41832 52.94302
##           min      max       sd      var   PS
## 1: 0.01276386 123.7934 17.65069 311.5470 1.24
## 2: 0.01044044 123.7934 17.79447 316.6433 1.01
## 3: 0.02071536 123.7934 17.92920 321.4563 1.16
## 4: 0.01029319 123.7934 18.12707 328.5907 1.57
## 5: 0.00691762 123.7934 17.92850 321.4310 2.69
## 6: 0.00385927 123.7934 17.01244 289.4230 4.58
head(DayNight1)
##    Level Attribute Group_by   Count  Prop     mean   median    p0.85
## 1: Night   Spd_All DayNight 9298025 57.29 33.18113 33.54014 52.87319
## 2:   Day   Spd_All DayNight 6931255 42.71 31.04503 31.01147 49.80019
## 3: Night   Spd_Car DayNight 9298025 57.29 33.48840 34.14232 52.94302
## 4:   Day   Spd_Car DayNight 6931255 42.71 31.24942 31.50010 49.85953
## 5: Night Spd_Truck DayNight 9298025 57.29 33.61389 32.44940 55.94967
## 6:   Day Spd_Truck DayNight 6931255 42.71 31.89396 30.46828 53.10518
##            min       max       sd      var    PS
## 1: 0.002320447 123.79335 17.46506 305.0282 34.48
## 2: 0.002320447 123.79335 16.67572 278.0795 65.52
## 3: 0.002320447 123.79335 17.65183 311.5870 32.68
## 4: 0.002320447 123.79335 16.81420 282.7175 67.32
## 5: 0.002320447  94.70874 17.80309 316.9500 35.27
## 6: 0.002320447  94.70874 17.30067 299.3133 64.73
head(PeakOffPeak1)
##           Level Attribute    Group_by    Count  Prop     mean   median
## 1:     Off-Peak   Spd_All PeakOffPeak 12510070 77.08 31.84252 31.80953
## 2: Morning Peak   Spd_All PeakOffPeak  1521495  9.38 31.95697 32.31106
## 3: Evening Peak   Spd_All PeakOffPeak  2197715 13.54 31.26531 31.10908
## 4:     Off-Peak   Spd_Car PeakOffPeak 12510070 77.08 32.04018 32.32718
## 5: Morning Peak   Spd_Car PeakOffPeak  1521495  9.38 32.20890 32.80612
## 6: Evening Peak   Spd_Car PeakOffPeak  2197715 13.54 31.43800 31.50340
##       p0.85         min      max       sd      var    PS
## 1: 51.11948 0.002320447 123.7934 17.02331 289.7932 68.30
## 2: 50.63079 0.002320447 123.7934 16.68421 278.3630 13.29
## 3: 50.43162 0.002320447 123.7934 16.96669 287.8687 18.40
## 4: 51.08829 0.002320447 123.7934 17.16019 294.4721 67.33
## 5: 50.75732 0.002320447 123.7934 16.79978 282.2326 13.76
## 6: 50.60963 0.002320447 123.7934 17.14908 294.0908 18.92
write.csv(hour1, paste0("./",mytype,"/des_output/NC_R2_OS_DS_hour.csv"),row.names = FALSE)
write.csv(day1, paste0("./",mytype,"/des_output/NC_R2_OS_DS_day.csv"),row.names = FALSE)
write.csv(DOW1, paste0("./",mytype,"/des_output/NC_R2_OS_DS_dow.csv"),row.names = FALSE)
write.csv(Month1,paste0("./",mytype,"/des_output/NC_R2_OS_DS_month.csv"),row.names = FALSE)
write.csv(AADT2, paste0("./",mytype,"/des_output/NC_R2_OS_DS_aadt.csv"),row.names = FALSE)
write.csv(Crash2, paste0("./",mytype,"/des_output/NC_R2_OS_DS_crash.csv"),row.names = FALSE)
write.csv(DayNight1, paste0("./",mytype,"/des_output/NC_R2_OS_DS_daynight.csv"),row.names = FALSE)
write.csv(PeakOffPeak1, paste0("./",mytype,"/des_output/NC_R2_OS_DS_peakoffpeak.csv"),row.names = FALSE)