Washington Conflated Data (Rural Multi-lane Divided)

library(data.table)
library(dplyr)
library(tidyr)
library(naniar)
library(stringr)
library(ggplot2)
library(DT)
library(lubridate)
library(ggpubr)
library(SmartEDA)



setwd("/scratch/user/cma16/Task4_Deliverable2/Process4/AllCrash/FacilityBased/")
load("./multi-lane_divided_WA_reduce_withCrash.rData")
mytype = 'RMD'
setwd(paste0("/scratch/user/cma16/Task4_Deliverable2/Process4/AllCrash/FacilityBased/",mytype))

df_RMD <- W_mun_med
dim(df_RMD)
## [1] 3784320      64
### Calculating Speed
df_RMD$Spd_All = 3600*df_RMD$DISTANCE/df_RMD$Travel_TIME_ALL_VEHICLES
df_RMD$Spd_Car = 3600*df_RMD$DISTANCE/df_RMD$Travel_TIME_PASSENGER_VEHICLES
df_RMD$Spd_Truck = 3600*df_RMD$DISTANCE/df_RMD$Travel_TIME_FREIGHT_TRUCKS


### Month, Day
df_RMD$date <- as.character(df_RMD$DATE)
df_RMD$date <- str_pad(df_RMD$DATE, 8, pad = "0")
df_RMD$Month <- substr(df_RMD$date, start = 1, stop = 2)
df_RMD$Day   <- substr(df_RMD$date, start = 3, stop = 4)
df_RMD$Year  <- substr(df_RMD$date, start = 5, stop = 8)

ConvEpoc2HM <- function(x) {
  # for a given epoc number, get its hour:min
  yy <- x*15
  y.hr <- yy %/% 60
  y.min <- yy %% 60
  x <- paste(str_pad(y.hr, 2, side = 'left', pad='0'), 
             str_pad(y.min, 2, side = 'left', pad='0'), 
             '00', sep = ':')
}


df_RMD$Hour1 <- ConvEpoc2HM(df_RMD$EPOCH15)
DATE4 <- paste(strptime(df_RMD$date, format = "%m%d%Y", tz =""), df_RMD$Hour1, sep = ' ')
df_RMD$PCT_TIME <- as.POSIXct(DATE4, tz ="", format = "%Y-%m-%d %H:%M:%OS")
df_RMD$Hour <- strftime(df_RMD$PCT_TIME, format="%H")
df_RMD$DOW <- wday(df_RMD$PCT_TIME, label = TRUE)

Temporal Patterns

names(df_RMD)
##  [1] "TimeStamp"                      "TMC"                           
##  [3] "V1"                             "DATE"                          
##  [5] "EPOCH15"                        "Travel_TIME_ALL_VEHICLES"      
##  [7] "Travel_TIME_PASSENGER_VEHICLES" "Travel_TIME_FREIGHT_TRUCKS"    
##  [9] "NP"                             "ADMIN_LEVE"                    
## [11] "ADMIN_LE_1"                     "ADMIN_LE_2"                    
## [13] "DISTANCE"                       "ROAD_NUMBE"                    
## [15] "ROAD_NAME"                      "LATITUDE"                      
## [17] "LONGITUDE"                      "ROAD_DIREC"                    
## [19] "ORN_FID"                        "FID_1"                         
## [21] "ACCESS"                         "LSHL_TY2"                      
## [23] "LSHL_TYP"                       "MED_TYPE"                      
## [25] "NHS_IND"                        "PRK_ZNE"                       
## [27] "RSHL_TY2"                       "RSHL_TYP"                      
## [29] "SURF_TYP"                       "SURF_TY2"                      
## [31] "TERRAIN"                        "COMP_DIR"                      
## [33] "COUNTY"                         "FUNC_CLS"                      
## [35] "MEDBARTY"                       "ST_FUNC"                       
## [37] "RTE_NBR"                        "HPMS"                          
## [39] "ROAD_INV"                       "SPD_LIMT"                      
## [41] "BEGMP"                          "ENDMP"                         
## [43] "LSHLDWID"                       "MEDWID"                        
## [45] "NO_LANE1"                       "NO_LANE2"                      
## [47] "NO_LANES"                       "RSHLDWID"                      
## [49] "RSHL_WD2"                       "SEG_LNG"                       
## [51] "lanewid"                        "rdwy_wd1"                      
## [53] "rdwy_wd2"                       "rdwy_wid"                      
## [55] "AADT"                           "mvmt"                          
## [57] "rodwycls"                       "ORN_FID_1"                     
## [59] "Total"                          "Fatal"                         
## [61] "Injury"                         "PDO"                           
## [63] "DAYMTH"                         "Crash"                         
## [65] "Spd_All"                        "Spd_Car"                       
## [67] "Spd_Truck"                      "date"                          
## [69] "Month"                          "Day"                           
## [71] "Year"                           "Hour1"                         
## [73] "PCT_TIME"                       "Hour"                          
## [75] "DOW"
df_RMD$AADT1 <- cut(df_RMD$AADT , breaks=c(0,2000,5000,10000, 15000, 20000, 30000, Inf), 
                   labels=c("0-2000","2001-5000","5001-10000","10001-15000","15001-20000","20001-30000","> 30000"))
table(df_RMD$AADT1)
## 
##      0-2000   2001-5000  5001-10000 10001-15000 15001-20000 20001-30000 
##           0      105120     1086240      490560      911040      490560 
##     > 30000 
##      700800
df_RMD$Crash1 <- cut(df_RMD$Crash , breaks=c(-1,0, Inf), 
                    labels=c("No crash","Crash"))
table(df_RMD$Crash1)
## 
## No crash    Crash 
##  3783676      644
# ############################################################
# df_RMD$DayNight <- cut(df_RMD$EPOCH15 , breaks=c(-1,26,67,95), 
#                    labels=c("Night","Day","Night"))
# table(df_RMD$DayNight)
# df_RMD$PeakOffPeak <- cut(df_RMD$EPOCH15 , breaks=c(-1,26,35,62,75, 96), 
#                    labels=c("Off-Peak","Morning Peak","Off-Peak", "Evening Peak", "Off-Peak"))
# table(df_RMD$PeakOffPeak)
# ###########################################################

df_RMD$DayNight <- cut(df_RMD$EPOCH15 , breaks=c(-1,26,67,95))
df_RMD$DayNight <- as.numeric(df_RMD$DayNight)
df_RMD$DayNight <- c("Night","Day","Night")[df_RMD$DayNight]
table(df_RMD$DayNight)
## 
##     Day   Night 
## 1616220 2168100
df_RMD$PeakOffPeak <- cut(df_RMD$EPOCH15 , breaks=c(-1,26,35,62,75, 96))
df_RMD$PeakOffPeak <- as.numeric(df_RMD$PeakOffPeak)
df_RMD$PeakOffPeak <- c("Off-Peak","Morning Peak","Off-Peak", "Evening Peak", "Off-Peak")[df_RMD$PeakOffPeak]
table(df_RMD$PeakOffPeak)
## 
## Evening Peak Morning Peak     Off-Peak 
##       512460       354780      2917080
# # ###########################################################
# df_RMD01 <- df_RMD[,c(26:28, 31, 32, 34, 38, 55, 56, 6, 53, 49,54, 48, 57, 58, 44:46)]
# df_RMD02 <- df_RMD01[,c(8:19)]
# # ###########################################################
df_RMD01 <- df_RMD[,c("SPD_LIMT","LSHLDWID","MEDWID",  "NO_LANES","RSHLDWID","SEG_LNG", "rdwy_wid",  
                      "AADT1",   "Crash1",  "EPOCH15", "Hour","Day", "DOW", "Month",
                      "DayNight","PeakOffPeak","Spd_All", "Spd_Car", "Spd_Truck")]
df_RMD02 <- df_RMD01[,c( "AADT1","Crash1","EPOCH15","Hour", "Day", "DOW", "Month",
                         "DayNight","PeakOffPeak","Spd_All","Spd_Car","Spd_Truck")]


cols <- c("EPOCH15", "Hour", "Day", "DOW", "Month", "AADT1" , "Crash1", "DayNight", "PeakOffPeak")
cols1 <- c("Spd_All", "Spd_Car", "Spd_Truck")
cols2 <- c("SPD_LIMT", "LSHLDWID", "MEDWID" , "NO_LANES", "RSHLDWID" ,"SEG_LNG" ,  "rdwy_wid")
df_RMD02= df_RMD02 %<>%
  mutate_at(cols, funs(factor(.)))


hour1 <- ExpCustomStat(df_RMD02,Cvar = c("Hour"), Nvar=cols1, stat = c('Count','Prop','mean','median','p0.85','min', 'max','sd', 'var','PS'),gpby=FALSE)
day1 <- ExpCustomStat(df_RMD02,Cvar = c("Day"), Nvar=cols1, stat = c('Count','Prop','mean','median','p0.85','min', 'max','sd', 'var','PS'),gpby=FALSE)
DOW1 <- ExpCustomStat(df_RMD02,Cvar = c("DOW"), Nvar=cols1, stat = c('Count','Prop','mean','median','p0.85','min', 'max','sd', 'var','PS'),gpby=FALSE)
Month1 <- ExpCustomStat(df_RMD02,Cvar = c("Month"), Nvar=cols1, stat = c('Count','Prop','mean','median','p0.85','min', 'max','sd', 'var','PS'),gpby=FALSE)
AADT2 <- ExpCustomStat(df_RMD02,Cvar = c("AADT1"), Nvar=cols1, stat = c('Count','Prop','mean','median','p0.85','min', 'max','sd', 'var','PS'),gpby=FALSE)
Crash2 <- ExpCustomStat(df_RMD02,Cvar = c("Crash1", "Hour"), Nvar=cols1, stat = c('Count','Prop','mean','median','p0.85','min', 'max','sd', 'var','PS'))
DayNight1 <- ExpCustomStat(df_RMD02,Cvar = c("DayNight"), Nvar=cols1, stat = c('Count','Prop','mean','median','p0.85','min', 'max','sd', 'var','PS'),gpby=FALSE)
PeakOffPeak1 <- ExpCustomStat(df_RMD02,Cvar = c("PeakOffPeak"), Nvar=cols1, stat = c('Count','Prop','mean','median','p0.85','min', 'max','sd', 'var','PS'),gpby=FALSE)
geo <- ExpCustomStat(df_RMD01, Nvar=cols2, stat = c('mean','median','p0.85','min', 'max','sd', 'var','PS'))


ggline(gather(hour1[,c(1, 2, 6, 8, 11)], condition, measurement,  mean:sd, factor_key=TRUE), x = "Level", y = "measurement", color = "Attribute",
       palette = c("red", "blue", "black"))+theme(legend.title=element_blank())+ facet_grid(condition~ .)+labs(title="By Hour")

ggline(gather(DOW1[,c(1, 2, 6, 8, 11)], condition, measurement,  mean:sd, factor_key=TRUE), x = "Level", y = "measurement", color = "Attribute",
       palette = c("red", "blue", "black"))+theme(legend.title=element_blank())+ facet_grid(condition~ .)+labs(title="By Day of Week")

ggline(gather(Month1[,c(1, 2, 6, 8, 11)], condition, measurement,  mean:sd, factor_key=TRUE), x = "Level", y = "measurement", color = "Attribute",
       palette = c("red", "blue", "black"))+theme(legend.title=element_blank())+ facet_grid(condition~ .)+labs(title="By Month")

ggline(gather(AADT2[,c(1, 2, 6, 8, 11)], condition, measurement,  mean:sd, factor_key=TRUE), x = "Level", y = "measurement", color = "Attribute",
       palette = c("red", "blue", "black"))+theme(legend.title=element_blank())+ facet_grid(condition~ .)+labs(title="By AADT")

ggline(gather(Crash2[,c(1, 2, 3, 6, 8, 11)], condition, measurement,  mean:sd, factor_key=TRUE), x = "Hour", y = "measurement", color = "Attribute",
       palette = c("red", "blue", "black"))+theme(legend.title=element_blank())+ facet_grid(condition+Crash1~ .)+labs(title="By Crash")

ggline(gather(DayNight1[,c(1, 2, 6, 8, 11)], condition, measurement,  mean:sd, factor_key=TRUE), x = "Level", y = "measurement", color = "Attribute",
       palette = c("red", "blue", "black"))+theme(legend.title=element_blank())+ facet_grid(condition~ .)+labs(title="By Day/Night")

ggline(gather(PeakOffPeak1[,c(1, 2, 6, 8, 11)], condition, measurement,  mean:sd, factor_key=TRUE), x = "Level", y = "measurement", color = "Attribute",
       palette = c("red", "blue", "black"))+theme(legend.title=element_blank())+ facet_grid(condition~ .)+labs(title="By Peak/Off-Peak")

Temporal Statistics of Operational Speed

setwd("/scratch/user/cma16/Task4_Deliverable2/Process4/AllCrash/FacilityBased/")

head(hour1)
##    Level Attribute Group_by  Count Prop     mean   median    p0.85
## 1:    00   Spd_All     Hour 157680 4.17 57.88218 60.05926 63.33602
## 2:    02   Spd_All     Hour 157680 4.17 58.35372 60.40901 63.37015
## 3:    03   Spd_All     Hour 157680 4.17 57.88928 60.12332 63.04721
## 4:    04   Spd_All     Hour 157680 4.17 57.49278 59.74248 62.88021
## 5:    05   Spd_All     Hour 157680 4.17 57.40758 59.84187 63.01613
## 6:    06   Spd_All     Hour 157680 4.17 57.17772 60.11047 63.22985
##          min      max       sd      var   PS
## 1: 0.6214941 89.63181 8.123692 65.99438 2.49
## 2: 0.6215004 90.88744 7.429353 55.19529 2.54
## 3: 0.6007661 88.36087 7.725809 59.68812 2.76
## 4: 0.6007661 87.50246 7.998398 63.97436 3.26
## 5: 0.6214647 87.75900 8.679242 75.32923 3.85
## 6: 0.6007661 90.07686 9.337910 87.19656 4.45
head(day1)
##    Level Attribute Group_by  Count Prop     mean   median    p0.85
## 1:    01   Spd_All      Day 124416 3.29 57.73510 60.34493 63.59400
## 2:    02   Spd_All      Day 124416 3.29 57.14641 60.03885 63.41567
## 3:    03   Spd_All      Day 124416 3.29 57.40070 60.23741 63.49920
## 4:    04   Spd_All      Day 124416 3.29 57.44193 60.25967 63.50346
## 5:    05   Spd_All      Day 124416 3.29 57.48908 60.25432 63.53874
## 6:    06   Spd_All      Day 124416 3.29 57.59949 60.30424 63.59400
##          min      max       sd      var   PS
## 1: 0.6007661 87.95047 9.195822 84.56314 3.09
## 2: 0.6214647 89.03160 9.469474 89.67093 3.34
## 3: 0.6007661 84.83559 9.431911 88.96094 3.34
## 4: 0.6007661 86.58982 9.367941 87.75832 3.21
## 5: 0.6214647 91.53321 9.443207 89.17416 3.21
## 6: 0.6203265 92.67885 9.276998 86.06268 3.30
head(DOW1)
##    Level Attribute Group_by  Count  Prop     mean   median    p0.85
## 1:   Thu   Spd_All      DOW 549504 14.52 57.21047 60.18130 63.39466
## 2:   Fri   Spd_All      DOW 539136 14.25 57.30706 60.20402 63.60188
## 3:   Sat   Spd_All      DOW 539136 14.25 58.08784 60.52855 64.19372
## 4:   Sun   Spd_All      DOW 539136 14.25 58.71306 60.73435 64.45408
## 5:   Mon   Spd_All      DOW 539136 14.25 57.26498 60.09949 63.38256
## 6:   Tue   Spd_All      DOW 539136 14.25 57.03046 60.04601 63.20445
##          min      max       sd      var    PS
## 1: 0.6007661 89.03160 9.482434 89.91655 15.60
## 2: 0.6214624 89.51471 9.657042 93.25846 15.32
## 3: 0.6203265 92.67885 9.464036 89.56797 12.45
## 4: 0.6007661 90.88744 8.897504 79.16558 10.88
## 5: 0.6007661 91.53321 9.306963 86.61957 14.83
## 6: 0.6007661 91.01388 9.394181 88.25064 15.45
head(Month1)
##    Level Attribute Group_by  Count Prop     mean   median    p0.85
## 1:    01   Spd_All    Month 321408 8.49 56.93244 59.76448 62.95514
## 2:    02   Spd_All    Month 290304 7.67 57.35742 60.12269 63.37198
## 3:    03   Spd_All    Month 321408 8.49 57.45906 60.27859 63.53533
## 4:    04   Spd_All    Month 311040 8.22 57.40629 60.28192 63.46622
## 5:    05   Spd_All    Month 321408 8.49 57.60348 60.32935 63.59400
## 6:    06   Spd_All    Month 311040 8.22 57.56049 60.38009 63.79496
##          min      max       sd      var   PS
## 1: 0.6007661 88.73345 9.241588 85.40695 7.53
## 2: 0.6007661 88.28732 9.239916 85.37605 7.26
## 3: 0.6007661 89.13791 9.479719 89.86508 8.16
## 4: 0.6214647 87.68429 9.460493 89.50092 8.22
## 5: 0.6214608 89.51471 9.463709 89.56178 8.39
## 6: 0.6007661 92.67885 9.769247 95.43819 8.54
head(AADT2)
##          Level Attribute Group_by   Count  Prop     mean   median    p0.85
## 1:  5001-10000   Spd_All    AADT1 1086240 28.70 60.77487 61.34416 64.17619
## 2: 15001-20000   Spd_All    AADT1  911040 24.07 58.06850 60.25234 63.66780
## 3: 20001-30000   Spd_All    AADT1  490560 12.96 50.49535 53.85630 61.43562
## 4:     > 30000   Spd_All    AADT1  700800 18.52 56.18289 59.35440 63.22959
## 5:   2001-5000   Spd_All    AADT1  105120  2.78 50.28303 54.09951 62.04961
## 6: 10001-15000   Spd_All    AADT1  490560 12.96 58.71108 60.85114 63.85421
##          min      max        sd       var    PS
## 1: 0.6214624 92.67885  5.568083  31.00355 33.86
## 2: 0.6203265 91.18800  8.300460  68.89764 22.59
## 3: 0.6214931 86.91560 12.055627 145.33813 12.11
## 4: 0.6007661 90.88744 10.162342 103.27319 22.29
## 5: 0.6214985 83.84451 13.689839 187.41170  0.64
## 6: 0.6214335 91.53321  9.235712  85.29839  8.51
head(Crash2)
##      Crash1 Hour Attribute  Count Prop     mean   median    p0.85
## 1: No crash   00   Spd_All 157664 4.17 57.88253 60.05926 63.33679
## 2: No crash   02   Spd_All 157669 4.17 58.35379 60.40901 63.37043
## 3: No crash   03   Spd_All 157672 4.17 57.89012 60.12395 63.04721
## 4: No crash   04   Spd_All 157664 4.17 57.49261 59.74248 62.88021
## 5: No crash   05   Spd_All 157655 4.17 57.40805 59.84217 63.01795
## 6: No crash   06   Spd_All 157648 4.17 57.17835 60.11047 63.22985
##          min      max       sd      var   PS
## 1: 0.6214941 89.63181 8.123533 65.99179 2.49
## 2: 0.6215004 90.88744 7.429597 55.19891 2.53
## 3: 0.6007661 88.36087 7.722526 59.63741 2.76
## 4: 0.6007661 87.50246 7.998596 63.97754 3.26
## 5: 0.6214647 87.75900 8.679436 75.33261 3.85
## 6: 0.6007661 90.07686 9.338072 87.19959 4.45
head(DayNight1)
##    Level Attribute Group_by   Count  Prop     mean   median    p0.85
## 1: Night   Spd_All DayNight 2168100 57.29 57.80939 60.25661 63.44216
## 2:   Day   Spd_All DayNight 1616220 42.71 57.15134 60.25216 63.60662
## 3: Night   Spd_Car DayNight 2168100 57.29 58.67287 61.00515 65.34688
## 4:   Day   Spd_Car DayNight 1616220 42.71 58.07230 60.93673 65.96362
## 5: Night Spd_Truck DayNight 2168100 57.29 57.81378 60.05178 62.37124
## 6:   Day Spd_Truck DayNight 1616220 42.71 56.80429 59.82755 61.96999
##          min      max        sd       var    PS
## 1: 0.6007661 91.53321  8.740307  76.39297 46.86
## 2: 0.6203265 92.67885  9.945993  98.92279 53.14
## 3: 0.4349164 92.62516  9.807319  96.18351 41.61
## 4: 0.6203265 93.04548 11.122911 123.71914 58.39
## 5: 0.4349164 79.72374  7.616653  58.01341 47.00
## 6: 0.4349164 76.30816  9.082711  82.49564 53.00
head(PeakOffPeak1)
##           Level Attribute    Group_by   Count  Prop     mean   median
## 1:     Off-Peak   Spd_All PeakOffPeak 2917080 77.08 57.51470 60.20753
## 2: Morning Peak   Spd_All PeakOffPeak  354780  9.38 57.13623 60.14129
## 3: Evening Peak   Spd_All PeakOffPeak  512460 13.54 57.42222 60.56040
## 4:     Off-Peak   Spd_Car PeakOffPeak 2917080 77.08 58.34442 60.92512
## 5: Morning Peak   Spd_Car PeakOffPeak  354780  9.38 57.88903 60.68575
## 6: Evening Peak   Spd_Car PeakOffPeak  512460 13.54 58.52009 61.46545
##       p0.85       min      max        sd       var    PS
## 1: 63.45731 0.6007661 92.67885  9.198911  84.61997 73.06
## 2: 63.27459 0.6214335 89.45918  9.482676  89.92115 11.08
## 3: 64.10748 0.6203265 91.53321 10.270982 105.49308 15.86
## 4: 65.59729 0.6007661 93.04548 10.411890 108.40746 70.73
## 5: 65.08940 0.6214335 91.84649 10.494542 110.13542 11.82
## 6: 66.57292 0.4349164 91.53321 11.406761 130.11419 17.45
write.csv(hour1, paste0("./",mytype,"/des_output/WA_RMD_OS_DS_hour.csv"),row.names = FALSE)
write.csv(day1, paste0("./",mytype,"/des_output/WA_RMD_OS_DS_day.csv"),row.names = FALSE)
write.csv(DOW1, paste0("./",mytype,"/des_output/WA_RMD_OS_DS_dow.csv"),row.names = FALSE)
write.csv(Month1,paste0("./",mytype,"/des_output/WA_RMD_OS_DS_month.csv"),row.names = FALSE)
write.csv(AADT2, paste0("./",mytype,"/des_output/WA_RMD_OS_DS_aadt.csv"),row.names = FALSE)
write.csv(Crash2, paste0("./",mytype,"/des_output/WA_RMD_OS_DS_crash.csv"),row.names = FALSE)
write.csv(DayNight1, paste0("./",mytype,"/des_output/WA_RMD_OS_DS_daynight.csv"),row.names = FALSE)
write.csv(PeakOffPeak1, paste0("./",mytype,"/des_output/WA_RMD_OS_DS_peakoffpeak.csv"),row.names = FALSE)