Washington Conflated Data (Rural Multi-lane Undivided)

library(data.table)
library(dplyr)
library(tidyr)
library(naniar)
library(stringr)
library(ggplot2)
library(DT)
library(lubridate)
library(ggpubr)
library(SmartEDA)



setwd("/scratch/user/cma16/Task4_Deliverable2/Process4/AllCrash/FacilityBased/")
load("./multi-lane_undivided_WA_reduce_withCrash.rData")
mytype = 'RMU'
setwd(paste0("/scratch/user/cma16/Task4_Deliverable2/Process4/AllCrash/FacilityBased/",mytype))

df_RMU <- W_mun_nomed
dim(df_RMU)
## [1] 946944     64
### Calculating Speed
df_RMU$Spd_All = 3600*df_RMU$DISTANCE/df_RMU$Travel_TIME_ALL_VEHICLES
df_RMU$Spd_Car = 3600*df_RMU$DISTANCE/df_RMU$Travel_TIME_PASSENGER_VEHICLES
df_RMU$Spd_Truck = 3600*df_RMU$DISTANCE/df_RMU$Travel_TIME_FREIGHT_TRUCKS


### Month, Day
df_RMU$date <- as.character(df_RMU$DATE)
df_RMU$date <- str_pad(df_RMU$DATE, 8, pad = "0")
df_RMU$Month <- substr(df_RMU$date, start = 1, stop = 2)
df_RMU$Day   <- substr(df_RMU$date, start = 3, stop = 4)
df_RMU$Year  <- substr(df_RMU$date, start = 5, stop = 8)

ConvEpoc2HM <- function(x) {
  # for a given epoc number, get its hour:min
  yy <- x*15
  y.hr <- yy %/% 60
  y.min <- yy %% 60
  x <- paste(str_pad(y.hr, 2, side = 'left', pad='0'), 
             str_pad(y.min, 2, side = 'left', pad='0'), 
             '00', sep = ':')
}


df_RMU$Hour1 <- ConvEpoc2HM(df_RMU$EPOCH15)
DATE4 <- paste(strptime(df_RMU$date, format = "%m%d%Y", tz =""), df_RMU$Hour1, sep = ' ')
df_RMU$PCT_TIME <- as.POSIXct(DATE4, tz ="", format = "%Y-%m-%d %H:%M:%OS")
df_RMU$Hour <- strftime(df_RMU$PCT_TIME, format="%H")
df_RMU$DOW <- wday(df_RMU$PCT_TIME, label = TRUE)

Temporal Patterns

names(df_RMU)
##  [1] "TimeStamp"                      "TMC"                           
##  [3] "V1"                             "DATE"                          
##  [5] "EPOCH15"                        "Travel_TIME_ALL_VEHICLES"      
##  [7] "Travel_TIME_PASSENGER_VEHICLES" "Travel_TIME_FREIGHT_TRUCKS"    
##  [9] "NP"                             "ADMIN_LEVE"                    
## [11] "ADMIN_LE_1"                     "ADMIN_LE_2"                    
## [13] "DISTANCE"                       "ROAD_NUMBE"                    
## [15] "ROAD_NAME"                      "LATITUDE"                      
## [17] "LONGITUDE"                      "ROAD_DIREC"                    
## [19] "ORN_FID"                        "FID_1"                         
## [21] "ACCESS"                         "LSHL_TY2"                      
## [23] "LSHL_TYP"                       "MED_TYPE"                      
## [25] "NHS_IND"                        "PRK_ZNE"                       
## [27] "RSHL_TY2"                       "RSHL_TYP"                      
## [29] "SURF_TYP"                       "SURF_TY2"                      
## [31] "TERRAIN"                        "COMP_DIR"                      
## [33] "COUNTY"                         "FUNC_CLS"                      
## [35] "MEDBARTY"                       "ST_FUNC"                       
## [37] "RTE_NBR"                        "HPMS"                          
## [39] "ROAD_INV"                       "SPD_LIMT"                      
## [41] "BEGMP"                          "ENDMP"                         
## [43] "LSHLDWID"                       "MEDWID"                        
## [45] "NO_LANE1"                       "NO_LANE2"                      
## [47] "NO_LANES"                       "RSHLDWID"                      
## [49] "RSHL_WD2"                       "SEG_LNG"                       
## [51] "lanewid"                        "rdwy_wd1"                      
## [53] "rdwy_wd2"                       "rdwy_wid"                      
## [55] "AADT"                           "mvmt"                          
## [57] "rodwycls"                       "ORN_FID_1"                     
## [59] "Total"                          "Fatal"                         
## [61] "Injury"                         "PDO"                           
## [63] "DAYMTH"                         "Crash"                         
## [65] "Spd_All"                        "Spd_Car"                       
## [67] "Spd_Truck"                      "date"                          
## [69] "Month"                          "Day"                           
## [71] "Year"                           "Hour1"                         
## [73] "PCT_TIME"                       "Hour"                          
## [75] "DOW"
df_RMU$AADT1 <- cut(df_RMU$AADT , breaks=c(0,2000,5000,10000, 15000, 20000, 30000, Inf), 
                   labels=c("0-2000","2001-5000","5001-10000","10001-15000","15001-20000","20001-30000","> 30000"))
table(df_RMU$AADT1)
## 
##      0-2000   2001-5000  5001-10000 10001-15000 15001-20000 20001-30000 
##       17664       17664      210240      315360           0      350976 
##     > 30000 
##           0
df_RMU$Crash1 <- cut(df_RMU$Crash , breaks=c(-1,0,Inf), 
                    labels=c("No crash","Crash"))
table(df_RMU$Crash1)
## 
## No crash    Crash 
##   946864       80
# ############################################################
# df_RMU$DayNight <- cut(df_RMU$EPOCH15 , breaks=c(-1,26,67,95), 
#                    labels=c("Night","Day","Night"))
# table(df_RMU$DayNight)
# df_RMU$PeakOffPeak <- cut(df_RMU$EPOCH15 , breaks=c(-1,26,35,62,75, 96), 
#                    labels=c("Off-Peak","Morning Peak","Off-Peak", "Evening Peak", "Off-Peak"))
# table(df_RMU$PeakOffPeak)
# ###########################################################

df_RMU$DayNight <- cut(df_RMU$EPOCH15 , breaks=c(-1,26,67,95))
df_RMU$DayNight <- as.numeric(df_RMU$DayNight)
df_RMU$DayNight <- c("Night","Day","Night")[df_RMU$DayNight]
table(df_RMU$DayNight)
## 
##    Day  Night 
## 404424 542520
df_RMU$PeakOffPeak <- cut(df_RMU$EPOCH15 , breaks=c(-1,26,35,62,75, 96))
df_RMU$PeakOffPeak <- as.numeric(df_RMU$PeakOffPeak)
df_RMU$PeakOffPeak <- c("Off-Peak","Morning Peak","Off-Peak", "Evening Peak", "Off-Peak")[df_RMU$PeakOffPeak]
table(df_RMU$PeakOffPeak)
## 
## Evening Peak Morning Peak     Off-Peak 
##       128232        88776       729936
# # ###########################################################
# df_RMU01 <- df_RMU[,c(26:28, 31, 32, 34, 38, 55, 56, 6, 53, 49,54, 48, 57, 58, 44:46)]
# df_RMU02 <- df_RMU01[,c(8:19)]
# # ###########################################################
df_RMU01 <- df_RMU[,c("SPD_LIMT","LSHLDWID","MEDWID",  "NO_LANES","RSHLDWID","SEG_LNG", "rdwy_wid",  
                      "AADT1",   "Crash1",  "EPOCH15", "Hour","Day", "DOW", "Month",
                      "DayNight","PeakOffPeak","Spd_All", "Spd_Car", "Spd_Truck")]
df_RMU02 <- df_RMU01[,c( "AADT1","Crash1","EPOCH15","Hour", "Day", "DOW", "Month",
                         "DayNight","PeakOffPeak","Spd_All","Spd_Car","Spd_Truck")]


cols <- c("EPOCH15", "Hour", "Day", "DOW", "Month", "AADT1" , "Crash1", "DayNight", "PeakOffPeak")
cols1 <- c("Spd_All", "Spd_Car", "Spd_Truck")
cols2 <- c("SPD_LIMT", "LSHLDWID", "MEDWID" , "NO_LANES", "RSHLDWID" ,"SEG_LNG" ,  "rdwy_wid")
df_RMU02= df_RMU02 %<>%
  mutate_at(cols, funs(factor(.)))


hour1 <- ExpCustomStat(df_RMU02,Cvar = c("Hour"), Nvar=cols1, stat = c('Count','Prop','mean','median','p0.85','min', 'max','sd', 'var','PS'),gpby=FALSE)
day1 <- ExpCustomStat(df_RMU02,Cvar = c("Day"), Nvar=cols1, stat = c('Count','Prop','mean','median','p0.85','min', 'max','sd', 'var','PS'),gpby=FALSE)
DOW1 <- ExpCustomStat(df_RMU02,Cvar = c("DOW"), Nvar=cols1, stat = c('Count','Prop','mean','median','p0.85','min', 'max','sd', 'var','PS'),gpby=FALSE)
Month1 <- ExpCustomStat(df_RMU02,Cvar = c("Month"), Nvar=cols1, stat = c('Count','Prop','mean','median','p0.85','min', 'max','sd', 'var','PS'),gpby=FALSE)
AADT2 <- ExpCustomStat(df_RMU02,Cvar = c("AADT1"), Nvar=cols1, stat = c('Count','Prop','mean','median','p0.85','min', 'max','sd', 'var','PS'),gpby=FALSE)
Crash2 <- ExpCustomStat(df_RMU02,Cvar = c("Crash1", "Hour"), Nvar=cols1, stat = c('Count','Prop','mean','median','p0.85','min', 'max','sd', 'var','PS'))
DayNight1 <- ExpCustomStat(df_RMU02,Cvar = c("DayNight"), Nvar=cols1, stat = c('Count','Prop','mean','median','p0.85','min', 'max','sd', 'var','PS'),gpby=FALSE)
PeakOffPeak1 <- ExpCustomStat(df_RMU02,Cvar = c("PeakOffPeak"), Nvar=cols1, stat = c('Count','Prop','mean','median','p0.85','min', 'max','sd', 'var','PS'),gpby=FALSE)
geo <- ExpCustomStat(df_RMU01, Nvar=cols2, stat = c('mean','median','p0.85','min', 'max','sd', 'var','PS'))


ggline(gather(hour1[,c(1, 2, 6, 8, 11)], condition, measurement,  mean:sd, factor_key=TRUE), x = "Level", y = "measurement", color = "Attribute",
       palette = c("red", "blue", "black"))+theme(legend.title=element_blank())+ facet_grid(condition~ .)+labs(title="By Hour")

ggline(gather(DOW1[,c(1, 2, 6, 8, 11)], condition, measurement,  mean:sd, factor_key=TRUE), x = "Level", y = "measurement", color = "Attribute",
       palette = c("red", "blue", "black"))+theme(legend.title=element_blank())+ facet_grid(condition~ .)+labs(title="By Day of Week")

ggline(gather(Month1[,c(1, 2, 6, 8, 11)], condition, measurement,  mean:sd, factor_key=TRUE), x = "Level", y = "measurement", color = "Attribute",
       palette = c("red", "blue", "black"))+theme(legend.title=element_blank())+ facet_grid(condition~ .)+labs(title="By Month")

ggline(gather(AADT2[,c(1, 2, 6, 8, 11)], condition, measurement,  mean:sd, factor_key=TRUE), x = "Level", y = "measurement", color = "Attribute",
       palette = c("red", "blue", "black"))+theme(legend.title=element_blank())+ facet_grid(condition~ .)+labs(title="By AADT")

ggline(gather(Crash2[,c(1, 2, 3, 6, 8, 11)], condition, measurement,  mean:sd, factor_key=TRUE), x = "Hour", y = "measurement", color = "Attribute",
       palette = c("red", "blue", "black"))+theme(legend.title=element_blank())+ facet_grid(condition+Crash1~ .)+labs(title="By Crash")

ggline(gather(DayNight1[,c(1, 2, 6, 8, 11)], condition, measurement,  mean:sd, factor_key=TRUE), x = "Level", y = "measurement", color = "Attribute",
       palette = c("red", "blue", "black"))+theme(legend.title=element_blank())+ facet_grid(condition~ .)+labs(title="By Day/Night")

ggline(gather(PeakOffPeak1[,c(1, 2, 6, 8, 11)], condition, measurement,  mean:sd, factor_key=TRUE), x = "Level", y = "measurement", color = "Attribute",
       palette = c("red", "blue", "black"))+theme(legend.title=element_blank())+ facet_grid(condition~ .)+labs(title="By Peak/Off-Peak")

Temporal Statistics of Operational Speed

setwd("/scratch/user/cma16/Task4_Deliverable2/Process4/AllCrash/FacilityBased/")

head(hour1)
##    Level Attribute Group_by Count Prop     mean   median    p0.85
## 1:    00   Spd_All     Hour 39456 4.17 37.15730 38.92184 51.08376
## 2:    02   Spd_All     Hour 39456 4.17 38.09885 39.89238 53.76903
## 3:    03   Spd_All     Hour 39456 4.17 37.84517 39.29380 53.51417
## 4:    04   Spd_All     Hour 39456 4.17 38.11333 39.85875 53.94114
## 5:    05   Spd_All     Hour 39456 4.17 37.60194 40.33521 53.55231
## 6:    06   Spd_All     Hour 39456 4.17 36.94226 39.10649 53.15785
##          min      max       sd      var   PS
## 1: 0.6215020 67.54346 13.31644 177.3275 1.60
## 2: 0.6213357 66.54964 14.11265 199.1670 1.69
## 3: 0.6215020 67.08382 14.07897 198.2174 1.72
## 4: 0.6214889 71.82000 14.17775 201.0085 2.42
## 5: 0.6214889 88.85277 14.86616 221.0026 3.46
## 6: 0.6214889 73.49724 14.84050 220.2403 4.45
head(day1)
##    Level Attribute Group_by Count Prop     mean   median    p0.85
## 1:    01   Spd_All      Day 31104 3.28 35.70558 36.99302 52.55362
## 2:    02   Spd_All      Day 31104 3.28 35.93497 37.57753 52.79806
## 3:    03   Spd_All      Day 31104 3.28 35.88374 37.41617 52.93220
## 4:    04   Spd_All      Day 31104 3.28 35.74576 37.05480 52.93324
## 5:    05   Spd_All      Day 31104 3.28 35.88393 37.83181 52.57007
## 6:    06   Spd_All      Day 31104 3.28 36.38034 38.21963 53.20894
##          min      max       sd      var   PS
## 1: 0.6213357 71.82000 15.09566 227.8788 3.01
## 2: 0.6213357 78.72744 15.07356 227.2122 3.45
## 3: 0.6214889 71.82000 15.27512 233.3291 3.34
## 4: 0.6213357 77.71991 15.15057 229.5397 3.07
## 5: 0.6214861 71.82000 15.05421 226.6292 3.08
## 6: 0.6213357 71.82000 15.00141 225.0424 3.32
head(DOW1)
##    Level Attribute Group_by  Count  Prop     mean   median    p0.85
## 1:   Thu   Spd_All      DOW 137664 14.54 36.05860 37.99084 52.81266
## 2:   Fri   Spd_All      DOW 134784 14.23 35.80642 37.49633 52.79806
## 3:   Sat   Spd_All      DOW 134784 14.23 36.08787 37.05480 53.02504
## 4:   Sun   Spd_All      DOW 134784 14.23 35.13806 35.29029 52.43119
## 5:   Mon   Spd_All      DOW 134784 14.23 36.18111 38.21867 52.93324
## 6:   Tue   Spd_All      DOW 134784 14.23 35.87113 37.74957 52.81378
##          min      max       sd      var    PS
## 1: 0.6213357 77.71991 15.12383 228.7301 16.60
## 2: 0.6213357 75.86861 15.28812 233.7267 15.85
## 3: 0.6213357 78.72744 15.02246 225.6742 11.07
## 4: 0.6213357 72.97226 15.02791 225.8380  8.31
## 5: 0.6213357 78.72744 15.13921 229.1957 15.69
## 6: 0.6213357 88.85277 15.21738 231.5688 16.30
head(Month1)
##    Level Attribute Group_by Count Prop     mean   median    p0.85
## 1:    01   Spd_All    Month 71424 7.54 36.47749 38.01352 52.87466
## 2:    02   Spd_All    Month 64512 6.81 36.93759 38.85996 53.36783
## 3:    03   Spd_All    Month 71424 7.54 36.85546 39.00505 53.61610
## 4:    04   Spd_All    Month 69120 7.30 36.94561 39.00505 53.67338
## 5:    05   Spd_All    Month 71424 7.54 36.71357 38.48842 53.95836
## 6:    06   Spd_All    Month 69120 7.30 36.60035 38.65357 53.83657
##          min      max       sd      var   PS
## 1: 0.6213357 71.82000 14.60882 213.4176 6.32
## 2: 0.6213357 78.72744 14.78056 218.4650 6.33
## 3: 0.6213357 88.85277 15.12544 228.7790 7.09
## 4: 0.6213357 75.73055 15.09444 227.8421 7.54
## 5: 0.6213357 71.82000 15.28438 233.6122 7.57
## 6: 0.6213357 77.71991 15.39367 236.9650 7.86
head(AADT2)
##          Level Attribute Group_by  Count  Prop      mean    median
## 1: 10001-15000   Spd_All    AADT1 315360 33.30 41.888708 48.003600
## 2: 20001-30000   Spd_All    AADT1 350976 37.06 33.961209 35.388078
## 3:  5001-10000   Spd_All    AADT1 210240 22.20 35.741643 35.828323
## 4:      0-2000   Spd_All    AADT1  17664  1.87  4.483276  3.720896
## 5:   2001-5000   Spd_All    AADT1  17664  1.87 21.661821 23.040750
## 6:        <NA>   Spd_All    AADT1  35040  3.70 39.721500 35.910000
##        p0.85        min      max        sd        var    PS
## 1: 56.540618  0.6214813 78.72744 15.790811 249.349727 26.92
## 2: 49.178605  0.6213357 88.85277 14.043438 197.218157 53.10
## 3: 52.723582  0.6215028 75.86861 14.686481 215.692716 19.44
## 4:  7.022535  0.6216958 17.19310  2.569652   6.603112  0.09
## 5: 39.498429  0.6214982 65.05624 15.364787 236.076680  0.14
## 6: 35.910000 17.9550000 71.82000 11.785225 138.891527  0.31
head(Crash2)
##      Crash1 Hour Attribute Count Prop     mean   median    p0.85       min
## 1: No crash   00   Spd_All 39456 4.17 37.15730 38.92184 51.08376 0.6215020
## 2: No crash   02   Spd_All 39455 4.17 38.09885 39.89238 53.76903 0.6213357
## 3: No crash   03   Spd_All 39456 4.17 37.84517 39.29380 53.51417 0.6215020
## 4: No crash   04   Spd_All 39456 4.17 38.11333 39.85875 53.94114 0.6214889
## 5: No crash   05   Spd_All 39455 4.17 37.60102 40.32502 53.55231 0.6214889
## 6: No crash   06   Spd_All 39454 4.17 36.94188 39.09191 53.15785 0.6214889
##         max       sd      var   PS
## 1: 67.54346 13.31644 177.3275 1.60
## 2: 66.54964 14.11265 199.1670 1.69
## 3: 67.08382 14.07897 198.2174 1.72
## 4: 71.82000 14.17775 201.0085 2.42
## 5: 88.85277 14.86651 221.0131 3.46
## 6: 73.49724 14.84148 220.2696 4.45
head(DayNight1)
##    Level Attribute Group_by  Count  Prop     mean   median    p0.85
## 1: Night   Spd_All DayNight 542520 57.29 35.78204 37.02199 52.56052
## 2:   Day   Spd_All DayNight 404424 42.71 35.97493 37.83567 52.98462
## 3: Night   Spd_Car DayNight 542520 57.29 37.10968 39.71769 53.66706
## 4:   Day   Spd_Car DayNight 404424 42.71 37.17971 39.80740 54.05352
## 5: Night Spd_Truck DayNight 542520 57.29 33.18266 32.80867 50.03885
## 6:   Day Spd_Truck DayNight 404424 42.71 32.00613 31.35573 49.17429
##          min      max       sd      var    PS
## 1: 0.6213357 88.85277 14.92313 222.7000 37.14
## 2: 0.6213357 78.72744 15.29166 233.8347 62.86
## 3: 0.6213357 88.85277 15.55792 242.0490 31.77
## 4: 0.6213357 82.34400 15.52096 240.9001 68.23
## 5: 0.6213357 71.82000 14.44222 208.5776 41.63
## 6: 0.6213357 71.82000 14.75228 217.6298 58.37
head(PeakOffPeak1)
##           Level Attribute    Group_by  Count  Prop     mean   median
## 1:     Off-Peak   Spd_All PeakOffPeak 729936 77.08 36.06341 37.54323
## 2: Morning Peak   Spd_All PeakOffPeak  88776  9.38 37.04196 39.00505
## 3: Evening Peak   Spd_All PeakOffPeak 128232 13.54 34.39723 35.91000
## 4:     Off-Peak   Spd_Car PeakOffPeak 729936 77.08 37.44765 39.94596
## 5: Morning Peak   Spd_Car PeakOffPeak  88776  9.38 38.20910 41.19409
## 6: Evening Peak   Spd_Car PeakOffPeak 128232 13.54 35.31003 37.37167
##       p0.85       min      max       sd      var    PS
## 1: 52.79806 0.6213357 88.85277 14.93786 223.1396 70.60
## 2: 53.33733 0.6214861 73.17021 14.84335 220.3249 13.06
## 3: 52.77816 0.6213357 75.41968 16.13434 260.3169 16.34
## 4: 53.93851 0.6213357 88.85277 15.30127 234.1289 68.64
## 5: 54.34370 0.6213357 73.17021 15.18437 230.5652 13.89
## 6: 53.61610 0.6213357 75.41968 16.46313 271.0346 17.47
write.csv(hour1, paste0("./",mytype,"/des_output/WA_RMU_OS_DS_hour.csv"),row.names = FALSE)
write.csv(day1, paste0("./",mytype,"/des_output/WA_RMU_OS_DS_day.csv"),row.names = FALSE)
write.csv(DOW1, paste0("./",mytype,"/des_output/WA_RMU_OS_DS_dow.csv"),row.names = FALSE)
write.csv(Month1,paste0("./",mytype,"/des_output/WA_RMU_OS_DS_month.csv"),row.names = FALSE)
write.csv(AADT2, paste0("./",mytype,"/des_output/WA_RMU_OS_DS_aadt.csv"),row.names = FALSE)
write.csv(Crash2, paste0("./",mytype,"/des_output/WA_RMU_OS_DS_crash.csv"),row.names = FALSE)
write.csv(DayNight1, paste0("./",mytype,"/des_output/WA_RMU_OS_DS_daynight.csv"),row.names = FALSE)
write.csv(PeakOffPeak1, paste0("./",mytype,"/des_output/WA_RMU_OS_DS_peakoffpeak.csv"),row.names = FALSE)