Washington Conflated Data (Rural Two Lane)

library(data.table)
library(dplyr)
library(tidyr)
library(naniar)
library(stringr)
library(ggplot2)
library(DT)
library(lubridate)
library(ggpubr)
library(SmartEDA)


mytype = 'R2'
setwd("/scratch/user/cma16/Task4_Deliverable2/Process4/AllCrash/FacilityBased/")
load("./two-lane_undivided_WA_reduce_withCrash.rData")


df_R2 <- W_2un_nomed
dim(df_R2)
## [1] 23080608       64
### Calculating Speed
df_R2$Spd_All = 3600*df_R2$DISTANCE/df_R2$Travel_TIME_ALL_VEHICLES
df_R2$Spd_Car = 3600*df_R2$DISTANCE/df_R2$Travel_TIME_PASSENGER_VEHICLES
df_R2$Spd_Truck = 3600*df_R2$DISTANCE/df_R2$Travel_TIME_FREIGHT_TRUCKS

### Remove outliers
df_R2$spd_av = ifelse(df_R2$spd_av <120, df_R2$spd_av, NA)
df_R2$spd_pv = ifelse(df_R2$spd_pv <120, df_R2$spd_pv, NA)
df_R2$spd_ft = ifelse(df_R2$spd_ft <120, df_R2$spd_ft, NA)

### Month, Day
df_R2$date <- as.character(df_R2$DATE)
df_R2$date <- str_pad(df_R2$DATE, 8, pad = "0")
df_R2$Month <- substr(df_R2$date, start = 1, stop = 2)
df_R2$Day   <- substr(df_R2$date, start = 3, stop = 4)
df_R2$Year  <- substr(df_R2$date, start = 5, stop = 8)

ConvEpoc2HM <- function(x) {
  # for a given epoc number, get its hour:min
  yy <- x*15
  y.hr <- yy %/% 60
  y.min <- yy %% 60
  x <- paste(str_pad(y.hr, 2, side = 'left', pad='0'), 
             str_pad(y.min, 2, side = 'left', pad='0'), 
             '00', sep = ':')
}


df_R2$Hour1 <- ConvEpoc2HM(df_R2$EPOCH15)
DATE4 <- paste(strptime(df_R2$date, format = "%m%d%Y", tz =""), df_R2$Hour1, sep = ' ')
df_R2$PCT_TIME <- as.POSIXct(DATE4, tz ="", format = "%Y-%m-%d %H:%M:%OS")
df_R2$Hour <- strftime(df_R2$PCT_TIME, format="%H")
df_R2$DOW <- wday(df_R2$PCT_TIME, label = TRUE)

Temporal Patterns

names(df_R2)
##  [1] "TimeStamp"                      "TMC"                           
##  [3] "V1"                             "DATE"                          
##  [5] "EPOCH15"                        "Travel_TIME_ALL_VEHICLES"      
##  [7] "Travel_TIME_PASSENGER_VEHICLES" "Travel_TIME_FREIGHT_TRUCKS"    
##  [9] "NP"                             "ADMIN_LEVE"                    
## [11] "ADMIN_LE_1"                     "ADMIN_LE_2"                    
## [13] "DISTANCE"                       "ROAD_NUMBE"                    
## [15] "ROAD_NAME"                      "LATITUDE"                      
## [17] "LONGITUDE"                      "ROAD_DIREC"                    
## [19] "ORN_FID"                        "FID_1"                         
## [21] "ACCESS"                         "LSHL_TY2"                      
## [23] "LSHL_TYP"                       "MED_TYPE"                      
## [25] "NHS_IND"                        "PRK_ZNE"                       
## [27] "RSHL_TY2"                       "RSHL_TYP"                      
## [29] "SURF_TYP"                       "SURF_TY2"                      
## [31] "TERRAIN"                        "COMP_DIR"                      
## [33] "COUNTY"                         "FUNC_CLS"                      
## [35] "MEDBARTY"                       "ST_FUNC"                       
## [37] "RTE_NBR"                        "HPMS"                          
## [39] "ROAD_INV"                       "SPD_LIMT"                      
## [41] "BEGMP"                          "ENDMP"                         
## [43] "LSHLDWID"                       "MEDWID"                        
## [45] "NO_LANE1"                       "NO_LANE2"                      
## [47] "NO_LANES"                       "RSHLDWID"                      
## [49] "RSHL_WD2"                       "SEG_LNG"                       
## [51] "lanewid"                        "rdwy_wd1"                      
## [53] "rdwy_wd2"                       "rdwy_wid"                      
## [55] "AADT"                           "mvmt"                          
## [57] "rodwycls"                       "ORN_FID_1"                     
## [59] "Total"                          "Fatal"                         
## [61] "Injury"                         "PDO"                           
## [63] "DAYMTH"                         "Crash"                         
## [65] "Spd_All"                        "Spd_Car"                       
## [67] "Spd_Truck"                      "spd_av"                        
## [69] "spd_pv"                         "spd_ft"                        
## [71] "date"                           "Month"                         
## [73] "Day"                            "Year"                          
## [75] "Hour1"                          "PCT_TIME"                      
## [77] "Hour"                           "DOW"
df_R2$AADT1 <- cut(df_R2$AADT , breaks=c(0,2000,5000,10000, 15000, 20000, 30000, Inf), 
                   labels=c("0-2000","2001-5000","5001-10000","10001-15000","15001-20000","20001-30000","> 30000"))
table(df_R2$AADT1)
## 
##      0-2000   2001-5000  5001-10000 10001-15000 15001-20000 20001-30000 
##     3644160     8674848     7185792     1595040     1437504      245280 
##     > 30000 
##           0
df_R2$Crash1 <- cut(df_R2$Crash , breaks=c(-1,0,Inf), 
                    labels=c("No crash","Crash"))
table(df_R2$Crash1)
## 
## No crash    Crash 
## 23078042     2566
# ############################################################
# df_R2$DayNight <- cut(df_R2$EPOCH15 , breaks=c(-1,26,67,95), 
#                    labels=c("Night","Day","Night"))
# table(df_R2$DayNight)
# df_R2$PeakOffPeak <- cut(df_R2$EPOCH15 , breaks=c(-1,26,35,62,75, 96), 
#                    labels=c("Off-Peak","Morning Peak","Off-Peak", "Evening Peak", "Off-Peak"))
# table(df_R2$PeakOffPeak)
# ###########################################################

df_R2$DayNight <- cut(df_R2$EPOCH15 , breaks=c(-1,26,67,95))
df_R2$DayNight <- as.numeric(df_R2$DayNight)
df_R2$DayNight <- c("Night","Day","Night")[df_R2$DayNight]
table(df_R2$DayNight)
## 
##      Day    Night 
##  9857343 13223265
df_R2$PeakOffPeak <- cut(df_R2$EPOCH15 , breaks=c(-1,26,35,62,75, 96))
df_R2$PeakOffPeak <- as.numeric(df_R2$PeakOffPeak)
df_R2$PeakOffPeak <- c("Off-Peak","Morning Peak","Off-Peak", "Evening Peak", "Off-Peak")[df_R2$PeakOffPeak]
table(df_R2$PeakOffPeak)
## 
## Evening Peak Morning Peak     Off-Peak 
##      3125499      2163807     17791302
# # ###########################################################
# df_R201 <- df_R2[,c(26:28, 31, 32, 34, 38, 55, 56, 6, 53, 49,54, 48, 57, 58, 44:46)]
# df_R202 <- df_R201[,c(8:19)]
# # ###########################################################
df_R201 <- df_R2[,c("SPD_LIMT","LSHLDWID","MEDWID",  "NO_LANES","RSHLDWID","SEG_LNG", "rdwy_wid",  
                      "AADT1",   "Crash1",  "EPOCH15", "Hour","Day", "DOW", "Month",
                      "DayNight","PeakOffPeak","Spd_All", "Spd_Car", "Spd_Truck")]
df_R202 <- df_R201[,c( "AADT1","Crash1","EPOCH15","Hour", "Day", "DOW", "Month",
                         "DayNight","PeakOffPeak","Spd_All","Spd_Car","Spd_Truck")]


cols <- c("EPOCH15", "Hour", "Day", "DOW", "Month", "AADT1" , "Crash1", "DayNight", "PeakOffPeak")
cols1 <- c("Spd_All", "Spd_Car", "Spd_Truck")
cols2 <- c("SPD_LIMT", "LSHLDWID", "MEDWID" , "NO_LANES", "RSHLDWID" ,"SEG_LNG" ,  "rdwy_wid")
df_R202= df_R202 %<>%
  mutate_at(cols, funs(factor(.)))


hour1 <- ExpCustomStat(df_R202,Cvar = c("Hour"), Nvar=cols1, stat = c('Count','Prop','mean','median','p0.85','min', 'max','sd', 'var','PS'),gpby=FALSE)
day1 <- ExpCustomStat(df_R202,Cvar = c("Day"), Nvar=cols1, stat = c('Count','Prop','mean','median','p0.85','min', 'max','sd', 'var','PS'),gpby=FALSE)
DOW1 <- ExpCustomStat(df_R202,Cvar = c("DOW"), Nvar=cols1, stat = c('Count','Prop','mean','median','p0.85','min', 'max','sd', 'var','PS'),gpby=FALSE)
Month1 <- ExpCustomStat(df_R202,Cvar = c("Month"), Nvar=cols1, stat = c('Count','Prop','mean','median','p0.85','min', 'max','sd', 'var','PS'),gpby=FALSE)
AADT2 <- ExpCustomStat(df_R202,Cvar = c("AADT1"), Nvar=cols1, stat = c('Count','Prop','mean','median','p0.85','min', 'max','sd', 'var','PS'),gpby=FALSE)
Crash2 <- ExpCustomStat(df_R202,Cvar = c("Crash1", "Hour"), Nvar=cols1, stat = c('Count','Prop','mean','median','p0.85','min', 'max','sd', 'var','PS'))
DayNight1 <- ExpCustomStat(df_R202,Cvar = c("DayNight"), Nvar=cols1, stat = c('Count','Prop','mean','median','p0.85','min', 'max','sd', 'var','PS'),gpby=FALSE)
PeakOffPeak1 <- ExpCustomStat(df_R202,Cvar = c("PeakOffPeak"), Nvar=cols1, stat = c('Count','Prop','mean','median','p0.85','min', 'max','sd', 'var','PS'),gpby=FALSE)
geo <- ExpCustomStat(df_R201, Nvar=cols2, stat = c('mean','median','p0.85','min', 'max','sd', 'var','PS'))


ggline(gather(hour1[,c(1, 2, 6, 8, 11)], condition, measurement,  mean:sd, factor_key=TRUE), x = "Level", y = "measurement", color = "Attribute",
       palette = c("red", "blue", "black"))+theme(legend.title=element_blank())+ facet_grid(condition~ .)+labs(title="By Hour")

ggline(gather(DOW1[,c(1, 2, 6, 8, 11)], condition, measurement,  mean:sd, factor_key=TRUE), x = "Level", y = "measurement", color = "Attribute",
       palette = c("red", "blue", "black"))+theme(legend.title=element_blank())+ facet_grid(condition~ .)+labs(title="By Day of Week")

ggline(gather(Month1[,c(1, 2, 6, 8, 11)], condition, measurement,  mean:sd, factor_key=TRUE), x = "Level", y = "measurement", color = "Attribute",
       palette = c("red", "blue", "black"))+theme(legend.title=element_blank())+ facet_grid(condition~ .)+labs(title="By Month")

ggline(gather(AADT2[,c(1, 2, 6, 8, 11)], condition, measurement,  mean:sd, factor_key=TRUE), x = "Level", y = "measurement", color = "Attribute",
       palette = c("red", "blue", "black"))+theme(legend.title=element_blank())+ facet_grid(condition~ .)+labs(title="By AADT")

ggline(gather(Crash2[,c(1, 2, 3, 6, 8, 11)], condition, measurement,  mean:sd, factor_key=TRUE), x = "Hour", y = "measurement", color = "Attribute",
       palette = c("red", "blue", "black"))+theme(legend.title=element_blank())+ facet_grid(condition+Crash1~ .)+labs(title="By Crash")

ggline(gather(DayNight1[,c(1, 2, 6, 8, 11)], condition, measurement,  mean:sd, factor_key=TRUE), x = "Level", y = "measurement", color = "Attribute",
       palette = c("red", "blue", "black"))+theme(legend.title=element_blank())+ facet_grid(condition~ .)+labs(title="By Day/Night")

ggline(gather(PeakOffPeak1[,c(1, 2, 6, 8, 11)], condition, measurement,  mean:sd, factor_key=TRUE), x = "Level", y = "measurement", color = "Attribute",
       palette = c("red", "blue", "black"))+theme(legend.title=element_blank())+ facet_grid(condition~ .)+labs(title="By Peak/Off-Peak")

Temporal Statistics of Operational Speed

setwd("/scratch/user/cma16/Task4_Deliverable2/Process4/AllCrash/FacilityBased/")

head(hour1)
##    Level Attribute Group_by  Count Prop     mean   median    p0.85
## 1:    00   Spd_All     Hour 961692 4.17 49.15593 52.84053 60.91672
## 2:    02   Spd_All     Hour 961692 4.17 49.71118 53.25287 61.34149
## 3:    03   Spd_All     Hour 961692 4.17 49.26730 52.89082 61.15369
## 4:    04   Spd_All     Hour 961692 4.17 49.73000 53.37789 61.23947
## 5:    05   Spd_All     Hour 961692 4.17 49.81418 53.17393 61.15586
## 6:    06   Spd_All     Hour 961692 4.17 49.73013 52.90097 60.92837
##          min       max       sd      var   PS
## 1: 0.4748904 2271.0690 23.49338 551.9390 1.53
## 2: 0.4748904 1651.6865 18.44794 340.3265 1.56
## 3: 0.4748904  908.4276 16.11487 259.6889 1.80
## 4: 0.4748904 1397.5809 16.35439 267.4662 2.36
## 5: 0.4748904 1397.5809 18.18746 330.7838 3.26
## 6: 0.3607134 1397.5809 21.57284 465.3874 4.34
head(day1)
##    Level Attribute Group_by  Count Prop     mean   median    p0.85
## 1:    01   Spd_All      Day 758592 3.29 49.25252 52.79154 60.82216
## 2:    02   Spd_All      Day 758592 3.29 48.92708 52.27405 60.51862
## 3:    03   Spd_All      Day 758592 3.29 48.92677 52.34234 60.53001
## 4:    04   Spd_All      Day 758592 3.29 49.08806 52.57044 60.69179
## 5:    05   Spd_All      Day 758592 3.29 49.01132 52.59557 60.65831
## 6:    06   Spd_All      Day 758592 3.29 49.33393 52.78355 60.71775
##          min      max       sd      var   PS
## 1: 0.4748904 1397.581 22.47545 505.1458 2.99
## 2: 0.4748904 1816.855 21.69355 470.6103 3.42
## 3: 0.4748904 2018.728 21.89425 479.3583 3.35
## 4: 0.4965955 2018.728 22.46098 504.4957 3.18
## 5: 0.3607134 2018.728 20.45719 418.4966 3.14
## 6: 0.3607134 1651.687 22.82097 520.7965 3.34
head(DOW1)
##    Level Attribute Group_by   Count  Prop     mean   median    p0.85
## 1:   Wed   Spd_All      DOW 3291744 14.26 48.94055 52.36609 60.59867
## 2:   Thu   Spd_All      DOW 3352704 14.53 49.01387 52.48741 60.64443
## 3:   Fri   Spd_All      DOW 3287232 14.24 49.16126 52.54978 60.65966
## 4:   Sat   Spd_All      DOW 3287232 14.24 49.49010 52.85929 60.88013
## 5:   Sun   Spd_All      DOW 3287232 14.24 49.39843 52.97843 60.97012
## 6:   Mon   Spd_All      DOW 3287232 14.24 49.06111 52.47502 60.63239
##          min      max       sd      var    PS
## 1: 0.3607134 1816.855 20.71954 429.2993 16.70
## 2: 0.3607134 1816.855 21.93713 481.2375 16.62
## 3: 0.3607134 2018.728 23.61010 557.4368 16.11
## 4: 0.3607134 2271.069 27.91978 779.5143 10.18
## 5: 0.3607134 2018.728 26.04009 678.0865  7.92
## 6: 0.3607134 2595.507 21.59448 466.3215 15.68
head(Month1)
##    Level Attribute Group_by   Count Prop     mean   median    p0.85
## 1:    07   Spd_All    Month 2029632 8.79 48.54614 52.45546 60.86608
## 2:    08   Spd_All    Month 2029632 8.79 48.51148 52.43925 60.88316
## 3:    09   Spd_All    Month 1964160 8.51 48.91354 52.78032 60.96308
## 4:    10   Spd_All    Month 2029632 8.79 49.08086 52.85054 60.89480
## 5:    11   Spd_All    Month 1964160 8.51 48.64636 52.17041 60.61032
## 6:    12   Spd_All    Month 2029632 8.79 46.89174 50.03110 59.56507
##          min      max       sd      var   PS
## 1: 0.6207591 89.83948 13.76924 189.5919 9.56
## 2: 0.6203265 92.11333 13.81450 190.8405 9.07
## 3: 0.6207591 91.08870 13.57451 184.2673 8.75
## 4: 0.6207591 97.56000 13.28766 176.5619 9.15
## 5: 0.6203265 92.73824 13.15485 173.0501 8.18
## 6: 0.6192558 88.52325 13.37273 178.8298 8.14
head(AADT2)
##          Level Attribute Group_by   Count  Prop     mean   median    p0.85
## 1:  5001-10000   Spd_All    AADT1 7185792 31.13 50.14718 52.88336 60.36686
## 2:   2001-5000   Spd_All    AADT1 8674848 37.59 50.59747 54.67048 61.28161
## 3:        <NA>   Spd_All    AADT1  297984  1.29 33.07762 38.53864 50.25211
## 4: 15001-20000   Spd_All    AADT1 1437504  6.23 45.23606 47.07547 58.41755
## 5: 20001-30000   Spd_All    AADT1  245280  1.06 41.26121 41.65200 56.53376
## 6:      0-2000   Spd_All    AADT1 3644160 15.79 50.28133 54.50003 61.47759
##          min        max       sd       var    PS
## 1: 0.4748904 2595.50743 33.06073 1093.0119 37.26
## 2: 0.6192558   97.56000 12.76177  162.8627 38.69
## 3: 0.6211297   75.47088 16.86071  284.2835  0.18
## 4: 0.3607134   88.60849 12.81995  164.3512  8.38
## 5: 0.6214676   75.97626 13.66982  186.8641  1.68
## 6: 0.6210162   90.81705 13.72082  188.2610  7.53
head(Crash2)
##      Crash1 Hour Attribute  Count Prop     mean   median    p0.85
## 1: No crash   00   Spd_All 961628 4.17 49.15701 52.84128 60.91672
## 2: No crash   02   Spd_All 961654 4.17 49.71190 53.25388 61.34151
## 3: No crash   03   Spd_All 961649 4.17 49.26745 52.89082 61.15474
## 4: No crash   04   Spd_All 961648 4.17 49.73056 53.37789 61.23947
## 5: No crash   05   Spd_All 961626 4.17 49.81467 53.17443 61.15586
## 6: No crash   06   Spd_All 961589 4.17 49.73048 52.90097 60.92837
##          min       max       sd      var   PS
## 1: 0.4748904 2271.0690 23.49413 551.9740 1.53
## 2: 0.4748904 1651.6865 18.44833 340.3409 1.56
## 3: 0.4748904  908.4276 16.11476 259.6856 1.80
## 4: 0.4748904 1397.5809 16.35405 267.4548 2.35
## 5: 0.4748904 1397.5809 18.18786 330.7984 3.26
## 6: 0.3607134 1397.5809 21.57408 465.4410 4.34
head(DayNight1)
##    Level Attribute Group_by    Count  Prop     mean   median    p0.85
## 1: Night   Spd_All DayNight 13223265 57.29 49.40874 52.82338 60.92400
## 2:   Day   Spd_All DayNight  9857343 42.71 48.91961 52.34388 60.49136
## 3: Night   Spd_Car DayNight 13223265 57.29 50.00980 52.97454 61.54997
## 4:   Day   Spd_Car DayNight  9857343 42.71 49.04493 52.27442 61.00213
## 5: Night Spd_Truck DayNight 13223265 57.29 48.93462 53.07063 60.40254
## 6:   Day Spd_Truck DayNight  9857343 42.71 49.10085 53.39862 60.28038
##          min      max       sd      var    PS
## 1: 0.3607134 2595.507 21.97110 482.7291 37.71
## 2: 0.3607134 2018.728 23.24277 540.2262 62.29
## 3: 0.3607134 2595.507 25.27966 639.0612 33.56
## 4: 0.3607134 2018.728 25.68657 659.8001 66.44
## 5: 0.4748904 1135.534 16.10601 259.4036 38.52
## 6: 0.4748904 1397.581 15.79334 249.4296 61.48
head(PeakOffPeak1)
##           Level Attribute    Group_by    Count  Prop     mean   median
## 1:     Off-Peak   Spd_All PeakOffPeak 17791302 77.08 49.08001 52.51639
## 2: Morning Peak   Spd_All PeakOffPeak  2163807  9.38 49.19692 52.53557
## 3: Evening Peak   Spd_All PeakOffPeak  3125499 13.54 49.13246 52.68626
## 4:     Off-Peak   Spd_Car PeakOffPeak 17791302 77.08 49.34339 52.51568
## 5: Morning Peak   Spd_Car PeakOffPeak  2163807  9.38 49.54117 52.72138
## 6: Evening Peak   Spd_Car PeakOffPeak  3125499 13.54 49.31964 52.75678
##       p0.85       min      max       sd      var    PS
## 1: 60.66486 0.3607134 2595.507 22.64408 512.7545 71.12
## 2: 60.65762 0.4748904 1816.855 23.51043 552.7402 12.11
## 3: 60.79315 0.3607134 2018.728 22.79393 519.5631 16.77
## 4: 61.18336 0.3607134 2595.507 25.51348 650.9379 69.83
## 5: 61.11641 0.4748904 1816.855 26.04420 678.3005 12.83
## 6: 61.42333 0.3607134 2018.728 25.36581 643.4245 17.34
write.csv(hour1, paste0("./",mytype,"/des_output/WA_R2_OS_DS_hour.csv"),row.names = FALSE)
write.csv(day1, paste0("./",mytype,"/des_output/WA_R2_OS_DS_day.csv"),row.names = FALSE)
write.csv(DOW1, paste0("./",mytype,"/des_output/WA_R2_OS_DS_dow.csv"),row.names = FALSE)
write.csv(Month1,paste0("./",mytype,"/des_output/WA_R2_OS_DS_month.csv"),row.names = FALSE)
write.csv(AADT2, paste0("./",mytype,"/des_output/WA_R2_OS_DS_aadt.csv"),row.names = FALSE)
write.csv(Crash2, paste0("./",mytype,"/des_output/WA_R2_OS_DS_crash.csv"),row.names = FALSE)
write.csv(DayNight1, paste0("./",mytype,"/des_output/WA_R2_OS_DS_daynight.csv"),row.names = FALSE)
write.csv(PeakOffPeak1, paste0("./",mytype,"/des_output/WA_R2_OS_DS_peakoffpeak.csv"),row.names = FALSE)