Washington Conflated Data (Rural Interstate)

library(data.table)
library(dplyr)
library(tidyr)
library(naniar)
library(stringr)
library(ggplot2)
library(DT)
library(lubridate)
library(ggpubr)
library(SmartEDA)



setwd("/scratch/user/cma16/Task4_Deliverable2/Process4/AllCrash/FacilityBased/")
load("WA_Rural_Interstate_41_TMC_TT_SI_reduce_withCrash.rData")
mytype = 'RI'
setwd(paste0("/scratch/user/cma16/Task4_Deliverable2/Process4/AllCrash/FacilityBased/",mytype))

df_RI <- b02a
dim(df_RI)
## [1] 9303840      64
### Calculating Speed
df_RI$Spd_All = 3600*df_RI$DISTANCE/df_RI$Travel_TIME_ALL_VEHICLES
df_RI$Spd_Car = 3600*df_RI$DISTANCE/df_RI$Travel_TIME_PASSENGER_VEHICLES
df_RI$Spd_Truck = 3600*df_RI$DISTANCE/df_RI$Travel_TIME_FREIGHT_TRUCKS


### Month, Day
df_RI$date <- as.character(df_RI$DATE)
df_RI$date <- str_pad(df_RI$DATE, 8, pad = "0")
df_RI$Month <- substr(df_RI$date, start = 1, stop = 2)
df_RI$Day   <- substr(df_RI$date, start = 3, stop = 4)
df_RI$Year  <- substr(df_RI$date, start = 5, stop = 8)

ConvEpoc2HM <- function(x) {
  # for a given epoc number, get its hour:min
  yy <- x*15
  y.hr <- yy %/% 60
  y.min <- yy %% 60
  x <- paste(str_pad(y.hr, 2, side = 'left', pad='0'), 
             str_pad(y.min, 2, side = 'left', pad='0'), 
             '00', sep = ':')
}


df_RI$Hour1 <- ConvEpoc2HM(df_RI$EPOCH15)
DATE4 <- paste(strptime(df_RI$date, format = "%m%d%Y", tz =""), df_RI$Hour1, sep = ' ')
df_RI$PCT_TIME <- as.POSIXct(DATE4, tz ="", format = "%Y-%m-%d %H:%M:%OS")
df_RI$Hour <- strftime(df_RI$PCT_TIME, format="%H")
df_RI$DOW <- wday(df_RI$PCT_TIME, label = TRUE)

Temporal Patterns

names(df_RI)
##  [1] "TimeStamp"                      "TMC"                           
##  [3] "V1"                             "DATE"                          
##  [5] "EPOCH15"                        "Travel_TIME_ALL_VEHICLES"      
##  [7] "Travel_TIME_PASSENGER_VEHICLES" "Travel_TIME_FREIGHT_TRUCKS"    
##  [9] "NP"                             "ADMIN_LEVE"                    
## [11] "ADMIN_LE_1"                     "ADMIN_LE_2"                    
## [13] "DISTANCE"                       "ROAD_NUMBE"                    
## [15] "ROAD_NAME"                      "LATITUDE"                      
## [17] "LONGITUDE"                      "ROAD_DIREC"                    
## [19] "ORN_FID"                        "FID_1"                         
## [21] "ACCESS"                         "LSHL_TY2"                      
## [23] "LSHL_TYP"                       "MED_TYPE"                      
## [25] "NHS_IND"                        "PRK_ZNE"                       
## [27] "RSHL_TY2"                       "RSHL_TYP"                      
## [29] "SURF_TYP"                       "SURF_TY2"                      
## [31] "TERRAIN"                        "COMP_DIR"                      
## [33] "COUNTY"                         "FUNC_CLS"                      
## [35] "MEDBARTY"                       "ST_FUNC"                       
## [37] "RTE_NBR"                        "HPMS"                          
## [39] "ROAD_INV"                       "SPD_LIMT"                      
## [41] "BEGMP"                          "ENDMP"                         
## [43] "LSHLDWID"                       "MEDWID"                        
## [45] "NO_LANE1"                       "NO_LANE2"                      
## [47] "NO_LANES"                       "RSHLDWID"                      
## [49] "RSHL_WD2"                       "SEG_LNG"                       
## [51] "lanewid"                        "rdwy_wd1"                      
## [53] "rdwy_wd2"                       "rdwy_wid"                      
## [55] "AADT"                           "mvmt"                          
## [57] "rodwycls"                       "ORN_FID_1"                     
## [59] "Total"                          "Fatal"                         
## [61] "Injury"                         "PDO"                           
## [63] "DAYMTH"                         "Crash"                         
## [65] "Spd_All"                        "Spd_Car"                       
## [67] "Spd_Truck"                      "date"                          
## [69] "Month"                          "Day"                           
## [71] "Year"                           "Hour1"                         
## [73] "PCT_TIME"                       "Hour"                          
## [75] "DOW"
df_RI$AADT1 <- cut(df_RI$AADT , breaks=c(0,2000,5000,10000, 15000, 20000, 30000, Inf), 
                   labels=c("0-2000","2001-5000","5001-10000","10001-15000","15001-20000","20001-30000","> 30000"))
table(df_RI$AADT1)
## 
##      0-2000   2001-5000  5001-10000 10001-15000 15001-20000 20001-30000 
##           0           0       88032     1138944     1646880     1471680 
##     > 30000 
##     4923264
df_RI$Crash1 <- cut(df_RI$Crash , breaks=c(-1,0,Inf), 
                    labels=c("No crash","Crash"))
table(df_RI$Crash1)
## 
## No crash    Crash 
##  9301626     2214
# ############################################################
# df_RI$DayNight <- cut(df_RI$EPOCH15 , breaks=c(-1,26,67,95), 
#                    labels=c("Night","Day","Night"))
# table(df_RI$DayNight)
# df_RI$PeakOffPeak <- cut(df_RI$EPOCH15 , breaks=c(-1,26,35,62,75, 96), 
#                    labels=c("Off-Peak","Morning Peak","Off-Peak", "Evening Peak", "Off-Peak"))
# table(df_RI$PeakOffPeak)
# ###########################################################

df_RI$DayNight <- cut(df_RI$EPOCH15 , breaks=c(-1,26,67,95))
df_RI$DayNight <- as.numeric(df_RI$DayNight)
df_RI$DayNight <- c("Night","Day","Night")[df_RI$DayNight]
table(df_RI$DayNight)
## 
##     Day   Night 
## 3973515 5330325
df_RI$PeakOffPeak <- cut(df_RI$EPOCH15 , breaks=c(-1,26,35,62,75, 96))
df_RI$PeakOffPeak <- as.numeric(df_RI$PeakOffPeak)
df_RI$PeakOffPeak <- c("Off-Peak","Morning Peak","Off-Peak", "Evening Peak", "Off-Peak")[df_RI$PeakOffPeak]
table(df_RI$PeakOffPeak)
## 
## Evening Peak Morning Peak     Off-Peak 
##      1259895       872235      7171710
# # ###########################################################
# df_RI01 <- df_RI[,c(26:28, 31, 32, 34, 38, 55, 56, 6, 53, 49,54, 48, 57, 58, 44:46)]
# df_RI02 <- df_RI01[,c(8:19)]
# # ###########################################################
df_RI01 <- df_RI[,c("SPD_LIMT","LSHLDWID","MEDWID",  "NO_LANES","RSHLDWID","SEG_LNG", "rdwy_wid",  
                      "AADT1",   "Crash1",  "EPOCH15", "Hour","Day", "DOW", "Month",
                      "DayNight","PeakOffPeak","Spd_All", "Spd_Car", "Spd_Truck")]
df_RI02 <- df_RI01[,c( "AADT1","Crash1","EPOCH15","Hour", "Day", "DOW", "Month",
                         "DayNight","PeakOffPeak","Spd_All","Spd_Car","Spd_Truck")]


cols <- c("EPOCH15", "Hour", "Day", "DOW", "Month", "AADT1" , "Crash1", "DayNight", "PeakOffPeak")
cols1 <- c("Spd_All", "Spd_Car", "Spd_Truck")
cols2 <- c("SPD_LIMT", "LSHLDWID", "MEDWID" , "NO_LANES", "RSHLDWID" ,"SEG_LNG" ,  "rdwy_wid")
df_RI02= df_RI02 %<>%
  mutate_at(cols, funs(factor(.)))


hour1 <- ExpCustomStat(df_RI02,Cvar = c("Hour"), Nvar=cols1, stat = c('Count','Prop','mean','median','p0.85','min', 'max','sd', 'var','PS'),gpby=FALSE)
day1 <- ExpCustomStat(df_RI02,Cvar = c("Day"), Nvar=cols1, stat = c('Count','Prop','mean','median','p0.85','min', 'max','sd', 'var','PS'),gpby=FALSE)
DOW1 <- ExpCustomStat(df_RI02,Cvar = c("DOW"), Nvar=cols1, stat = c('Count','Prop','mean','median','p0.85','min', 'max','sd', 'var','PS'),gpby=FALSE)
Month1 <- ExpCustomStat(df_RI02,Cvar = c("Month"), Nvar=cols1, stat = c('Count','Prop','mean','median','p0.85','min', 'max','sd', 'var','PS'),gpby=FALSE)
AADT2 <- ExpCustomStat(df_RI02,Cvar = c("AADT1"), Nvar=cols1, stat = c('Count','Prop','mean','median','p0.85','min', 'max','sd', 'var','PS'),gpby=FALSE)
Crash2 <- ExpCustomStat(df_RI02,Cvar = c("Crash1", "Hour"), Nvar=cols1, stat = c('Count','Prop','mean','median','p0.85','min', 'max','sd', 'var','PS'))
DayNight1 <- ExpCustomStat(df_RI02,Cvar = c("DayNight"), Nvar=cols1, stat = c('Count','Prop','mean','median','p0.85','min', 'max','sd', 'var','PS'),gpby=FALSE)
PeakOffPeak1 <- ExpCustomStat(df_RI02,Cvar = c("PeakOffPeak"), Nvar=cols1, stat = c('Count','Prop','mean','median','p0.85','min', 'max','sd', 'var','PS'),gpby=FALSE)
geo <- ExpCustomStat(df_RI01, Nvar=cols2, stat = c('mean','median','p0.85','min', 'max','sd', 'var','PS'))


ggline(gather(hour1[,c(1, 2, 6, 8, 11)], condition, measurement,  mean:sd, factor_key=TRUE), x = "Level", y = "measurement", color = "Attribute",
       palette = c("red", "blue", "black"))+theme(legend.title=element_blank())+ facet_grid(condition~ .)+labs(title="By Hour")

ggline(gather(DOW1[,c(1, 2, 6, 8, 11)], condition, measurement,  mean:sd, factor_key=TRUE), x = "Level", y = "measurement", color = "Attribute",
       palette = c("red", "blue", "black"))+theme(legend.title=element_blank())+ facet_grid(condition~ .)+labs(title="By Day of Week")

ggline(gather(Month1[,c(1, 2, 6, 8, 11)], condition, measurement,  mean:sd, factor_key=TRUE), x = "Level", y = "measurement", color = "Attribute",
       palette = c("red", "blue", "black"))+theme(legend.title=element_blank())+ facet_grid(condition~ .)+labs(title="By Month")

ggline(gather(AADT2[,c(1, 2, 6, 8, 11)], condition, measurement,  mean:sd, factor_key=TRUE), x = "Level", y = "measurement", color = "Attribute",
       palette = c("red", "blue", "black"))+theme(legend.title=element_blank())+ facet_grid(condition~ .)+labs(title="By AADT")

ggline(gather(Crash2[,c(1, 2, 3, 6, 8, 11)], condition, measurement,  mean:sd, factor_key=TRUE), x = "Hour", y = "measurement", color = "Attribute",
       palette = c("red", "blue", "black"))+theme(legend.title=element_blank())+ facet_grid(condition+Crash1~ .)+labs(title="By Crash")

ggline(gather(DayNight1[,c(1, 2, 6, 8, 11)], condition, measurement,  mean:sd, factor_key=TRUE), x = "Level", y = "measurement", color = "Attribute",
       palette = c("red", "blue", "black"))+theme(legend.title=element_blank())+ facet_grid(condition~ .)+labs(title="By Day/Night")

ggline(gather(PeakOffPeak1[,c(1, 2, 6, 8, 11)], condition, measurement,  mean:sd, factor_key=TRUE), x = "Level", y = "measurement", color = "Attribute",
       palette = c("red", "blue", "black"))+theme(legend.title=element_blank())+ facet_grid(condition~ .)+labs(title="By Peak/Off-Peak")

Temporal Statistics of Operational Speed

setwd("/scratch/user/cma16/Task4_Deliverable2/Process4/AllCrash/FacilityBased/")

head(hour1)
##    Level Attribute Group_by  Count Prop     mean   median    p0.85
## 1:    00   Spd_All     Hour 387660 4.17 59.16427 60.44120 63.09938
## 2:    02   Spd_All     Hour 387660 4.17 58.88994 60.34188 62.81449
## 3:    03   Spd_All     Hour 387660 4.17 58.97503 60.43151 62.88271
## 4:    04   Spd_All     Hour 387660 4.17 59.16004 60.54903 63.06764
## 5:    05   Spd_All     Hour 387660 4.17 59.78718 60.93243 63.96322
## 6:    06   Spd_All     Hour 387660 4.17 60.17584 61.18908 64.61305
##          min       max       sd      var   PS
## 1: 0.6205909 110.05200 6.620055 43.82513 3.64
## 2: 0.3015088 110.05200 6.479639 41.98572 3.65
## 3: 0.6214827  91.27195 6.506683 42.33692 3.71
## 4: 0.1901463 110.05200 6.546507 42.85675 3.82
## 5: 0.6214827  91.26807 6.721821 45.18287 4.00
## 6: 0.6214737 110.05200 6.830326 46.65335 4.16
head(day1)
##    Level Attribute Group_by  Count Prop     mean   median    p0.85
## 1:    01   Spd_All      Day 305856 3.29 60.52047 61.33030 65.00892
## 2:    02   Spd_All      Day 305856 3.29 60.30087 61.28269 64.87649
## 3:    03   Spd_All      Day 305856 3.29 60.55338 61.33211 64.95398
## 4:    04   Spd_All      Day 305856 3.29 60.28285 61.28702 64.95671
## 5:    05   Spd_All      Day 305856 3.29 60.39961 61.27137 64.93816
## 6:    06   Spd_All      Day 305856 3.29 60.69394 61.40769 65.12400
##          min     max       sd      var   PS
## 1: 0.6214737 110.052 6.892661 47.50877 3.22
## 2: 0.3015088 110.052 7.032819 49.46055 3.31
## 3: 0.6214737 110.052 6.718134 45.13332 3.31
## 4: 0.1790391 110.052 7.261986 52.73644 3.25
## 5: 0.6214827 110.052 7.088801 50.25110 3.27
## 6: 0.6202286 110.052 6.886575 47.42491 3.30
head(DOW1)
##    Level Attribute Group_by   Count  Prop     mean   median    p0.85
## 1:   Thu   Spd_All      DOW 1351104 14.52 60.28254 61.31314 64.65521
## 2:   Fri   Spd_All      DOW 1325376 14.25 60.73164 61.51794 65.51501
## 3:   Sat   Spd_All      DOW 1325376 14.25 61.05724 61.46548 66.22813
## 4:   Sun   Spd_All      DOW 1325376 14.25 60.79640 61.37829 66.19257
## 5:   Mon   Spd_All      DOW 1325376 14.25 60.34534 61.33211 64.73792
## 6:   Tue   Spd_All      DOW 1325376 14.25 60.20380 61.24617 64.35642
##          min     max       sd      var    PS
## 1: 0.3015088 110.052 6.969782 48.57786 14.80
## 2: 0.6214737 110.052 7.210267 51.98794 14.61
## 3: 0.3329612 110.052 7.049717 49.69850 13.92
## 4: 0.3069072 110.052 7.583639 57.51158 13.18
## 5: 0.1901463 110.052 6.941705 48.18727 14.27
## 6: 0.6214645 110.052 6.637874 44.06137 14.60
head(Month1)
##    Level Attribute Group_by  Count Prop     mean   median    p0.85
## 1:    01   Spd_All    Month 782688 8.41 59.49789 60.65600 64.13450
## 2:    02   Spd_All    Month 706944 7.60 60.23695 61.05738 64.64627
## 3:    03   Spd_All    Month 782688 8.41 60.53000 61.31449 64.97023
## 4:    04   Spd_All    Month 757440 8.14 60.58479 61.37631 65.01388
## 5:    05   Spd_All    Month 782688 8.41 60.78117 61.44716 65.25750
## 6:    06   Spd_All    Month 757440 8.14 60.81023 61.56433 65.54809
##          min     max       sd      var   PS
## 1: 0.4558267 110.052 7.416034 54.99756 8.01
## 2: 0.1901463 110.052 6.949830 48.30014 7.48
## 3: 0.1790391 110.052 7.072570 50.02124 8.37
## 4: 0.3015088 110.052 7.095538 50.34665 8.21
## 5: 0.6214645 110.052 7.095307 50.34338 8.46
## 6: 0.6187059 110.052 7.446483 55.45010 8.32
head(AADT2)
##          Level Attribute Group_by   Count  Prop     mean   median    p0.85
## 1:     > 30000   Spd_All    AADT1 4923264 52.92 60.90318 61.47704 65.63090
## 2: 15001-20000   Spd_All    AADT1 1646880 17.70 59.43579 61.00288 64.27708
## 3: 20001-30000   Spd_All    AADT1 1471680 15.82 60.43144 61.18777 64.22400
## 4: 10001-15000   Spd_All    AADT1 1138944 12.24 61.50970 61.70645 64.80651
## 5:  5001-10000   Spd_All    AADT1   88032  0.95 43.48890 57.31888 62.14566
## 6:        <NA>   Spd_All    AADT1   35040  0.38 55.53743 55.02600 55.02600
##          min       max        sd       var    PS
## 1: 0.6187059  92.10300  6.612917  43.73067 53.70
## 2: 0.6214737  90.48240  7.317704  53.54879 17.07
## 3: 0.6214950  91.97280  6.534802  42.70364 16.34
## 4: 0.6214787  92.78333  4.845620  23.48003 12.20
## 5: 0.1790391  84.90511 25.536959 652.13630  0.49
## 6: 5.6436923 110.05200  5.901306  34.82542  0.20
head(Crash2)
##      Crash1 Hour Attribute  Count Prop     mean   median    p0.85
## 1: No crash   00   Spd_All 387613 4.17 59.16471 60.44135 63.09955
## 2: No crash   02   Spd_All 387619 4.17 58.89005 60.34188 62.81449
## 3: No crash   03   Spd_All 387622 4.17 58.97522 60.43151 62.88271
## 4: No crash   04   Spd_All 387613 4.17 59.16054 60.55000 63.06764
## 5: No crash   05   Spd_All 387586 4.17 59.78817 60.93243 63.96347
## 6: No crash   06   Spd_All 387565 4.17 60.17718 61.18911 64.61345
##          min       max       sd      var   PS
## 1: 0.6205909 110.05200 6.619307 43.81523 3.64
## 2: 0.3015088 110.05200 6.479189 41.97989 3.65
## 3: 0.6214827  91.27195 6.505830 42.32582 3.71
## 4: 0.1901463 110.05200 6.546405 42.85542 3.82
## 5: 0.6214827  91.26807 6.720517 45.16535 4.00
## 6: 0.6214737 110.05200 6.828089 46.62280 4.16
head(DayNight1)
##    Level Attribute Group_by   Count  Prop     mean   median    p0.85
## 1: Night   Spd_All DayNight 5330325 57.29 59.89615 60.94800 64.27428
## 2:   Day   Spd_All DayNight 3973515 42.71 61.26895 61.90013 65.90685
## 3: Night   Spd_Car DayNight 5330325 57.29 62.30832 62.81941 68.09263
## 4:   Day   Spd_Car DayNight 3973515 42.71 63.84810 64.30803 69.70162
## 5: Night Spd_Truck DayNight 5330325 57.29 58.24493 59.94904 61.95700
## 6:   Day Spd_Truck DayNight 3973515 42.71 58.61165 60.14499 61.85295
##          min       max       sd      var    PS
## 1: 0.1790391 110.05200 6.971240 48.59819 54.42
## 2: 0.6187059 110.05200 7.015949 49.22354 45.58
## 3: 0.3069072 110.05200 7.578883 57.43947 49.69
## 4: 0.1109338 110.05200 7.601828 57.78779 50.31
## 5: 0.1790391  86.99618 6.425325 41.28481 53.52
## 6: 0.2589905  82.17000 6.177161 38.15731 46.48
head(PeakOffPeak1)
##           Level Attribute    Group_by   Count  Prop     mean   median
## 1:     Off-Peak   Spd_All PeakOffPeak 7171710 77.08 60.34196 61.23585
## 2: Morning Peak   Spd_All PeakOffPeak  872235  9.38 60.80524 61.55596
## 3: Evening Peak   Spd_All PeakOffPeak 1259895 13.54 61.23780 61.99722
## 4:     Off-Peak   Spd_Car PeakOffPeak 7171710 77.08 62.83870 63.26114
## 5: Morning Peak   Spd_Car PeakOffPeak  872235  9.38 63.32925 63.74814
## 6: Evening Peak   Spd_Car PeakOffPeak 1259895 13.54 64.02098 64.64935
##       p0.85       min     max       sd      var    PS
## 1: 64.77240 0.1901463 110.052 6.929175 48.01347 75.88
## 2: 65.25401 0.6214737 110.052 6.792508 46.13816  9.73
## 3: 66.36363 0.1790391 110.052 7.610712 57.92293 14.39
## 4: 68.68267 0.1225678 110.052 7.507579 56.36375 73.88
## 5: 69.08688 0.1109338 110.052 7.337392 53.83731 10.25
## 6: 70.47090 0.3069072 110.052 8.282295 68.59641 15.86
write.csv(hour1, paste0("./",mytype,"/des_output/WA_RI_OS_DS_hour.csv"),row.names = FALSE)
write.csv(day1, paste0("./",mytype,"/des_output/WA_RI_OS_DS_day.csv"),row.names = FALSE)
write.csv(DOW1, paste0("./",mytype,"/des_output/WA_RI_OS_DS_dow.csv"),row.names = FALSE)
write.csv(Month1,paste0("./",mytype,"/des_output/WA_RI_OS_DS_month.csv"),row.names = FALSE)
write.csv(AADT2, paste0("./",mytype,"/des_output/WA_RI_OS_DS_aadt.csv"),row.names = FALSE)
write.csv(Crash2, paste0("./",mytype,"/des_output/WA_RI_OS_DS_crash.csv"),row.names = FALSE)
write.csv(DayNight1, paste0("./",mytype,"/des_output/WA_RI_OS_DS_daynight.csv"),row.names = FALSE)
write.csv(PeakOffPeak1, paste0("./",mytype,"/des_output/WA_RI_OS_DS_peakoffpeak.csv"),row.names = FALSE)