Washington Conflated Data (Multi-lane Undivided)
library(data.table)
library(dplyr)
library(wavelets)
library(tidyr)
library(naniar)
library(stringr)
library(ggplot2)
library(DT)
library(lubridate)
library(ggpubr)
setwd("/scratch/user/cma16/Task4_Deliverable2/Process4/AllCrash/FacilityBased/")
load("./multi-lane_undivided_WA_reduce_withCrash.rData")
mytype = 'RMU'
setwd(paste0("/scratch/user/cma16/Task4_Deliverable2/Process4/AllCrash/FacilityBased/",mytype))
df_RMU <- W_mun_nomed
dim(df_RMU)
## [1] 946944 64
### Calculating Speed
df_RMU$spd_av = 3600*df_RMU$DISTANCE/df_RMU$Travel_TIME_ALL_VEHICLES
df_RMU$spd_pv = 3600*df_RMU$DISTANCE/df_RMU$Travel_TIME_PASSENGER_VEHICLES
df_RMU$spd_ft = 3600*df_RMU$DISTANCE/df_RMU$Travel_TIME_FREIGHT_TRUCKS
### Month, Day
df_RMU$date <- as.character(df_RMU$DATE)
df_RMU$date <- str_pad(df_RMU$DATE, 8, pad = "0")
df_RMU$Month <- substr(df_RMU$date, start = 1, stop = 2)
df_RMU$Day <- substr(df_RMU$date, start = 3, stop = 4)
df_RMU$Year <- substr(df_RMU$date, start = 5, stop = 8)
head(df_RMU,2)
## TimeStamp TMC V1 DATE EPOCH15
## 1: 114N05556_0101_0 114N05556 479137 1012015 0
## 2: 114N05556_0101_1 114N05556 479138 1012015 1
## Travel_TIME_ALL_VEHICLES Travel_TIME_PASSENGER_VEHICLES
## 1: NA NA
## 2: NA NA
## Travel_TIME_FREIGHT_TRUCKS NP ADMIN_LEVE ADMIN_LE_1 ADMIN_LE_2 DISTANCE
## 1: NA N USA Washington Whitman 0.67404
## 2: NA N USA Washington Whitman 0.67404
## ROAD_NUMBE ROAD_NAME LATITUDE LONGITUDE ROAD_DIREC ORN_FID FID_1
## 1: US-195 46.8799 -117.3648 Southbound 31759.53 9634.893
## 2: US-195 46.8799 -117.3648 Southbound 31759.53 9634.893
## ACCESS LSHL_TY2 LSHL_TYP MED_TYPE NHS_IND PRK_ZNE RSHL_TY2 RSHL_TYP
## 1: 5 C Y X C
## 2: 5 C Y X C
## SURF_TYP SURF_TY2 TERRAIN COMP_DIR COUNTY FUNC_CLS MEDBARTY ST_FUNC
## 1: A R N 38 43 R1
## 2: A R N 38 43 R1
## RTE_NBR HPMS ROAD_INV SPD_LIMT BEGMP ENDMP LSHLDWID MEDWID
## 1: 195 6195380374900 195 60 35.95 36.64 0 0
## 2: 195 6195380374900 195 60 35.95 36.64 0 0
## NO_LANE1 NO_LANE2 NO_LANES RSHLDWID RSHL_WD2 SEG_LNG lanewid
## 1: 1.961917 1.977804 4 0 0 0.05879853 13.23761
## 2: 1.961917 1.977804 4 0 0 0.05879853 13.23761
## rdwy_wd1 rdwy_wd2 rdwy_wid AADT mvmt rodwycls ORN_FID_1 Total
## 1: 51.96074 0 51.96074 10130.56 0.2192078 10 31759.53 10
## 2: 51.96074 0 51.96074 10130.56 0.2192078 10 31759.53 10
## Fatal Injury PDO DAYMTH Crash spd_av spd_pv spd_ft date Month Day
## 1: 0 2 8 0101 0 NA NA NA 01012015 01 01
## 2: 0 2 8 0101 0 NA NA NA 01012015 01 01
## Year
## 1: 2015
## 2: 2015
#####
df1= df_RMU[,c("spd_av","spd_pv","spd_ft")]
df2= df_RMU[,c("date","spd_av","spd_pv","spd_ft")]
df3= df_RMU[,c("Month","spd_av","spd_pv","spd_ft")]
df4= df_RMU[,c("Day","spd_av","spd_pv","spd_ft")]
df5= df_RMU[,c("Year","spd_av","spd_pv","spd_ft")]
######################################################
ConvEpoc2HM <- function(x) {
# for a given epoc number, get its hour:min
yy <- x*15
y.hr <- yy %/% 60
y.min <- yy %% 60
x <- paste(str_pad(y.hr, 2, side = 'left', pad='0'),
str_pad(y.min, 2, side = 'left', pad='0'),
'00', sep = ':')
}
df_RMU$Hour1 <- ConvEpoc2HM(df_RMU$EPOCH15)
DATE4 <- paste(strptime(df_RMU$date, format = "%m%d%Y", tz =""), df_RMU$Hour1, sep = ' ')
df_RMU$PCT_TIME <- as.POSIXct(DATE4, tz ="", format = "%Y-%m-%d %H:%M:%OS")
df_RMU$Hour <- strftime(df_RMU$PCT_TIME, format="%H")
df_RMU$DOW <- wday(df_RMU$PCT_TIME, label = TRUE)
df6= df_RMU[,c("Hour","spd_av","spd_pv","spd_ft")]
df7= df_RMU[,c("DOW","spd_av","spd_pv","spd_ft")]
################################################################
#### Operating Speed by Month
long <- melt(df3, id.vars = c("Month"))
ggviolin(long, "Month", "value", fill = "Month",
add = "boxplot", add.params = list(fill = "white"))+coord_flip()+facet_grid(. ~ "variable")+
geom_hline(yintercept=38, linetype="dashed",
color = "red", size=1)+labs(title="Operating Speed by Month", x="Month",y="Speed (mph)")+ theme(legend.position="none")
## Warning: Removed 2091215 rows containing non-finite values (stat_ydensity).
## Warning: Removed 2091215 rows containing non-finite values (stat_boxplot).
#### Operating Speed by Month [All Vehicles]
long <- melt(df3[,c(1,2)], id.vars = c("Month"))
ggviolin(long, "Month", "value", fill = "Month",
add = "boxplot", add.params = list(fill = "white"))+coord_flip()+
geom_hline(yintercept=38, linetype="dashed",
color = "red", size=1)+labs(title="Operating Speed (All vehicles) by Month", x="Month",y="Speed (mph)")+ theme(legend.position="none")
## Warning: Removed 611688 rows containing non-finite values (stat_ydensity).
## Warning: Removed 611688 rows containing non-finite values (stat_boxplot).
#### Operating Speed by Month [Passenger Cars]
long <- melt(df3[,c(1,3)], id.vars = c("Month"))
ggviolin(long, "Month", "value", fill = "Month",
add = "boxplot", add.params = list(fill = "white"))+coord_flip()+
geom_hline(yintercept=40, linetype="dashed",
color = "red", size=1)+labs(title="Operating Speed (Passenger Cars) by Month", x="Month",y="Speed (mph)")+ theme(legend.position="none")
## Warning: Removed 689942 rows containing non-finite values (stat_ydensity).
## Warning: Removed 689942 rows containing non-finite values (stat_boxplot).
#### Operating Speed by Month [Fright Trucks]
long <- melt(df3[,c(1,4)], id.vars = c("Month"))
ggviolin(long, "Month", "value", fill = "Month",
add = "boxplot", add.params = list(fill = "white"))+coord_flip()+
geom_hline(yintercept=32, linetype="dashed",
color = "red", size=1)+labs(title="Operating Speed (Freight Trucks) by Month", x="Month",y="Speed (mph)")+ theme(legend.position="none")
## Warning: Removed 789585 rows containing non-finite values (stat_ydensity).
## Warning: Removed 789585 rows containing non-finite values (stat_boxplot).
gg_miss_var(df3, facet = Month, show_pct = TRUE)
gg_miss_var(df4, facet = Day, show_pct = TRUE)
gg_miss_var(df5, facet = Year, show_pct = TRUE)
gg_miss_var(df2, facet = date, show_pct = TRUE)
### Missingness by Date
df2a <- df2 %>%
group_by(date) %>%
miss_var_summary()
datatable(
df2a, extensions = 'Buttons', options = list(
dom = 'Bfrtip',
buttons = c('csv', 'excel', 'print')
)
)
### Missingness by Month
df3a <- df3 %>%
group_by(Month) %>%
miss_var_summary()
datatable(
df3a, extensions = 'Buttons', options = list(
dom = 'Bfrtip',
buttons = c('csv', 'excel', 'print')
)
)
### Missingness by Day
df4a <- df4 %>%
group_by(Day) %>%
miss_var_summary()
datatable(
df4a, extensions = 'Buttons', options = list(
dom = 'Bfrtip',
buttons = c('csv', 'excel', 'print')
)
)
### Missingness by Year
df5a <- df5 %>%
group_by(Year) %>%
miss_var_summary()
datatable(
df5a, extensions = 'Buttons', options = list(
dom = 'Bfrtip',
buttons = c('csv', 'excel', 'print')
)
)
### TMC Level
tmc1 <- df_RMU[,c('TMC')] %>%
group_by(TMC) %>%
summarize(Count=n())
datatable(
tmc1, extensions = 'Buttons', options = list(
dom = 'Bfrtip',
buttons = c('csv', 'excel', 'print')
)
)
#### Operating Speed by Hour
long <- melt(df6, id.vars = c("Hour"))
long$Hour <- as.factor(long$Hour)
ggviolin(long, "Hour", "value", fill = "Hour",
add = "boxplot", add.params = list(fill = "white"))+coord_flip()+
geom_hline(yintercept=36, linetype="dashed",
color = "red", size=1)+labs(title="Operating Speed by Hour", x="Hour",y="Speed (mph)")+ theme(legend.position="none")
## Warning: Removed 2091215 rows containing non-finite values (stat_ydensity).
## Warning: Removed 2091215 rows containing non-finite values (stat_boxplot).
#### Operating Speed by DOW
long <- melt(df7, id.vars = c("DOW"))
ggviolin(long, "DOW", "value", fill = "DOW",
add = "boxplot", add.params = list(fill = "white"))+coord_flip()+
geom_hline(yintercept=36, linetype="dashed",
color = "red", size=1)+labs(title="Operating Speed by DOW", x="Day of Week",y="Speed (mph)")+ theme(legend.position="none")
## Warning: Removed 2091215 rows containing non-finite values (stat_ydensity).
## Warning: Removed 2091215 rows containing non-finite values (stat_boxplot).
gg_miss_var(df6, facet = Hour, show_pct = TRUE)
### Missingness by Hour
df6a <- df6 %>%
group_by(Hour) %>%
miss_var_summary()
datatable(
df6a, extensions = 'Buttons', options = list(
dom = 'Bfrtip',
buttons = c('csv', 'excel', 'print')
)
)
### Missingness by DOW
df7a <- df7 %>%
group_by(DOW) %>%
miss_var_summary()
datatable(
df7a, extensions = 'Buttons', options = list(
dom = 'Bfrtip',
buttons = c('csv', 'excel', 'print')
)
)