Washington Data for Models (EDA)

library(ggplot2)
library(reshape2)

setwd("C:/Users/s-das/Syncplicity Folders/MyProjects_IMP/Rural Speed Safety/12292018")

ohwa <- read.csv("Yearly_OHWA.csv")
names(ohwa)
##  [1] "TMC"           "DISTANCE"      "AADT"          "NO_LANES"     
##  [5] "MEDWID"        "SPD_LIMT"      "SURF_WID"      "ACCESS"       
##  [9] "RTE_NBR"       "Facility_Type" "Facility"      "Total"        
## [13] "Fatal"         "Injury"        "PDO"           "Total_NonInt" 
## [17] "Fatal_NonInt"  "Injury_NonInt" "PDO_NonInt"    "SHD_WID"      
## [21] "myState"       "OptSpd"        "SpdVarr1"      "SpdVarr2"     
## [25] "SpdVarr3"      "SpdVarr4"      "SpdAvg"        "SpdCrash"     
## [29] "SpdBefCr1"     "SpdBefCr2"     "SpdBefCr3"     "SpdBefCr4"    
## [33] "SpdBefCrSD1"   "SpdBefCrSD2"   "SpdBefCrSD3"   "SpdBefCrSD4"  
## [37] "SpdBefCrSim1"  "SpdBefCrSim2"  "SpdBefCrSim3"  "SpdBefCrSim4" 
## [41] "SpdFF"
wa <- subset(ohwa, myState=="WA")
dim(wa)
## [1] 1102   41
names(wa)
##  [1] "TMC"           "DISTANCE"      "AADT"          "NO_LANES"     
##  [5] "MEDWID"        "SPD_LIMT"      "SURF_WID"      "ACCESS"       
##  [9] "RTE_NBR"       "Facility_Type" "Facility"      "Total"        
## [13] "Fatal"         "Injury"        "PDO"           "Total_NonInt" 
## [17] "Fatal_NonInt"  "Injury_NonInt" "PDO_NonInt"    "SHD_WID"      
## [21] "myState"       "OptSpd"        "SpdVarr1"      "SpdVarr2"     
## [25] "SpdVarr3"      "SpdVarr4"      "SpdAvg"        "SpdCrash"     
## [29] "SpdBefCr1"     "SpdBefCr2"     "SpdBefCr3"     "SpdBefCr4"    
## [33] "SpdBefCrSD1"   "SpdBefCrSD2"   "SpdBefCrSD3"   "SpdBefCrSD4"  
## [37] "SpdBefCrSim1"  "SpdBefCrSim2"  "SpdBefCrSim3"  "SpdBefCrSim4" 
## [41] "SpdFF"
wa1 <- wa[c(12, 33:36, 11)]
head(wa1)
##      Total SpdBefCrSD1 SpdBefCrSD2 SpdBefCrSD3 SpdBefCrSD4   Facility
## 1554    17   12.976720   12.421628   12.224131   12.146737 Interstate
## 1555     0          NA          NA          NA          NA Interstate
## 1556     7    2.584412    3.115596    3.123477    3.248472 Interstate
## 1557    16    4.578336    4.329360    4.082451    3.843415 Interstate
## 1558    37   12.127962   11.873144   11.419561   11.184545 Interstate
## 1559     8    3.118788    3.739448    3.862563    6.672306 Interstate
wa2 <- melt(wa1, id=c("Total","Facility"))
head(wa2)
##   Total   Facility    variable     value
## 1    17 Interstate SpdBefCrSD1 12.976720
## 2     0 Interstate SpdBefCrSD1        NA
## 3     7 Interstate SpdBefCrSD1  2.584412
## 4    16 Interstate SpdBefCrSD1  4.578336
## 5    37 Interstate SpdBefCrSD1 12.127962
## 6     8 Interstate SpdBefCrSD1  3.118788
wa3 <- subset(wa2, value < 15 & value >0)
ggplot(wa3, aes(Total, value)) + geom_point()+ theme_bw()+geom_smooth()+ facet_grid(Facility~ variable)
## `geom_smooth()` using method = 'loess' and formula 'y ~ x'

wa1 <- wa[c(12, 23:26, 11)]
head(wa1)
##      Total SpdVarr1  SpdVarr2  SpdVarr3   SpdVarr4   Facility
## 1554    17 2.012648 1.4752954 1.0457908 1.39958579 Interstate
## 1555     0 1.143017 1.0948927 0.5676900 0.46200043 Interstate
## 1556     7 1.184563 0.8531215 0.5164317 0.08386433 Interstate
## 1557    16 1.124240 0.7894267 0.3480923 0.46962648 Interstate
## 1558    37 2.431305 0.9720285 0.6028242 0.36334350 Interstate
## 1559     8 2.173805 0.8093347 0.3178004 0.27300302 Interstate
wa2 <- melt(wa1, id=c("Total","Facility"))
head(wa2)
##   Total   Facility variable    value
## 1    17 Interstate SpdVarr1 2.012648
## 2     0 Interstate SpdVarr1 1.143017
## 3     7 Interstate SpdVarr1 1.184563
## 4    16 Interstate SpdVarr1 1.124240
## 5    37 Interstate SpdVarr1 2.431305
## 6     8 Interstate SpdVarr1 2.173805
wa3 <- subset(wa2, value < 15 & value >0)
ggplot(wa3, aes(Total, value)) + geom_point()+ theme_bw()+geom_smooth()+ facet_grid(Facility~ variable)
## `geom_smooth()` using method = 'loess' and formula 'y ~ x'

wa1 <- wa[c(12, 2, 11)]
head(wa1)
##      Total DISTANCE   Facility
## 1554    17  2.79553 Interstate
## 1555     0  2.09622 Interstate
## 1556     7  3.49942 Interstate
## 1557    16  1.19326 Interstate
## 1558    37  3.32854 Interstate
## 1559     8  2.34230 Interstate
wa2 <- melt(wa1, id=c("Total","Facility"))
head(wa2)
##   Total   Facility variable   value
## 1    17 Interstate DISTANCE 2.79553
## 2     0 Interstate DISTANCE 2.09622
## 3     7 Interstate DISTANCE 3.49942
## 4    16 Interstate DISTANCE 1.19326
## 5    37 Interstate DISTANCE 3.32854
## 6     8 Interstate DISTANCE 2.34230
ggplot(wa2, aes(Total, value)) + geom_point()+ theme_bw()+geom_smooth()+ facet_grid(Facility~ variable)
## `geom_smooth()` using method = 'loess' and formula 'y ~ x'

wa1 <- wa[c(12, 3, 11)]
head(wa1)
##      Total     AADT   Facility
## 1554    17 32974.55 Interstate
## 1555     0 31074.00 Interstate
## 1556     7 46392.00 Interstate
## 1557    16 51531.76 Interstate
## 1558    37 60131.76 Interstate
## 1559     8 67610.37 Interstate
wa2 <- melt(wa1, id=c("Total","Facility"))
head(wa2)
##   Total   Facility variable    value
## 1    17 Interstate     AADT 32974.55
## 2     0 Interstate     AADT 31074.00
## 3     7 Interstate     AADT 46392.00
## 4    16 Interstate     AADT 51531.76
## 5    37 Interstate     AADT 60131.76
## 6     8 Interstate     AADT 67610.37
wa3 <- subset(wa2, value > 1000)
ggplot(wa3, aes(Total, value)) + geom_point()+ theme_bw()+geom_smooth()+ facet_grid(Facility~ variable)
## `geom_smooth()` using method = 'loess' and formula 'y ~ x'

wa1 <- wa[c(12, 4, 11)]
head(wa1)
##      Total NO_LANES   Facility
## 1554    17        7 Interstate
## 1555     0        6 Interstate
## 1556     7        7 Interstate
## 1557    16        7 Interstate
## 1558    37        7 Interstate
## 1559     8        6 Interstate
wa2 <- melt(wa1, id=c("Total","Facility"))
head(wa2)
##   Total   Facility variable value
## 1    17 Interstate NO_LANES     7
## 2     0 Interstate NO_LANES     6
## 3     7 Interstate NO_LANES     7
## 4    16 Interstate NO_LANES     7
## 5    37 Interstate NO_LANES     7
## 6     8 Interstate NO_LANES     6
ggplot(wa2, aes(Total, value)) + geom_point()+ theme_bw()+geom_smooth()+ facet_grid(Facility~ variable)
## `geom_smooth()` using method = 'loess' and formula 'y ~ x'

wa1 <- wa[c(12, 5, 11)]
head(wa1)
##      Total    MEDWID   Facility
## 1554    17 100.02681 Interstate
## 1555     0 250.00000 Interstate
## 1556     7 241.69448 Interstate
## 1557    16 129.19223 Interstate
## 1558    37 220.26164 Interstate
## 1559     8  62.96295 Interstate
wa2 <- melt(wa1, id=c("Total","Facility"))
head(wa2)
##   Total   Facility variable     value
## 1    17 Interstate   MEDWID 100.02681
## 2     0 Interstate   MEDWID 250.00000
## 3     7 Interstate   MEDWID 241.69448
## 4    16 Interstate   MEDWID 129.19223
## 5    37 Interstate   MEDWID 220.26164
## 6     8 Interstate   MEDWID  62.96295
ggplot(wa2, aes(Total, value)) + geom_point()+ theme_bw()+geom_smooth()+ facet_grid(Facility~ variable)
## `geom_smooth()` using method = 'loess' and formula 'y ~ x'

wa1 <- wa[c(12, 7, 11)]
head(wa1)
##      Total SURF_WID   Facility
## 1554    17 85.82151 Interstate
## 1555     0 72.00000 Interstate
## 1556     7 84.07674 Interstate
## 1557    16 85.15134 Interstate
## 1558    37 84.42425 Interstate
## 1559     8 72.28819 Interstate
wa2 <- melt(wa1, id=c("Total","Facility"))
head(wa2)
##   Total   Facility variable    value
## 1    17 Interstate SURF_WID 85.82151
## 2     0 Interstate SURF_WID 72.00000
## 3     7 Interstate SURF_WID 84.07674
## 4    16 Interstate SURF_WID 85.15134
## 5    37 Interstate SURF_WID 84.42425
## 6     8 Interstate SURF_WID 72.28819
ggplot(wa2, aes(Total, value)) + geom_point()+ theme_bw()+geom_smooth()+ facet_grid(Facility~ variable)
## `geom_smooth()` using method = 'loess' and formula 'y ~ x'

Subasish Das

2019-01-07