Ohio Data for Models (EDA)

library(ggplot2)
library(reshape2)

setwd("C:/Users/s-das/Syncplicity Folders/MyProjects_IMP/Rural Speed Safety/12292018")

ohwa <- read.csv("Yearly_OHWA.csv")
names(ohwa)
##  [1] "TMC"           "DISTANCE"      "AADT"          "NO_LANES"     
##  [5] "MEDWID"        "SPD_LIMT"      "SURF_WID"      "ACCESS"       
##  [9] "RTE_NBR"       "Facility_Type" "Facility"      "Total"        
## [13] "Fatal"         "Injury"        "PDO"           "Total_NonInt" 
## [17] "Fatal_NonInt"  "Injury_NonInt" "PDO_NonInt"    "SHD_WID"      
## [21] "myState"       "OptSpd"        "SpdVarr1"      "SpdVarr2"     
## [25] "SpdVarr3"      "SpdVarr4"      "SpdAvg"        "SpdCrash"     
## [29] "SpdBefCr1"     "SpdBefCr2"     "SpdBefCr3"     "SpdBefCr4"    
## [33] "SpdBefCrSD1"   "SpdBefCrSD2"   "SpdBefCrSD3"   "SpdBefCrSD4"  
## [37] "SpdBefCrSim1"  "SpdBefCrSim2"  "SpdBefCrSim3"  "SpdBefCrSim4" 
## [41] "SpdFF"
oh <- subset(ohwa, myState=="OH")
wa <- subset(ohwa, myState=="WA")
dim(oh)
## [1] 1553   41
dim(wa)
## [1] 1102   41
######### OH
names(oh)
##  [1] "TMC"           "DISTANCE"      "AADT"          "NO_LANES"     
##  [5] "MEDWID"        "SPD_LIMT"      "SURF_WID"      "ACCESS"       
##  [9] "RTE_NBR"       "Facility_Type" "Facility"      "Total"        
## [13] "Fatal"         "Injury"        "PDO"           "Total_NonInt" 
## [17] "Fatal_NonInt"  "Injury_NonInt" "PDO_NonInt"    "SHD_WID"      
## [21] "myState"       "OptSpd"        "SpdVarr1"      "SpdVarr2"     
## [25] "SpdVarr3"      "SpdVarr4"      "SpdAvg"        "SpdCrash"     
## [29] "SpdBefCr1"     "SpdBefCr2"     "SpdBefCr3"     "SpdBefCr4"    
## [33] "SpdBefCrSD1"   "SpdBefCrSD2"   "SpdBefCrSD3"   "SpdBefCrSD4"  
## [37] "SpdBefCrSim1"  "SpdBefCrSim2"  "SpdBefCrSim3"  "SpdBefCrSim4" 
## [41] "SpdFF"
oh1 <- oh[c(12, 33:36, 11)]
head(oh1)
##   Total SpdBefCrSD1 SpdBefCrSD2 SpdBefCrSD3 SpdBefCrSD4       Facility
## 1    25    9.813677    8.438037    9.784742   10.596249     Interstate
## 2    12    2.984909    2.612623    2.741538    2.706975     Interstate
## 3     0          NA          NA          NA          NA       Two lane
## 4     4   15.518371   13.815025   13.138043   11.406294       Two lane
## 5    10    6.024830    6.084638    6.798247    6.486792     Interstate
## 6     2    4.446976   12.185968   10.067380    9.854834 Multilane Div.
oh2 <- melt(oh1, id=c("Total","Facility"))
head(oh2)
##   Total       Facility    variable     value
## 1    25     Interstate SpdBefCrSD1  9.813677
## 2    12     Interstate SpdBefCrSD1  2.984909
## 3     0       Two lane SpdBefCrSD1        NA
## 4     4       Two lane SpdBefCrSD1 15.518371
## 5    10     Interstate SpdBefCrSD1  6.024830
## 6     2 Multilane Div. SpdBefCrSD1  4.446976
oh3 <- subset(oh2, value < 15)
ggplot(oh3, aes(Total, value)) + geom_point()+ theme_bw()+geom_smooth()+ facet_grid(Facility~ variable)
## `geom_smooth()` using method = 'loess' and formula 'y ~ x'

oh1 <- oh[c(12, 23:26, 11)]
head(oh1)
##   Total  SpdVarr1  SpdVarr2  SpdVarr3   SpdVarr4       Facility
## 1    25 0.6866794 0.6131693 0.4259650 0.30982180     Interstate
## 2    12 1.5886624 1.0282349 0.6145491 0.28965256     Interstate
## 3     0 2.3671735 1.3241403 0.2417825 0.01448730       Two lane
## 4     4 1.3216267 0.7543925 0.4032497 0.02054649       Two lane
## 5    10 1.3466003 0.9463342 0.5025775 0.32786973     Interstate
## 6     2 1.3077780 0.7494583 0.1269628 0.73539314 Multilane Div.
oh2 <- melt(oh1, id=c("Total","Facility"))
head(oh2)
##   Total       Facility variable     value
## 1    25     Interstate SpdVarr1 0.6866794
## 2    12     Interstate SpdVarr1 1.5886624
## 3     0       Two lane SpdVarr1 2.3671735
## 4     4       Two lane SpdVarr1 1.3216267
## 5    10     Interstate SpdVarr1 1.3466003
## 6     2 Multilane Div. SpdVarr1 1.3077780
oh3 <- subset(oh2, value < 15)
ggplot(oh3, aes(Total, value)) + geom_point()+ theme_bw()+geom_smooth()+ facet_grid(Facility~ variable)
## `geom_smooth()` using method = 'loess' and formula 'y ~ x'

oh1 <- oh[c(12, 2, 11)]
head(oh1)
##   Total DISTANCE       Facility
## 1    25  4.90704     Interstate
## 2    12  5.51171     Interstate
## 3     0  1.61862       Two lane
## 4     4  1.74262       Two lane
## 5    10  7.77372     Interstate
## 6     2  1.74257 Multilane Div.
oh2 <- melt(oh1, id=c("Total","Facility"))
head(oh2)
##   Total       Facility variable   value
## 1    25     Interstate DISTANCE 4.90704
## 2    12     Interstate DISTANCE 5.51171
## 3     0       Two lane DISTANCE 1.61862
## 4     4       Two lane DISTANCE 1.74262
## 5    10     Interstate DISTANCE 7.77372
## 6     2 Multilane Div. DISTANCE 1.74257
ggplot(oh2, aes(Total, value)) + geom_point()+ theme_bw()+geom_smooth()+ facet_grid(Facility~ variable)
## `geom_smooth()` using method = 'loess' and formula 'y ~ x'

oh1 <- oh[c(12, 3, 11)]
head(oh1)
##   Total     AADT       Facility
## 1    25 28750.80     Interstate
## 2    12 38385.37     Interstate
## 3     0  3140.00       Two lane
## 4     4  3140.00       Two lane
## 5    10 37946.37     Interstate
## 6     2 13877.46 Multilane Div.
oh2 <- melt(oh1, id=c("Total","Facility"))
head(oh2)
##   Total       Facility variable    value
## 1    25     Interstate     AADT 28750.80
## 2    12     Interstate     AADT 38385.37
## 3     0       Two lane     AADT  3140.00
## 4     4       Two lane     AADT  3140.00
## 5    10     Interstate     AADT 37946.37
## 6     2 Multilane Div.     AADT 13877.46
ggplot(oh2, aes(Total, value)) + geom_point()+ theme_bw()+geom_smooth()+ facet_grid(Facility~ variable)
## `geom_smooth()` using method = 'loess' and formula 'y ~ x'

oh1 <- oh[c(12, 4, 11)]
head(oh1)
##   Total NO_LANES       Facility
## 1    25        4     Interstate
## 2    12        4     Interstate
## 3     0        2       Two lane
## 4     4        2       Two lane
## 5    10        4     Interstate
## 6     2        4 Multilane Div.
oh2 <- melt(oh1, id=c("Total","Facility"))
head(oh2)
##   Total       Facility variable value
## 1    25     Interstate NO_LANES     4
## 2    12     Interstate NO_LANES     4
## 3     0       Two lane NO_LANES     2
## 4     4       Two lane NO_LANES     2
## 5    10     Interstate NO_LANES     4
## 6     2 Multilane Div. NO_LANES     4
ggplot(oh2, aes(Total, value)) + geom_point()+ theme_bw()+geom_smooth()+ facet_grid(Facility~ variable)
## `geom_smooth()` using method = 'loess' and formula 'y ~ x'

oh1 <- oh[c(12, 5, 11)]
head(oh1)
##   Total   MEDWID       Facility
## 1    25 48.00000     Interstate
## 2    12 84.04703     Interstate
## 3     0       NA       Two lane
## 4     4       NA       Two lane
## 5    10 89.02088     Interstate
## 6     2 77.42882 Multilane Div.
oh2 <- melt(oh1, id=c("Total","Facility"))
head(oh2)
##   Total       Facility variable    value
## 1    25     Interstate   MEDWID 48.00000
## 2    12     Interstate   MEDWID 84.04703
## 3     0       Two lane   MEDWID       NA
## 4     4       Two lane   MEDWID       NA
## 5    10     Interstate   MEDWID 89.02088
## 6     2 Multilane Div.   MEDWID 77.42882
ggplot(oh2, aes(Total, value)) + geom_point()+ theme_bw()+geom_smooth()+ facet_grid(Facility~ variable)
## `geom_smooth()` using method = 'loess' and formula 'y ~ x'
## Warning: Removed 779 rows containing non-finite values (stat_smooth).
## Warning: Removed 779 rows containing missing values (geom_point).

oh1 <- oh[c(12, 7, 11)]
head(oh1)
##   Total SURF_WID       Facility
## 1    25 48.00000     Interstate
## 2    12 55.96865     Interstate
## 3     0 27.02106       Two lane
## 4     4 26.90477       Two lane
## 5    10 48.00000     Interstate
## 6     2 48.00000 Multilane Div.
oh2 <- melt(oh1, id=c("Total","Facility"))
head(oh2)
##   Total       Facility variable    value
## 1    25     Interstate SURF_WID 48.00000
## 2    12     Interstate SURF_WID 55.96865
## 3     0       Two lane SURF_WID 27.02106
## 4     4       Two lane SURF_WID 26.90477
## 5    10     Interstate SURF_WID 48.00000
## 6     2 Multilane Div. SURF_WID 48.00000
ggplot(oh2, aes(Total, value)) + geom_point()+ theme_bw()+geom_smooth()+ facet_grid(Facility~ variable)
## `geom_smooth()` using method = 'loess' and formula 'y ~ x'

Subasish Das

2019-01-07