Ohio Reduced Models (Segment Level)

Rural Two lane(NB)

library(data.table)
library(MASS)
rm(list=ls())
setwd("//ama-mobility4/d/ITSdata/DAS-Rural Safety/FHWA Rural Safety (Subasish Das)/Task 4/Task 5/Models/NB_OH")

# 01. Load Data -----------------------------------------------------------
dat_yearly <- get(load(url('http://people.tamu.edu/~cma16/w3i4b3j1tam/TMC_Yearly.rData')))
## This is whole data
table(dat_yearly$Facility_Type)

myDat <- dat_yearly[myState == 'OH' 
                    & Facility_Type == '2_RMU' 
                    & AADT > 0 
                    & !is.na(AADT)
                    # & !is.na(SHD_WID)
                    # & !is.na(SURF_WID)
                    # & !is.na(OptSpd)
                    & !is.na(SpdVarr1)
                    & !is.na(SpdVarr2)
                    # & !is.na(SpdVarr3)
                    # & !is.na(SpdVarr4)
                    & !is.na(SpdAvg)
                    & !is.na(NO_LANES)
                    & !is.na(MEDWID)
                    ,]
# & !is.na(SpdFF) 

myX <- myDat[, .('LogL' = log(DISTANCE),
                 'Inter' = 1,
                 'logAADT' = log(AADT), 
                 # 'LaneWidth' = SURF_WID/NO_LANES,
                 # 'Shdwidth' = SHD_WID,
                 # 'Surfwidth' = SURF_WID,
                 # 'OptSpd' = OptSpd,
                 'SpdVarr1' = SpdVarr1,
                 'SpdVarr2' = SpdVarr2,
                 # 'SpdVarr3' = SpdVarr3,
                 # 'SpdVarr4' = SpdVarr4,
                 'SpdAvg' = SpdAvg,
                 # 'SpdFF' = SpdFF
                 'Nolane' = NO_LANES,
                 'Medwidth'= MEDWID
                 # 'Speedlim' = SPD_LIMT,
)]
nn_comp <- complete.cases(myX)
myX <- myX[nn_comp, ]
myY <- myDat$Total[nn_comp]




## First, develop a full model, including all the variables

nb_Model_Full <- glm.nb(myY ~ . - LogL + offset(LogL) , data = cbind(myY, myX[, -2]))
summary(nb_Model_Full)

# calculate DIC
Get_NB_DIC <- function(inNBModel, inY) {
  
  # for input NB model, get DIC
  inNBModel <- nb_Model_Full
  myMu <- exp(predict(inNBModel))
  myTheta <- summary(inNBModel$theta)
  mylogLik <- dnbinom(myY, size = myTheta, mu = myMu, log = T)
  myLlog <- sum(mylogLik)
  myDIC <- -2 * myLlog
  myDIC
}

myDIC <- Get_NB_DIC(nb_Model_Full, myY)

myDIC

# calculate MAD
y_hat = nb_Model_Full$fitted.values

MAD <- sum(abs(myY - y_hat))/length(myY)

MAD

save.image(paste('Results_OH_RI_NB_yearly.r', sep = ''))

Rural Two lane(NBL)

model
{
  for(i in 1:667) # change this according to data size
    # for(i in 1:1000)
  {
    YY[i] ~ dnegbin(p[i],r)
    p[i] <- r/(r+a[i]*mu[i])
    mu[i] <- exp(X[i, 1] + b0 + 
                   b1*X[i, 3] + b2*X[i, 4] + 
                   b3*X[i, 5] + b4*X[i, 6])
    
    a[i] ~ dgamma(f[i],t)
    f[i]<-1+z[i] 
    z[i] ~ dbern(k)  
    
    mn[i]<-mu[i]*M1
  }
  
  r<-1/alp
  t<-(1-k)/k
  M1<-(t+2)/(t*(t+1))
  M2<-2*(t+3)/(t*t*(t+1))
  
  
  b0~ dnorm(0, 0.01)
  b1~ dnorm(0, 0.01)
  b2~ dnorm(0, 0.01)
  b3~ dnorm(0, 0.01)
  b4~ dnorm(0, 0.01)
  alp ~ dunif(0.1, 10)
  k~dbeta(222,333) 
  # change this according to data size k~(size/3,size/2)
  # k~dbeta(333,500) ## for 1000 sample
}


## OH NB, NBL NBL-RP Models
setwd('//ama-mobility4/d/ITSdata/DAS-Rural Safety/FHWA Rural Safety (Subasish Das)/Task 4/Task 5/Models/NBL_OH')
rm(list = ls())
library(data.table)
library(MASS)
library(rjags)
library(coda)

# 01. Load Data -----------------------------------------------------------
dat_yearly <- get(load(url('http://people.tamu.edu/~cma16/w3i4b3j1tam/TMC_Yearly.rData')))
## This is whole data
table(dat_yearly$Facility_Type)

myDat <- dat_yearly[myState == 'OH' 
                   & Facility_Type == '1_R2L' 
                   & AADT > 0 
                   & !is.na(AADT)
                   # & !is.na(SHD_WID)
                   # & !is.na(SURF_WID)
                   # & !is.na(OptSpd)
                   & !is.na(SpdVarr1)
                   & !is.na(SpdVarr2)
                   # & !is.na(SpdVarr3)
                   # & !is.na(SpdVarr4)
                   & !is.na(SpdAvg)
                   # & !is.na(NO_LANES)
                   # & !is.na(MEDWID)
                   ,]
# & !is.na(SpdFF) 

myX <- myDat[, .('LogL' = log(DISTANCE),
                 'Inter' = 1,
                 'logAADT' = log(AADT), 
                 # 'LaneWidth' = SURF_WID/NO_LANES,
                 # 'Shdwidth' = SHD_WID,
                 # 'Surfwidth' = SURF_WID,
                 # 'OptSpd' = OptSpd,
                 'SpdVarr1' = SpdVarr1,
                 'SpdVarr2' = SpdVarr2,
                 # 'SpdVarr3' = SpdVarr3,
                 # 'SpdVarr4' = SpdVarr4,
                 'SpdAvg' = SpdAvg
                 # 'SpdFF' = SpdFF
                 # 'Nolane' = NO_LANES,
                 # 'Medwidth'= MEDWID
                 # 'Speedlim' = SPD_LIMT,
)]
nn_comp <- complete.cases(myX)
myX <- myX[nn_comp, ]
myY <- myDat$Total[nn_comp]
# attach(myX)

# 01. NB Model ------------------------------------------------------------
# myNB <- glm.nb(myY ~ offset(LogL) + logAADT + LaneWidth
#                  + SpdAvg + SpdAvgSD)
# 
# summary(myNB)
# 
# save(myNB, file='OH_R2_NB.rData')

# 02. NBL Model -----------------------------------------------------------

## Change Line 3 of NBL_bug.r first
## i.e.,   for(i in 1:10000) ---> for(i in 1:Actual Data Size)

## Change Line 28 of NBL_bug.r first
## i.e.,   k~dbeta(xxx,yyy) ---> k~dbeta(Actual Data Size / 3, Actual Data Size / 2)

## Save the NBL_bug.r file

## Build Model
system.time(my.jags <-jags.model('OH_R2_model_NBL_yearly.R',
                                 data = list(YY = myY, X = myX),
                                 n.chains = 3))


## Run MCMC
system.time(my.sims <- coda.samples(my.jags, c('b0', 'b1', 'b2', 'b3', 'b4',
                                               'alp', 'k','mu'),
                                    n.iter = 25000, thin = 10))
a <- as.data.frame(summary(my.sims)[[1]])#

# calculate MAD
# exclude the top rows and only keep mu (y_bar)
y_hat <- a[-c(1:7),1]

MAD <- sum(abs(myY - y_hat))/length(myY)

MAD

# calculate DIC
DIC <- dic.samples(model = my.jags,n.iter = 25000, thin = 5)#

DIC

## Results
mm <- as.matrix(my.sims)
mcmc.para <- as.data.table(t(rbind(colnames(mm), apply(mm, 2, mean), apply(mm, 2, sd))))
colnames(mcmc.para) <- c('Parameter', 'MeanValue', 'sd')
mcmc.para
output_para <- mcmc.para[c(1:7)]#

save.image(paste('MCMC_OH_R2_NBL_25000.r', sep = ''))


# 04. NBL_RP Model --------------------------------------------------------
# rm(list = ls()[!ls() %in% c('myX', 'myY')])
# 
# ## Build Model
# system.time(my.jags <-jags.model('NBL_RP_OH_bug.R',
#                      data = list(YY = myY, X = myX),
#                      n.chains = 3))
# 
# ## Run MCMC
# system.time(my.sims <- coda.samples(my.jags, c('b0', 'mub1', 
#                                    'mub2', 'mub3', 
#                                    'mub4', 'mub5', 'mub6', 
#                                    'mub7', 'mub8', 'mub9', 
#                                    'sigma1', 'sigma2',
#                                    'sigma3', 'sigma4',
#                                    'sigma5', 'sigma6',
#                                    'sigma7', 'sigma8',
#                                    'sigma9', 
#                                    'alp', 'k'),
#                         n.iter = 1000, thin = 5))

## Results
DIC <- dic.samples(model = my.jags,n.iter = 25000, thin = 5)
mm <- as.matrix(my.sims)
mcmc.para <- as.data.table(t(rbind(colnames(mm), apply(mm, 2, mean), apply(mm, 2, sd))))
colnames(mcmc.para) <- c('Parameter', 'MeanValue', 'sd')
mcmc.para
output_para <- mcmc.para[c(1:7)]#
save.image(paste('MCMC_OH_R2_NBL_25000_yearly.r', sep = ''))

## The End

Rural Two lane(RP-NBL)

model
{
  for(i in 1:667)
  # for(i in 1:81)
  # for(i in 1:1000)
  {
    YY[i] ~ dnegbin(p[i],r)
    p[i] <- r/(r+a[i]*mu[i])
    mu[i] <- exp(X[i, 1] + b0 + 
                   b1[i]*X[i, 3] + b2[i]*X[i, 4] + 
                   b3[i]*X[i, 5] + b4[i]*X[i, 6])
    
    a[i] ~ dgamma(f[i],t)
    f[i]<-1+z[i] 
    z[i] ~ dbern(k)  
    
    b1[i]~dnorm(mub1,tau1)
    b2[i]~dnorm(mub2,tau2)
    b3[i]~dnorm(mub3,tau3)
    b4[i]~dnorm(mub4,tau4)
    mn[i]<-mu[i]*M1
  }
  
  r<-1/alp
  t<-(1-k)/k
  M1<-(t+2)/(t*(t+1))
  M2<-2*(t+3)/(t*t*(t+1))
  
  b0~ dnorm(0, 0.01)
  mub1~ dnorm(0, 0.01)
  mub2~ dnorm(0, 0.01)
  mub3~ dnorm(0, 0.01)
  mub4~ dnorm(0, 0.01)
  alp ~ dunif(0.1, 10)
  
  tau1 ~ dgamma(0.01,0.01)
  tau2 ~ dgamma(0.01,0.01)
  tau3 ~ dgamma(0.01,0.01)
  tau4 ~ dgamma(0.01,0.01)
  
  sigma1<-pow(tau1,-0.5)
  sigma2<-pow(tau2,-0.5)
  sigma3<-pow(tau3,-0.5)
  sigma4<-pow(tau4,-0.5)
  
  k~dbeta(222,333) 
  ## k~dbeta(k1,k2) k1 ~= N/3; k2 ~=N/2
  # k~dbeta(333,500) ## for 1000 sample
}


## OH NB, NBL NBL-RP Models
setwd('//ama-mobility4/d/ITSdata/DAS-Rural Safety/FHWA Rural Safety (Subasish Das)/Task 4/Task 5/Models/NBLRP_OH')
rm(list = ls())
library(data.table)
library(MASS)
library(rjags)
library(coda)

# 01. Load Data -----------------------------------------------------------
dat_yearly <- get(load(url('http://people.tamu.edu/~cma16/w3i4b3j1tam/TMC_Yearly.rData')))
## This is whole data
table(dat_yearly$Facility_Type)

myDat <- dat_yearly[myState == 'OH' 
                    & Facility_Type == '1_R2L' 
                    & AADT > 0 
                    & !is.na(AADT)
                    # & !is.na(SHD_WID)
                    # & !is.na(SURF_WID)
                    # & !is.na(OptSpd)
                    & !is.na(SpdVarr1)
                    & !is.na(SpdVarr2)
                    # & !is.na(SpdVarr3)
                    # & !is.na(SpdVarr4)
                    & !is.na(SpdAvg)
                    # & !is.na(NO_LANES)
                    # & !is.na(MEDWID)
                    ,]
# & !is.na(SpdFF) 

myX <- myDat[, .('LogL' = log(DISTANCE),
                 'Inter' = 1,
                 'logAADT' = log(AADT), 
                 # 'LaneWidth' = SURF_WID/NO_LANES,
                 # 'Shdwidth' = SHD_WID,
                 # 'Surfwidth' = SURF_WID,
                 # 'OptSpd' = OptSpd,
                 'SpdVarr1' = SpdVarr1,
                 'SpdVarr2' = SpdVarr2,
                 # 'SpdVarr3' = SpdVarr3,
                 # 'SpdVarr4' = SpdVarr4,
                 'SpdAvg' = SpdAvg
                 # 'SpdFF' = SpdFF
                 # 'Nolane' = NO_LANES,
                 # 'Medwidth'= MEDWID
                 # 'Speedlim' = SPD_LIMT,
)]
nn_comp <- complete.cases(myX)
myX <- myX[nn_comp, ]
myY <- myDat$Total[nn_comp]
# attach(myX)

# 01. NB Model ------------------------------------------------------------
# myNB <- glm.nb(myY ~ offset(LogL) + logAADT + LaneWidth
#                  + SpdAvg + SpdAvgSD)
# 
# summary(myNB)
# 
# save(myNB, file='OH_R2_NB.rData')

# 02. NBL Model -----------------------------------------------------------

## Change Line 3 of NBL_bug.r first
## i.e.,   for(i in 1:10000) ---> for(i in 1:Actual Data Size)

## Change Line 28 of NBL_bug.r first
## i.e.,   k~dbeta(xxx,yyy) ---> k~dbeta(Actual Data Size / 3, Actual Data Size / 2)

## Save the NBL_bug.r file

## Build Model
# my.jags <-jags.model('NBL_OH_bug.R', 
#                      data = list(YY = myY, X = myX),
#                      n.chains = 3)
# 
# ## Run MCMC
# my.sims <- coda.samples(my.jags, c('b0', 'b1', 'b2', 'b3', 'b4',
#                                    'alp', 'k'),
#                         n.iter = 25000, thin = 10)
# 
# ## Results
# mm <- as.matrix(my.sims) 
# mcmc.para <- as.data.table(t(rbind(colnames(mm), apply(mm, 2, mean), apply(mm, 2, sd))))
# colnames(mcmc.para) <- c('Parameter', 'MeanValue', 'sd')
# mcmc.para
# 
# save.image(paste('MCMC_OH_R2_NBL_25000', round(rnorm(1), 3), '.r', sep = ''))


# 04. NBL_RP Model --------------------------------------------------------
rm(list = ls()[!ls() %in% c('myX', 'myY')])

## Build Model
system.time(my.jags <-jags.model('OH_R2_model_NBLRP_yearly.R',
                                 data = list(YY = myY, X = myX),
                                 n.chains = 3))

## Run MCMC
system.time(my.sims <- coda.samples(my.jags, c('b0', 'mub1', 
                                               'mub2', 'mub3', 
                                               'mub4', 
                                               'sigma1', 'sigma2',
                                               'sigma3', 'sigma4',
                                               'alp', 'k','mu'),#
                                    n.iter = 25000, thin = 5))

a <- as.data.frame(summary(my.sims)[[1]])#

# calculate MAD
# exclude the top rows and only keep mu (y_bar)
y_hat <- a[-c(1:3,(nrow(a)-7):nrow(a)),1]

MAD <- sum(abs(myY - y_hat))/length(myY)

MAD

# calculate DIC
DIC <- dic.samples(model = my.jags,n.iter = 25000, thin = 5)#

DIC

## Results
mm <- as.matrix(my.sims)
mcmc.para <- as.data.table(t(rbind(colnames(mm), apply(mm, 2, mean), apply(mm, 2, sd))))
colnames(mcmc.para) <- c('Parameter', 'MeanValue', 'sd')
mcmc.para
output_para <- mcmc.para[c(1:3,(nrow(a)-7):nrow(a))]#

save.image(paste('MCMC_OH_R2_NBLRP_25000_yearly.r', sep = ''))

Rural Interstate(NB)

library(data.table)
library(MASS)
rm(list=ls())
setwd("//ama-mobility4/d/ITSdata/DAS-Rural Safety/FHWA Rural Safety (Subasish Das)/Task 4/Task 5/Models/NB_OH")

# 01. Load Data -----------------------------------------------------------
dat_yearly <- get(load(url('http://people.tamu.edu/~cma16/w3i4b3j1tam/TMC_Yearly.rData')))
## This is whole data
table(dat_yearly$Facility_Type)

myDat <- dat_yearly[myState == 'OH' 
                    & Facility_Type == '0_RI' 
                    & AADT > 0 
                    & !is.na(AADT)
                    # & !is.na(SHD_WID)
                    # & !is.na(SURF_WID)
                    # & !is.na(OptSpd)
                    & !is.na(SpdVarr1)
                    & !is.na(SpdVarr2)
                    # & !is.na(SpdVarr3)
                    # & !is.na(SpdVarr4)
                    & !is.na(SpdAvg)
                    & !is.na(NO_LANES)
                    & !is.na(MEDWID)
                    ,]
# & !is.na(SpdFF) 

myX <- myDat[, .('LogL' = log(DISTANCE),
                 'Inter' = 1,
                 'logAADT' = log(AADT), 
                 # 'LaneWidth' = SURF_WID/NO_LANES,
                 # 'Shdwidth' = SHD_WID,
                 # 'Surfwidth' = SURF_WID,
                 # 'OptSpd' = OptSpd,
                 'SpdVarr1' = SpdVarr1,
                 'SpdVarr2' = SpdVarr2,
                 # 'SpdVarr3' = SpdVarr3,
                 # 'SpdVarr4' = SpdVarr4,
                 'SpdAvg' = SpdAvg,
                 # 'SpdFF' = SpdFF
                 'Nolane' = NO_LANES,
                 'Medwidth'= MEDWID
                 # 'Speedlim' = SPD_LIMT,
)]
nn_comp <- complete.cases(myX)
myX <- myX[nn_comp, ]
myY <- myDat$Total[nn_comp]




## First, develop a full model, including all the variables

nb_Model_Full <- glm.nb(myY ~ . - LogL + offset(LogL) , data = cbind(myY, myX[, -2]))
summary(nb_Model_Full)

# calculate DIC
Get_NB_DIC <- function(inNBModel, inY) {
  
  # for input NB model, get DIC
  inNBModel <- nb_Model_Full
  myMu <- exp(predict(inNBModel))
  myTheta <- summary(inNBModel$theta)
  mylogLik <- dnbinom(myY, size = myTheta, mu = myMu, log = T)
  myLlog <- sum(mylogLik)
  myDIC <- -2 * myLlog
  myDIC
}

myDIC <- Get_NB_DIC(nb_Model_Full, myY)

myDIC

# calculate MAD
y_hat = nb_Model_Full$fitted.values

MAD <- sum(abs(myY - y_hat))/length(myY)

MAD

save.image(paste('Results_OH_RI_NB_yearly.r', sep = ''))

Rural Interstate(NBL)

model
{
  for(i in 1:346) # change this according to data size
    # for(i in 1:1000)
  {
    YY[i] ~ dnegbin(p[i],r)
    p[i] <- r/(r+a[i]*mu[i])
    mu[i] <- exp(X[i, 1] + b0 + 
                   b1*X[i, 3] + b2*X[i, 4] + 
                   b3*X[i, 5] + b4*X[i, 6] + 
                   b5*X[i, 7] + b6*X[i, 8])
    
    a[i] ~ dgamma(f[i],t)
    f[i]<-1+z[i] 
    z[i] ~ dbern(k)  
    
    mn[i]<-mu[i]*M1
  }
  
  r<-1/alp
  t<-(1-k)/k
  M1<-(t+2)/(t*(t+1))
  M2<-2*(t+3)/(t*t*(t+1))
  
  
  b0~ dnorm(0, 0.01)
  b1~ dnorm(0, 0.01)
  b2~ dnorm(0, 0.01)
  b3~ dnorm(0, 0.01)
  b4~ dnorm(0, 0.01)
  b5~ dnorm(0, 0.01)
  b6~ dnorm(0, 0.01)
  alp ~ dunif(0.1, 10)
  k~dbeta(115,173) 
  # change this according to data size k~(size/3,size/2)
  # k~dbeta(333,500) ## for 1000 sample
}


## OH NB, NBL NBL-RP Models
setwd('//ama-mobility4/d/ITSdata/DAS-Rural Safety/FHWA Rural Safety (Subasish Das)/Task 4/Task 5/Models/NBL_OH')
rm(list = ls())
library(data.table)
library(MASS)
library(rjags)
library(coda)

# 01. Load Data -----------------------------------------------------------
dat_yearly <- get(load(url('http://people.tamu.edu/~cma16/w3i4b3j1tam/TMC_Yearly.rData')))
## This is whole data
table(dat_yearly$Facility_Type)

myDat <- dat_yearly[myState == 'OH' 
                   & Facility_Type == '0_RI' 
                   & AADT > 0 
                   & !is.na(AADT)
                   # & !is.na(SHD_WID)
                   # & !is.na(SURF_WID)
                   # & !is.na(OptSpd)
                   & !is.na(SpdVarr1)
                   & !is.na(SpdVarr2)
                   # & !is.na(SpdVarr3)
                   # & !is.na(SpdVarr4)
                   & !is.na(SpdAvg)
                   & !is.na(NO_LANES)
                   & !is.na(MEDWID),]
# & !is.na(SpdFF) 

myX <- myDat[, .('LogL' = log(DISTANCE),
                 'Inter' = 1,
                 'logAADT' = log(AADT), 
                 # 'LaneWidth' = SURF_WID/NO_LANES,
                 # 'Shdwidth' = SHD_WID,
                 # 'Surfwidth' = SURF_WID,
                 # 'OptSpd' = OptSpd,
                 'SpdVarr1' = SpdVarr1,
                 'SpdVarr2' = SpdVarr2,
                 # 'SpdVarr3' = SpdVarr3,
                 # 'SpdVarr4' = SpdVarr4,
                 'SpdAvg' = SpdAvg,
                 # 'SpdFF' = SpdFF
                 'Nolane' = NO_LANES,
                 'Medwidth'= MEDWID
                 # 'Speedlim' = SPD_LIMT,
)]
nn_comp <- complete.cases(myX)
myX <- myX[nn_comp, ]
myY <- myDat$Total[nn_comp]
# attach(myX)

# 01. NB Model ------------------------------------------------------------
# myNB <- glm.nb(myY ~ offset(LogL) + logAADT + LaneWidth
#                  + SpdAvg + SpdAvgSD)
# 
# summary(myNB)
# 
# save(myNB, file='OH_RI_NB.rData')

# 02. NBL Model -----------------------------------------------------------

## Change Line 3 of NBL_bug.r first
## i.e.,   for(i in 1:10000) ---> for(i in 1:Actual Data Size)

## Change Line 28 of NBL_bug.r first
## i.e.,   k~dbeta(xxx,yyy) ---> k~dbeta(Actual Data Size / 3, Actual Data Size / 2)

## Save the NBL_bug.r file

## Build Model
system.time(my.jags <-jags.model('OH_RI_model_NBL_yearly.R',
                                 data = list(YY = myY, X = myX),
                                 n.chains = 3))


## Run MCMC
system.time(my.sims <- coda.samples(my.jags, c('b0', 'b1', 'b2', 'b3', 'b4','b5','b6',
                                               'alp', 'k','mu'),
                                    n.iter = 25000, thin = 10))
a <- as.data.frame(summary(my.sims)[[1]])#

# calculate MAD
# exclude the top rows and only keep mu (y_bar)
y_hat <- a[-c(1:9),1]

MAD <- sum(abs(myY - y_hat))/length(myY)

MAD

# calculate DIC
DIC <- dic.samples(model = my.jags,n.iter = 25000, thin = 5)#

DIC

## Results
mm <- as.matrix(my.sims)
mcmc.para <- as.data.table(t(rbind(colnames(mm), apply(mm, 2, mean), apply(mm, 2, sd))))
colnames(mcmc.para) <- c('Parameter', 'MeanValue', 'sd')
mcmc.para
output_para <- mcmc.para[c(1:9)]#

save.image(paste('MCMC_OH_RI_NBL_25000.r', sep = ''))


# 04. NBL_RP Model --------------------------------------------------------
# rm(list = ls()[!ls() %in% c('myX', 'myY')])
# 
# ## Build Model
# system.time(my.jags <-jags.model('NBL_RP_OH_bug.R',
#                      data = list(YY = myY, X = myX),
#                      n.chains = 3))
# 
# ## Run MCMC
# system.time(my.sims <- coda.samples(my.jags, c('b0', 'mub1', 
#                                    'mub2', 'mub3', 
#                                    'mub4', 'mub5', 'mub6', 
#                                    'mub7', 'mub8', 'mub9', 
#                                    'sigma1', 'sigma2',
#                                    'sigma3', 'sigma4',
#                                    'sigma5', 'sigma6',
#                                    'sigma7', 'sigma8',
#                                    'sigma9', 
#                                    'alp', 'k'),
#                         n.iter = 1000, thin = 5))

## Results
DIC <- dic.samples(model = my.jags,n.iter = 25000, thin = 5)
mm <- as.matrix(my.sims)
mcmc.para <- as.data.table(t(rbind(colnames(mm), apply(mm, 2, mean), apply(mm, 2, sd))))
colnames(mcmc.para) <- c('Parameter', 'MeanValue', 'sd')
mcmc.para
output_para <- mcmc.para[c(1:9)]#
save.image(paste('MCMC_OH_RI_NBL_25000_yearly.r', sep = ''))

## The End

Rural Interstate(RP-NBL)

model
{
  for(i in 1:346)
  # for(i in 1:81)
  # for(i in 1:1000)
  {
    YY[i] ~ dnegbin(p[i],r)
    p[i] <- r/(r+a[i]*mu[i])
    mu[i] <- exp(X[i, 1] + b0 + 
                   b1[i]*X[i, 3] + b2[i]*X[i, 4] + 
                   b3[i]*X[i, 5] + b4[i]*X[i, 6] + 
                   b5[i]*X[i, 7] + b6[i]*X[i, 8])
    
    a[i] ~ dgamma(f[i],t)
    f[i]<-1+z[i] 
    z[i] ~ dbern(k)  
    
    b1[i]~dnorm(mub1,tau1)
    b2[i]~dnorm(mub2,tau2)
    b3[i]~dnorm(mub3,tau3)
    b4[i]~dnorm(mub4,tau4)
    b5[i]~dnorm(mub5,tau5)
    b6[i]~dnorm(mub6,tau6)
    mn[i]<-mu[i]*M1
  }
  
  r<-1/alp
  t<-(1-k)/k
  M1<-(t+2)/(t*(t+1))
  M2<-2*(t+3)/(t*t*(t+1))
  
  b0~ dnorm(0, 0.01)
  mub1~ dnorm(0, 0.01)
  mub2~ dnorm(0, 0.01)
  mub3~ dnorm(0, 0.01)
  mub4~ dnorm(0, 0.01)
  mub5~ dnorm(0, 0.01)
  mub6~ dnorm(0, 0.01)
  alp ~ dunif(0.1, 10)
  
  tau1 ~ dgamma(0.01,0.01)
  tau2 ~ dgamma(0.01,0.01)
  tau3 ~ dgamma(0.01,0.01)
  tau4 ~ dgamma(0.01,0.01)
  tau5 ~ dgamma(0.01,0.01)
  tau6 ~ dgamma(0.01,0.01)
  
  sigma1<-pow(tau1,-0.5)
  sigma2<-pow(tau2,-0.5)
  sigma3<-pow(tau3,-0.5)
  sigma4<-pow(tau4,-0.5)
  sigma5<-pow(tau5,-0.5)
  sigma6<-pow(tau6,-0.5)
  
  k~dbeta(115,173)
  ## k~dbeta(k1,k2) k1 ~= N/3; k2 ~=N/2
  # k~dbeta(333,500) ## for 1000 sample
}


## OH NB, NBL NBL-RP Models
setwd('//ama-mobility4/d/ITSdata/DAS-Rural Safety/FHWA Rural Safety (Subasish Das)/Task 4/Task 5/Models/NBLRP_OH')
rm(list = ls())
library(data.table)
library(MASS)
library(rjags)
library(coda)

# 01. Load Data -----------------------------------------------------------
dat_yearly <- get(load(url('http://people.tamu.edu/~cma16/w3i4b3j1tam/TMC_Yearly.rData')))
## This is whole data
table(dat_yearly$Facility_Type)

myDat <- dat_yearly[myState == 'OH' 
                   & Facility_Type == '0_RI' 
                   & AADT > 0 
                   & !is.na(AADT)
                   # & !is.na(SHD_WID)
                   # & !is.na(SURF_WID)
                   # & !is.na(OptSpd)
                   & !is.na(SpdVarr1)
                   & !is.na(SpdVarr2)
                   # & !is.na(SpdVarr3)
                   # & !is.na(SpdVarr4)
                   & !is.na(SpdAvg)
                   & !is.na(NO_LANES)
                   & !is.na(MEDWID),]
# & !is.na(SpdFF) 

myX <- myDat[, .('LogL' = log(DISTANCE),
                 'Inter' = 1,
                 'logAADT' = log(AADT), 
                 # 'LaneWidth' = SURF_WID/NO_LANES,
                 # 'Shdwidth' = SHD_WID,
                 # 'Surfwidth' = SURF_WID,
                 # 'OptSpd' = OptSpd,
                 'SpdVarr1' = SpdVarr1,
                 'SpdVarr2' = SpdVarr2,
                 # 'SpdVarr3' = SpdVarr3,
                 # 'SpdVarr4' = SpdVarr4,
                 'SpdAvg' = SpdAvg,
                 # 'SpdFF' = SpdFF
                 'Nolane' = NO_LANES,
                 'Medwidth'= MEDWID
                 # 'Speedlim' = SPD_LIMT,
)]
nn_comp <- complete.cases(myX)
myX <- myX[nn_comp, ]
myY <- myDat$Total[nn_comp]
# attach(myX)

# 01. NB Model ------------------------------------------------------------
# myNB <- glm.nb(myY ~ offset(LogL) + logAADT + LaneWidth
#                  + SpdAvg + SpdAvgSD)
# 
# summary(myNB)
# 
# save(myNB, file='OH_RI_NB.rData')

# 02. NBL Model -----------------------------------------------------------

## Change Line 3 of NBL_bug.r first
## i.e.,   for(i in 1:10000) ---> for(i in 1:Actual Data Size)

## Change Line 28 of NBL_bug.r first
## i.e.,   k~dbeta(xxx,yyy) ---> k~dbeta(Actual Data Size / 3, Actual Data Size / 2)

## Save the NBL_bug.r file

## Build Model
# my.jags <-jags.model('NBL_OH_bug.R', 
#                      data = list(YY = myY, X = myX),
#                      n.chains = 3)
# 
# ## Run MCMC
# my.sims <- coda.samples(my.jags, c('b0', 'b1', 'b2', 'b3', 'b4',
#                                    'alp', 'k'),
#                         n.iter = 25000, thin = 10)
# 
# ## Results
# mm <- as.matrix(my.sims) 
# mcmc.para <- as.data.table(t(rbind(colnames(mm), apply(mm, 2, mean), apply(mm, 2, sd))))
# colnames(mcmc.para) <- c('Parameter', 'MeanValue', 'sd')
# mcmc.para
# 
# save.image(paste('MCMC_OH_RI_NBL_25000', round(rnorm(1), 3), '.r', sep = ''))


# 04. NBL_RP Model --------------------------------------------------------
rm(list = ls()[!ls() %in% c('myX', 'myY')])

## Build Model
system.time(my.jags <-jags.model('OH_RI_model_NBLRP_yearly.R',
                                 data = list(YY = myY, X = myX),
                                 n.chains = 3))

## Run MCMC
system.time(my.sims <- coda.samples(my.jags, c('b0', 'mub1', 
                                               'mub2', 'mub3', 
                                               'mub4', 'mub5', 'mub6', 
                                               'sigma1', 'sigma2',
                                               'sigma3', 'sigma4',
                                               'sigma5', 'sigma6',
                                               'alp', 'k','mu'),#
                                    n.iter = 25000, thin = 5))

a <- as.data.frame(summary(my.sims)[[1]])#

# calculate MAD
# exclude the top rows and only keep mu (y_bar)
y_hat <- a[-c(1:3,(nrow(a)-11):nrow(a)),1]

MAD <- sum(abs(myY - y_hat))/length(myY)

MAD

# calculate DIC
DIC <- dic.samples(model = my.jags,n.iter = 25000, thin = 5)#

DIC

## Results
mm <- as.matrix(my.sims)
mcmc.para <- as.data.table(t(rbind(colnames(mm), apply(mm, 2, mean), apply(mm, 2, sd))))
colnames(mcmc.para) <- c('Parameter', 'MeanValue', 'sd')
mcmc.para
output_para <- mcmc.para[c(1:3,(nrow(a)-11):nrow(a))]#

save.image(paste('MCMC_OH_RI_NBLRP_25000_yearly.r', sep = ''))

Rural Multilane Divided(NB)

library(data.table)
library(MASS)
rm(list=ls())
setwd("//ama-mobility4/d/ITSdata/DAS-Rural Safety/FHWA Rural Safety (Subasish Das)/Task 4/Task 5/Models/NB_OH")

# 01. Load Data -----------------------------------------------------------
dat_yearly <- get(load(url('http://people.tamu.edu/~cma16/w3i4b3j1tam/TMC_Yearly.rData')))
## This is whole data
table(dat_yearly$Facility_Type)

myDat <- dat_yearly[myState == 'OH' 
                    & Facility_Type == '3_RMD' 
                    & AADT > 0 
                    & !is.na(AADT)
                    # & !is.na(SHD_WID)
                    # & !is.na(SURF_WID)
                    # & !is.na(OptSpd)
                    & !is.na(SpdVarr1)
                    & !is.na(SpdVarr2)
                    # & !is.na(SpdVarr3)
                    # & !is.na(SpdVarr4)
                    & !is.na(SpdAvg)
                    & !is.na(NO_LANES)
                    & !is.na(MEDWID)
                    ,]
# & !is.na(SpdFF) 

myX <- myDat[, .('LogL' = log(DISTANCE),
                 'Inter' = 1,
                 'logAADT' = log(AADT), 
                 # 'LaneWidth' = SURF_WID/NO_LANES,
                 # 'Shdwidth' = SHD_WID,
                 # 'Surfwidth' = SURF_WID,
                 # 'OptSpd' = OptSpd,
                 'SpdVarr1' = SpdVarr1,
                 'SpdVarr2' = SpdVarr2,
                 # 'SpdVarr3' = SpdVarr3,
                 # 'SpdVarr4' = SpdVarr4,
                 'SpdAvg' = SpdAvg,
                 # 'SpdFF' = SpdFF
                 'Nolane' = NO_LANES,
                 'Medwidth'= MEDWID
                 # 'Speedlim' = SPD_LIMT,
)]
nn_comp <- complete.cases(myX)
myX <- myX[nn_comp, ]
myY <- myDat$Total[nn_comp]




## First, develop a full model, including all the variables

nb_Model_Full <- glm.nb(myY ~ . - LogL + offset(LogL) , data = cbind(myY, myX[, -2]))
summary(nb_Model_Full)

# calculate DIC
Get_NB_DIC <- function(inNBModel, inY) {
  
  # for input NB model, get DIC
  inNBModel <- nb_Model_Full
  myMu <- exp(predict(inNBModel))
  myTheta <- summary(inNBModel$theta)
  mylogLik <- dnbinom(myY, size = myTheta, mu = myMu, log = T)
  myLlog <- sum(mylogLik)
  myDIC <- -2 * myLlog
  myDIC
}

myDIC <- Get_NB_DIC(nb_Model_Full, myY)

myDIC

# calculate MAD
y_hat = nb_Model_Full$fitted.values

MAD <- sum(abs(myY - y_hat))/length(myY)

MAD

save.image(paste('Results_OH_RMD_NB_yearly.r', sep = ''))

Rural Multilane Divided(NBL)

model
{
  for(i in 1:428) # change this according to data size
    # for(i in 1:1000)
  {
    YY[i] ~ dnegbin(p[i],r)
    p[i] <- r/(r+a[i]*mu[i])
    mu[i] <- exp(X[i, 1] + b0 + 
                   b1*X[i, 3] + b2*X[i, 4] + 
                   b3*X[i, 5] + b4*X[i, 6] + 
                   b5*X[i, 7] + b6*X[i, 8])
    
    a[i] ~ dgamma(f[i],t)
    f[i]<-1+z[i] 
    z[i] ~ dbern(k)  
    
    mn[i]<-mu[i]*M1
  }
  
  r<-1/alp
  t<-(1-k)/k
  M1<-(t+2)/(t*(t+1))
  M2<-2*(t+3)/(t*t*(t+1))
  
  
  b0~ dnorm(0, 0.01)
  b1~ dnorm(0, 0.01)
  b2~ dnorm(0, 0.01)
  b3~ dnorm(0, 0.01)
  b4~ dnorm(0, 0.01)
  b5~ dnorm(0, 0.01)
  b6~ dnorm(0, 0.01)
  alp ~ dunif(0.1, 10)
  k~dbeta(142,214) 
  # change this according to data size k~(size/3,size/2)
  # k~dbeta(333,500) ## for 1000 sample
}


## OH NB, NBL NBL-RP Models
setwd('//ama-mobility4/d/ITSdata/DAS-Rural Safety/FHWA Rural Safety (Subasish Das)/Task 4/Task 5/Models/NBL_OH')
rm(list = ls())
library(data.table)
library(MASS)
library(rjags)
library(coda)

# 01. Load Data -----------------------------------------------------------
dat_yearly <- get(load(url('http://people.tamu.edu/~cma16/w3i4b3j1tam/TMC_Yearly.rData')))
## This is whole data
table(dat_yearly$Facility_Type)

myDat <- dat_yearly[myState == 'OH' 
                   & Facility_Type == '3_RMD' 
                   & AADT > 0 
                   & !is.na(AADT)
                   # & !is.na(SHD_WID)
                   # & !is.na(SURF_WID)
                   # & !is.na(OptSpd)
                   & !is.na(SpdVarr1)
                   & !is.na(SpdVarr2)
                   # & !is.na(SpdVarr3)
                   # & !is.na(SpdVarr4)
                   & !is.na(SpdAvg)
                   & !is.na(NO_LANES)
                   & !is.na(MEDWID),]
# & !is.na(SpdFF) 

myX <- myDat[, .('LogL' = log(DISTANCE),
                 'Inter' = 1,
                 'logAADT' = log(AADT), 
                 # 'LaneWidth' = SURF_WID/NO_LANES,
                 # 'Shdwidth' = SHD_WID,
                 # 'Surfwidth' = SURF_WID,
                 # 'OptSpd' = OptSpd,
                 'SpdVarr1' = SpdVarr1,
                 'SpdVarr2' = SpdVarr2,
                 # 'SpdVarr3' = SpdVarr3,
                 # 'SpdVarr4' = SpdVarr4,
                 'SpdAvg' = SpdAvg,
                 # 'SpdFF' = SpdFF
                 'Nolane' = NO_LANES,
                 'Medwidth'= MEDWID
                 # 'Speedlim' = SPD_LIMT,
)]
nn_comp <- complete.cases(myX)
myX <- myX[nn_comp, ]
myY <- myDat$Total[nn_comp]
# attach(myX)

# 01. NB Model ------------------------------------------------------------
# myNB <- glm.nb(myY ~ offset(LogL) + logAADT + LaneWidth
#                  + SpdAvg + SpdAvgSD)
# 
# summary(myNB)
# 
# save(myNB, file='OH_RMD_NB.rData')

# 02. NBL Model -----------------------------------------------------------

## Change Line 3 of NBL_bug.r first
## i.e.,   for(i in 1:10000) ---> for(i in 1:Actual Data Size)

## Change Line 28 of NBL_bug.r first
## i.e.,   k~dbeta(xxx,yyy) ---> k~dbeta(Actual Data Size / 3, Actual Data Size / 2)

## Save the NBL_bug.r file

## Build Model
system.time(my.jags <-jags.model('OH_RMD_model_NBL_yearly.R',
                                 data = list(YY = myY, X = myX),
                                 n.chains = 3))


## Run MCMC
system.time(my.sims <- coda.samples(my.jags, c('b0', 'b1', 'b2', 'b3', 'b4','b5','b6',
                                               'alp', 'k','mu'),
                                    n.iter = 25000, thin = 10))
a <- as.data.frame(summary(my.sims)[[1]])#

# calculate MAD
# exclude the top rows and only keep mu (y_bar)
y_hat <- a[-c(1:9),1]

MAD <- sum(abs(myY - y_hat))/length(myY)

MAD

# calculate DIC
DIC <- dic.samples(model = my.jags,n.iter = 25000, thin = 5)#

DIC

## Results
mm <- as.matrix(my.sims)
mcmc.para <- as.data.table(t(rbind(colnames(mm), apply(mm, 2, mean), apply(mm, 2, sd))))
colnames(mcmc.para) <- c('Parameter', 'MeanValue', 'sd')
mcmc.para
output_para <- mcmc.para[c(1:9)]#

save.image(paste('MCMC_OH_RMD_NBL_25000.r', sep = ''))


# 04. NBL_RP Model --------------------------------------------------------
# rm(list = ls()[!ls() %in% c('myX', 'myY')])
# 
# ## Build Model
# system.time(my.jags <-jags.model('NBL_RP_OH_bug.R',
#                      data = list(YY = myY, X = myX),
#                      n.chains = 3))
# 
# ## Run MCMC
# system.time(my.sims <- coda.samples(my.jags, c('b0', 'mub1', 
#                                    'mub2', 'mub3', 
#                                    'mub4', 'mub5', 'mub6', 
#                                    'mub7', 'mub8', 'mub9', 
#                                    'sigma1', 'sigma2',
#                                    'sigma3', 'sigma4',
#                                    'sigma5', 'sigma6',
#                                    'sigma7', 'sigma8',
#                                    'sigma9', 
#                                    'alp', 'k'),
#                         n.iter = 1000, thin = 5))

## Results
DIC <- dic.samples(model = my.jags,n.iter = 25000, thin = 5)
mm <- as.matrix(my.sims)
mcmc.para <- as.data.table(t(rbind(colnames(mm), apply(mm, 2, mean), apply(mm, 2, sd))))
colnames(mcmc.para) <- c('Parameter', 'MeanValue', 'sd')
mcmc.para
output_para <- mcmc.para[c(1:9)]#
save.image(paste('MCMC_OH_RMD_NBL_25000_yearly.r', sep = ''))

## The End

Rural Multilane Divided(RP-NBL)

model
{
  for(i in 1:428)
  # for(i in 1:81)
  # for(i in 1:1000)
  {
    YY[i] ~ dnegbin(p[i],r)
    p[i] <- r/(r+a[i]*mu[i])
    mu[i] <- exp(X[i, 1] + b0 + 
                   b1[i]*X[i, 3] + b2[i]*X[i, 4] + 
                   b3[i]*X[i, 5] + b4[i]*X[i, 6] + 
                   b5[i]*X[i, 7] + b6[i]*X[i, 8])
    
    a[i] ~ dgamma(f[i],t)
    f[i]<-1+z[i] 
    z[i] ~ dbern(k)  
    
    b1[i]~dnorm(mub1,tau1)
    b2[i]~dnorm(mub2,tau2)
    b3[i]~dnorm(mub3,tau3)
    b4[i]~dnorm(mub4,tau4)
    b5[i]~dnorm(mub5,tau5)
    b6[i]~dnorm(mub6,tau6)
    mn[i]<-mu[i]*M1
  }
  
  r<-1/alp
  t<-(1-k)/k
  M1<-(t+2)/(t*(t+1))
  M2<-2*(t+3)/(t*t*(t+1))
  
  b0~ dnorm(0, 0.01)
  mub1~ dnorm(0, 0.01)
  mub2~ dnorm(0, 0.01)
  mub3~ dnorm(0, 0.01)
  mub4~ dnorm(0, 0.01)
  mub5~ dnorm(0, 0.01)
  mub6~ dnorm(0, 0.01)
  alp ~ dunif(0.1, 10)
  
  tau1 ~ dgamma(0.01,0.01)
  tau2 ~ dgamma(0.01,0.01)
  tau3 ~ dgamma(0.01,0.01)
  tau4 ~ dgamma(0.01,0.01)
  tau5 ~ dgamma(0.01,0.01)
  tau6 ~ dgamma(0.01,0.01)
  
  sigma1<-pow(tau1,-0.5)
  sigma2<-pow(tau2,-0.5)
  sigma3<-pow(tau3,-0.5)
  sigma4<-pow(tau4,-0.5)
  sigma5<-pow(tau5,-0.5)
  sigma6<-pow(tau6,-0.5)
  
  k~dbeta(143,214)
  ## k~dbeta(k1,k2) k1 ~= N/3; k2 ~=N/2
  # k~dbeta(333,500) ## for 1000 sample
}


## OH NB, NBL NBL-RP Models
setwd('//ama-mobility4/d/ITSdata/DAS-Rural Safety/FHWA Rural Safety (Subasish Das)/Task 4/Task 5/Models/NBLRP_OH')
rm(list = ls())
library(data.table)
library(MASS)
library(rjags)
library(coda)

# 01. Load Data -----------------------------------------------------------
dat_yearly <- get(load(url('http://people.tamu.edu/~cma16/w3i4b3j1tam/TMC_Yearly.rData')))
## This is whole data
table(dat_yearly$Facility_Type)

myDat <- dat_yearly[myState == 'OH' 
                   & Facility_Type == '3_RMD' 
                   & AADT > 0 
                   & !is.na(AADT)
                   # & !is.na(SHD_WID)
                   # & !is.na(SURF_WID)
                   # & !is.na(OptSpd)
                   & !is.na(SpdVarr1)
                   & !is.na(SpdVarr2)
                   # & !is.na(SpdVarr3)
                   # & !is.na(SpdVarr4)
                   & !is.na(SpdAvg)
                   & !is.na(NO_LANES)
                   & !is.na(MEDWID),]
# & !is.na(SpdFF) 

myX <- myDat[, .('LogL' = log(DISTANCE),
                 'Inter' = 1,
                 'logAADT' = log(AADT), 
                 # 'LaneWidth' = SURF_WID/NO_LANES,
                 # 'Shdwidth' = SHD_WID,
                 # 'Surfwidth' = SURF_WID,
                 # 'OptSpd' = OptSpd,
                 'SpdVarr1' = SpdVarr1,
                 'SpdVarr2' = SpdVarr2,
                 # 'SpdVarr3' = SpdVarr3,
                 # 'SpdVarr4' = SpdVarr4,
                 'SpdAvg' = SpdAvg,
                 # 'SpdFF' = SpdFF
                 'Nolane' = NO_LANES,
                 'Medwidth'= MEDWID
                 # 'Speedlim' = SPD_LIMT,
)]
nn_comp <- complete.cases(myX)
myX <- myX[nn_comp, ]
myY <- myDat$Total[nn_comp]
# attach(myX)

# 01. NB Model ------------------------------------------------------------
# myNB <- glm.nb(myY ~ offset(LogL) + logAADT + LaneWidth
#                  + SpdAvg + SpdAvgSD)
# 
# summary(myNB)
# 
# save(myNB, file='OH_RMD_NB.rData')

# 02. NBL Model -----------------------------------------------------------

## Change Line 3 of NBL_bug.r first
## i.e.,   for(i in 1:10000) ---> for(i in 1:Actual Data Size)

## Change Line 28 of NBL_bug.r first
## i.e.,   k~dbeta(xxx,yyy) ---> k~dbeta(Actual Data Size / 3, Actual Data Size / 2)

## Save the NBL_bug.r file

## Build Model
# my.jags <-jags.model('NBL_OH_bug.R', 
#                      data = list(YY = myY, X = myX),
#                      n.chains = 3)
# 
# ## Run MCMC
# my.sims <- coda.samples(my.jags, c('b0', 'b1', 'b2', 'b3', 'b4',
#                                    'alp', 'k'),
#                         n.iter = 25000, thin = 10)
# 
# ## Results
# mm <- as.matrix(my.sims) 
# mcmc.para <- as.data.table(t(rbind(colnames(mm), apply(mm, 2, mean), apply(mm, 2, sd))))
# colnames(mcmc.para) <- c('Parameter', 'MeanValue', 'sd')
# mcmc.para
# 
# save.image(paste('MCMC_OH_RMD_NBL_25000', round(rnorm(1), 3), '.r', sep = ''))


# 04. NBL_RP Model --------------------------------------------------------
rm(list = ls()[!ls() %in% c('myX', 'myY')])

## Build Model
system.time(my.jags <-jags.model('OH_RMD_model_NBLRP_yearly.R',
                                 data = list(YY = myY, X = myX),
                                 n.chains = 3))

## Run MCMC
system.time(my.sims <- coda.samples(my.jags, c('b0', 'mub1', 
                                               'mub2', 'mub3', 
                                               'mub4', 'mub5', 'mub6', 
                                               'sigma1', 'sigma2',
                                               'sigma3', 'sigma4',
                                               'sigma5', 'sigma6',
                                               'alp', 'k','mu'),#
                                    n.iter = 25000, thin = 5))

a <- as.data.frame(summary(my.sims)[[1]])#

# calculate MAD
# exclude the top rows and only keep mu (y_bar)
y_hat <- a[-c(1:3,(nrow(a)-11):nrow(a)),1]

MAD <- sum(abs(myY - y_hat))/length(myY)

MAD

# calculate DIC
DIC <- dic.samples(model = my.jags,n.iter = 25000, thin = 5)#

DIC

## Results
mm <- as.matrix(my.sims)
mcmc.para <- as.data.table(t(rbind(colnames(mm), apply(mm, 2, mean), apply(mm, 2, sd))))
colnames(mcmc.para) <- c('Parameter', 'MeanValue', 'sd')
mcmc.para
output_para <- mcmc.para[c(1:3,(nrow(a)-11):nrow(a))]#

save.image(paste('MCMC_OH_RMD_NBLRP_25000_yearly.r', sep = ''))

Rural Multilane Undivided(NB)

library(data.table)
library(MASS)
rm(list=ls())
setwd("//ama-mobility4/d/ITSdata/DAS-Rural Safety/FHWA Rural Safety (Subasish Das)/Task 4/Task 5/Models/NB_OH")

# 01. Load Data -----------------------------------------------------------
dat_yearly <- get(load(url('http://people.tamu.edu/~cma16/w3i4b3j1tam/TMC_Yearly.rData')))
## This is whole data
table(dat_yearly$Facility_Type)

myDat <- dat_yearly[myState == 'OH' 
                    & Facility_Type == '2_RMU' 
                    & AADT > 0 
                    & !is.na(AADT)
                    # & !is.na(SHD_WID)
                    # & !is.na(SURF_WID)
                    # & !is.na(OptSpd)
                    & !is.na(SpdVarr1)
                    & !is.na(SpdVarr2)
                    # & !is.na(SpdVarr3)
                    # & !is.na(SpdVarr4)
                    & !is.na(SpdAvg)
                    & !is.na(NO_LANES)
                    # & !is.na(MEDWID)
                    ,]
# & !is.na(SpdFF) 

myX <- myDat[, .('LogL' = log(DISTANCE),
                 'Inter' = 1,
                 'logAADT' = log(AADT), 
                 # 'LaneWidth' = SURF_WID/NO_LANES,
                 # 'Shdwidth' = SHD_WID,
                 # 'Surfwidth' = SURF_WID,
                 # 'OptSpd' = OptSpd,
                 'SpdVarr1' = SpdVarr1,
                 'SpdVarr2' = SpdVarr2,
                 # 'SpdVarr3' = SpdVarr3,
                 # 'SpdVarr4' = SpdVarr4,
                 'SpdAvg' = SpdAvg,
                 # 'SpdFF' = SpdFF
                 'Nolane' = NO_LANES
                 # 'Medwidth'= MEDWID
                 # 'Speedlim' = SPD_LIMT,
)]
nn_comp <- complete.cases(myX)
myX <- myX[nn_comp, ]
myY <- myDat$Total[nn_comp]




## First, develop a full model, including all the variables

nb_Model_Full <- glm.nb(myY ~ . - LogL + offset(LogL) , data = cbind(myY, myX[, -2]))
summary(nb_Model_Full)

# calculate DIC
Get_NB_DIC <- function(inNBModel, inY) {
  
  # for input NB model, get DIC
  inNBModel <- nb_Model_Full
  myMu <- exp(predict(inNBModel))
  myTheta <- summary(inNBModel$theta)
  mylogLik <- dnbinom(myY, size = myTheta, mu = myMu, log = T)
  myLlog <- sum(mylogLik)
  myDIC <- -2 * myLlog
  myDIC
}

myDIC <- Get_NB_DIC(nb_Model_Full, myY)

myDIC

# calculate MAD
y_hat = nb_Model_Full$fitted.values

MAD <- sum(abs(myY - y_hat))/length(myY)

MAD

save.image(paste('Results_OH_RMU_NB_yearly.r', sep = ''))

Rural Multilane Undivided(NBL)

model
{
  for(i in 1:67) # change this according to data size
    # for(i in 1:1000)
  {
    YY[i] ~ dnegbin(p[i],r)
    p[i] <- r/(r+a[i]*mu[i])
    mu[i] <- exp(X[i, 1] + b0 + 
                   b1*X[i, 3] + b2*X[i, 4] + 
                   b3*X[i, 5] + b4*X[i, 6] + 
                   b5*X[i, 7])
    
    a[i] ~ dgamma(f[i],t)
    f[i]<-1+z[i] 
    z[i] ~ dbern(k)  
    
    mn[i]<-mu[i]*M1
  }
  
  r<-1/alp
  t<-(1-k)/k
  M1<-(t+2)/(t*(t+1))
  M2<-2*(t+3)/(t*t*(t+1))
  
  
  b0~ dnorm(0, 0.01)
  b1~ dnorm(0, 0.01)
  b2~ dnorm(0, 0.01)
  b3~ dnorm(0, 0.01)
  b4~ dnorm(0, 0.01)
  b5~ dnorm(0, 0.01)
  alp ~ dunif(0.1, 10)
  k~dbeta(22,33) 
  # change this according to data size k~(size/3,size/2)
  # k~dbeta(333,500) ## for 1000 sample
}


## OH NB, NBL NBL-RP Models
setwd('//ama-mobility4/d/ITSdata/DAS-Rural Safety/FHWA Rural Safety (Subasish Das)/Task 4/Task 5/Models/NBL_OH')
rm(list = ls())
library(data.table)
library(MASS)
library(rjags)
library(coda)

# 01. Load Data -----------------------------------------------------------
dat_yearly <- get(load(url('http://people.tamu.edu/~cma16/w3i4b3j1tam/TMC_Yearly.rData')))
## This is whole data
table(dat_yearly$Facility_Type)

myDat <- dat_yearly[myState == 'OH' 
                   & Facility_Type == '2_RMU' 
                   & AADT > 0 
                   & !is.na(AADT)
                   # & !is.na(SHD_WID)
                   # & !is.na(SURF_WID)
                   # & !is.na(OptSpd)
                   & !is.na(SpdVarr1)
                   & !is.na(SpdVarr2)
                   # & !is.na(SpdVarr3)
                   # & !is.na(SpdVarr4)
                   & !is.na(SpdAvg)
                   & !is.na(NO_LANES)
                   # & !is.na(MEDWID)
                   ,]
# & !is.na(SpdFF) 

myX <- myDat[, .('LogL' = log(DISTANCE),
                 'Inter' = 1,
                 'logAADT' = log(AADT), 
                 # 'LaneWidth' = SURF_WID/NO_LANES,
                 # 'Shdwidth' = SHD_WID,
                 # 'Surfwidth' = SURF_WID,
                 # 'OptSpd' = OptSpd,
                 'SpdVarr1' = SpdVarr1,
                 'SpdVarr2' = SpdVarr2,
                 # 'SpdVarr3' = SpdVarr3,
                 # 'SpdVarr4' = SpdVarr4,
                 'SpdAvg' = SpdAvg,
                 # 'SpdFF' = SpdFF
                 'Nolane' = NO_LANES
                 # 'Medwidth'= MEDWID
                 # 'Speedlim' = SPD_LIMT,
)]
nn_comp <- complete.cases(myX)
myX <- myX[nn_comp, ]
myY <- myDat$Total[nn_comp]
# attach(myX)

# 01. NB Model ------------------------------------------------------------
# myNB <- glm.nb(myY ~ offset(LogL) + logAADT + LaneWidth
#                  + SpdAvg + SpdAvgSD)
# 
# summary(myNB)
# 
# save(myNB, file='OH_RMU_NB.rData')

# 02. NBL Model -----------------------------------------------------------

## Change Line 3 of NBL_bug.r first
## i.e.,   for(i in 1:10000) ---> for(i in 1:Actual Data Size)

## Change Line 28 of NBL_bug.r first
## i.e.,   k~dbeta(xxx,yyy) ---> k~dbeta(Actual Data Size / 3, Actual Data Size / 2)

## Save the NBL_bug.r file

## Build Model
system.time(my.jags <-jags.model('OH_RMU_model_NBL_yearly.R',
                                 data = list(YY = myY, X = myX),
                                 n.chains = 3))


## Run MCMC
system.time(my.sims <- coda.samples(my.jags, c('b0', 'b1', 'b2', 'b3', 'b4','b5',
                                               'alp', 'k','mu'),
                                    n.iter = 25000, thin = 10))
a <- as.data.frame(summary(my.sims)[[1]])#

# calculate MAD
# exclude the top rows and only keep mu (y_bar)
y_hat <- a[-c(1:8),1]

MAD <- sum(abs(myY - y_hat))/length(myY)

MAD

# calculate DIC
DIC <- dic.samples(model = my.jags,n.iter = 25000, thin = 5)#

DIC

## Results
mm <- as.matrix(my.sims)
mcmc.para <- as.data.table(t(rbind(colnames(mm), apply(mm, 2, mean), apply(mm, 2, sd))))
colnames(mcmc.para) <- c('Parameter', 'MeanValue', 'sd')
mcmc.para
output_para <- mcmc.para[c(1:8)]#

save.image(paste('MCMC_OH_RMU_NBL_25000.r', sep = ''))


# 04. NBL_RP Model --------------------------------------------------------
# rm(list = ls()[!ls() %in% c('myX', 'myY')])
# 
# ## Build Model
# system.time(my.jags <-jags.model('NBL_RP_OH_bug.R',
#                      data = list(YY = myY, X = myX),
#                      n.chains = 3))
# 
# ## Run MCMC
# system.time(my.sims <- coda.samples(my.jags, c('b0', 'mub1', 
#                                    'mub2', 'mub3', 
#                                    'mub4', 'mub5', 'mub6', 
#                                    'mub7', 'mub8', 'mub9', 
#                                    'sigma1', 'sigma2',
#                                    'sigma3', 'sigma4',
#                                    'sigma5', 'sigma6',
#                                    'sigma7', 'sigma8',
#                                    'sigma9', 
#                                    'alp', 'k'),
#                         n.iter = 1000, thin = 5))

## Results
DIC <- dic.samples(model = my.jags,n.iter = 25000, thin = 5)
mm <- as.matrix(my.sims)
mcmc.para <- as.data.table(t(rbind(colnames(mm), apply(mm, 2, mean), apply(mm, 2, sd))))
colnames(mcmc.para) <- c('Parameter', 'MeanValue', 'sd')
mcmc.para
output_para <- mcmc.para[c(1:8)]#
save.image(paste('MCMC_OH_RMU_NBL_25000_yearly.r', sep = ''))

## The End

Rural Multilane Undivided(RP-NBL)

model
{
  for(i in 1:67)
  # for(i in 1:81)
  # for(i in 1:1000)
  {
    YY[i] ~ dnegbin(p[i],r)
    p[i] <- r/(r+a[i]*mu[i])
    mu[i] <- exp(X[i, 1] + b0 + 
                   b1[i]*X[i, 3] + b2[i]*X[i, 4] + 
                   b3[i]*X[i, 5] + b4[i]*X[i, 6] + 
                   b5[i]*X[i, 7])
    
    a[i] ~ dgamma(f[i],t)
    f[i]<-1+z[i] 
    z[i] ~ dbern(k)  
    
    b1[i]~dnorm(mub1,tau1)
    b2[i]~dnorm(mub2,tau2)
    b3[i]~dnorm(mub3,tau3)
    b4[i]~dnorm(mub4,tau4)
    b5[i]~dnorm(mub5,tau5)

    mn[i]<-mu[i]*M1
  }
  
  r<-1/alp
  t<-(1-k)/k
  M1<-(t+2)/(t*(t+1))
  M2<-2*(t+3)/(t*t*(t+1))
  
  b0~ dnorm(0, 0.01)
  mub1~ dnorm(0, 0.01)
  mub2~ dnorm(0, 0.01)
  mub3~ dnorm(0, 0.01)
  mub4~ dnorm(0, 0.01)
  mub5~ dnorm(0, 0.01)

  alp ~ dunif(0.1, 10)
  
  tau1 ~ dgamma(0.01,0.01)
  tau2 ~ dgamma(0.01,0.01)
  tau3 ~ dgamma(0.01,0.01)
  tau4 ~ dgamma(0.01,0.01)
  tau5 ~ dgamma(0.01,0.01)

  
  sigma1<-pow(tau1,-0.5)
  sigma2<-pow(tau2,-0.5)
  sigma3<-pow(tau3,-0.5)
  sigma4<-pow(tau4,-0.5)
  sigma5<-pow(tau5,-0.5)

  
  k~dbeta(22,33)
  ## k~dbeta(k1,k2) k1 ~= N/3; k2 ~=N/2
  # k~dbeta(333,500) ## for 1000 sample
}


## OH NB, NBL NBL-RP Models
setwd('//ama-mobility4/d/ITSdata/DAS-Rural Safety/FHWA Rural Safety (Subasish Das)/Task 4/Task 5/Models/NBLRP_OH')
rm(list = ls())
library(data.table)
library(MASS)
library(rjags)
library(coda)

# 01. Load Data -----------------------------------------------------------
dat_yearly <- get(load(url('http://people.tamu.edu/~cma16/w3i4b3j1tam/TMC_Yearly.rData')))
## This is whole data
table(dat_yearly$Facility_Type)

myDat <- dat_yearly[myState == 'OH' 
                   & Facility_Type == '2_RMU' 
                   & AADT > 0 
                   & !is.na(AADT)
                   # & !is.na(SHD_WID)
                   # & !is.na(SURF_WID)
                   # & !is.na(OptSpd)
                   & !is.na(SpdVarr1)
                   & !is.na(SpdVarr2)
                   # & !is.na(SpdVarr3)
                   # & !is.na(SpdVarr4)
                   & !is.na(SpdAvg)
                   & !is.na(NO_LANES)
                   # & !is.na(MEDWID)
                   ,]
# & !is.na(SpdFF) 

myX <- myDat[, .('LogL' = log(DISTANCE),
                 'Inter' = 1,
                 'logAADT' = log(AADT), 
                 # 'LaneWidth' = SURF_WID/NO_LANES,
                 # 'Shdwidth' = SHD_WID,
                 # 'Surfwidth' = SURF_WID,
                 # 'OptSpd' = OptSpd,
                 'SpdVarr1' = SpdVarr1,
                 'SpdVarr2' = SpdVarr2,
                 # 'SpdVarr3' = SpdVarr3,
                 # 'SpdVarr4' = SpdVarr4,
                 'SpdAvg' = SpdAvg,
                 # 'SpdFF' = SpdFF
                 'Nolane' = NO_LANES
                 # 'Medwidth'= MEDWID
                 # 'Speedlim' = SPD_LIMT,
)]
nn_comp <- complete.cases(myX)
myX <- myX[nn_comp, ]
myY <- myDat$Total[nn_comp]
# attach(myX)

# 01. NB Model ------------------------------------------------------------
# myNB <- glm.nb(myY ~ offset(LogL) + logAADT + LaneWidth
#                  + SpdAvg + SpdAvgSD)
# 
# summary(myNB)
# 
# save(myNB, file='OH_RMU_NB.rData')

# 02. NBL Model -----------------------------------------------------------

## Change Line 3 of NBL_bug.r first
## i.e.,   for(i in 1:10000) ---> for(i in 1:Actual Data Size)

## Change Line 28 of NBL_bug.r first
## i.e.,   k~dbeta(xxx,yyy) ---> k~dbeta(Actual Data Size / 3, Actual Data Size / 2)

## Save the NBL_bug.r file

## Build Model
# my.jags <-jags.model('NBL_OH_bug.R', 
#                      data = list(YY = myY, X = myX),
#                      n.chains = 3)
# 
# ## Run MCMC
# my.sims <- coda.samples(my.jags, c('b0', 'b1', 'b2', 'b3', 'b4',
#                                    'alp', 'k'),
#                         n.iter = 25000, thin = 10)
# 
# ## Results
# mm <- as.matrix(my.sims) 
# mcmc.para <- as.data.table(t(rbind(colnames(mm), apply(mm, 2, mean), apply(mm, 2, sd))))
# colnames(mcmc.para) <- c('Parameter', 'MeanValue', 'sd')
# mcmc.para
# 
# save.image(paste('MCMC_OH_RMU_NBL_25000', round(rnorm(1), 3), '.r', sep = ''))


# 04. NBL_RP Model --------------------------------------------------------
rm(list = ls()[!ls() %in% c('myX', 'myY')])

## Build Model
system.time(my.jags <-jags.model('OH_RMU_model_NBLRP_yearly.R',
                                 data = list(YY = myY, X = myX),
                                 n.chains = 3))

## Run MCMC
system.time(my.sims <- coda.samples(my.jags, c('b0', 'mub1', 
                                               'mub2', 'mub3', 
                                               'mub4', 'mub5', 
                                               'sigma1', 'sigma2',
                                               'sigma3', 'sigma4',
                                               'sigma5', 
                                               'alp', 'k','mu'),#
                                    n.iter = 25000, thin = 5))

a <- as.data.frame(summary(my.sims)[[1]])#

# calculate MAD
# exclude the top rows and only keep mu (y_bar)
y_hat <- a[-c(1:3,(nrow(a)-9):nrow(a)),1]

MAD <- sum(abs(myY - y_hat))/length(myY)

MAD

# calculate DIC
DIC <- dic.samples(model = my.jags,n.iter = 25000, thin = 5)#

DIC

## Results
mm <- as.matrix(my.sims)
mcmc.para <- as.data.table(t(rbind(colnames(mm), apply(mm, 2, mean), apply(mm, 2, sd))))
colnames(mcmc.para) <- c('Parameter', 'MeanValue', 'sd')
mcmc.para
output_para <- mcmc.para[c(1:3,(nrow(a)-9):nrow(a))]#

save.image(paste('MCMC_OH_RMU_NBLRP_25000_yearly.r', sep = ''))

Subasish Das

2019-01-07