###############################################
## Author: Joshua M. Tebbs
## Date: 11 December 2011
## Update: 30 November 2014
## STAT 509 course notes: R Code
## Chapter 1
###############################################
 
# Example 1.1
# Page 3
# MATH 141 example
# Simulate data
x1 = runif(50,400,800) # SAT MATH
x2 = rnorm(50,3,0.3) # HS GPA
y = rnorm(50,75,10) # MATH 141 score
 
# The data in Example 1.1 are not real (I simulated them).
# You will get a different picture (than in the notes) if you run the code below.
 
# Produce 3d scatterplot (point cloud)
# Need to install (then load) scatterplot3d package in R
# Packages --> install; Packages --> load
 
scatterplot3d(x1,x2,y,
     highlight.3d=FALSE,axis=TRUE,col.grid="lightblue",
     xlab="SAT MATH",ylab="HS GPA",zlab="MATH 141 SCORE",main="",
     box=FALSE,pch=20)


###############################################
## Date: 11 December 2011
## Update: 30 November 2014
## STAT 509 course notes: R Code
## Chapter 2
###############################################
 
# Example 2.2
# Page 8-9
# 2002 Oakland A's example
# Simulate each game during the season
game.outcome = rbinom(162,1,0.571)
# Create plot of relative frequencies
# Initialise
winning.percentage = game.outcome*0
for (i in 1:162){
     winning.percentage[i] = sum(game.outcome[1:i])/i
     }
plot(winning.percentage,ylab="Winning Percentage",xlab="Game",type="o",pch=20)
abline(h=0.571)
 
# Simulate one 20-game stretch
games = rbinom(20,1,0.571)
games
sum(games)
 
# Simulate 1000 20-game stretches
games = rbinom(1000,20,0.571)
# Number of times (out of 1000) that 20 straight games are won
length(games[games>19])


###############################################
## Date: 11 December 2011
## Update: 30 November 2014
## STAT 509 course notes: R Code
## Chapter 3
###############################################
 
# Example 3.1
# Page 19-20
# Discrete PMF and CDF
y = c(0,1,2,3,4,5,6)
prob = c(0.10,0.15,0.20,0.25,0.20,0.06,0.04)
# Plot PMF
plot(y,prob,type="h",xlab="y",ylab="PMF",ylim=c(0,0.26))
abline(h=0)
# Plot CDF
cdf = c(0,cumsum(prob))
cdf.plot = stepfun(y,cdf,f=0)
plot.stepfun(cdf.plot,xlab="y",ylab="CDF",verticals=FALSE,do.points=TRUE,main="",pch=16)
 
 
# Example 3.2
# Page 22-23
# Discrete PMF and CDF
y = c(0,1,2,3)
prob = c(0.20,0.30,0.30,0.20)
# Plot PMF
plot(y,prob,type="h",xlab="y",ylab="PMF",ylim=c(0,0.325),
     xaxp=c(0, 3, 3))
abline(h=0)
# Plot CDF
cdf = c(0,cumsum(prob))
cdf.plot = stepfun(y,cdf,f=0)
plot.stepfun(cdf.plot,xlab="y",ylab="CDF",verticals=FALSE,do.points=TRUE,main="",pch=16)
 
 
# Example 3.3
# Page 26-27
# Binomial PMF and CDF
y = c(0,1,2,3,4)
prob = dbinom(y,4,0.4)
# Plot PMF
plot(y,prob,type="h",xlab="y",ylab="PMF",ylim=c(0,0.375))
abline(h=0)
# Plot CDF
cdf = c(0,cumsum(prob))
cdf.plot = stepfun(y,cdf,f=0)
plot.stepfun(cdf.plot,xlab="y",ylab="CDF",verticals=FALSE,do.points=TRUE,main="",pch=16)
 
 
# Example 3.4
# Page 27-28
# Binomial PMF and CDF
y = seq(0,30,1)
prob = dbinom(y,30,0.1)
# Plot PMF
plot(y,prob,type="h",xlab="y",ylab="PMF",ylim=c(0,0.25))
abline(h=0)
# Plot CDF
cdf = c(0,cumsum(prob))
cdf.plot = stepfun(y,cdf,f=0)
plot.stepfun(cdf.plot,xlab="y",ylab="CDF",verticals=FALSE,do.points=TRUE,main="",pch=16)
 
 
# Example 3.5
# Page 30-31
# Geometric PMF and CDF
y = seq(1,25,1)
prob = dgeom(y-1,0.25)
# Plot PMF
plot(y,prob,type="h",xlab="y",ylab="PMF",ylim=c(0,0.25))
abline(h=0)
# Plot CDF
cdf = c(0,cumsum(prob))
cdf.plot = stepfun(y,cdf,f=0)
plot.stepfun(cdf.plot,xlab="y",ylab="CDF",verticals=FALSE,do.points=TRUE,main="",pch=16)
 
 
# Example 3.6
# Page 32-33
# Negative binomial PMF and CDF
y = seq(3,70,1)
prob = dnbinom(y-3,3,0.15)
# Plot PMF
plot(y,prob,type="h",xlab="y",ylab="PMF",ylim=c(0,0.05))
abline(h=0)
# Plot CDF
cdf = c(0,cumsum(prob))
cdf.plot = stepfun(y,cdf,f=0)
plot.stepfun(cdf.plot,xlab="y",ylab="CDF",verticals=FALSE,do.points=TRUE,main="",pch=16)
 
 
# Example 3.7
# Page 35-36
# Hypergeometric PMF and CDF
y = seq(0,5,1)
prob = dhyper(y,10,100-10,5)
# Plot PMF
plot(y,prob,type="h",xlab="y",ylab="PMF",ylim=c(0,0.6))
abline(h=0)
# Plot CDF
cdf = c(0,cumsum(prob))
cdf.plot = stepfun(y,cdf,f=0)
plot.stepfun(cdf.plot,xlab="y",ylab="CDF",verticals=FALSE,do.points=TRUE,main="",pch=16)
 
 
# Example 3.8
# Page 37-38
# Poisson PMF and CDF
y = seq(0,10,1)
prob = dpois(y,2.5)
# Plot PMF
plot(y,prob,type="h",xlab="y",ylab="PMF",ylim=c(0,0.275))
abline(h=0)
# Plot CDF
cdf = c(0,cumsum(prob))
cdf.plot = stepfun(y,cdf,f=0)
plot.stepfun(cdf.plot,xlab="y",ylab="CDF",verticals=FALSE,do.points=TRUE,main="",pch=16)
 
 
###############################################
## Date: 11 December 2011
## Update: 30 November 2014
## STAT 509 course notes: R Code
## Chapter 4
###############################################
 
# Example 4.1
# Page 40-41
# Continuous PDF and CDF
# Plot PDF
y = seq(0,1,0.01)
pdf = 3*y^2
plot(y,pdf,type="l",xlab="y",ylab="PDF",ylim=c(0,3))
abline(h=0)
abline(v=1,lty=2)
# Plot CDF
y0 = seq(-0.25,1.25,0.01)
y = seq(0,1,0.01)
cdf = c(rep(0,25),y^3,rep(1,25))
plot(y0,cdf,type="l",xlab="y",ylab="CDF",ylim=c(0,1))
abline(h=0)
 
 
# Example 4.2
# Page 43-44
# Continuous PDF and CDF
# Plot PDF
y = seq(12.5,13,0.01)
pdf = 20*exp(-20*(y-12.5))
plot(y,pdf,type="l",xlab="y",ylab="PDF",xlim=c(12.5,13))
abline(h=0)
abline(v=12.5,lty=2)
# Plot CDF
y = seq(12.5,13,0.01)
cdf = 1-exp(-20*(y-12.5))
plot(y,cdf,type="l",xlab="y",ylab="CDF",xlim=c(12.4,13),ylim=c(0,1))
abline(h=0)
 
# Calculate E(Y)
integrand.1 <- function(y){y*20*exp(-20*(y-12.5))}
integrate(integrand.1,lower=12.5,upper=Inf)
# Calculate var(Y)
integrand.2 <- function(y){(y-12.55)^2*20*exp(-20*(y-12.5))}
integrate(integrand.2,lower=12.5,upper=Inf)
 
 
# Figure 4.3
# Page 45
# Exponential PDFs
y = seq(0,8,0.01)
# Plot exponential pdf with lambda = 2
plot(y,dexp(y,2),type="l",lty=1,xlab="y",ylab="PDF")
# Add other pdfs
lines(y,dexp(y,1),lty=4)
lines(y,dexp(y,1/2),lty=8)
abline(h=0)
abline(v=0,lty=2)
# Add legend
legend(3,1,lty = c(1,4,8),
     c(
     expression(paste(lambda, "=2")),
     expression(paste(lambda, "=1")),
     expression(paste(lambda, "=1/2"))
     ))
 
 
# Example 4.3
# Page 46-47
# Exponential PDF and CDF
# Plot PDF
y = seq(0,15,0.01)
pdf = dexp(y,0.4)
plot(y,pdf,type="l",xlab="y",ylab="PDF",ylim=c(0,0.4))
abline(h=0)
abline(v=0,lty=2)
# Plot CDF
cdf = pexp(y,0.4)
plot(y,cdf,type="l",xlab="y",ylab="CDF",ylim=c(0,1))
abline(h=0)
 
 
# Figure 4.5
# Page 50
# Gamma PDFs
y = seq(0,25,0.01)
# Plot gamma pdf with alpha = 1.5 and lambda = 0.6
plot(y,dgamma(y,1.5,0.6),type="l",lty=1,xlab="y",ylab="PDF")
# Add other pdfs
lines(y,dgamma(y,2,0.5),lty=4)
lines(y,dgamma(y,2.5,0.4),lty=8)
abline(h=0)
# Add legend
legend(10,0.20,lty = c(1,4,8),
     c(
     expression(paste(alpha, "=1.5, ", lambda, "=0.6")),
     expression(paste(alpha, "=2.0, ", lambda, "=0.5")),
     expression(paste(alpha, "=2.5, ", lambda, "=0.4"))
     ))
 
 
# Example 4.5
# Page 50-51
# Gamma PDF and CDF
# Plot PDF
y = seq(0,80,0.01)
pdf = dgamma(y,4,1/6)
plot(y,pdf,type="l",xlab="y",ylab="PDF",ylim=c(0,0.04))
abline(h=0)
# Plot CDF
cdf = pgamma(y,4,1/6)
plot(y,cdf,type="l",xlab="y",ylab="CDF",ylim=c(0,1))
abline(h=0)
 
 
# Figure 4.7
# Page 53
# Normal PDFs
y = seq(-10,10,0.01)
# Plot normal pdf with mu = 0 and sigma = 1 (standard normal pdf)
plot(y,dnorm(y,0,1),type="l",lty=1,xlab="y",ylab="PDF")
# Add other pdfs
lines(y,dnorm(y,-2,2),lty=4)
lines(y,dnorm(y,1,3),lty=8)
abline(h=0)
# Add legend
legend(3.5,0.30,lty = c(1,4,8),
     c(
     expression(paste(mu, "=0, ", sigma, "=1")),
     expression(paste(mu, "=-2, ", sigma, "=2")),
     expression(paste(mu, "=1, ", sigma, "=3"))
     ))
 
 
# Example 4.6
# Page 53-54
# Normal PDF and CDF
# Plot PDF
y = seq(0,3,0.01)
pdf = dnorm(y,1.5,sqrt(0.16))
plot(y,pdf,type="l",xlab="y",ylab="PDF",ylim=c(0,1))
abline(h=0)
# Plot CDF
cdf = pnorm(y,1.5,sqrt(0.16))
plot(y,cdf,type="l",xlab="y",ylab="CDF",ylim=c(0,1))
abline(h=0)


###############################################
## Date: 11 December 2011
## Update: 30 November 2014
## STAT 509 course notes: R Code
## Chapter 5
###############################################
 
# Figure 5.1
# Page 57
# Weibull PDFs
y = seq(0,25,0.01)
# Plot Weibull pdf with beta = 2 and eta = 5
plot(y,dweibull(y,2,5),type="l",lty=1,xlab="y",ylab="PDF")
# Add other pdfs
lines(y,dweibull(y,2,10),lty=4)
lines(y,dweibull(y,3,10),lty=8)
abline(h=0)
# Add legend
legend(15,0.15,lty = c(1,4,8),
     c(
     expression(paste(beta, "=2, ", eta, "=5")),
     expression(paste(beta, "=2, ", eta, "=10")),
     expression(paste(beta, "=3, ", eta, "=10"))
     ))
 
 
# Example 5.1
# Page 58-59
# Weibull PDF and CDF
# Plot PDF
t = seq(0,30,0.01)
pdf = dweibull(t,2,10)
plot(t,pdf,type="l",xlab="t",ylab="PDF",ylim=c(0,0.09))
abline(h=0)
# Plot CDF
cdf = pweibull(t,2,10)
plot(t,cdf,type="l",xlab="t",ylab="CDF",ylim=c(0,1))
abline(h=0)
 
 
# Example 5.2
# Weibull hazard functions
# Page 62
t = seq(0,4,0.01)
haz.1 = 3*t^2
haz.2 = 1.5*t^(0.5)
haz.3 = 1+t*0
haz.4 = 0.5*t^(-0.5)
# Make 2 by 2 figure
par(mfrow=c(2,2))
plot(t,haz.1,type='l',xlab="t",ylab="HAZARD")
abline(h=0,lty=2)
plot(t,haz.2,type='l',xlab="t",ylab="HAZARD")
abline(h=0,lty=2)
plot(t,haz.3,type='l',xlab="t",ylab="HAZARD")
abline(h=0,lty=2)
plot(t,haz.4,type='l',xlab="t",ylab="HAZARD",ylim=c(0,5))
abline(h=0,lty=2)
 
 
# Example 5.3
# Pages 63-67
# Need to install (then load) MASS package in R
# Enter data
cart.data = c(3.9,4.2,5.4,6.5,7.0,8.8,9.2,11.4,14.3,15.1,15.3,15.5,17.9,18.0,19.0,19.0,23.9,24.8,26.0,34.2)
# Fit Weibull model
fitdistr(cart.data,densfun="weibull")
# Maximum likelihood estimates
beta.hat = 1.99
eta.hat = 16.94
 
# Page 65
# Plot PDF, CDF, survivor, and hazard
# I constructed each function separately
# Use estimates beta.hat, eta.hat above
t = seq(0,50,0.01)
# Plot PDF
pdf = dweibull(t,beta.hat,eta.hat)
plot(t,pdf,type="l",xlab="t",ylab="PDF",ylim=c(0,0.05))
abline(h=0)
# Plot CDF
cdf = pweibull(t,beta.hat,eta.hat)
plot(t,cdf,type="l",xlab="t",ylab="CDF",ylim=c(0,1))
abline(h=0)
# Plot survivor function
survivor = 1-pweibull(t,beta.hat,eta.hat)
plot(t,survivor,type="l",xlab="t",ylab="SURVIVOR",ylim=c(0,1))
abline(h=0)
abline(v=0,lty=2)
# Plot hazard function
hazard = pdf/survivor
plot(t,hazard,type="l",xlab="t",ylab="HAZARD",ylim=c(0,0.35))
abline(h=0)
 
# Page 67
# Weibull qqplot
# Need to install (then load) car package in R
qqPlot(cart.data,distribution="weibull",shape=beta.hat,scale=eta.hat,
     xlab="Weibull quantiles",ylab="Cart data",pch=16)
 
 
###############################################
## Date: 11 December 2011
## Update: 1 December 2014
## STAT 509 course notes: R Code
## Chapter 6
###############################################
 
# Example 6.1
# Battery lifetime data
# Page 69-70
battery = c(4285,2066,2584,1009,318,1429,981,1402,1137,414,
     564,604,14,4152,737,852,1560,1786,520,396,
     1278,209,349,478,3032,1461,701,1406,261,83,
     205,602,3770,726,3894,2662,497,35,2778,1379,
     3920,1379,99,510,582,308,3367,99,373,454)
# Page 70
# Create histogram
hist(battery,xlab="Lifetime (in hours)",ylab="Count",main="")
# Create boxplot
boxplot(battery,xlab="",ylab="Lifetime (in hours)",col="grey")
# Calculate summary statistics
# Page 72
mean(battery) ## sample mean
var(battery) ## sample variance
sd(battery) ## sample standard deviation
 
 
# Example 6.2
# Brake time example
# Page 75-76
y = seq(0,3,0.01)
# Plot population distribution
plot(y,dnorm(y,1.5,sqrt(0.16)),type="l",lty=1,xlab="Brake times (sec)",ylab="PDF",ylim=c(0,5))
# Add sampling distribution when n=5
lines(y,dnorm(y,1.5,sqrt(0.16/5)),lty=4)
# Add sampling distribution when n=25
lines(y,dnorm(y,1.5,sqrt(0.16/25)),lty=8)
abline(h=0)
# Add legend
legend(0,5,lty = c(1,4,8),
     c(
     expression(paste("Population distribution")),
     expression(paste("Sample mean, n=5")),
     expression(paste("Sample mean, n=25"))
     ))
 
 
# Example 6.3
# Rat example
# Page 77-78
y = seq(0,20,0.01)
# Plot population distribution
plot(y,dexp(y,1/5),type="l",lty=1,xlab="Time to death (in days)",ylab="PDF",ylim=c(0,0.4))
# Add sampling distribution when n=5
lines(y,dgamma(y,5,1),lty=4)
# Add sampling distribution when n=5
lines(y,dgamma(y,25,5),lty=8)
abline(h=0)
abline(v=0,lty=2)
# Add legend
legend(10,0.3,lty = c(1,4,8),
     c(
     expression(paste("Population distribution")),
     expression(paste("Sample mean, n=5")),
     expression(paste("Sample mean, n=25"))
     ))
 
 
# Figure 6.4
# Page 80
# N(0,1) and t PDFs
y = seq(-5,5,0.01)
# Plot N(0,1) pdf
plot(y,dnorm(y,0,1),type="l",lty=1,xlab="",ylab="PDF",ylim=c(0,0.4))
# Add other pdfs
lines(y,dt(y,2),lty=4)
lines(y,dt(y,10),lty=8)
abline(h=0)
# Add legend
legend(2,0.35,lty = c(1,4,8),
     c(
     expression(paste("N(0,1)")),
     expression(paste("t(2)")),
     expression(paste("t(10)"))
     ))
 
 
# Example 6.5
# Page 81-84
# Pipe diameter data
pipes = c(1.296,1.320,1.311,1.298,1.315,1.305,1.278,1.294,1.311,1.290,1.284,1.287,1.289,1.292,1.301,
     1.298,1.287,1.302,1.304,1.301,1.313,1.315,1.306,1.289,1.291)
# Calculate summary statistics
mean(pipes) ## sample mean
sd(pipes) ## sample standard deviation
 
# Page 82
# Plot t(24) pdf
t = seq(-5,5,0.001)
pdf = dt(t,24)
plot(t,pdf,type="l",lty=1,xlab="t",ylab="PDF",ylim=c(0,0.4))
abline(h=0)
# Add points
points(x=4.096,y=0,pch=4,cex=1.5)
 
# Page 84
# Create normal qqplot
# Need to install (then load) car package in R
qqPlot(pipes,distribution="norm",xlab="N(0,1) quantiles",ylab="Pipe data",pch=16)
 

###############################################
## Date: 11 December 2011
## Update: 4 December 2014
## STAT 509 course notes: R Code
## Chapter 7
###############################################
 
# Figure 7.1
# Page 87
# t pdf with quantiles and shading
x = seq(-4,4,0.001)
pdf = dt(x,10)
plot(x,pdf,type="l",lty=1,xlab="t",ylab="PDF",ylim=c(0,0.4))
abline(h=0)
x = seq(-4,qt(0.025,10),0.001)
y = dt(x,10)
polygon(c(-4,x,qt(0.025,10)),c(0,y,0),col="grey")
# Add points
points(x=qt(0.025,10),y=0,pch=19,cex=1.5)
x = seq(qt(0.975,10),4,0.001)
y = dt(x,10)
polygon(c(qt(0.975,10),x,4),c(0,y,0),col="grey")
# Add points
points(x=qt(0.975,10),y=0,pch=19,cex=1.5)
# Add text
text(-0.025,0.1,expression(1-alpha))
text(-3.5,0.05,expression(alpha/2))
text(3.5,0.05,expression(alpha/2))
 
 
# Example 7.1
# Cadmium data
# Page 89-90
cadmium = c(0.044,0.030,0.052,0.044,0.046,0.020,0.066,
     0.052,0.049,0.030,0.040,0.045,0.039,0.039,
     0.039,0.057,0.050,0.056,0.061,0.042,0.055,
     0.037,0.062,0.062,0.070,0.061,0.061,0.058,
     0.053,0.060,0.047,0.051,0.054,0.042,0.051)
# Calculate summary statistics
mean(cadmium) ## sample mean
sd(cadmium) ## sample standard deviation
 
# Calculate confidence interval directly
# Page 91
t.test(cadmium,conf.level=0.99)$conf.int
 
# Create normal qqplot
# Page 92
# Need to install (then load) car package in R
qqPlot(cadmium,distribution="norm",xlab="N(0,1) quantiles",ylab="Cadmium data",pch=16)
 
 
# Figure 7.3
# Page 94
# Chi-square PDFs
y = seq(0,45,0.01)
# Plot chi-square pdf with 5 df
plot(y,dchisq(y,5),type="l",lty=1,xlab=expression(chi^2),ylab="PDF",ylim=c(0,0.16))
# Add other pdfs
lines(y,dchisq(y,10),lty=4)
lines(y,dchisq(y,20),lty=8)
abline(h=0)
# Add legend
legend(25,0.125,lty = c(1,4,8),
     c(
     expression(paste(nu, "=5")),
     expression(paste(nu, "=10")),
     expression(paste(nu, "=20"))
     ))
 
 
# Figure 7.4
# Page 95
# chi-square pdf with quantiles and shading
x = seq(0,30,0.001)
pdf = dchisq(x,10)
plot(x,pdf,type="l",lty=1,xlab=expression(chi^2),ylab="PDF",ylim=c(0,0.1))
abline(h=0)
x = seq(0,qchisq(0.025,10),0.001)
y = dchisq(x,10)
polygon(c(0,x,qchisq(0.025,10)),c(0,y,0),col="grey")
# Add points
points(x=qchisq(0.025,10),y=0,pch=19,cex=1.5)
x = seq(qchisq(0.975,10),30,0.001)
y = dchisq(x,10)
polygon(c(qchisq(0.975,10),x,30),c(0,y,0),col="grey")
# Add points
points(x=qchisq(0.975,10),y=0,pch=19,cex=1.5)
# Add text
text(9.8,0.02,expression(1-alpha))
text(0.5,0.01,expression(alpha/2))
text(25,0.0075,expression(alpha/2))
 
 
# Example 7.2
# IKEA diameter data
# Page 97
diameters = c(1.206,1.190,1.200,1.195,1.201,1.200,1.198,1.196,1.195,
     1.202,1.203,1.210,1.206,1.193,1.207,1.201,1.199,1.200,1.199,
     1.204,1.194,1.203,1.194,1.199,1.203,1.200,1.197,1.208,1.199,
     1.205,1.199,1.204,1.202,1.196,1.211,1.204)
 
# R does not have an internal function to calculate CI for population variance
# I wrote this function to do it
var.interval = function(data,conf.level=0.95){
     df = length(data)-1
     chi.lower = qchisq((1-conf.level)/2,df)
     chi.upper = qchisq((1+conf.level)/2,df)
     s2 = var(data)
     c(df*s2/chi.upper,df*s2/chi.lower)
     }
 
# CI for population variance
var.interval(diameters)
# CI for population standard deviation
sd.interval = sqrt(var.interval(diameters))
sd.interval
 
# Create normal qqplot
# Page 99
# Need to install (then load) car package in R
qqPlot(diameters,distribution="norm",xlab="N(0,1) quantiles",ylab="Diameter data",pch=16)
 
 
# Figure 7.6
# Page 102
# N(0,1) pdf with quantiles and shading
x = seq(-4,4,0.001)
pdf = dnorm(x,0,1)
plot(x,pdf,type="l",lty=1,xlab="z",ylab="PDF",ylim=c(0,0.4))
abline(h=0)
x = seq(-4,qnorm(0.025,0,1),0.001)
y = dnorm(x,0,1)
polygon(c(-4,x,qnorm(0.025,0,1)),c(0,y,0),col="grey")
# Add points
points(x=qnorm(0.025,0,1),y=0,pch=19,cex=1.5)
x = seq(qnorm(0.975,0,1),4,0.001)
y = dnorm(x,0,1)
polygon(c(qnorm(0.975,0,1),x,4),c(0,y,0),col="grey")
# Add points
points(x=qnorm(0.975,0,1),y=0,pch=19,cex=1.5)
# Add text
text(-0.025,0.1,expression(1-alpha))
text(-2.8,0.04,expression(alpha/2))
text(2.8,0.04,expression(alpha/2))
 
 
###############################################
## Date: 11 December 2011
## Update: 7 December 2014
## STAT 509 course notes: R Code
## Chapter 8
###############################################
 
# Figure 8.1
# Page 111
y = seq(35,75,0.01)
plot(y,dnorm(y,50,3),type="l",lty=1,xlab="y",ylab="PDF",ylim=c(0,0.15))
lines(y,dnorm(y,60,3),lty=4)
abline(h=0)
# Add legend
legend(63,0.15,lty = c(1,4),
     c(
     expression(paste("Population 1")),
     expression(paste("Population 2"))
     ))
 
 
# Figure 8.2
# Page 112
# t pdf with quantiles and shading
x = seq(-4,4,0.001)
pdf = dt(x,10)
plot(x,pdf,type="l",lty=1,xlab="t",ylab="PDF",ylim=c(0,0.4))
abline(h=0)
x = seq(-4,qt(0.025,10),0.001)
y = dt(x,10)
polygon(c(-4,x,qt(0.025,10)),c(0,y,0),col="grey")
# Add points
points(x=qt(0.025,10),y=0,pch=19,cex=1.5)
x = seq(qt(0.975,10),4,0.001)
y = dt(x,10)
polygon(c(qt(0.975,10),x,4),c(0,y,0),col="grey")
# Add points
points(x=qt(0.975,10),y=0,pch=19,cex=1.5)
# Add text
text(-0.025,0.1,expression(1-alpha))
text(-3.5,0.05,expression(alpha/2))
text(3.5,0.05,expression(alpha/2))
 
 
# Example 8.1
# Fish weight data
# Page 113-116
loc.1 = c(21.9,18.5,12.3,16.7,21.0,15.1,18.2,23.0,36.8,26.6)
loc.2 = c(21.0,19.6,14.4,16.9,23.4,14.6,10.4,16.5)
# Create side by side boxplots
boxplot(loc.1,loc.2,xlab="",names=c("Location 1","Location 2"),ylab="Weight (in ounces)",ylim=c(0,40),col="grey")
# Calculate confidence interval
t.test(loc.1,loc.2,conf.level=0.90,var.equal=TRUE)$conf.int
 
# Page 116
# Create normal qqplots for each sample
# Need to install (then load) car package in R
qqPlot(loc.1,distribution="norm",xlab="N(0,1) quantiles",ylab="Location 1",pch=16)
qqPlot(loc.2,distribution="norm",xlab="N(0,1) quantiles",ylab="Location 2",pch=16)
 
 
# Example 8.2
# White paper data
# Page 117-119
plant.1 = c(3.01,2.58,3.04,1.75,2.87,2.57,2.51,2.93,2.85,3.09,
         1.43,3.36,3.18,2.74,2.25,1.95,3.68,2.29,1.86,2.63,
         2.83,2.04,2.23,1.92,3.02)
plant.2 = c(3.99,2.08,3.66,1.53,4.27,4.31,2.62,4.52,3.80,5.30,
         3.41,0.82,3.03,1.95,6.45,1.86,1.87,3.98,2.74,4.81)
# Create side by side boxplots
boxplot(plant.1,plant.2,xlab="",names=c("Plant 1","Plant 2"),ylab="Weight (in 100s lb)",ylim=c(0,8),col="grey")
# Calculate confidence interval
t.test(plant.1,plant.2,conf.level=0.95,var.equal=FALSE)$conf.int
 
# Page 119
# Create normal qqplots for each sample
# Need to install (then load) car package in R
qqPlot(plant.1,distribution="norm",xlab="N(0,1) quantiles",ylab="Plant 1",pch=16)
qqPlot(plant.2,distribution="norm",xlab="N(0,1) quantiles",ylab="Plant 2",pch=16)
 
 
# Example 8.3
# Ergonomics data: Matched pairs analysis
# Page 119-124
before = c(81.3,87.2,86.1,82.2,90.8,86.9,96.5,73.0,84.2,74.5,72.0,73.8,74.2,
     74.9,75.8,72.6,80.8,66.5,72.2,56.5,82.4,88.8,80.0,91.1,97.5,70.0)
after = c(78.9,91.4,78.3,78.3,84.4,67.4,92.8,69.9,63.8,69.7,68.4,71.8,58.3,
     58.3,62.5,70.2,58.7,66.6,60.7,65.0,73.7,80.4,78.8,81.8,91.6,74.2)
# Create data differences
diff = before-after
# Calculate confidence interval
t.test(diff,conf.level=0.95)$conf.int
 
# Page 124
# Create normal qqplots for each sample
# Need to install (then load) car package in R
qqPlot(diff,distribution="norm",xlab="N(0,1) quantiles",ylab="Data differences",pch=16)
 
 
# Figure 8.8
# Page 125
# F PDFs
y = seq(0,8,0.01)
plot(y,df(y,5,10),type="l",lty=1,xlab="F",ylab="PDF",ylim=c(0,0.85))
# Add other F pdfs
lines(y,df(y,5,20),lty=4)
lines(y,df(y,10,20),lty=8)
abline(h=0)
# Add legend
legend(4,0.5,lty = c(1,4,8),
     c(
     expression(paste(nu[1], "=5, ", nu[2], "=10")),
     expression(paste(nu[1], "=5, ", nu[2], "=20")),
     expression(paste(nu[1], "=10, ", nu[2], "=20"))
     ))
 
 
# Figure 8.9
# Page 127
# F pdf with quantiles and shading
x = seq(0,6,0.001)
pdf = df(x,10,10)
plot(x,pdf,type="l",lty=1,xlab="F",ylab="PDF",ylim=c(0,0.8))
abline(h=0)
x = seq(0,qf(0.025,10,10),0.001)
y = df(x,10,10)
polygon(c(0,x,qf(0.025,10,10)),c(0,y,0),col="grey")
# Add points
points(x=qf(0.025,10,10),y=0,pch=19,cex=1.5)
x = seq(qf(0.975,10,10),6,0.001)
y = df(x,10,10)
polygon(c(qf(0.975,10,10),x,6),c(0,y,0),col="grey")
# Add points
points(x=qf(0.975,10,10),y=0,pch=19,cex=1.5)
# Add text
text(1.2,0.15,expression(1-alpha))
text(0.0,0.2,expression(alpha/2))
text(4.5,0.05,expression(alpha/2))
 
 
# Example 8.4
# Paint fill volume data
# Page 128-130
process.1 = c(127.75,127.87,127.86,127.92,128.03,127.94,127.91,128.10,128.01,128.11,127.79,127.93,
           127.89,127.96,127.80,127.94,128.02,127.82,128.11,127.92,127.74,127.78,127.85,127.96)
process.2 = c(127.90,127.90,127.74,127.93,127.62,127.76,127.63,127.93,127.86,127.73,127.82,127.84,
           128.06,127.88,127.85,127.60,128.02,128.05,127.95,127.89,127.82,127.92,127.71,127.78)
# Create side by side boxplots
boxplot(process.1,process.2,xlab="",names=c("Process 1","Process 2"),
     ylab="Fluid ounces",ylim=c(127.5,128.25),col="grey")
 
# R does not have an internal function to calculate CI for the ratio of population variances
# I wrote this function to do it
ratio.var.interval = function(data.1,data.2,conf.level=0.95){
     df.1 = length(data.1)-1
     df.2 = length(data.2)-1
     F.lower = qf((1-conf.level)/2,df.1,df.2)
     F.upper = qf((1+conf.level)/2,df.1,df.2)
     s2.1 = var(data.1)
     s2.2 = var(data.2)
     c((s2.2/s2.1)*F.lower,(s2.2/s2.1)*F.upper)
     }
 
# CI for ratio of population variances
ratio.var.interval(process.1,process.2)
 
# Page 130
# Create normal qqplots for each sample
# Need to install (then load) car package in R
qqPlot(process.1,distribution="norm",xlab="N(0,1) quantiles",ylab="Process 1",pch=16)
qqPlot(process.2,distribution="norm",xlab="N(0,1) quantiles",ylab="Process 2",pch=16)
 
 
# Example 8.5
# Irish HBV data
# Page 133
# R does not have an internal function to calculate CI for the difference of two population proportions
# I wrote this function to do it
proportion.diff.interval = function(y.1,n.1,y.2,n.2,conf.level=0.95){
     z.upper = qnorm((1+conf.level)/2)
     var.1 = (y.1/n.1)*(1-y.1/n.1)/n.1
     var.2 = (y.2/n.2)*(1-y.2/n.2)/n.2
     se = sqrt(var.1+var.2)
     moe = z.upper*se
     c((y.1/n.1-y.2/n.2)-moe,(y.1/n.1-y.2/n.2)+moe)
     }
# CI for difference of two population proportions
proportion.diff.interval(18,82,28,555)
 
 
###############################################
## Date: 11 December 2011
## Update: 7 December 2014
## STAT 509 course notes: R Code
## Chapter 9
###############################################
 
# Example 9.1
# Mortar strength data
# Page 136-137
OCM = c(51.45,42.96,41.11,48.06,38.27,38.88,42.74,49.62)
PIM = c(64.97,64.21,57.39,52.79,64.87,53.27,51.24,55.87,61.76,67.15)
RM = c(48.95,62.41,52.11,60.45,58.07,52.16,61.71,61.06,57.63,56.80)
PCM = c(35.28,38.59,48.64,50.99,51.52,52.85,46.75,48.31)
# Create side-by-side boxplots
boxplot(OCM,PIM,RM,PCM,
 xlab="",names=c("OCM","PIM","RM","PCM"),ylab="Strength (MPa)",col="grey")
 
# Perform F test/Create ANOVA table
# Page 143
# Create response with all data
strength = c(OCM,PIM,RM,PCM)
# Create a treatment indicator variable
mortar.type = c(
     rep("OCM",length(OCM)),
     rep("PIM",length(PIM)),
     rep("RM",length(RM)),
     rep("PCM",length(PCM))
     )
mortar.type = factor(mortar.type)
# Analysis of variance
anova(lm(strength ~ mortar.type))
 
# Plot F(3,32) pdf
# Page 143
f = seq(0,18,0.01)
plot(f,df(f,3,32),type="l",lty=1,xlab="F",ylab="PDF",ylim=c(0,0.75))
abline(h=0)
# Add points
points(x=16.848,y=0,pch=4,cex=1.5)
 
# Follow-up analysis
# Page 150
# Tukey confidence intervals
TukeyHSD(aov(lm(strength ~ mortar.type)),conf.level=0.95)
 

 ###############################################
## Author: Joshua M. Tebbs
## Date: 11 December 2011
## Update: 11 December 2014
## STAT 509 course notes: R Code
## Chapter 10
###############################################
 
# Example 10.1.
# Sewage sludge data
# Page 154-155
filtration.rate=c(125.3,98.2,201.4,147.3,145.9,124.7,112.2,120.2,161.2,178.9,
            159.5,145.8,75.1,151.4,144.2,125.0,198.8,132.5,159.6,110.7)
moisture=c(77.9,76.8,81.5,79.8,78.2,78.3,77.5,77.0,80.1,80.2,
         79.9,79.0,76.7,78.2,79.5,78.1,81.5,77.0,79.0,78.6)
# Construct scatterplot
plot(filtration.rate,moisture,xlab="Filtration rate (kg-DS/m/hr)",
      ylab="Moisture (Percentage)",pch=16)
 
# Page 156-157
# Fit the model
fit = lm(moisture~filtration.rate)
fit
# Page 157
# Construct scatterplot (with least squares line superimposed)
plot(filtration.rate,moisture,xlab="Filtration rate (kg-DS/m/hr)",
      ylab="Moisture (Percentage)",pch=16)
abline(fit)
 
# Page 160-162
# Calculate fitted values and residuals
fitted.values = predict(fit)
residuals = moisture-fitted.values
# Show residuals sum to 0
sum(residuals)
# Calculate MSres
MSres = sum(residuals^2)/(length(moisture)-2)
MSres
# Calculate residual standard error
resid.std.error = sqrt(MSres)
resid.std.error
 
# Page 163
# Confidence interval for beta0 and beta1
fit = lm(moisture~filtration.rate)
confint(fit,level=0.95)
 
# Page 164-165
# Regression output for inference
fit = lm(moisture~filtration.rate)
summary(fit)
 
# Page 165
# Plot t(18) pdf
t = seq(-9,9,0.01)
pdf = dt(t,18)
plot(t,pdf,type="l",lty=1,xlab="t",ylab="PDF",ylim=c(0,0.4))
abline(h=0)
points(x=8.484,y=0,pch=4,cex=1.5)
 
# Page 168
# Calculate confidence/prediction intervals
fit = lm(moisture~filtration.rate)
predict(fit,data.frame(filtration.rate=150),level=0.95,interval="confidence")
predict(fit,data.frame(filtration.rate=150),level=0.95,interval="prediction")
 
# Page 169
# Confidence/Prediction bands
fit = lm(moisture~filtration.rate)
x.not = seq(80,200,1)
conf = predict(fit,data.frame(filtration.rate=x.not),level=0.95,interval="confidence")
pred = predict(fit,data.frame(filtration.rate=x.not),level=0.95,interval="prediction")
plot(filtration.rate,moisture,xlab = "Filtration rate (kg-DS/m/hr)",
      ylab = "Moisture (Percentage)",pch=16,
      xlim=c(min(filtration.rate),max(filtration.rate)),
      ylim=c(min(moisture),max(moisture)))
abline(fit)
par(new=T)
plot(x.not,conf[,2],type="l",lty=2,xlab="",ylab="",
      xlim=c(min(filtration.rate),max(filtration.rate)),
      ylim=c(min(moisture),max(moisture)))
par(new=T)
plot(x.not,conf[,3],type="l",lty=2,xlab="",ylab="",
      xlim=c(min(filtration.rate),max(filtration.rate)),
      ylim=c(min(moisture),max(moisture)))
par(new=T)
plot(x.not,pred[,2],type="l",lty=4,xlab="",ylab="",
      xlim=c(min(filtration.rate),max(filtration.rate)),
      ylim=c(min(moisture),max(moisture)))
par(new=T)
plot(x.not,pred[,3],type="l",lty=4,xlab="",ylab="",
      xlim=c(min(filtration.rate),max(filtration.rate)),
      ylim=c(min(moisture),max(moisture)))
# Create legend
# You MUST "click" where you want it to go (on figure)
names = c("95% confidence","95% prediction")
legend(locator(1),legend=names,lty=c(2,4),bty="n")
 
 
 ###############################################
## Date: 11 December 2011
## Update: 14 December 2014
## STAT 509 course notes: R Code
## Chapter 11
###############################################
 
# Example 11.1
# Cheese data
# Page 170-171
 
## Find data web site
## Cut and paste the data set into a Notepad file
## Save this file (e.g., cheese.txt) to a directory that you know
## Use R to import the data from a directory on your computer
cheese = read.table(
     "C:\\Users\\Tebbs\\Documents\\texfiles\\Classes\\USC\\stat509\\s15\\data\\cheese.txt",header=TRUE)
 
# Define variables
taste<-cheese[,1]
acetic<-cheese[,2]
h2s<-cheese[,3]
lactic<-cheese[,4]
 
# Page 174
# Fit the model
fit = lm(taste~acetic+h2s+lactic)
fit
 
# Page 175
# Calculate fitted values and residuals
fit = lm(taste~acetic+h2s+lactic)
fitted.values = predict(fit)
residuals = taste-fitted.values
 
# Figure 11.1
# Page 179
# F(3,26) pdf with F statistic
f = seq(0,20,0.01)
pdf = df(f,3,26)
plot(f,pdf,type="l",lty=1,xlab="F",ylab="PDF",ylim=c(0,0.7))
abline(h=0)
# Add points
points(x=16.22,y=0,pch=4,cex=1.5)
 
# Page 181
# R's ANOVA with sequential SS
fit = lm(taste~acetic+h2s+lactic)
anova(fit)
 
# Page 182
# Different ordering
fit.2 = lm(taste~h2s+lactic+acetic)
anova(fit.2)
 
# Page 184
# Confidence intervals for regression parameters
fit = lm(taste~acetic+h2s+lactic)
confint(fit,level=0.95)
 
# Page 185
# Confidence/Prediction intervals
fit = lm(taste~acetic+h2s+lactic)
predict(fit,data.frame(acetic=5.5,h2s=6.0,lactic=1.4),level=0.95,interval="confidence")
predict(fit,data.frame(acetic=5.5,h2s=6.0,lactic=1.4),level=0.95,interval="prediction")
 
# Page 187
# Create normal qqplots for residuals
# Need to install (then load) car package in R
fit = lm(taste~acetic+h2s+lactic)
qqPlot(residuals(fit),distribution="norm",xlab="N(0,1) quantiles",ylab="Residuals",pch=16)
 
 
#####################
# Example 11.2
#####################
 
# Electricity data
# Page 189
electricity = read.table(
     "C:\\Users\\Tebbs\\Documents\\texfiles\\Classes\\USC\\stat509\\s15\\data\\electricity.txt",header=TRUE)
 
# Define variables
monthly.usage = electricity[,1]
peak.demand = electricity[,2]
 
# Fit the model
fit = lm(peak.demand ~ monthly.usage)
fit
 
# Page 190
# Plots were constructed separately
# Scatterplot
plot(monthly.usage,peak.demand,xlab = "Monthly Usage (kWh)",
      ylab = "Peak Demand (kWh)", pch=16)
abline(fit)
# Residual plot
plot(fitted(fit),residuals(fit),pch=16,
     xlab="Fitted values",ylab="Residuals")
abline(h=0)
 
# Page 191
# Fit the transformed model
# Square-root transformation
fit.2 = lm(sqrt(peak.demand) ~ monthly.usage)
fit.2
# Confidence intervals for regression parameters
confint(fit.2,level=0.95)
 
# Page 192
# Transformed model (square-root transformation)
# Plots were constructed separately
# Scatterplot
plot(monthly.usage,sqrt(peak.demand),xlab = "Monthly Usage (kWh)",
      ylab = "Peak Demand (kWh): Square root scale", pch=16)
abline(fit.2)
# Residual plot
plot(fitted(fit.2),residuals(fit.2),pch=16,
     xlab="Fitted values",ylab="Residuals")
abline(h=0)
 
# Create normal qqplots for residuals (not shown in notes)
# Need to install (then load) car package in R
fit.2 = lm(sqrt(peak.demand) ~ monthly.usage)
qqPlot(residuals(fit.2),distribution="norm",xlab="N(0,1) quantiles",ylab="Residuals",pch=16)
 
 
#####################
# Example 11.3
#####################
 
# Windmill data
# Page 193
wind.velocity = c(5,6,3.4,2.7,10,9.7,9.55,3.05,8.15,6.2,2.9,6.35,4.6,5.8,7.4,3.6,7.85,8.8,7,5.45,9.1,10.2,
     4.1,3.95,2.45)
DC.output = c(1.582,1.822,1.057,0.5,2.236,2.386,2.294,0.558,2.166,1.866,0.653,1.93,1.562,1.737,2.088,1.137,
     2.179,2.112,1.8,1.501,2.303,2.31,1.194,1.144,0.123)
# Fit the model
fit = lm(DC.output ~ wind.velocity)
fit
anova(fit)
 
# Page 193
# Plots were constructed separately
# Scatterplot with simple linear regression model fit
plot(wind.velocity,DC.output,xlab = "Wind Velocity (mph)",
      ylab = "DC Output", pch=16)
abline(fit)
# Residual plot
plot(fitted(fit),residuals(fit),pch=16,
     xlab="Fitted values",ylab="Residuals")
abline(h=0)
 
# Page 194
# Fit quadratic model
wind.velocity.sq = wind.velocity^2
fit.2 = lm(DC.output ~ wind.velocity + wind.velocity.sq)
fit.2
 
# Page 195
# Plots were constructed separately
# Scatterplot with quadratic regression model fit
plot(wind.velocity,DC.output,xlab = "Wind Velocity (mph)",
      ylab = "DC Output", pch=16)
curve(expr = fit.2$coefficients[1] +
             fit.2$coefficients[2]*x +
             fit.2$coefficients[3]*x^2, col = "black", 
             lty = "solid", lwd = 1, add = TRUE)
# Residual plot
plot(fitted(fit.2),residuals(fit.2),pch=16,
     xlab="Fitted values",ylab="Residuals")
abline(h=0)
 
# Page 195
# Confidence intervals for regression parameters
fit.2 = lm(DC.output ~ wind.velocity + wind.velocity.sq)
confint(fit.2,level=0.95)
 
# Page 196
# Create normal qqplots for residuals
# Need to install (then load) car package in R
# Simple linear regression fit
qqPlot(residuals(fit),distribution="norm",xlab="N(0,1) quantiles",ylab="Residuals",pch=16)
# Quadratic regression fit
qqPlot(residuals(fit.2),distribution="norm",xlab="N(0,1) quantiles",ylab="Residuals",pch=16)
 

 ###############################################
## Date: 11 December 2011
## Update: 14 December 2014
## STAT 509 course notes: R Code
## Chapter 12
###############################################
 
# Example 12.2
# Corn yield data
# Page 199-200
# Enter the data
a1b1 = c(35, 26, 25, 33, 31)
a1b2 = c(39, 33, 41, 31, 36)
a2b1 = c(37, 27, 35, 27, 34)
a2b2 = c(49, 39, 39, 47, 46)
 
# Page 200
# One way ANOVA/Overall F test
# Create response with all data
yield = c(a1b1,a1b2,a2b1,a2b2)
# Create treatment indicator variable
treatment = c(
     rep("a1b1",length(a1b1)),
     rep("a1b2",length(a1b2)),
     rep("a2b1",length(a2b1)),
     rep("a2b2",length(a2b2))
     )
treatment = factor(treatment)
# One way analysis
anova(lm(yield ~ treatment))
 
# Page 201
# Construct side by side boxplots
boxplot(a1b1,a1b2,a2b1,a2b2,xlab="",names=c("a1b1","a1b2","a2b1","a2b2"),
     ylab="Yield (bushels/plot)",col="grey")
 
# Page 202
# Two way ANOVA with interaction
nitrogen = c(
     rep("a1",length(a1b1)),
     rep("a1",length(a1b2)),
     rep("a2",length(a2b1)),
     rep("a2",length(a2b1))
     )
phosphorus = c(
     rep("b1",length(a1b1)),
     rep("b2",length(a1b2)),
     rep("b1",length(a2b1)),
     rep("b2",length(a2b1))    
     )
nitrogen = factor(nitrogen)
phosphorus = factor(phosphorus)
fit = lm(yield ~ nitrogen + phosphorus + nitrogen*phosphorus)
anova(fit)
 
# Page 203
# Plot F(1,16) pdf
f = seq(0,8,0.01)
pdf = df(f,1,16)
plot(f,pdf,type="l",lty=1,xlab="F",ylab="PDF",ylim=c(0,1))
abline(h=0)
# Add points
points(x=2.25,y=0,pch=4,cex=1.5)
 
# Page 204
# Interaction plot
interaction.plot(nitrogen,phosphorus,yield,fun = mean,
     ylab="Mean yield (bushels/plot)",ylim=c(20,50))
 
# Page 206
# Two way ANOVA with no interaction
fit.2 = lm(yield ~ nitrogen + phosphorus)
anova(fit.2)
 
# Page 206
# Confidence intervals for difference of factor means
# MSE = mean squared residuals from no interaction model
MSE = 21.47
# Aggregate Factor A data
nitrogen.1 = c(a1b1,a1b2)
nitrogen.2 = c(a2b1,a2b2)
# Aggregate Factor B data
phosphorus.1 = c(a1b1,a2b1)
phosphorus.2 = c(a1b2,a2b2)
# Nitrogen CI
est = mean(nitrogen.1)-mean(nitrogen.2)
moe = qt(0.975,17)*sqrt(MSE*0.2)
c(est-moe,est+moe)
# Phospherus CI
est = mean(phosphorus.1)-mean(phosphorus.2)
moe = qt(0.975,17)*sqrt(MSE*0.2)
c(est-moe,est+moe)
 
# Page 207
# Individual factor side by side boxplots
# Plots were constructed separately
boxplot(nitrogen.1,nitrogen.2,
     xlab="",names=c("Nitrogen.1","Nitrogen.2"),ylab="Yield (bushels/plot)",ylim=c(20,50),col="grey")
boxplot(phosphorus.1,phosphorus.2,
     xlab="",names=c("Phosphorus.1","Phosphorus.2"),ylab="Yield (bushels/plot)",ylim=c(20,50),col="grey")
 
 
##############
# Example 12.3
##############
 
# Filtration rate data
# Page 208
# Enter the data
filtration = c(45,71,48,65,68,60,80,65,43,100,45,104,75,86,70,96)
A = c("a1","a2","a1","a2","a1","a2","a1","a2","a1","a2","a1","a2","a1","a2","a1","a2")
B = c("b1","b1","b2","b2","b1","b1","b2","b2","b1","b1","b2","b2","b1","b1","b2","b2")
C = c("c1","c1","c1","c1","c2","c2","c2","c2","c1","c1","c1","c1","c2","c2","c2","c2")
D = c("d1","d1","d1","d1","d1","d1","d1","d1","d2","d2","d2","d2","d2","d2","d2","d2")
A = factor(A)
B = factor(B)
C = factor(C)
D = factor(D)
 
# Page 210
# Fit full model
fit = lm(filtration ~ A*B*C*D)
anova(fit)
 
# Page 211
# Fit smaller model
fit = lm(filtration ~ A + C + D + A*C + A*D)
anova(fit)
 
# Page 211
# Interaction plots
# Plots were constructed separately
# AC Interaction plot
Factor.A = factor(A)
Factor.C = factor(C)
interaction.plot(Factor.A,Factor.C,filtration,fun = mean,
     ylab="Filtration rate (gal/hr)",ylim=c(40,100))
# AD Interaction plot
Factor.A = factor(A)
Factor.D = factor(D)
interaction.plot(Factor.A,Factor.D,filtration,fun = mean,
     ylab="Filtration rate (gal/hr)",ylim=c(40,100))
 
# Page 212
# Multiple linear regression model
filtration = c(45,71,48,65,68,60,80,65,43,100,45,104,75,86,70,96)
temp = c(-1,1,-1,1,-1,1,-1,1,-1,1,-1,1,-1,1,-1,1)
conc = c(-1,-1,-1,-1,1,1,1,1,-1,-1,-1,-1,1,1,1,1)
stir = c(-1,-1,-1,-1,-1,-1,-1,-1,1,1,1,1,1,1,1,1)
fit = lm(filtration~temp+conc+stir+temp:conc+temp:stir)
fit