setwd("C:/!!!Current Projects/!MLRM/Lectures/Introduction to Simulation/")

library(arm)

s=round(sqrt(.65* (1-.65)/100),4)
upper = .65 + 1.95996 * s
lower = .65 - 1.95996 * s
upper = round(upper,4)
lower = round(lower,4)
########################
simple.interval <- function(phat,N,conf)
{
z <- qnorm(1-(1-conf)/2)
dist <- z * sqrt(phat*(1-phat)/N)
lower = phat - dist
upper = phat + dist
return(list(lower=lower,upper=upper))
}
simple.interval(.65,100,.95)
#######################
wilson.interval <- function(phat,N,conf)
{
z <- qnorm(1 - (1-conf)/2)
theta <- z^2 /N
mult <- 1/(1+theta)
dist <- sqrt(phat*(1-phat)*theta + theta^2 / 4)
upper = mult*(phat + theta/2 + dist)
lower = mult*(phat + theta/2 - dist)
return(list(lower=lower,upper=upper))
}
wilson.interval(.65,100,.95)
###################
bootstrap.interval <- function(phat,N,conf,reps)
{
  lower.p <- (1-conf)/2
  upper.p <- 1 - lower.p
  lower <- rep(NA,length(phat))
  upper <- rep(NA,length(phat))
  for(i in 1:length(phat))
  {
  x <- rbinom(reps,N,phat[i])
  lower[i] <- quantile(x,lower.p,names=F)/N
  upper[i] <- quantile(x,upper.p,names=F)/N
  }
  return(list(lower=lower,upper=upper))
}
bootstrap.interval(.95,30,.95,1000)
##################################
simple.interval(.95,30,.95)
bootstrap.interval(.95,30,.95,1000)
wilson.interval(.95,30,.95)
################################
actual.coverage.probability <- function(N,p,conf)
{
x <- 0:N
phat <- x/N
probs <- dbinom(x,N,p)
wilson <- wilson.interval(phat,N,conf)
simple <- simple.interval(phat,N,conf)
bootstrap <- bootstrap.interval(phat,N,conf,1000)
s<-0
w<-0
b<-0
results <- new.env()
for(i in 1:N+1) if((simple$lower[i] < p)&(simple$upper[i] >p)) s<-s+probs[i]
for(i in 1:N+1) if((wilson$lower[i] < p)&(wilson$upper[i] >p)) w<-w+probs[i]
for(i in 1:N+1) if((bootstrap$lower[i] < p)&(bootstrap$upper[i] >p)) b<-b+probs[i]
return(list(simple.coverage=s,wilson.coverage=w,bootstrap.coverage=b))
}
actual.coverage.probability(30,.95,.95)
#######################################


estimate.coverage.probability<-function(N,p,conf,reps,seed.value=12345)
{
## Set seed, create empty matrices to hold results
set.seed(seed.value)
results <- new.env()
coverage.wilson<-0
coverage.simple<-0
coverage.bootstrap<-0
## Loop through the Monte Carlo replications
for(i in 1:reps)
  {
## create the simulated proportion
   phat <- rbinom(1,N,p)/N
## calculate the intervals
   wilson <- wilson.interval(phat,N,conf)
   simple <- simple.interval(phat,N,conf)
   bootstrap <- bootstrap.interval(phat,N,conf,1000)
## test for coverage, and update the count if successful
   if((simple$lower < p)&(simple$upper >p))
      coverage.simple <- coverage.simple + 1
   if((wilson$lower < p)&(wilson$upper >p))
      coverage.wilson <- coverage.wilson + 1
   if((bootstrap$lower < p)&(bootstrap$upper >p))
      coverage.bootstrap <- coverage.bootstrap + 1

  }
## convert results from count to probability
results$simple <- coverage.simple/reps
results$wilson <- coverage.wilson/reps
results$bootstrap <-coverage.bootstrap/reps
## return as a named list
return(as.list(results))
}
estimate.coverage.probability(30,.95,.95,10000)
#################################################
## set up empty vectors to hold 50 cases
p <- matrix(NA,50,1)
wilson <- matrix(NA,50,1)
simple <- matrix(NA,50,1)
bootstrap <- matrix(NA,50,1)
## step from .50 to .99, saving results as we go
for (i in 1:50)
{
 p[i]<-(49+i)/100
 res <- actual.coverage.probability(30,p[i],.95)
 wilson[i] <- res$wilson.coverage
 simple[i] <- res$simple.coverage
 bootstrap[i] <- res$bootstrap.coverage
}
#################################
plot(p,wilson,type="l",col="blue",
  ylim=c(.1,.99),xlab="Population Proportion p",
  ylab="Actual Coverage Probability",main="Confidence Interval Performance (N = 30)")
lines(p,simple,col="green")
lines(p,bootstrap,col="orange")
abline(.95,0,lty=2,col="red")
legend(.6,.6,c("Wilson Interval","Simple Interval","Bootstrap Interval"),
   col=c("blue","green","orange"),lty=c(1,1,1))
##################################
wells <-  read.table("wells.dat", header = TRUE )
attach(wells)
dist100 <- dist
fit.1 <- glm(switch ~ dist, family = binomial (link = "logit"))
display (fit.1 , digits = 4)


sim.1 <- sim(fit.1,n.sims=1000)

plot(sim.1$coef[,1],sim.1$coef[,2],xlab=expression(beta[0]),
  ylab=expression(beta[1]))

plot(dist,switch,pch=".")
for(s in 1:20)
{
 curve(invlogit(sim.1$coef[s,1] + sim.1$coef[s,2]*x),
   col="gray",add=TRUE)
}
curve(invlogit(fit.1$coef[1] + fit.1$coef[2]*x),col="red",add=TRUE)


n.sims<-1000
X.tilde <- matrix(c(1,1,1,1,1,1,1,1,1,1,120,45,109,54,33,254,81,190,101,65),10,2)
n.tilde <- nrow(X.tilde)
y.tilde <- array(NA,c(n.sims,n.tilde))
for (s in 1:n.sims){
 p.tilde <- invlogit(X.tilde %*% sim.1$coef[s,])
 y.tilde[s,] <- rbinom(n.tilde,1,p.tilde)
 }


y.tilde[1:20,]

#############################################

y <- scan ("lightspeed.dat", skip=4)
# fit the normal model 
#(i.e., regression with no predictors)
lm.light <- lm (y ~ 1)
display (lm.light)
n <- length (y)

n.sims <- 1000
sim.light <- sim (lm.light, n.sims)

y.rep <- array (NA, c(n.sims, n))
for (s in 1:n.sims){
  y.rep[s,] <- rnorm (1, sim.light$coef[s], sim.light$sigma[s])
}

test <- function (y){
  min (y)
}

test.rep <- rep (NA, n.sims)
for (s in 1:n.sims){
  test.rep[s] <- test (y.rep[s,])
}

# plot the histogram of test statistics of replications and of actual data

hist (test.rep, xlim=range (test(y), test.rep))
lines (rep (test(y), 2), c(0,n),col="red") 


# plot the data
hist (y,breaks=40)