setwd("C:/!!!Current Projects/!MLRM/Lectures/Introduction to Simulation/") library(arm) s=round(sqrt(.65* (1-.65)/100),4) upper = .65 + 1.95996 * s lower = .65 - 1.95996 * s upper = round(upper,4) lower = round(lower,4) ######################## simple.interval <- function(phat,N,conf) { z <- qnorm(1-(1-conf)/2) dist <- z * sqrt(phat*(1-phat)/N) lower = phat - dist upper = phat + dist return(list(lower=lower,upper=upper)) } simple.interval(.65,100,.95) ####################### wilson.interval <- function(phat,N,conf) { z <- qnorm(1 - (1-conf)/2) theta <- z^2 /N mult <- 1/(1+theta) dist <- sqrt(phat*(1-phat)*theta + theta^2 / 4) upper = mult*(phat + theta/2 + dist) lower = mult*(phat + theta/2 - dist) return(list(lower=lower,upper=upper)) } wilson.interval(.65,100,.95) ################### bootstrap.interval <- function(phat,N,conf,reps) { lower.p <- (1-conf)/2 upper.p <- 1 - lower.p lower <- rep(NA,length(phat)) upper <- rep(NA,length(phat)) for(i in 1:length(phat)) { x <- rbinom(reps,N,phat[i]) lower[i] <- quantile(x,lower.p,names=F)/N upper[i] <- quantile(x,upper.p,names=F)/N } return(list(lower=lower,upper=upper)) } bootstrap.interval(.95,30,.95,1000) ################################## simple.interval(.95,30,.95) bootstrap.interval(.95,30,.95,1000) wilson.interval(.95,30,.95) ################################ actual.coverage.probability <- function(N,p,conf) { x <- 0:N phat <- x/N probs <- dbinom(x,N,p) wilson <- wilson.interval(phat,N,conf) simple <- simple.interval(phat,N,conf) bootstrap <- bootstrap.interval(phat,N,conf,1000) s<-0 w<-0 b<-0 results <- new.env() for(i in 1:N+1) if((simple$lower[i] < p)&(simple$upper[i] >p)) s<-s+probs[i] for(i in 1:N+1) if((wilson$lower[i] < p)&(wilson$upper[i] >p)) w<-w+probs[i] for(i in 1:N+1) if((bootstrap$lower[i] < p)&(bootstrap$upper[i] >p)) b<-b+probs[i] return(list(simple.coverage=s,wilson.coverage=w,bootstrap.coverage=b)) } actual.coverage.probability(30,.95,.95) ####################################### estimate.coverage.probability<-function(N,p,conf,reps,seed.value=12345) { ## Set seed, create empty matrices to hold results set.seed(seed.value) results <- new.env() coverage.wilson<-0 coverage.simple<-0 coverage.bootstrap<-0 ## Loop through the Monte Carlo replications for(i in 1:reps) { ## create the simulated proportion phat <- rbinom(1,N,p)/N ## calculate the intervals wilson <- wilson.interval(phat,N,conf) simple <- simple.interval(phat,N,conf) bootstrap <- bootstrap.interval(phat,N,conf,1000) ## test for coverage, and update the count if successful if((simple$lower < p)&(simple$upper >p)) coverage.simple <- coverage.simple + 1 if((wilson$lower < p)&(wilson$upper >p)) coverage.wilson <- coverage.wilson + 1 if((bootstrap$lower < p)&(bootstrap$upper >p)) coverage.bootstrap <- coverage.bootstrap + 1 } ## convert results from count to probability results$simple <- coverage.simple/reps results$wilson <- coverage.wilson/reps results$bootstrap <-coverage.bootstrap/reps ## return as a named list return(as.list(results)) } estimate.coverage.probability(30,.95,.95,10000) ################################################# ## set up empty vectors to hold 50 cases p <- matrix(NA,50,1) wilson <- matrix(NA,50,1) simple <- matrix(NA,50,1) bootstrap <- matrix(NA,50,1) ## step from .50 to .99, saving results as we go for (i in 1:50) { p[i]<-(49+i)/100 res <- actual.coverage.probability(30,p[i],.95) wilson[i] <- res$wilson.coverage simple[i] <- res$simple.coverage bootstrap[i] <- res$bootstrap.coverage } ################################# plot(p,wilson,type="l",col="blue", ylim=c(.1,.99),xlab="Population Proportion p", ylab="Actual Coverage Probability",main="Confidence Interval Performance (N = 30)") lines(p,simple,col="green") lines(p,bootstrap,col="orange") abline(.95,0,lty=2,col="red") legend(.6,.6,c("Wilson Interval","Simple Interval","Bootstrap Interval"), col=c("blue","green","orange"),lty=c(1,1,1)) ################################## wells <- read.table("wells.dat", header = TRUE ) attach(wells) dist100 <- dist fit.1 <- glm(switch ~ dist, family = binomial (link = "logit")) display (fit.1 , digits = 4) sim.1 <- sim(fit.1,n.sims=1000) plot(sim.1$coef[,1],sim.1$coef[,2],xlab=expression(beta[0]), ylab=expression(beta[1])) plot(dist,switch,pch=".") for(s in 1:20) { curve(invlogit(sim.1$coef[s,1] + sim.1$coef[s,2]*x), col="gray",add=TRUE) } curve(invlogit(fit.1$coef[1] + fit.1$coef[2]*x),col="red",add=TRUE) n.sims<-1000 X.tilde <- matrix(c(1,1,1,1,1,1,1,1,1,1,120,45,109,54,33,254,81,190,101,65),10,2) n.tilde <- nrow(X.tilde) y.tilde <- array(NA,c(n.sims,n.tilde)) for (s in 1:n.sims){ p.tilde <- invlogit(X.tilde %*% sim.1$coef[s,]) y.tilde[s,] <- rbinom(n.tilde,1,p.tilde) } y.tilde[1:20,] ############################################# y <- scan ("lightspeed.dat", skip=4) # fit the normal model #(i.e., regression with no predictors) lm.light <- lm (y ~ 1) display (lm.light) n <- length (y) n.sims <- 1000 sim.light <- sim (lm.light, n.sims) y.rep <- array (NA, c(n.sims, n)) for (s in 1:n.sims){ y.rep[s,] <- rnorm (1, sim.light$coef[s], sim.light$sigma[s]) } test <- function (y){ min (y) } test.rep <- rep (NA, n.sims) for (s in 1:n.sims){ test.rep[s] <- test (y.rep[s,]) } # plot the histogram of test statistics of replications and of actual data hist (test.rep, xlim=range (test(y), test.rep)) lines (rep (test(y), 2), c(0,n),col="red") # plot the data hist (y,breaks=40)