### LLN (BMI) library(MASS) data(Pima.tr) m1 = mean(Pima.tr$bmi) sd = sd(Pima.tr$bmi) m1= 32.3 sd = 6.1 sample.size = c(1000,c(1:10)*10000) barx = c() for(n in sample.size){ x = rnorm(n,mean=m1,sd=sd) barx = c(barx,mean(x)) } plot(sample.size ,barx,type="l",ylim=c(m1-0.05*sd,m1+0.05*sd),lwd=2,ylab="BMI") abline(h=m1,col="red",lwd=2) ### CLT x = seq(0.0001,30,by=0.01) y = dgamma(x,shape=1.5,scale=5) plot(x,y,type="l",col="red",lwd=2) legend("topright",legend="Gamma(1.5,5)",bty="n") n = 100 x = c() for(i in 1:1000){ x[i] = mean(rgamma(n,shape=1.5,scale=5)) } prb = (1:length(x))/length(x) q.n = qnorm(prb,mean=1.5*5,sd=sqrt(1.5*5^2)/sqrt(n)) plot(sort(x),q.n,xlab="Observed Quantile",ylab="Theoretical Quantile",main=paste("QQ-plot for the mean (","n=",n,")",sep="")) abline(a=0,b=1,col="red",lwd=2) #### confidence interval for mean mu = 32.3 sd = 6.13 n = 20 m.max = 20 bar.x = c() for(i in 1:m.max){ x = rnorm(n,mean=mu,sd=sd) bar.x = c(bar.x,mean(x)) } plot.new() plot.window(xlim=c(min(bar.x)-2.5*sd/sqrt(n),max(bar.x)+2.5*sd/sqrt(n)),ylim=c(-1,m.max+1)) axis(side=1) axis(side=2) abline(v=mu,lwd=2,col="red") m.max.tmp = 20 for(k in 1:m.max.tmp){ points(bar.x[k],k,pch=19) lines(c(bar.x[k]-2*sd/sqrt(n),bar.x[k]+2*sd/sqrt(n)),c(k,k)) lines(c(bar.x[k]-2*sd/sqrt(n),bar.x[k]-2*sd/sqrt(n)),c(k-0.3,k+0.3)) lines(c(bar.x[k]+2*sd/sqrt(n),bar.x[k]+2*sd/sqrt(n)),c(k-0.3,k+0.3)) } curve(dnorm,xlim=c(-4,4),ylab="Density",lwd=1) lines(c(2,2),c(0,dnorm(2)),lwd=1) abline(h=0) col=rainbow(4) par(lwd=2) curve(dnorm,xlim=c(-4,4),ylab="Density",lwd=2,col=col[1]) df = c(2,10,20) for(k in c(1:3)) curve(dt(x,df=df[k]),add=TRUE,col=col[k+1]) legend("topright",legend=c("Normal","t:df=2","t:df=10","t:df=20"),lty=1,col=col) #### TERT x = 50 n = 70 p = 50/70 SE = sqrt(p*(1-p)/n) CI = c(p-1.96*SE,p+1.96*SE) curve(dnorm,xlim=c(-4,4),ylab="Density",lwd=1) lines(c(1.96,1.96),c(0,dnorm(1.96)),lwd=1) lines(c(-1.96,-1.96),c(0,dnorm(-1.96)),lwd=1) abline(h=0) ### t-test women.bmi = 26.5 ## usa mean BMI for women library(MASS) data(Pima.tr) shapiro.test(Pima.tr$bmi) mean(Pima.tr$bmi) sd(Pima.tr$bmi) n=200 c.cri = qt(1-0.05/2,df=n-1) t = (mean(Pima.tr$bmi)-women.bmi)/(sd(Pima.tr$bmi)/sqrt(n)) pvalue = 2*pt(t,lower.tail = FALSE,df=n-1) curve(dt(x,df=n-1),xlim=c(-15,15),col="blue",lwd=2,ylab="Density") curve(dt(x,df=n-1),xlim=c(-15,-c.cri),col="red",lwd=2,add=T) curve(dt(x,df=n-1),xlim=c(c.cri,15),col="red",lwd=2,add=T) lines(rep(c.cri,2),c(0,dt(c.cri,df=n-1)),col="red",lwd=2) lines(rep(-c.cri,2),c(0,dt(-c.cri,df=n-1)),col="red",lwd=2) abline(h=0) abline(v=t,col="darkcyan",lwd=2) ### the Imatinib example c.cri = qnorm(1-0.01) z = (0.87-0.45)/sqrt(0.45*(1-0.45)/415) pnorm(z,,lower.tail=F) #####bootstrap norm.data <- rnorm(500, mean=5000, sd=100) chisq.data <- rchisq(500, 3) boots <- function(data, R){ b.avg <<- c(); b.sd <<- c() for(b in 1:R) { ystar <- sample(data,length(data),replace=T) b.avg <<- c(b.avg,mean(ystar)) b.sd <<- c(b.sd,sd(ystar))} } boots.median <- function(data, R){ b.median <<- c(); for(b in 1:R) { ystar <- sample(data,length(data),replace=T) b.median<<- c(b.median,median(ystar)) } } boots.median(norm.data, 1000) boots(norm.data, 1000) x = c(1:1000)/100 y = dchisq(x,3) plot(x,y,type="l") boots(chisq.data, 1000) boots.median(chisq.data, 1000) #####regression x = rnorm(500,mean=0,sd=4) y = 1+x*3 + rnorm(500,sd=5) data.reg = data.frame(x=x,y=y) model.reg = lm(y~x,data=data.reg) boot.regression = function(data,R){ beta0 <<- c() beta1 <<- c() for(i in 1:R){ ind = sample.int(nrow(data),size=nrow(data),replace=TRUE) data.tmp = data[ind,] model.reg.tmp = lm(y~x,data=data.tmp) beta0 <<- c(beta0,model.reg.tmp$coef[1]) beta1 <<- c(beta1,model.reg.tmp$coef[2]) } } boot.regression(data.reg,1000) boot.regression.res = function(lm.model,data,R){ beta0.res<<-c() beta1.res<<-c() for(i in 1:R){ res.tmp = sample(lm.model$res,nrow(data),replace=TRUE) data.tmp = data data.tmp$y = lm.model$fitted + res.tmp model.reg.tmp = lm(y~x,data=data.tmp) beta0.res <<- c(beta0.res,model.reg.tmp$coef[1]) beta1.res <<- c(beta1.res,model.reg.tmp$coef[2]) } } boot.regression.res(model.reg,data.reg,1000)