Bootstrap本意是拎着靴带让自己跳起来,在统计学中是一种重采样的方法,通常在样本量小的时候,可以从中不断再次抽样。
1. Bootstrap 简单应用
Bootstrap bias偏差 &variance 方差
library(bootstrap) bslogc<-function(x,B){
n<-length(x) thetastar<-replicate(B,{
xstar<-sample(x,n,replace = T) median(xstar)} ) thetastar } x <- diabetes$logCpeptide hist(x, freq = FALSE, main = "Histogram of diabetes data") median(x) bscpep<-bslogc(x,1000) hist(bscpep,freq = FALSE, breaks = 7,main = "Histogram of bootstrap diabetes medians") var(bscpep) bias<-mean(bscpep)-median(x)
Bootstrap correlation
samplecorr <- function(data, B) {
n <- nrow(data) res <- numeric(B) for(i in 1:B) {
ind <- sample(n, n, replace = TRUE) bs_data <- data[ind, ] res[i] <- cor(bs_data[ , 1], bs_data[ , 2]) } res } cor(law) cor(law[ , 1], law[ , 2]) bs_law <- samplecorr(law, 10000) hist(bs_law, freq = FALSE, main = "Histogram of law data bootstrap corellation coefficients") bias.law <- mean(bs_law) - cor.law[1, 2] bias.law var(bs_law) cor(law82) bs_law82 <- samplecorr(law82[ , 2:3], 10000) hist(bs_law82, freq = FALSE, main = "Histogram of law82 data bootstrap corellation coefficients") bias.law82 <- mean(bs_law82) - cor.law82[1, 2] bias.law82 var(bs_law82)
Bootstrap方法比较实验组与对照组
mean(mouse.t)-mean(mouse.c) bsmice<-function(x,y,B){
n1<-length(x) n2<-length(y) thet1<-replicate(B,{
xstar<-sample(x,n1,replace = T) mean(xstar)}) thet2<-replicate(B,{
ystar<-sample(y,n2,replace = T) mean(ystar)}) d=thet1-thet2 } df1=bsmice(mouse.t,mouse.c,1000) head(df1) pihat <- sum(df1 > 10) / length(df1) #probability that survival time difference > 10 pihat
2.Bootstrap residuals
ebs_eps<-function(y,x,B){
fit <- lm(y ~ x) e <- residuals(fit) y_fitted <- fitted(fit) # or : beta[1] + beta[2] * x <- wood$density # for convenience bs_resid <- matrix(0, nrow = B, ncol = 2) n<-length(y) for(i in 1:B){
e_star <- sample(e, n, replace = TRUE) y_star <- y_fitted + e_star fit_star <- lm(y_star ~ x) bs_resid[i, ] <- coef(fit_star) } bs_resid } wood <- read.table("wood.txt", header = TRUE) bs_beta <-ebs_eps(y = wood$hardness, x = wood$density, 1000) fit_wood <- lm(hardness ~ density, data = wood) beta <- coef(fit_wood) plot(wood$density, wood$hardness, main = "", xlab = "density", ylab = "hardness") yreg <- beta[1] + beta[2] * wood$density lines(wood$density, yreg, col = "red") for(i in 1:10) {
yest <- bs_beta[i, 1] + bs_beta[i, 2] * wood$density lines(wood$density, yest) } hist(bs_beta[, 1], xlab = "Estimate of alpha", freq = FALSE) hist(bs_beta[ , 2], xlab = "Estimate of beta", freq = FALSE)
3.Bootstrap 置信区间
s1=rgamma(50,shape = 4,rate = 1) hist(s1) n <- length(s1) B=10000 zstar<-{
} for (i in 1:B){
xstar<- sample(s1,n,replace = T) ths<-mean(xstar) se<-sd(xstar)/sqrt(n) zstar[i]<- (ths-theta_hat)/se } tq<-quantile(zstar,c(0.05,1-0.05)) theta_hat-c(tq[2],tq[1])*se_hat library(bootstrap) help(bootstrap) boott(s1,theta = function(x) mean(x), perc=c(.025,.05,.10,.50,.90,.95,.975))
发布者:全栈程序员-站长,转载请注明出处:https://javaforall.net/177830.html原文链接:https://javaforall.net
