12-Classification Basic

Extra-Other Resampling methods (s25)

Permutation

Similar to bootstrapping, except permutation testing resamples without replacement (meaning when a value is selected, it can not be selected again, so no value can be a duplicate). This simply shuffles the values. In the case of a univariate statistic (e.g. mean), this will not change anything. However, when there are two or more variables, reshuffling one variable will change the test statistic (e.g. correlation or regression). Usually this is done on the response/outcome/y variable, and usually for tests that use discrete or categorical (“yes”, “no”) variables.

library(tidyr)
library(dplyr)

## 
## Attaching package: 'dplyr'

## The following objects are masked from 'package:stats':
## 
##     filter, lag

## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union

ds <- data.frame(y = runif(10), x = runif(10)); head(ds)

##            y         x
## 1 0.20200727 0.2020174
## 2 0.88228326 0.5967211
## 3 0.38736966 0.4249668
## 4 0.24538380 0.6020442
## 5 0.12865606 0.7334585
## 6 0.07374217 0.3428088

cor(ds$y, ds$x, method = "spearman") # correlation is statistic for paired measurement.

## [1] -0.3090909

ds$resample_y <- sample(ds$y); head(ds)

##            y         x resample_y
## 1 0.20200727 0.2020174 0.89397897
## 2 0.88228326 0.5967211 0.24538380
## 3 0.38736966 0.4249668 0.88228326
## 4 0.24538380 0.6020442 0.44260192
## 5 0.12865606 0.7334585 0.51159146
## 6 0.07374217 0.3428088 0.03607334

cor(ds$resample_y, ds$x, method = "spearman") # If you shuffle order, it change the coefficient

## [1] 0.2363636

# Do permutation 1000 times
library(ggplot2)
data_frame(num = 1:1000) %>% 
    group_by(num) %>%  # group_by number, we actually iterate sampling of ds$y 1000 times
    mutate(corr = cor(sample(ds$y), ds$x, method = "spearman")) %>% 
    ggplot(aes(x = corr)) +
    geom_freqpoly() # estimated background correlation coefficient distribution

## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

# R package
library(coin)

## Loading required package: survival

ds <- data.frame(y = runif(10), x = runif(10))
cor(ds$y, ds$x, method = "spearman")

## [1] 0.2363636

spearman_test(y ~ x, data = ds)

## 
##  Asymptotic Spearman Correlation Test
## 
## data:  y by x
## Z = 0.70909, p-value = 0.4783
## alternative hypothesis: true rho is not equal to 0

spearman_test(y ~ x, data = ds, distribution = "approximate") # MCMC

## 
##  Approximative Spearman Correlation Test
## 
## data:  y by x
## Z = 0.70909, p-value = 0.5212
## alternative hypothesis: true rho is not equal to 0

# vignette("coin", package = "coin") # for more information, see the package vignette

The “spearman_test” function in “coin” package tests whether given correlation is significant compared to estimated background correlation coefficient distribution by default 10,000 times permutation.

Ref: https://uoftcoders.github.io/studyGroup/lessons/r/resampling/lesson/

Markov Chain Monte Carlo (MCMC) & Metropolis Hasting

We perform simple MCMC simulation with toy example below. See more detailed explanation on the reference page given below this code block.

# making example data
trueA <- 5
trueB <- 0
trueSd <- 10
sampleSize <- 31
 
# create independent x-values 
x <- (-(sampleSize-1)/2):((sampleSize-1)/2)
# create dependent values according to ax + b + N(0,sd)
y <-  trueA * x + trueB + rnorm(n=sampleSize,mean=0,sd=trueSd)
# plot data
plot(x,y, main="Test Data")

# Define likelihood function
likelihood <- function(param){
    a = param[1]
    b = param[2]
    sd = param[3]
     
    pred = a*x + b
    singlelikelihoods = dnorm(y, mean = pred, sd = sd, log = T)
    sumll = sum(singlelikelihoods)
    return(sumll)   
}
 
# Example: plot the likelihood profile of the slope a
slopevalues <- function(x){return(likelihood(c(x, trueB, trueSd)))}
slopelikelihoods <- lapply(seq(3, 7, by=.05), slopevalues )
plot (seq(3, 7, by=.05), slopelikelihoods , type="l", 
      xlab = "values of slope parameter a", ylab = "Log likelihood")

# Prior distribution
prior <- function(param){
    a = param[1]
    b = param[2]
    sd = param[3]
    aprior = dunif(a, min=0, max=10, log = T)
    bprior = dnorm(b, sd = 5, log = T)
    sdprior = dunif(sd, min=0, max=30, log = T)
    return(aprior+bprior+sdprior)
}

posterior <- function(param){
   return (likelihood(param) + prior(param))
}

######## Metropolis algorithm ################
 proposalfunction <- function(param){
    return(rnorm(3,mean = param, sd= c(0.1,0.5,0.3)))
}
 
run_metropolis_MCMC <- function(startvalue, iterations){
    chain = array(dim = c(iterations+1,3))
    chain[1,] = startvalue
    for (i in 1:iterations){
        proposal = proposalfunction(chain[i,])
         
        probab = exp(posterior(proposal) - posterior(chain[i,]))
        if (runif(1) < probab){
            chain[i+1,] = proposal
        }else{
            chain[i+1,] = chain[i,]
        }
    }
    return(chain)
}
 
startvalue = c(4,0,10)
chain = run_metropolis_MCMC(startvalue, 10000)
 
burnIn = 5000
acceptance = 1-mean(duplicated(chain[-(1:burnIn),]))

### Summary: #######################
par(mfrow = c(2,3))
hist(chain[-(1:burnIn),1],nclass=30, main="Posterior of a", xlab="True value = red line" )
abline(v = mean(chain[-(1:burnIn),1]))
abline(v = trueA, col="red" )
hist(chain[-(1:burnIn),2],nclass=30, main="Posterior of b", xlab="True value = red line")
abline(v = mean(chain[-(1:burnIn),2]))
abline(v = trueB, col="red" )
hist(chain[-(1:burnIn),3],nclass=30, main="Posterior of sd", xlab="True value = red line")
abline(v = mean(chain[-(1:burnIn),3]) )
abline(v = trueSd, col="red" )
plot(chain[-(1:burnIn),1], type = "l", xlab="True value = red line" , main = "Chain values of a")
abline(h = trueA, col="red" )
plot(chain[-(1:burnIn),2], type = "l", xlab="True value = red line" , main = "Chain values of b")
abline(h = trueB, col="red" )
plot(chain[-(1:burnIn),3], type = "l", xlab="True value = red line" , main = "Chain values of sd")
abline(h = trueSd, col="red" )

# for comparison:
summary(lm(y~x))

## 
## Call:
## lm(formula = y ~ x)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -21.210  -5.393  -1.997   7.841  19.248 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  -0.5371     1.8507   -0.29    0.774    
## x             5.6279     0.2069   27.20   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 10.3 on 29 degrees of freedom
## Multiple R-squared:  0.9623, Adjusted R-squared:  0.961 
## F-statistic: 739.8 on 1 and 29 DF,  p-value: < 2.2e-16

Ref: https://theoreticalecology.wordpress.com/2010/09/17/metropolis-hastings-mcmc-in-r/

Jackknife Method (samples without replacement)

# code from bu
numvec<-rep(NA,100000)
maxbiasvec<-rep(NA,100000)
jackbiasvec<-rep(NA,100000)

for (i in 1:100000){
  samp<-runif(100, min = 0, max = 5)
  jack<-max(samp)+(100-1)/100*(max(samp)-max(samp[!samp==max(samp)]))
  numvec[i]<-ifelse(abs(5-jack)<abs(max(samp)-5), 1,0)
  maxbiasvec[i]<-abs(5-max(samp))
  jackbiasvec[i]<-abs(5-jack) 
}

mean(numvec)

## [1] 0.66614

mean(jackbiasvec)

## [1] 0.04930729

mean(maxbiasvec)

## [1] 0.04938722

Ref: http://people.bu.edu/aimcinto/jackknife.pdf

Gibbs sampler

Suppose \(p(x, y)\) is a p.d.f. or p.m.f. that is difficult to sample from directly. Suppose, though, that we can easily sample from the conditional distributions \(p(x|y)\) and \(p(y|x)\).

The Gibbs sampler proceeds as follows: 1. set \(x\) and \(y\) to some initial starting values 2. then sample \(x|y\), then sample \(y|x\), then \(x|y\), and so on.

Let’s suppose we are constructing a Gibbs sampler for the bivariate distribution \[f(x,y) = kx^2exp\{-xy^2-y^2+2y-4x\} \hspace{1cm} x>0, y\in R\] with unknown normalising constant \(k>0\). The conditional probability is given as \[x|y \sim Ga(3, y^2+4)\] and \[y|x \sim N\Bigg(\frac{1}{1+x}, \frac{1}{2(1+x)}\Bigg)\] here \(Ga\) denotes Gamma distribution.

gibbs=function(N,thin)
{
    x=0
    y=0
        cat(paste("Iter","x","y","\n"))
    for (i in 1:N) {
        for (j in 1:thin) {
            x=rgamma(1,3,y*y+4)
            y=rnorm(1,1/(x+1),1/sqrt(2*x+2))
        }
        cat(paste(i,x,y,"\n"))
    }
}
 
gibbs(10,100)

## Iter x y 
## 1 0.566421456729337 0.829296909186305 
## 2 0.695655879093854 -0.303454968900284 
## 3 0.982355323570921 0.518012501468744 
## 4 0.288529881456506 0.857854718995394 
## 5 0.411077486318104 1.04345740915507 
## 6 0.258271594130548 0.784987332055443 
## 7 0.958096319070601 1.17333236964631 
## 8 0.484763389731867 0.70134420433492 
## 9 0.573898414220757 0.870268529276595 
## 10 0.998080698685726 -0.539143803446724

Another implementation is shown below

# summary statistics of sample
n    <- 30
ybar <- 15
s2   <- 3

# sample from the joint posterior (mu, tau | data)
mu     <- rep(NA, 11000)
tau    <- rep(NA, 11000)
T      <- 1000    # burnin
tau[1] <- 1  # initialisation
for(i in 2:11000) {   
    mu[i]  <- rnorm(n = 1, mean = ybar, sd = sqrt(1 / (n * tau[i - 1])))    
    tau[i] <- rgamma(n = 1, shape = n / 2, scale = 2 / ((n - 1) * s2 + n * (mu[i] - ybar)^2))
}
mu  <- mu[-(1:T)]   # remove burnin
tau <- tau[-(1:T)] # remove burnin

hist(mu)

hist(tau)

Ref:

https://darrenjw.wordpress.com/2011/07/16/gibbs-sampler-in-various-languages-revisited/
https://stats.stackexchange.com/questions/266665/gibbs-sampler-examples-in-r
Casella, G. & George, E.I. (1992). Explaining the Gibbs Sampler. The American Statistician, 46, 467-174.

12-Classification Basic - Supplement

Bis335

Extra-Other Resampling methods (s25)

Permutation

Markov Chain Monte Carlo (MCMC) & Metropolis Hasting

Jackknife Method (samples without replacement)

Gibbs sampler