広域システム特別講義II ベイズ統計

library(rjags)
source("http://www.matsuka.info/univ/course_folder/HDI_revised.txt")

island.hopping2 <- function(n.rep=1e5, init.st=4) {
  # example from DBDA 2nd Ed. (Kruschke) ch. 07
  # intro to MCMC, island hopping

  # initialization
  state = 1:7
  curr.st = init.st
  state.counter = rep(0,7)
  state.counter[curr.st] = 1
  state.history=rep(0, n.rep)
  state.history[1]=curr.st

  prob.propose = matrix(1/6, nrow=7,ncol=7)
  diag(prob.propose)<-0

  # main loop
  for (i.rep in 2:n.rep) {
    destination = sample(state, 1, prob=prob.propose[curr.st,])
    prob.move = min(destination/curr.st, 1)
    if (runif(1) < prob.move) {
      curr.st = destination
    }
    state.history[i.rep] = curr.st
    state.counter[curr.st] = state.counter[curr.st]+1
  }
  par(mfrow=c(2, 1))
  par(mai=c(0, 1, 1, 1))
  par(mar=c(4, 3, 1, 3))
  barplot(state.counter, xlab="theta", ylab="Frequency")
  plot(state.history, 1:n.rep, type='o', log='y', xlab="theta", ylab="Time Step (log)", col="orange")
}

island.hopping2(10000, 4)

metropolis.ex01 <- function(n.iter=1e6, theta.init=0.1, sigma=0.2, plot.yn = T){
  # example from DBDA 2nd Ed. (Kruschke) ch. 07
  # metropolis algo. with 1 parameter

  # "posterior of theta" function
  posterior.theta <- function(theta, N, z, a, b) {
    posterior = theta^z * (1-theta)^(N-z) * theta^(a-1) * (1 - theta)^(b-1) / beta(a,b)
  }

  # initialization
  theta.history = rep(0,nrow = n.iter,ncol = 1)
  theta.current = theta.init
  theta.history[1] = theta.current
  # values given in text
  mu = 0
  N = 20
  z = 14
  a = 1
  b = 1

  # main loop
  for (i_iter in 2:n.iter) {
    theta.proposed = theta.current + rnorm(1, mean=mu, sd=sigma)
    if (theta.proposed < 0 | theta.proposed > 1) {
      p.move = 0
    } else {
      p.current = posterior.theta(theta.current, N, z, a, b)
      p.proposed = posterior.theta(theta.proposed, N, z, a, b)
      p.move = min(p.proposed/p.current, 1)
    }
    if (runif(1) < p.move) {
      theta.current = theta.proposed
    }
    theta.history[i_iter] = theta.current
  }

  # plotting results
  if (plot.yn == T) {
    par(mfrow = c(3, 1))
    hist(theta.history, nclass = 100, col = "orange", probability = T, xlab = "theta")
    den=density(theta.history)
    lines(den)
    plot(theta.history[(n.iter-100):n.iter], ((n.iter-100):n.iter), type = 'o', xlim = c(0,1), xlab="theta", ylab = "step in chain")
    plot(theta.history[1:100],1:100, type = 'o', xlim = c(0,1), xlab = "theta", ylab = "step in chain")
  }
  return(theta.history)
}

res = metropolis.ex01(10000, 0.1)

# initialization
n.iter = 10000; sigma = 0.1; counter = 0
z = c(6, 2); N = c(8, 7)
a = c(2, 2); b = c(2, 2);
n.par = 2
th.hist = matrix(0, nrow = n.iter*n.par, ncol = n.par)
theta = runif(2)

# function to calc. prob. move
prob.gibbs <- function(theta, proposed, N, z, a, b, idx){
  p.th=dbeta(theta[idx], z[idx]+a[idx], N[idx]-z[idx]+b[idx])
  p.pro=dbeta(proposed, z[idx]+a[idx], N[idx]-z[idx]+b[idx])
  return(p.pro/p.th)
}

# main loop
for (i.iter in 1:n.iter){
  for (i.par in 1:n.par){
    proposed = theta[i.par] + rnorm(1, mean=0, sd=sigma)
    if (proposed > 1) {proposed = 1}
    if (proposed < 0) {proposed = 0}
    p.move= min(1, prob.gibbs(theta, proposed, N, z, a, b, i.par))
    if (runif(1) < p.move){
      theta[i.par] = proposed
    }
    counter = counter + 1
    th.hist[counter, ] = theta
  }
}
par(mfrow=c(3,1))
HDI.plot(th.hist[,1])
HDI.plot(th.hist[,2])
plot(th.hist[,1],th.hist[,2], type='o',cex=0.1,xlim = c(0,1),ylim=c(0,1))
par(mfrow=c(1,1))

model.txt = "
model {
  for ( i_data in 1:Ntotal ) {
    y[ i_data ] ~ dbern( theta )
  }
  theta ~ dbeta( 1, 1 )
}"
writeLines(model.txt, "model.txt")

dat<-read.csv("http://www.matsuka.info/univ/course_folder/z15N50.csv")
y=dat$y
Ntotal=length(dat$y)
datalist = list(y=y,Ntotal=Ntotal)

# jags
jagsModel = jags.model(file="model.txt",data=datalist,n.chains=3,n.adapt=500)
update(jagsModel,n.iter=1000)
codaSamples=coda.samples(jagsModel,variable.names=c("theta"),n.iter=5000)
mcmcMat<-as.matrix(codaSamples)

par(mfrow=c(2,2))
cols=c("orange", "skyblue","pink")

# chain
mcmc1<-as.mcmc(codaSamples[[1]])
mcmc2<-as.mcmc(codaSamples[[2]])
mcmc3<-as.mcmc(codaSamples[[3]])
plot(mcmc1,type='l')
lines(mcmc2,col='red')
lines(mcmc3,col='blue')

# autocorrelation
ac1=autocorr(mcmc1,lags=0:50)
ac2=autocorr(mcmc2,lags=0:50)
ac3=autocorr(mcmc3,lags=0:50)
plot(ac1, type='o', pch = 20, col = cols[1], ylab = "Autocorrelation", xlab = "Lag")
lines(ac2, type='o', pch = 20, col = cols[2])
lines(ac3, type='o', pch = 20, col = cols[3])

# shrink factor
resALL=mcmc.list(mcmc1,mcmc2,mcmc3)
gd1=gelman.plot(resALL, auto.layout = F)

# density
den1=density(mcmc1)
den2=density(mcmc2)
den3=density(mcmc3)
plot(den1, type='l', col = cols[1], main = " ", xlab = "param. value",lwd=2.5)
lines(den2, col = cols[2], lwd=2.5)
lines(den3, col = cols[3], lwd=2.5)

par(mfrow=c(1,1))

model.txt = "
model {
  for ( i_data in 1:Ntotal ) {
    y[ i_data ] ~ dbern( theta[s[i_data]] )
  }
  for ( i_s in 1:Nsubj) {
    theta[i_s] ~ dbeta( 2, 2 )
  }
}"
writeLines(model.txt, "model.txt")

dat<-read.csv("http://www.matsuka.info/univ/course_folder/z6N8z2N7.csv")
y=dat$y
s=as.numeric(dat$s)
Ntotal=length(dat$y)
Nsubj=length(unique(s))
datalist = list(y=y,s=s,Ntotal=Ntotal,Nsubj=Nsubj)

jagsModel = jags.model(file="model.txt",data=datalist,n.chains=3,n.adapt=500)
update(jagsModel,n.iter=1000)
codaSamples=coda.samples(jagsModel,variable.names=c("theta"),n.iter=5000)
mcmcMat<-as.matrix(codaSamples)
par(mfrow=c(2,2))
cols=c("orange", "skyblue","pink")

# chain
mcmc1<-as.mcmc(codaSamples[[1]])
mcmc2<-as.mcmc(codaSamples[[2]])
mcmc3<-as.mcmc(codaSamples[[3]])
plot(temp1,type='l')
lines(temp2,col='red')
lines(temp3,col='blue')

# autocorrelation
ac1=autocorr(mcmc1,lags=0:50)
ac2=autocorr(mcmc2,lags=0:50)
ac3=autocorr(mcmc3,lags=0:50)
plot(ac1, type='o', pch = 20, col = cols[1], ylab = "Autocorrelation", xlab = "Lag")
lines(ac2, type='o', pch = 20, col = cols[2])
lines(ac3, type='o', pch = 20, col = cols[3])

# shrink factor
resALL=mcmc.list(mcmc1,mcmc2,mcmc3)
gd1=gelman.plot(resALL, auto.layout = F)

# density
den1=density(mcmc1)
den2=density(mcmc2)
den3=density(mcmc3)
plot(den1, type='l', col = cols[1], main = " ", xlab = "param. value",lwd=2.5)
lines(den2, col = cols[2], lwd=2.5)
lines(den3, col = cols[3], lwd=2.5)

par(mfrow=c(1,1))

model.txt = "
model {
  for ( i in 1:Ntotal ) {
    y[i] ~ dnorm( mu , 1/sigma^2  )
  }
  mu ~ dnorm( meanY , 1/(100*sdY)^2 )
  sigma ~ dunif( sdY/1000 , sdY*1000 )
}
"
writeLines(model.txt, "model.txt")

dat<-read.csv("http://www.matsuka.info/univ/course_folder/TwoGroupIQ.csv")
y = dat$Score[dat$Group=="Smart Drug"]
Ntotal = length(y)

dataList = list(y = y, Ntotal = Ntotal, meanY = mean(y), sdY = sd(y))
jagsModel = jags.model("model.txt", data=dataList, n.chains=3, n.adapt=1000 )
update( jagsModel , n.iter=1000)
codaSamples = coda.samples( jagsModel , variable.names=c("mu","sigma"), n.iter=5000, thin=5)
mcmcMat<-as.matrix(codaSamples)

# calculating & plotting normality
par(mfrow=c(2,2))
HDI.plot(mcmcMat[,1])
hist(y,nclass=15,probability = T)
x.temp = seq(40,200,0.1)
n.plot = 100
randperm = sample(1:nrow(mcmcMat),n.plot)
for (i.plot in 1:n.plot){
  norm.temp=dnorm(x.temp,mean=mcmcMat[randperm[i.plot],1],sd=mcmcMat[randperm[i.plot],2])
  lines(x.temp,norm.temp,col='orange')
}
HDI.plot(mcmcMat[,2])

# calculating & plotting effect size
effect.size=(mcmcMat[,"mu"]-100)/mcmcMat[,"sigma"]
HDI.plot(effect.size)


#
dat<-read.csv("http://www.matsuka.info/univ/course_folder/TwoGroupIQ.csv")
y = dat$Score[dat$Group=="Smart Drug"]
Ntotal = length(y)

model.txt="
model {
  for ( i in 1:Ntotal ) {
    y[i] ~ dt( mu , 1/sigma^2 , nu )
  }
  mu ~ dnorm( meanY , 1/(100*sdY)^2 )
  sigma ~ dunif( sdY/1000 , sdY*1000 )
  nu <- nuMinusOne+1
  nuMinusOne ~ dexp(1/30.0)
}"
writeLines(model.txt, "model.txt")

dataList = list(y = y, Ntotal = Ntotal, meanY = mean(y), sdY = sd(y))
jagsModel = jags.model("model.txt", data=dataList, n.chains=3, n.adapt=1000 )
update( jagsModel , n.iter=1000)
codaSamples = coda.samples( jagsModel , variable.names=c("mu","sigma","nu"), n.iter=5000, thin=5)
mcmcMat<-as.matrix(codaSamples)

# calculating & plotting normality
par(mfrow=c(3,2))
HDI.plot(mcmcMat[,1])
HDI.plot(mcmcMat[,3])
normality=log10(mcmcMat[,"nu"])
HDI.plot(normality)
effect.size=(mcmcMat[,"mu"]-100)/mcmcMat[,"sigma"]
HDI.plot(effect.size)

hist(y,nclass=20,probability = T)
n.plot = 100
randperm = sample(1:nrow(mcmcMat),n.plot)
for (i.plot in 1:n.plot){
  x.temp1 = seq(40,200,0.1)
  x.temp2 = (x.temp1 - mcmcMat[randperm[i.plot],1])/mcmcMat[randperm[i.plot],3]
  t.temp=dt(x.temp2,mcmcMat[randperm[i.plot],2])/mcmcMat[randperm[i.plot],3]
  lines(x.temp1,t.temp,col='orange')
}

par(mfrow=c(2,2))
plot(mcmcMat[,1],mcmcMat[,3],col='orange')
plot(mcmcMat[,1],log10(mcmcMat[,2]),col='orange')
plot(0,0,type='n')
plot(log10(mcmcMat[,2]),mcmcMat[,3],col='orange')


dat<-read.csv("http://www.matsuka.info/univ/course_folder/TwoGroupIQ.csv")
y = dat$Score
group = as.numeric(dat$Group)
Ntotal = length(y)
Ngroup = length(unique(group))
model.txt="
model {
  for ( i in 1:Ntotal ) {
    y[i] ~ dt( mu[group[i]] , 1/sigma[group[i]]^2 , nu )
  }
  for (j in 1:Ngroup){
    mu[j] ~ dnorm( meanY , 1/(100*sdY)^2 )
    sigma[j] ~ dunif( sdY/1000 , sdY*1000 )
  }
  nu <- nuMinusOne+1
  nuMinusOne ~ dexp(1/29)
}"
writeLines(model.txt, "model.txt")


dataList = list(y = y, Ntotal = Ntotal, meanY = mean(y), sdY = sd(y),Ngroup=Ngroup,group=group)
jagsModel = jags.model("model.txt", data=dataList, n.chains=3, n.adapt=5000 )
update( jagsModel , n.iter=1000)
codaSamples = coda.samples( jagsModel , variable.names=c("mu","sigma","nu"), n.iter=5000, thin=5)
mcmcMat<-as.matrix(codaSamples)

# plotting result
par(mfrow=c(5,2))
HDI.plot(mcmcMat[,1],xlabel="placebo Mean")

hist(y[dat$Group=="Placebo"],nclass=20,probability = T)
n.plot = 100
randperm = sample(1:nrow(mcmcMat),n.plot)
for (i.plot in 1:n.plot){
  x.temp1 = seq(40,200,0.1)
  x.temp2 = (x.temp1 - mcmcMat[randperm[i.plot],1])/mcmcMat[randperm[i.plot],4]
  t.temp=dt(x.temp2,mcmcMat[randperm[i.plot],3])/mcmcMat[randperm[i.plot],4]
  lines(x.temp1,t.temp,col='orange')
}

HDI.plot(mcmcMat[,2],xlabel="smart drug Mean")

hist(y[dat$Group=="Smart Drug"],nclass=20,probability = T)
n.plot = 100
randperm = sample(1:nrow(mcmcMat),n.plot)
for (i.plot in 1:n.plot){
  x.temp1 = seq(40,200,0.1)
  x.temp2 = (x.temp1 - mcmcMat[randperm[i.plot],2])/mcmcMat[randperm[i.plot],5]
  t.temp=dt(x.temp2,mcmcMat[randperm[i.plot],3])/mcmcMat[randperm[i.plot],5]
  lines(x.temp1,t.temp,col='orange')
}

HDI.plot(mcmcMat[,4],xlabel="placebo scale")

HDI.plot(mcmcMat[,2]-mcmcMat[,1],xlabel="Difference of Means")

HDI.plot(mcmcMat[,5],xlabel="smart drug scale")

HDI.plot(mcmcMat[,5]-mcmcMat[,4],xlabel="Difference of Scales")

HDI.plot(log10(mcmcMat[,3]),xlabel="Normality")

effect.size = (mcmcMat[,2]-mcmcMat[,1])/sqrt((mcmcMat[,5]^2+mcmcMat[,4]^2)/2)
HDI.plot(effect.size,xlabel="effect size")