MCMC

# JAGS and “simple” GIBBS sampling

# "simple" GIBBS sampling example
# initialization
n.iter = 10000; sigma = 0.1; counter = 0
z = c(6, 2); N = c(8, 7)
a = c(2, 2); b = c(2, 2); 
n.par = 2
th.hist = matrix(0, nrow = n.iter*n.par, ncol = n.par)
theta = runif(2)

# function to calc. prob. move
prob.gibbs <- function(theta, proposed, N, z, a, b, idx){
  p.th=dbeta(theta[idx], z[idx]+a[idx], N[idx]-z[idx]+b[idx])
  p.pro=dbeta(proposed, z[idx]+a[idx], N[idx]-z[idx]+b[idx])
  return(p.pro/p.th)
}

# main loop
for (i.iter in 1:n.iter){
  for (i.par in 1:n.par){
    proposed = theta[i.par] + rnorm(1, mean=0, sd=sigma)
    if (proposed > 1) {proposed = 1}
    if (proposed < 0) {proposed = 0}
    p.move= min(1, prob.gibbs(theta, proposed, N, z, a, b, i.par))
    if (runif(1) < p.move){
      theta[i.par] = proposed
    } 
    counter = counter + 1
    th.hist[counter, ] = theta
  }
}
par(mfrow=c(3,1))
HDI.plot(th.hist[,1])
HDI.plot(th.hist[,2])
plot(th.hist[,1],th.hist[,2], type='o',cex=0.1,xlim = c(0,1),ylim=c(0,1))
par(mfrow=c(1,1))

# with JAGS
# model 
model {
 for (i in 1:Ntotal) {
   y[i] ~ dbern(theta[ coin[ i ] ])
 }
 for (c in 1:Ncoin) {
   theta[ c ] ~ dbeta(2, 2)
   }
}
# R command
y = c(rep(1,6), rep(0,2), rep(1,2), rep(0,5))
coin = c(rep(1,8), rep(2,7))
dataList = list(Ntotal=length(y),
                y = y,
                coin = coin,
                Ncoin = 2)
parameters = c("theta")
model = jags.model( "~/research/oka/DBDA/ch07/twoVarMCMC.tex", 
  data = dataList, n.chains = 3, n.adapt = 1000)
update(model, n.iter=500)
CS = coda.samples(model, variable.names = parameters, n.iter = 10000, thin = 5)
mcmcMat<-as.matrix(CS)
par(mfrow = c(3, 1))
plot(mcmcMat[,1], mcmcMat[,2], type='o',ylim = c(0,1), xlim = c(0,1))
HDI.plot(mcmcMat[,1])
HDI.plot(mcmcMat[,2])

Disclaimer

このcourselogにあるコードは、主に学部生・博士課程前期向けの講義・演習・実習で例として提示しているもので、原則直感的に分かりやすいように書いているつもりです。例によってはとても非効率なものもあるでしょうし、「やっつけ」で書いているものあります。また、普段はMATLABを使用していますので、変な癖がでているかもしれません。これらの例を使用・参考にする場合はそれを踏まえてたうえで使用・参考にして下さい。
卒業論文に関する資料:[2015PDF] [word template] [latex template] [表紙] [レポートの書き方] [引用文献など]
A Brief Guide to R (Rのコマンドの手引きおよび例)はこちらをどうぞ

単純主効果の検定など

SPF.tsmeとRBF.tsmeを更新しました。
使用方法は:
SPF.tsme(分散分析の結果、データ名、従属変数名)
RBF.tsme(分散分析の結果、データ名、従属変数名)
具体例は以下の通りです。
なお、これらを使用する前に
source(“http://peach.l.chiba-u.ac.jp/course_folder/tsme.txt”)
を必ず実行してください。

source("http://peach.l.chiba-u.ac.jp/course_folder/tsme.txt")

# SPFの例
dat<-read.csv("http://www.matsuka.info/data_folder/dktb3211.txt")
dat.aov<-aov(result~method*duration+Error(s+s:duration),dat)
tsme <- SPF.tsme(dat.aov, dat, "result")
simple main effect test for BETWEEEN subject factor 
           ss df   ms       f         p
1hr       2.5  1  2.5  1.0417 0.3150887
2hr       0.4  1  0.4  0.1667 0.6858104
3hr      12.1  1 12.1  5.0417 0.0317831
4hr      32.4  1 32.4 13.5000 0.0008662
residual 76.8 32  2.4                  

 Tukey HSD test - between subject factor @ duration = 3hr 
      D     E
D FALSE  TRUE
E  TRUE FALSE

 Tukey HSD test - between subject factor @ duration = 4hr 
      D     E
D FALSE  TRUE
E  TRUE FALSE

 simple main effect test for WITHIN subject factor 
           ss df      ms      f         p
D        49.2  3 16.4000 12.990 3.047e-05
E         1.0  3  0.3333  0.264 8.506e-01
residual 30.3 24  1.2625                 

 Tukey HSD test - within subject factor @ method = D 
      1hr   2hr   3hr   4hr
1hr FALSE FALSE  TRUE  TRUE
2hr FALSE FALSE FALSE  TRUE
3hr  TRUE FALSE FALSE FALSE
4hr  TRUE  TRUE FALSE FALSE

# RBFの例
dat<-read.csv("http://www.matsuka.info/data_folder/dktb3218.txt")
dat.aov<-aov(result~method*duration+Error(s+s:duration+s:method),dat)
tsme <- RBF.tsme(dat.aov, dat, "result")
simple main effect test for the 1st within-subject factor 
           ss df   ms      f         p
1hr       2.5  1  2.5  1.563 0.2292743
2hr       0.4  1  0.4  0.250 0.6238816
3hr      12.1  1 12.1  7.562 0.0142341
4hr      32.4  1 32.4 20.250 0.0003635
residual 25.6 16  1.6                 

 Tukey HSD test - 1st within-subject factor @ duration = 3hr 
      D     E
D FALSE  TRUE
E  TRUE FALSE

 Tukey HSD test - 1st within-subject factor @ duration = 4hr 
      D     E
D FALSE  TRUE
E  TRUE FALSE

 simple main effect test for the 2nd within-subject factor 
           ss df      ms      f         p
D        49.2  3 16.4000 12.990 3.047e-05
E         1.0  3  0.3333  0.264 8.506e-01
residual 30.3 24  1.2625                 

 Tukey HSD test - 2nd within-subject factor @ method = D 
      1hr   2hr   3hr   4hr
1hr FALSE FALSE  TRUE  TRUE
2hr FALSE FALSE FALSE  TRUE
3hr  TRUE FALSE FALSE FALSE
4hr  TRUE  TRUE FALSE FALSE

認知情報解析 カジノ体験

サイコロを3つなげ、その和が何であるかをかけるギャンブルがあるそうです。
和が、4~10ならsmall, 11~17ならlargeとおおよそ2択のギャンブルだそうです。(3をx、18をyと呼ぶことにします)
Kくんが体験したのは「過去50回の結果がlargeが70%と表示されていたので、次はsmallじゃないかと思いsmallに賭けて、実際にsmallが出た」ということでした。この出来事がどの程度よく起きるのかシミュレーションを用いて検証しまししょう。

# 非効率だけど、直感的に分かりやすいと思われる例

# 1試行の関数
die <- function() {
  die1<-sample(1:6,1)
  die2<-sample(1:6,1)
  die3<-sample(1:6,1)
  die.sum = die1 + die2 + die3
  if (die.sum==3) {
     result="X"
  } else { 
    if (die.sum==18) {
      result="Y"
   } else {
     if (die.sum<11){ 
       result="small"
       } else {
         result="large"
       }
     }
   }
  return(result)
}

# 51試行の結果を表す関数
SL51=function( ) {
  n.rep=51
  res=rep("N",51)
  for (i_rep in 1:n.rep) {
    res[i_rep]=die()
  }
  p.large=sum(res[1:50]=="large")/50
  return(list(p.large=p.large,last.result=res[51]))
}
  
# 51試行をN.REP回繰り返す関数
ck.kurimoto <- function(n.rep) {
  p.hist = rep(0,n.rep)
  lr.hist = rep("N",n.rep)
  for (i_rep in 1:n.rep) {
     result=SL51()
     p.hist[i_rep] = result$p.large
     lr.hist[i_rep] = result$last.result
  }
  return(list(p.hist=p.hist,lr.hist=lr.hist))
}

# 50試行の内、Lが出た割合が70%以上のものを選択
index70=which(m.result$p.hist>=0.7)
sum(m.result$lr.hist[index70]=="small")/length(index70)

# 上の3つの関数をまとめたもの
n.rep=1e6
die.mat = matrix(sample(c("X","Y","S","L"),
 n.rep*51,prob=c(1/16,1/16,7/16,7/16),replace=T),nrow=n.rep)
p.large=rowSums(die.mat[,1:50]=="L")/50
index70=which(p.large>=0.7)
ck51L70=die.mat[index70,51]
table(ck51L70)/length(index70)

認知情報解析演習 ギャンブル

# 直感的にはわかりやすいけど、非効率な例
bet.example<-function(n.trial,p.win,max.bet) {
  rew.history=rep(0,n.trial)
  bet.history=rep(0,n.trial)
  WL.history=rep("W",n.trial)
  loss.counter=0
  i_trial=0;
  WL="L"
  loss.counter.reset="F"
  while (i_trial < n.trial | WL=="L" ) {
    i_trial=i_trial+1
    bet=2^loss.counter
    if (bet > max.bet) {
      bet = max.bet
      loss.counter.reset = "T"
    }
    bet.history[i_trial]=bet
    WL=sample(c("W","L"),1,prob=c(p.win, 1-p.win))
    WL.history[i_trial]=WL
    if (WL=="L") {
      loss.counter=loss.counter+1
      reward=-bet
    } else {
      loss.counter=0
      reward=bet
    }
    if (loss.counter.reset=="T"){loss.counter=0}
    rew.history[i_trial]=reward
  }
  return(list(reward=sum(rew.history),money.required=max(bet.history)))
}

# より効率的な例 - max.betは未導入
bet.example02<-function(n.trial,p.win) {
  WL=sample(c("W","L"),n.trial,replace=T,prob=c(p.win, 1-p.win))
  if (WL[n.trial]!="W") {
    WL.extra=sample(c("W","L"),1,prob=c(p.win,1-p.win))
    while (tail(WL.extra,1)=="L") {
      WL.extra=c(WL.extra,sample(c("W","L"),1,prob=c(p.win,1-p.win)))
     }
    WL=c(WL,WL.extra)
  }
  reward=sum(WL=="W")
  result=rle(WL)
  loss.index=result$values=="L"
  money.required=2^max(result$lengths[loss.index])
  return(list(reward,money.required))
}

データ解析基礎論 分散分析の例

# 可視化
interaction.plot(dat$gender,
  dat$affil,
  dat$shoesize, 
  pch=c(20,20), 
  col=c("skyblue","orange"), 
  xlab="gender", ylab="shoesize", 
  lwd=3,type='b',cex=2,
  trace.label="Affiliation")

# 可視化2(非効率)
means<-tapply(dat$shoesize,list(dat$gender, dat$affil),mean)
Ns<-tapply(dat$shoesize,list(dat$gender, dat$affil),length)
sds<-tapply(dat$shoesize,list(dat$gender, dat$affil),sd)
sems<-sds/sqrt(Ns)

plot(c(0,1),means[,1],type='o',col='skyblue',
 ylim=c(min(means)*0.975,max(means)*1.025),
  xlim=c(-0.1,1.1),lwd=2,cex=2,pch=20,xlab="gender",ylab="shoesize")
lines(c(0,1),means[,2],type='o',col='orange',lwd=2,cex=2,pch=20)
legend("topleft",c("CogSci","PsySci"),col=c("skyblue","orange"),lwd=2)
lines(c(0,0),c(means[1,1]-sems[1,1],means[1,1]+sems[1,1]),col="skyblue",lwd=2.5)
lines(c(1,1),c(means[2,1]-sems[2,1],means[2,1]+sems[2,1]),col="skyblue",lwd=2.5)
lines(c(0,0),c(means[1,2]-sems[1,2],means[1,2]+sems[1,2]),col="orange",lwd=2.5)
lines(c(1,1),c(means[2,2]-sems[2,2],means[2,2]+sems[2,2]),col="orange",lwd=2.5)

# 分散分析
dat.aov=aov(shoesize~gender*affil, data=dat)
dat.aov.sum=summary(dat.aov)

# 単純主効果の検定
#  各所属講座における性別の効果
means<-tapply(dat$shoesize, list(dat$gender,dat$affil), mean)
SS_gen_CS<- 5*(means[2,1]^2 + means[1,1]^2 -0.5*sum(means[,1])^2) # SS_gender CS
SS_gen_PS<- 5*(means[2,2]^2 + means[1,2]^2 -0.5*sum(means[,2])^2) # SS_gender PS
dat.aov.sum=summary(dat.aov)   # ANOVA table
MSe=dat.aov.sum[[1]][4,3]      # MSE from ANOVA table or MSe=0.62
dfE=dat.aov.sum[[1]][4,1]      # DF for error or dfE=16
dfG=1                          # DF for gender
F_gen_CS=(SS_gen_CS/dfG)/MSe   # F-value for gender effect given CS
F_gen_PS=(SS_gen_PS/dfG)/MSe   # F-value for gender effect given PS
P_gen_CS=1-pf(F_gen_CS,1,dfE)  # p-value for gender effect given CS
P_gen_PS=1-pf(F_gen_PS,1,dfE)  # p-value for gender effect given PS

#  各性別における所属講座の効果
SS_affil_F<- 5*(means[1,1]^2+means[1,2]^2-0.5*sum(means[1,])^2) #SS_affil | F
SS_affil_M<- 5*(means[2,1]^2+means[2,2]^2-0.5*sum(means[2,])^2) #SS_affil | M
dfA=1				          # DF for affil
F_affil_F=SS_affil_F/dfA/MSe         # F-value for affiliation effect | F
F_affil_M=SS_affil_M/dfA/MSe         # F-value for affiliation effect | M
P_affil_F=1-pf(F_affil_F,1,dfE)      # p-value for affiliation effect | F
P_affil_M=1-pf(F_affil_M,1,dfE)      # p-value for affiliation effect | M

# 例2
interaction.plot(dat$duration,dat$method,dat$result, 
  pch=c(20,20), col=c("skyblue","orange"), ylab="score", 
  xlab="Duration",lwd=3,type='b',cex=2,trace.label="Method")
mod1=aov(result~method+duration,data=dat)
mod1.sum=print(summary(mod1))
mod2=aov(result~method*duration,data=dat)
mod2.sum=print(summary(mod2))
means<-tapply(dat$result,list(dat$method,dat$duration),mean)
ssM_1=5*(sum(means[,1]^2)-0.5*(sum(means[,1])^2))
ssM_2=5*(sum(means[,2]^2)-0.5*(sum(means[,2])^2))
ssM_3=5*(sum(means[,3]^2)-0.5*(sum(means[,3])^2))
ssM_4=5*(sum(means[,4]^2)-0.5*(sum(means[,4])^2))
MSe=mod2.sum[[1]][4,3]
DFe=mod2.sum[[1]][4,1]
DFm=1
fM_1=(ssM_1/DFm)/MSe
1-pf(fM_1,DFm,DFe)
fM_2=(ssM_2/DFm)/MSe
1-pf(fM_2,DFm,DFe)
fM_3=(ssM_3/DFm)/MSe
1-pf(fM_3,DFm,DFe)
fM_4=(ssM_4/DFm)/MSe
1-pf(fM_4,DFm,DFe)
ssD_X=5*(sum(means[1,]^2)-1/4*(sum(means[1,])^2))
ssD_Y=5*(sum(means[2,]^2)-1/4*(sum(means[2,])^2))
DFd=3
fD_X=(ssD_X/DFd)/MSe
fD_Y=(ssD_Y/DFd)/MSe
1-pf(fD_X,DFd,DFe)
1-pf(fD_X,DFd,DFe)
qv=qtukey(0.95,DFd+1,DFe)
hsd=qv*(sqrt(MSe/5))
print(diffM<-outer(means[1,],means[1,],"-"))           
abs(diffM)>hsd      

認知情報解析演習 TSP

traveling salesman problem
来週までに、inversionとtranslationを行う関数を作っておいてください。

# initialisation, etc.
n.city=20
location=matrix(runif(n.city*2,min=0,max=100),nrow=n.city)
route=sample(n.city) 
calc.Dist<-function(location,route) {
  n.city=length(route)
  total.Dist=0
  route=c(route,route[1])
  for (i_city in 1:n.city){
    total.Dist=total.Dist+dist(location[c(route[i_city],route[i_city+1]),])
  }
  return(total.Dist)
}

# plotting results
plot(rbind(location[route,],location[route[1],]),type='o',pch=20,cex=2.5, col='red',
  xlab='location @X',ylab='location @Y',main='Initial route')
plot(rbind(location[best.route,],location[best.route[1],]),type='o',pch=20, 
  col='magenta',cex=2.5,xlab='location @X',ylab='location @Y',main='Optimized route')
#switching 
TSP.switch<-function(route) {
  two.cities=sample(route,2,replace=F)
  route[two.cities]=route[rev(two.cities)]
  return(route)
}

# inversion
TSP.inv<-function(route,inv.length) { 
  inv.begin=sample(route,1)
  inv.end=min(inv.begin+inv.length-1,length(route))
  route[inv.begin:inv.end]=rev(route[inv.begin:inv.end])
  return(route)
}

# translation
TSP.trans<-function(route,tr.length) {
  trP1=sample(route,1)
  tr.vec=route[trP1:min(length(route),trP1+tr.length-1)]
  temp.vec=route[-(trP1:min(length(route),trP1+tr.length-1))]
  trP2=sample(1:length(temp.vec),1)
  if (trP2==length(temp.vec)) {
    new=c(temp.vec,tr.vec)
  } else {
    new=c(temp.vec[1:trP2],tr.vec,temp.vec[(trP2+1):length(temp.vec)])
  }
return(new)
}

# demo
TSP.demo<-function(n.city=20, maxItr=1000) {
  location=matrix(runif(n.city*2,min=0,max=100),nrow=n.city)
  route=sample(n.city) 
  ## param. for simulated annealing - change values if necessary 
  C=1;eta=0.99;TEMP=50; 
  ##
  optDist=calc.Dist(location,route)
  optHist=matrix(0,nrow=maxItr,ncol=(length(route)+1))
  optHist[1,]=c(route,optDist)
  for (i_loop in 2:maxItr) {
    rand.op=sample(c('inv','sw','trans'),1,prob=c(0.75,0.125,0.125))
    if (rand.op=='inv') {
      new.route=TSP.inv(route,sample(2:n.city,1))
    } else if (rand.op=='sw') {
      new.route=TSP.switch(route)
    } else { new.route=TSP.trans(route,sample(2:(round(n.city/2)),1))}
    new.dist=calc.Dist(location,new.route)
    delta=new.dist-optDist
    prob=1/(1+exp(delta/(C*TEMP)))
    if (runif(1) < prob) {
      route=new.route;optDist=new.dist;
    } 
    optHist[i_loop,]=c(route,optDist);
    TEMP=TEMP*eta
  }
  par(mfrow=c(1,2)) 
  plot(rbind(location,location[1,]),type='o',pch=20,cex=2.5, col='red',
    xlab='location @X',ylab='location @Y',main='Initial route')
  plot(location[optHist[1000,c(1:n.city,1)],],type='o',pch=20, col='magenta',
    cex=2.5,xlab='location @X',ylab='location @Y',main='Optimized route')
  return(optHist)
}

認知情報解析演習a

最適化問題

 
#Objective Function 
funZ<-function(x,y) {x^4-16*x^2-5*x+y^4-16*y^2-5*y}
xmin=-5;xmax=5;n_len=100;
xs=ys<-seq(xmin, xmax,length.out=n_len)
z<-outer(xs,ys,funZ)
contour(x=xs,y=ys,z=z,nlevels=30,drawlabels=F,col="darkgreen",
 main=expression(z=x^4-16*x^2-5*x+y^4-16*y^2-5*y))

勾配法の実装例

 
# initialization
x=x.hist=runif(1,min=-5,max=5) # initial value of x
y=y.hist=runif(1,min=-5,max=5) # initial value of y
lambda=0.01                    # step size
zeta=1e-8                      # tol (stopping criterion)
t=0                            # time
g=100                          # maximum gradient
# end initialization

### note ###
# obj fun: z=x^4-16*x^2-5*x+y^4-16*y^2-5*y
# part. deriv. x: 4*x^3-32*x-5
# part. deriv. y: 4*y^3-32*y-5
#############

# main 
while (g>zeta) {
  t=t+1
  g.x=4*x^3-32*x-5
  g.y=4*y^3-32*y-5
  x=x-lambda*g.x
  y=y-lambda*g.y
  g=max(abs(g.x),abs(g.y))
  x.hist=c(x.hist,x)
  y.hist=c(y.hist,y)
}

# plotting results
par(mfrow=c(1,3))
par(mar=c(4.0,4.1,4.1,1.1))
contour(x=xs,y=ys,z=z,nlevels=30,drawlabels=F,col="darkgreen",
 main=expression(z=x^4-16*x^2-5*x+y^4-16*y^2-5*y))
lines(x.hist,y.hist,type='o',pch=20,col='red')
plot(x.hist,type='l',xlab="time",ylab="value of x",lwd=2,
 main="how value of y changes over time")
plot(y.hist,type='l',xlab="time",ylab="value of y",lwd=2,
 main="how value of y changes over time")

単純確率的最適化法の実装例

 
# initialization
x=runif(1,min=-5,max=5)             # initial value of x
y=runif(1,min=-5,max=5)             # initial value of y
f.xy=x^4-16*x^2-5*x+y^4-16*y^2-5*y  # initial value of obj. fun.
w=0.1                               # search width
t=0                                 # time
max.t=1000                          # max iter (stopping criterion)
stop.crit=F                         # stop criterion
x.hist=rep(0,max.t)                 # history of x
y.hist=rep(0,max.t)                 # history of x
x.hist[1]=x
y.hist[1]=y
# end initialization

# main
while (stop.crit==F) {
  t=t+1
  x.temp=x+rnorm(1,mean=0,sd=w)
  y.temp=y+rnorm(1,mean=0,sd=w)
  f.temp=funZ(x.temp,y.temp)
  if (f.temp < f.xy) {
    x=x.temp
    y=y.temp
    f.xy=f.temp
  }
  x.hist[t]=x
  y.hist[t]=y
  if (t==max.t){stop.crit=T}
}

# plotting results
par(mfrow=c(1,3))
par(mar=c(4.0,4.1,4.1,1.1))
contour(x=xs,y=ys,z=z,nlevels=30,drawlabels=F,col="darkgreen",
 main=expression(z=x^4-16*x^2-5*x+y^4-16*y^2-5*y))
lines(x.hist,y.hist,type='o',pch=20,col='red')
plot(x.hist,type='l',xlab="time",ylab="value of x",lwd=2,
 main="how value of y changes over time")
plot(y.hist,type='l',xlab="time",ylab="value of y",lwd=2,
 main="how value of y changes over time")

焼きなまし法の実装例

# initialization
x=runif(1,min=-5,max=5)             # initial value of x
y=runif(1,min=-5,max=5)             # initial value of y
f.xy=x^4-16*x^2-5*x+y^4-16*y^2-5*y  # initial value of obj. fun.
t=0                                 # time
max.t=1000                          # max iter (stopping criterion)
stop.crit=F                         # stop criterion
x.hist=rep(0,max.t)                 # history of x
y.hist=rep(0,max.t)                 # history of x
x.hist[1]=x
y.hist[1]=y
tau=1
c=1
g=0.999
# end initialization
 
# main
while (stop.crit==F) {
  t=t+1
  x.temp=x+rnorm(1,mean=0,sd=tau)
  y.temp=y+rnorm(1,mean=0,sd=tau)
  f.temp=funZ(x.temp,y.temp)
  deltaE=f.temp-f.xy
  p=1/(1+exp(deltaE/(c*tau)))
  if (runif(1) < p) {
    x=x.temp
    y=y.temp
    f.xy=f.temp
  }
  x.hist[t]=x
  y.hist[t]=y
  tau=tau*g
  if (t==max.t){stop.crit=T}
}
 
# plotting results
par(mfrow=c(1,3))
par(mar=c(4.0,4.1,4.1,1.1))
contour(x=xs,y=ys,z=z,nlevels=30,drawlabels=F,col="darkgreen",
 main=expression(z=x^4-16*x^2-5*x+y^4-16*y^2-5*y))
lines(x.hist,y.hist,type='o',pch=20,col='red')
plot(x.hist,type='l',xlab="time",ylab="value of x",lwd=2,
 main="how value of y changes over time")
plot(y.hist,type='l',xlab="time",ylab="value of y",lwd=2,
 main="how value of y changes over time")

3つの手法の比較

 

GD<-function(x.init,y.init,n.iter,lambda){
x=x.init;y=y.init;z.hist=rep(0,n.iter)
# main 
for (i_iter in 2:n.iter) {
  g.x=4*x^3-32*x-5
  g.y=4*y^3-32*y-5
  x=x-lambda*g.x
  y=y-lambda*g.y
  g=max(abs(g.x),abs(g.y))
  x.hist=c(x.hist,x)
  y.hist=c(y.hist,y)
  z.hist[i_iter]=funZ(x,y)
}
return(z.hist)
}

stochOpt<-function(x.init,y.init,n.iter,w) {
x=x.init;y=y.init;f.xy=funZ(x,y)
z.hist=rep(0,n.iter);z.hist[1]=f.xy
# main
for (i_iter in 2:n.iter) {
  x.temp=x+rnorm(1,mean=0,sd=w)
  y.temp=y+rnorm(1,mean=0,sd=w)
  f.temp=funZ(x.temp,y.temp)
  if (f.temp < f.xy) {
    x=x.temp;y=y.temp;f.xy=f.temp
  }
  z.hist[i_iter]=f.xy
}
return(z.hist)
}
 
simAnn<-function(x.init,y.init,n.iter,tau,c,g) {
x=x.init;y=y.init;f.xy=funZ(x,y)
z.hist=rep(0,n.iter);z.hist[1]=f.xy
# main
for (i_iter in 2:n.iter) {
  x.temp=x+rnorm(1,mean=0,sd=tau)
  y.temp=y+rnorm(1,mean=0,sd=tau)
  f.temp=funZ(x.temp,y.temp)
  deltaE=f.temp-f.xy
  p=1/(1+exp(deltaE/(c*tau)))
  if (runif(1) < p) {
    x=x.temp;y=y.temp;f.xy=f.temp
  }
  z.hist[i_iter]=f.xy
  tau=tau*g
}
return(z.hist)
}

n.rep=500
n.iter=500
xs=runif(n.rep,-3,3);ys=runif(n.rep,-3,3)
GD.hist=SA.hist=SO.hist=matrix(0,n.iter,n.rep)
for (i.rep in 1:n.rep) {
  GD.hist[,i.rep]=GD(xs[i.rep],ys[i.rep],n.iter,0.025)
  SO.hist[,i.rep]=stochOpt(xs[i.rep],ys[i.rep],n.iter,2)
  SA.hist[,i.rep]=simAnn(xs[i.rep],ys[i.rep],n.iter,3,1,0.999)
}

plot(rowMeans(GD.hist),type='l',ylim=c(-160,0),lwd=3)
lines(rowMeans(SO.hist),col='red',lwd=3)
lines(rowMeans(SA.hist),col='blue',lwd=3)

認知情報解析A: なぜ外国人居住区ができるのか?

社会を<モデル>でみる:数理社会学への招待
29章:なぜ差別しなくても外国人居住区ができるのか
○と♯の2つのグループが存在し、以下の条件で他の場所へ移動する。
○: 近隣に2人以下○の場合、
♯: 近隣の1/3以上が♯でない場合、

例1
#
# 1 epochで複数回移動するエージェントがいてもいいバージョン
# 次回は移動は1epoch内で最大1回とするものを作りましょう。
#

# function to check circles
ck.circle <- function(world,i_row,i_col,max.row,max.col) {
  neighbor=world[max(i_row-1,1):min(i_row+1,max.row),
                 max(i_col-1,1):min(i_col+1,max.col)]
  circles=length(which(neighbor==2))
  act=ifelse(circles<3,"move","stay")
  return(act)
}

# function to check sharps
ck.sharp <- function(world,i_row,i_col,max.row,max.col) {
  neighbor=world[max(i_row-1,1):min(i_row+1,max.row),
                 max(i_col-1,1):min(i_col+1,max.col)]
  all.neighb=length(which(neighbor!=0))-1
  if (all.neighb==0){
    return("move")
  } else {
    sharps=length(which(neighbor==1))-1
    act=ifelse(sharps/all.neighb<(1/3),"move","stay")
    return(act)
  }
}

FUNmigration<-function(N.row=10, N.col=10, N.sharp=10, N.circle=10) {
   
# initialization
world=matrix(0,nrow=N.row,ncol=N.col)
temp.vec=c(rep(1,N.sharp),rep(2,N.circle))
world[sample(1:(N.row*N.col),N.sharp+N.circle,replace=F)]=temp.vec
moved=1;counter=0

# plotting initial config.
par(mfrow=c(1,2))
idx1<-which(world==2,arr.ind=T);idx2<-which(world==1,arr.ind=T)
plot(idx1[,1],idx1[,2],type="n",xlim=c(0.5,N.row+0.5),
  ylim=c(0.5,N.col+0.5),main="Initial Arrangement",
  xlab="location @ x", ylab="location @ y")
  text(idx1[,1],idx1[,2],"o",cex=4,col="red");
  text(idx2[,1],idx2[,2],"#",cex=4,col="blue")

# main loop, max.iter=1000
while (moved>0 & counter<1000) {
  moved=0
  counter=counter+1
  for (i_row in 1:N.row) {
    for (i_col in 1:N.col) {
      if (world[i_row,i_col]==1) {
        act=ck.sharp(world,i_row,i_col,N.row,N.col)
        if (act=="move") {   
          dest=sample(which(world==0),1)
          world[dest]=1
          world[i_row,i_col]=0
          moved=1  
        }
      }
      if (world[i_row,i_col]==2) {
        act=ck.circle(world,i_row,i_col,N.row,N.col)
        if (act == "move") {
         dest=sample(which(world==0),1)
         world[dest]=2
         world[i_row,i_col]=0
         moved=1
        }
}}}}

# plotting result
idx1<-which(world==1,arr.ind=T)
idx2<-which(world==2,arr.ind=T)
plot(idx1[,1],idx1[,2],type="n",xlim=c(0.5,N.row+0.5),ylim=c(0.5,N.col+0.5),
  main="Arragement After Migration",,xlab="location @ x", ylab="location @ y")
text(idx1[,1],idx1[,2],"#",cex=4,col="blue")
text(idx2[,1],idx2[,2],"o",cex=4,col="red")
return(world)
}

例2
#
# 移動は1epoch内で最大1回とするバージョン
#

FUNmigration2<-function(N.row=10, N.col=10, N.sharp=10, N.circle=10) {
   
# initialization
world=matrix(0,nrow=N.row,ncol=N.col)
temp.vec=c(rep(1,N.sharp),rep(2,N.circle))
world[sample(1:(N.row*N.col),N.sharp+N.circle,replace=F)]=temp.vec
moved=1;counter=0

# plotting initial config.
par(mfrow=c(1,2))
idx1<-which(world==2,arr.ind=T);idx2<-which(world==1,arr.ind=T)
plot(idx1[,1],idx1[,2],type="n",xlim=c(0.5,N.row+0.5),
  ylim=c(0.5,N.col+0.5),main="Initial Arrangement",
  xlab="location @ x", ylab="location @ y")
  text(idx1[,1],idx1[,2],"o",cex=4,col="red");
  text(idx2[,1],idx2[,2],"#",cex=4,col="blue")

ck.circle <- function(world,i_row,i_col,max.row,max.col) {
  neighbor=world[max(i_row-1,1):min(i_row+1,max.row),
                 max(i_col-1,1):min(i_col+1,max.col)]
  circles=length(which(neighbor<0))
  act=ifelse(circles<3,"move","stay")
  return(act)
}
 
ck.sharp <- function(world,i_row,i_col,max.row,max.col) {
  neighbor=world[max(i_row-1,1):min(i_row+1,max.row),
                 max(i_col-1,1):min(i_col+1,max.col)]
  all.neighb=length(which(neighbor!=0))-1
  if (all.neighb==0){
    return("move")
  } else {
    sharps=length(which(neighbor>0))-1
    act=ifelse(sharps/all.neighb<(1/3),"move","stay")
    return(act)
  }
}

# ここから違う
moved=1;counter=0;N.row=10;N.col=10;
while (moved>0 & counter<1000) {
  moved=0
  counter=counter+1
  for (i_sharp in 1:N.sharp) {
    idx=which(world==i_sharp,arr.ind=T)
    act=ck.sharp(world,idx[1],idx[2],N.row,N.col);
    if (act=="move") {   
      dest=sample(which(world==0),1)
      world[dest]=i_sharp
      world[idx[1],idx[2]]=0
      moved=1  
    }
  }
  for (i_circle in -1:-N.circle) {
    idx=which(world==i_circle,arr.ind=T)
    act=ck.circle(world,idx[1],idx[2],N.row,N.col);
    if (act=="move") {   
      dest=sample(which(world==0),1)
      world[dest]=i_circle
      world[idx[1],idx[2]]=0
      moved=1  
    }  
  }
} 

# plotting result
idx1<-which(world==1,arr.ind=T)
idx2<-which(world==2,arr.ind=T)
plot(idx1[,1],idx1[,2],type="n",xlim=c(0.5,N.row+0.5),ylim=c(0.5,N.col+0.5),
  main="Arragement After Migration",,xlab="location @ x", ylab="location @ y")
text(idx1[,1],idx1[,2],"#",cex=4,col="blue")
text(idx2[,1],idx2[,2],"o",cex=4,col="red")
return(world)
}

RL Sutton & Barto Ch.6

#################################################
#  ch6.3 Temporal Difference 
#   TD0 & constant step-size MC
#################################################
# TD0 model
TD0.ex1<-function(maxItr,alpha,gamma) {
  V=c(0,rep(0.5,5),0)
  V.hist=matrix(0,nrow=maxItr+1,ncol=5)
  V.hist[1,]=V[2:6]
  P.act=matrix(0.5,ncol=7,nrow=2)
  for (i_rep in 1:maxItr) {
    state=5
    while (state!=1 & state!=7) {
      action=sample(c(-1,1),1,prob=P.act[,state])
      state.old=state
      state=state+action
      r=ifelse(state==7,1,0)
      V[state.old]=V[state.old]+alpha*(r+gamma*V[state]-V[state.old])
    }
    V.hist[(i_rep+1),]=V[2:6]
  }
  return(V.hist)  
}
 
# (re)creating Fig 6.6
true.V=1:5*(1/6)
res=TD0.ex1(1000,0.1,1)
plot(true.V,type='o',pch=15,ylim=c(0,1),ylab="Value",xaxt="n",
  xlab="State",xlim=c(0.5,5.5),cex=2,lwd=2)
axis(1,at=1:5,labels=c("A","B","C","D","E"))
cols=c('red','blue','green','cyan','magenta')
ns=c(1,2,11,101,1001)
for (i_lines in 1:5) {
  lines(res[ns[i_lines],],type='o',pch=15+i_lines,cex=2,lwd=2,col=cols[i_lines])
}
legend('topleft',c('True value','t=0','t=1','t=10','t=100','t=1000'),
 col=c('black',cols),pch=15:20,lwd=1.5)
# constant step-size Monte Carlo
constMC.ex1<-function(maxItr,alpha) {
  V=c(0,rep(0.5,5),0)
  V.hist=matrix(0,nrow=maxItr+1,5)
  V.hist[1,]=V[2:6]
  P.act=matrix(0.5,ncol=7,nrow=2)
  for (i_rep in 1:maxItr) {
  state=5;
  state.hist=state
  while (state!=1 & state!=7) {
    action=sample(c(-1,1),1,prob=P.act[,state])
    state=state+action
    state.hist=cbind(state.hist,state)
  }
  R=ifelse(state==7,1,0)
  n.state=length(state.hist)
  for (i_state in 1:(n.state-1)) {
    V[state.hist[i_state]]=V[state.hist[i_state]]+
     alpha*(R-V[state.hist[i_state]])
  }
  V.hist[(i_rep+1),]=V[2:6]
 }
 return(V.hist)  
}

# (re)creating Fig 6.7
alphaTD=c(0.05,0.075,0.1,0.15)
alphaMC=c(0.01,0.02,0.03,0.04)
n.alphas=length(alphaTD)
pchs=0:(0+n.alphas)
true.V=1:5*(1/6)
n_rep=100
sqs=seq(1,101,2)
plot(0,0,type='n',xlim=c(0,100),ylim=c(0,0.25))
for (i_alpha in 1:n.alphas) {
  rmsTD=matrix(0,101,n_rep)
  rmsMC=matrix(0,101,n_rep)
  for (i_rep in 1:n_rep) {
     resTD=TD0.ex1(100,alphaTD[i_alpha],1)
     resMC=constMC.ex1(100,alphaMC[i_alpha])
     for (i_gen in 1:101) {
       rmsTD[i_gen,i_rep]=sqrt(mean((resTD[i_gen,]-true.V)^2))
       rmsMC[i_gen,i_rep]=sqrt(mean((resMC[i_gen,]-true.V)^2))
     }
  }
  mTD=rowMeans(rmsTD)
  mMC=rowMeans(rmsMC)
  lines(mTD,col='red')
  lines(mMC,col='blue')
  lines(sqs,mTD[sqs],col='red',pch=pchs[i_alpha],type='p')
  lines(sqs,mMC[sqs],col='blue',pch=pchs[i_alpha],type='p')
}
labs=c("MC, alpha=0.01",
       "MC, alpha=0.02", 
       "MC, alpha=0.03",
       "MC, alpha=0.04",
       "TD, alpha=0.05",
       "TD, alpha=0.075",
       "TD, alpha=0.10",
       "TD, alpha=0.15")  
legend('topright',labs,col=c(rep('blue',4),rep('red',4)),pch=rep(0:3,2),lwd=1.5)
#################################################
#  ch6.4 On-policy TD, Sarsa 
#################################################
sarsa.ex6.5<-function(maxItr,alpha,gamma,epsilon) {
# field size: 7row x 10column
# horizontal move ->  COLUMN 
# vertical move     ->  ROW 
# effect of wind     ->  ROW
# actions: 1-up, 2-right, 3-down, 4-left 
act.V=matrix(c(1,0,0,1,-1,0,0,-1),nrow=4,byrow=T)
wind=matrix(c(0,0,0,0,0,0,1,0,1,0,1,0,2,0,2,0,1,0,0,0),byrow=T,nrow=10)
goal=c(4,8)
Qs=array(0,dim=c(7,10,4))
for (i_rep in 1:maxItr) {
  state=c(4,1) # start
  if (runif(1) > epsilon) {
    move=which.max(Qs[state[1],state[2],])
  } else { move=sample(1:4,1)}
  while (!all(state==goal)) {
    st.old=state
    mv.old=move
    state=state+act.V[move,]+wind[state[2],]
    if (state[1]<1) {state[1]=1}
    if (state[1]>7) {state[1]=7}
    if (state[2]<1) {state[2]=1}
    if (state[2]>10) {state[2]=10}
    if (runif(1) > epsilon) {
      move=which.max(Qs[state[1],state[2],])
    } else { move=sample(1:4,1)}
     rew=ifelse(all(state==goal),0,-1)
    Qs[st.old[1],st.old[2],mv.old]=Qs[st.old[1],st.old[2],mv.old]
     +alpha*(rew+gamma* Qs[state[1],state[2],move]
     -Qs[st.old[1],st.old[2],mv.old])
  }
}
return(Qs)
}    
# running example
Qs=sarsa.ex6.5(5e6,0.1,1,0.1)
# sim optimal actions
state=c(4,1);goal=c(4,8);
state.hist=state
while (!all(state==goal)) {
  moveID=which.max(Qs[state[1],state[2],])
  state=state+act.V[moveID,]+wind[state[2],]
  if (state[1]<1) {state[1]=1}
  if (state[1]>7) {state[1]=7}
  if (state[2]<1) {state[2]=1}
  if (state[2]>10) {state[2]=10}
  state.hist=rbind(state.hist,state)
}
# plotting results
plot(0,0,type='n',xlim=c(0,11),ylim=c(0,8),xlab="",ylab="",
  main="Learned policies -- Sarsa")
lines(1,4,type='p',pch=19,col='red',cex=2)
lines(8,4,type='p',pch=19,col='red',cex=2)
dirs=c("up","right","down","left" )
for (i_row in 1:7) {
   for (i_col in 1:10) {
     best.move=dirs[which.max(Qs[i_row,i_col,])]
     text(i_col,i_row,best.move)
   }
}
lines(state.hist[,2],state.hist[,1],col="red",lwd=2)
#################################################
#  ch6.5 Off-policy TD, Q-learning
#################################################
Qlearn.ex6.5<-function(maxItr,alpha,gamma,epsilon) {
# field size: 7row x 10column
# horizontal move ->  COLUMN 
# vertical move     ->  ROW 
# effect of wind     ->  ROW
# actions: 1-up, 2-right, 3-down, 4-left 
act.V=matrix(c(1,0,0,1,-1,0,0,-1),nrow=4,byrow=T)
wind=matrix(c(0,0,0,0,0,0,1,0,1,0,1,0,2,0,2,0,1,0,0,0),byrow=T,nrow=10)
goal=c(4,8)
Qs=array(0,dim=c(7,10,4))
for (i_rep in 1:maxItr) {
  state=c(4,1) # start
  while (!all(state==goal)) {
    if (runif(1) > epsilon) {
      move=which.max(Qs[state[1],state[2],])
    } else { move=sample(1:4,1)}
    sIDX=state
    state=state+act.V[move,]+wind[state[2],]
    if (state[1]<1) {state[1]=1}
    if (state[1]>7) {state[1]=7}
    if (state[2]<1) {state[2]=1}
    if (state[2]>10) {state[2]=10}   
    max.Q=max(Qs[state[1],state[2],])
    rew=ifelse(all(state==goal),0,-1)
    Qs[sIDX[1],sIDX[2],move]=Qs[sIDX[1],sIDX[2],move]
     +alpha*(rew+gamma* max.Q-Qs[sIDX[1],sIDX[2],move])
  }
}
return(Qs)
}

Qs=Qlearn.ex6.5(1e6,0.05,1,0.1)
# sim optimal actions
state=c(4,1);goal=c(4,8);
state.hist=state
while (!all(state==goal)) {
  moveID=which.max(Qs[state[1],state[2],])
  state=state+act.V[moveID,]+wind[state[2],]
  if (state[1]<1) {state[1]=1}
  if (state[1]>7) {state[1]=7}
  if (state[2]<1) {state[2]=1}
  if (state[2]>10) {state[2]=10}
  state.hist=rbind(state.hist,state)
}
# plotting results
plot(0,0,type='n',xlim=c(0,11),ylim=c(0,8),xlab="",ylab="",
 main="Learned policies -- Q-learning")
lines(1,4,type='p',pch=19,col='red',cex=2)
lines(8,4,type='p',pch=19,col='red',cex=2)
dirs=c("up","right","down","left" )
for (i_row in 1:7) {
   for (i_col in 1:10) {
     best.move=dirs[which.max(Qs[i_row,i_col,])]
     text(i_col,i_row,best.move)
   }
}
lines(state.hist[,2],state.hist[,1],col="red",lwd=2)