dynamic programming 実装例

V=c(rep(0,100),1);V.hist=c()
p=c(0.4,0.6);
gamma=1;delta=1; tol=1e-20
max.a=rep(0,101)
while (delta>tol) {
  delta=0;
  for (i_state in 1:99) {
    v=V[i_state+1]
    temp=matrix(0,nrow=1,ncol=i_state)
    for (i_action in 1:i_state) {
       temp[i_action]=sum(p*(gamma*c(V[(min(i_state+i_action,100)+1)],
         V[(max(i_state-i_action,0)+1)])))
     }
    V[i_state+1]=max(temp)
    max.a[i_state+1]=which.max(round(temp,8))
    delta=max(delta,abs(v-V[i_state+1]))
  }
  V.hist=rbind(V.hist,V)
}
# plotting results
par(mfrow=c(1,2))
plot(V.hist[1,],type='l',lwd=2,xlab="Capital",ylab="Value Estimates",col='red')
lines(V.hist[2,],lwd=2,col='blue')
lines(V.hist[3,],lwd=2,col='green')
lines(V.hist[32,],lwd=2,col='black')
legend("topleft",c("sweep 1","sweep 2","sweep 3", "sweep 32"),
 col=c("red","blue","green","black"),lwd=2)
barplot(max.a,xlab="Capital",ylab="Final Policy",col="white")

認知情報解析演習 居住区問題

n.circle = 20; n.sharp = 20; size = 10
loc = sample(1:size^2, n.circle+n.sharp)
type = c(rep(1,n.circle),rep(2,n.sharp))
# circle = 1; sharp = 2
people = cbind(type,loc)
state.mat = matrix(0,size,size)
state.mat[people[,2]]=people[,1]

p.move = #1/(2*n.circle)
p.other = 0.1
c.count = 3

idx = cbind(rep(1:size,size),sort(rep(1:size,size)))

#
term = F
while (term == F){
  term = T
  rand.order = sample(1:nrow(people))
  for (i.p in 1:nrow(people)){
    if (people[rand.order[i.p],1]==1){
      # circle
      if (runif(1) < p.move){
        empty = 1:(size^2)
        empty = empty[-sort(people[,2])]
        people[rand.order[i.p],2] = sample(empty,1)
        state.mat = matrix(0,size,size)
        state.mat[people[,2]]=people[,1]
        term = F
      } 
    } else {
      # sharp
      x.min = max(idx[people[rand.order[i.p],2],1]-1,1)
      x.max = min(idx[people[rand.order[i.p],2],1]+1,size)
      y.min = max(idx[people[rand.order[i.p],2],2]-1,1)
      y.max = min(idx[people[rand.order[i.p],2],2]+1,size)
      circle.in = sum(state.mat[x.min:x.max,y.min:y.max]==1)
      if (circle.in >= c.count){
        empty = 1:(size^2)
        empty = empty[-sort(people[,2])]
        people[rand.order[i.p],2] = sample(empty,1)
        state.mat = matrix(0,size,size)
        state.mat[people[,2]]=people[,1]
        term = F
        #print('moved')
      }
    }
  }
  for (i.p in 1:nrow(people)){
    
    if (people[rand.order[i.p],1]==2){
      x.min = max(idx[people[i.p,2],1]-1,1)
      x.max = min(idx[people[i.p,2],1]+1,size)
      y.min = max(idx[people[i.p,2],2]-1,1)
      y.max = min(idx[people[i.p,2],2]+1,size)
      circle.in = sum(state.mat[x.min:x.max,y.min:y.max]==1)
      print(circle.in)
      if (circle.in >= c.count){
        term = F
        break
      }
    }
  }
}
plot(0,0, type= 'n', xlim = c(0,11),ylim=c(0,11))
lab = c("0","#")
text(idx[people[,2],1],idx[people[,2],2],lab[people[,1]],cex=3)
ab = seq(0.5,10.5,1)
for (i in 1:11){
  abline(h=ab[i],col='red')
  abline(v=ab[i],col='red')
}

基礎自習B02

dec2bin<-function(num, digits=8) {
  bin=c()
  if (num==0){
    bin=0
  } else {
    while(num!=0){
      rem=num%%2
     num=num%/%2
      bin=c(rem,bin)
    }
  }
  if (length(bin) < digits){
    res=matrix(0,nrow=1,ncol=digits)
    res[(digits-length(bin)+1):digits]=bin
  } else {res=bin}
  return(res)
}

認知情報解析演習 W13

north=c(1:3,15,1:10)
east=2:15;east[ c(3,7,11)]=c(3,7,11)
south=c(5:15,12:14)
west=c(15,1:13);west[ c(4,8,12)]=c(4,8,12)
trM=cbind(north,east,south,west)
###

nRep=1000; gamma=1;
P = 0.25
R = -1
V = rep(0,15)
for (i_rep in 1:nRep) {
  V.old=V
  for (i_state in 1:14) {
    V[i_state]=sum(P*(R+gamma*V.old[trM[i_state,]]))
  }
}
matrix(c(0,V),4,4)

#   ch4.1 policy evaluation 
########################################
# example 4.1 - bug fixed on 2017.03.21
# defining deterministic trans. matrix
north=c(1:3,15,1:10)
east=2:15;east[ c(3,7,11)]=c(3,7,11)
south=c(5:15,12:14)
west=c(15,1:13);west[ c(4,8,12)]=c(4,8,12)
trM=cbind(north,east,south,west)
 
# defining Reward & trans. prob.
R=-1;P=0.25;
 
# iterative policy evaluation
delta=1; gamma=1; tol=1e-8; V=rep(0,15);
while (delta>tol) {
 delta=0;
  for (i_state in 1:14) {
    v=V[i_state]
    V[i_state]=sum(P*(R+gamma*V[trM[i_state,]]))
    delta=max(delta,abs(v-V[i_state]));
  }
}
 
# result
> matrix(c(0,V),nrow=4)
     [,1] [,2] [,3] [,4]
[1,]    0  -14  -20  -22
[2,]  -14  -18  -20  -20
[3,]  -20  -20  -18  -14
[4,]  -22  -20  -14    0
 
#####################################
#   ch4.3 Policy Improvement
#   easier one 
#####################################
north=c(1:3,15,1:10)
east=2:15;east[ c(3,7,11)]=c(3,7,11)
south=c(5:15,12:14)
west=c(15,1:13);west[ c(4,8,12)]=c(4,8,12)
trM=cbind(north,east,south,west)
R=-1;V=rep(0,15);
delta=1; gamma=1; tol=1e-5; 
bestP=sample(1:4,14,replace=T)
stable=F;counter=0
while (stable==F){
  counter=counter+1
  Vmat=matrix(V[trM],ncol=4)
  Ppolicy=t(apply(Vmat,1,function(x) exp(10*x)/sum(exp(10*x))))
# iterative policy evaluation
  while (delta>tol) {
    delta=0;
    for (i_state in 1:14) {
      v=V[i_state]
      V[i_state]=sum(Ppolicy[i_state]*(R+gamma*V[trM[i_state,]]))
      delta=max(delta,abs(v-V[i_state]))
    }
  }
# policy improvement
  stable=F
  for (i_state in 1:14) {
    b=bestP[i_state]
    bestP[i_state]=which.max(V[trM[i_state,]])
    ifelse((bestP[i_state]==b),stable<-T,stable<-F)
  }
}
 
 
#####################################
#   ch4.4 Value Iteration 
#####################################
# example 4.3
V=c(rep(0,100),1);V.hist=c()
p=c(0.4,0.6);
gamma=1;delta=1; tol=1e-20
max.a=rep(0,101)
while (delta>tol) {
  delta=0;
  for (i_state in 1:99) {
    v=V[i_state+1]
    temp=matrix(0,nrow=1,ncol=i_state)
    for (i_action in 1:i_state) {
       temp[i_action]=sum(p*(gamma*c(V[(min(i_state+i_action,100)+1)],
         V[(max(i_state-i_action,0)+1)])))
     }
    V[i_state+1]=max(temp)
    max.a[i_state+1]=which.max(round(temp,8))
    delta=max(delta,abs(v-V[i_state+1]))
  }
  V.hist=rbind(V.hist,V)
}
# plotting results
par(mfrow=c(1,2))
plot(V.hist[1,],type='l',lwd=2,xlab="Capital",ylab="Value Estimates",col='red')
lines(V.hist[2,],lwd=2,col='blue')
lines(V.hist[3,],lwd=2,col='green')
lines(V.hist[32,],lwd=2,col='black')
legend("topleft",c("sweep 1","sweep 2","sweep 3", "sweep 32"),
 col=c("red","blue","green","black"),lwd=2)
barplot(max.a,xlab="Capital",ylab="Final Policy",col="white")

基礎実習MB02

multi.forwd <- function(x,y){
  return(x*y)
}
multi.bckwd <- function(x, y, dout){
  dx = dout * y
  dy = dout * x
  return(list(dx = dx, dy = dy))
}
 
apple = 100; n.apple = 2; tax = 1.1
apple.pre.tax = multi.forwd(apple, n.apple)
apple.post.tax = multi.forwd(apple.pre.tax, tax)
 
dprice = 1
d.apple.post.tax = multi.bckwd(apple.pre.tax, tax, dprice)
d.apple = multi.bckwd(apple, n.apple, d.apple.post.tax$dx)$dx
d.n.apple = multi.bckwd(apple, n.apple, d.apple.post.tax$dx)$dy
 

Disclaimer

このcourselogにあるコードは、主に学部生・博士課程前期向けの講義・演習・実習で例として提示しているもので、原則直感的に分かりやすいように書いているつもりです。例によってはとても非効率なものもあるでしょうし、「やっつけ」で書いているものあります。また、普段はMATLABを使用していますので、変な癖がでているかもしれません。これらの例を使用・参考にする場合はそれを踏まえてたうえで使用・参考にして下さい。
卒業論文に関する資料:[2015PDF] [word template] [latex template] [表紙] [レポートの書き方] [引用文献など]
A Brief Guide to R (Rのコマンドの手引きおよび例)はこちらをどうぞ