# dynamic programming 実装例

```V=c(rep(0,100),1);V.hist=c()
p=c(0.4,0.6);
gamma=1;delta=1; tol=1e-20
max.a=rep(0,101)
while (delta>tol) {
delta=0;
for (i_state in 1:99) {
v=V[i_state+1]
temp=matrix(0,nrow=1,ncol=i_state)
for (i_action in 1:i_state) {
temp[i_action]=sum(p*(gamma*c(V[(min(i_state+i_action,100)+1)],
V[(max(i_state-i_action,0)+1)])))
}
V[i_state+1]=max(temp)
max.a[i_state+1]=which.max(round(temp,8))
delta=max(delta,abs(v-V[i_state+1]))
}
V.hist=rbind(V.hist,V)
}
# plotting results
par(mfrow=c(1,2))
plot(V.hist[1,],type='l',lwd=2,xlab="Capital",ylab="Value Estimates",col='red')
lines(V.hist[2,],lwd=2,col='blue')
lines(V.hist[3,],lwd=2,col='green')
lines(V.hist[32,],lwd=2,col='black')
legend("topleft",c("sweep 1","sweep 2","sweep 3", "sweep 32"),
col=c("red","blue","green","black"),lwd=2)
barplot(max.a,xlab="Capital",ylab="Final Policy",col="white")
```

# 認知情報解析演習　居住区問題

```n.circle = 20; n.sharp = 20; size = 10
loc = sample(1:size^2, n.circle+n.sharp)
type = c(rep(1,n.circle),rep(2,n.sharp))
# circle = 1; sharp = 2
people = cbind(type,loc)
state.mat = matrix(0,size,size)
state.mat[people[,2]]=people[,1]

p.move = #1/(2*n.circle)
p.other = 0.1
c.count = 3

idx = cbind(rep(1:size,size),sort(rep(1:size,size)))

#
term = F
while (term == F){
term = T
rand.order = sample(1:nrow(people))
for (i.p in 1:nrow(people)){
if (people[rand.order[i.p],1]==1){
# circle
if (runif(1) < p.move){
empty = 1:(size^2)
empty = empty[-sort(people[,2])]
people[rand.order[i.p],2] = sample(empty,1)
state.mat = matrix(0,size,size)
state.mat[people[,2]]=people[,1]
term = F
}
} else {
# sharp
x.min = max(idx[people[rand.order[i.p],2],1]-1,1)
x.max = min(idx[people[rand.order[i.p],2],1]+1,size)
y.min = max(idx[people[rand.order[i.p],2],2]-1,1)
y.max = min(idx[people[rand.order[i.p],2],2]+1,size)
circle.in = sum(state.mat[x.min:x.max,y.min:y.max]==1)
if (circle.in >= c.count){
empty = 1:(size^2)
empty = empty[-sort(people[,2])]
people[rand.order[i.p],2] = sample(empty,1)
state.mat = matrix(0,size,size)
state.mat[people[,2]]=people[,1]
term = F
#print('moved')
}
}
}
for (i.p in 1:nrow(people)){

if (people[rand.order[i.p],1]==2){
x.min = max(idx[people[i.p,2],1]-1,1)
x.max = min(idx[people[i.p,2],1]+1,size)
y.min = max(idx[people[i.p,2],2]-1,1)
y.max = min(idx[people[i.p,2],2]+1,size)
circle.in = sum(state.mat[x.min:x.max,y.min:y.max]==1)
print(circle.in)
if (circle.in >= c.count){
term = F
break
}
}
}
}
plot(0,0, type= 'n', xlim = c(0,11),ylim=c(0,11))
lab = c("0","#")
text(idx[people[,2],1],idx[people[,2],2],lab[people[,1]],cex=3)
ab = seq(0.5,10.5,1)
for (i in 1:11){
abline(h=ab[i],col='red')
abline(v=ab[i],col='red')
}
```

# 基礎自習B02

```dec2bin<-function(num, digits=8) {
bin=c()
if (num==0){
bin=0
} else {
while(num!=0){
rem=num%%2
num=num%/%2
bin=c(rem,bin)
}
}
if (length(bin) < digits){
res=matrix(0,nrow=1,ncol=digits)
res[(digits-length(bin)+1):digits]=bin
} else {res=bin}
return(res)
}
```

# 認知情報解析演習　W13

```north=c(1:3,15,1:10)
east=2:15;east[ c(3,7,11)]=c(3,7,11)
south=c(5:15,12:14)
west=c(15,1:13);west[ c(4,8,12)]=c(4,8,12)
trM=cbind(north,east,south,west)
###

nRep=1000; gamma=1;
P = 0.25
R = -1
V = rep(0,15)
for (i_rep in 1:nRep) {
V.old=V
for (i_state in 1:14) {
V[i_state]=sum(P*(R+gamma*V.old[trM[i_state,]]))
}
}
matrix(c(0,V),4,4)

#   ch4.1 policy evaluation
########################################
# example 4.1 - bug fixed on 2017.03.21
# defining deterministic trans. matrix
north=c(1:3,15,1:10)
east=2:15;east[ c(3,7,11)]=c(3,7,11)
south=c(5:15,12:14)
west=c(15,1:13);west[ c(4,8,12)]=c(4,8,12)
trM=cbind(north,east,south,west)

# defining Reward & trans. prob.
R=-1;P=0.25;

# iterative policy evaluation
delta=1; gamma=1; tol=1e-8; V=rep(0,15);
while (delta>tol) {
delta=0;
for (i_state in 1:14) {
v=V[i_state]
V[i_state]=sum(P*(R+gamma*V[trM[i_state,]]))
delta=max(delta,abs(v-V[i_state]));
}
}

# result
> matrix(c(0,V),nrow=4)
[,1] [,2] [,3] [,4]
[1,]    0  -14  -20  -22
[2,]  -14  -18  -20  -20
[3,]  -20  -20  -18  -14
[4,]  -22  -20  -14    0

#####################################
#   ch4.3 Policy Improvement
#   easier one
#####################################
north=c(1:3,15,1:10)
east=2:15;east[ c(3,7,11)]=c(3,7,11)
south=c(5:15,12:14)
west=c(15,1:13);west[ c(4,8,12)]=c(4,8,12)
trM=cbind(north,east,south,west)
R=-1;V=rep(0,15);
delta=1; gamma=1; tol=1e-5;
bestP=sample(1:4,14,replace=T)
stable=F;counter=0
while (stable==F){
counter=counter+1
Vmat=matrix(V[trM],ncol=4)
Ppolicy=t(apply(Vmat,1,function(x) exp(10*x)/sum(exp(10*x))))
# iterative policy evaluation
while (delta>tol) {
delta=0;
for (i_state in 1:14) {
v=V[i_state]
V[i_state]=sum(Ppolicy[i_state]*(R+gamma*V[trM[i_state,]]))
delta=max(delta,abs(v-V[i_state]))
}
}
# policy improvement
stable=F
for (i_state in 1:14) {
b=bestP[i_state]
bestP[i_state]=which.max(V[trM[i_state,]])
ifelse((bestP[i_state]==b),stable<-T,stable<-F)
}
}

#####################################
#   ch4.4 Value Iteration
#####################################
# example 4.3
V=c(rep(0,100),1);V.hist=c()
p=c(0.4,0.6);
gamma=1;delta=1; tol=1e-20
max.a=rep(0,101)
while (delta>tol) {
delta=0;
for (i_state in 1:99) {
v=V[i_state+1]
temp=matrix(0,nrow=1,ncol=i_state)
for (i_action in 1:i_state) {
temp[i_action]=sum(p*(gamma*c(V[(min(i_state+i_action,100)+1)],
V[(max(i_state-i_action,0)+1)])))
}
V[i_state+1]=max(temp)
max.a[i_state+1]=which.max(round(temp,8))
delta=max(delta,abs(v-V[i_state+1]))
}
V.hist=rbind(V.hist,V)
}
# plotting results
par(mfrow=c(1,2))
plot(V.hist[1,],type='l',lwd=2,xlab="Capital",ylab="Value Estimates",col='red')
lines(V.hist[2,],lwd=2,col='blue')
lines(V.hist[3,],lwd=2,col='green')
lines(V.hist[32,],lwd=2,col='black')
legend("topleft",c("sweep 1","sweep 2","sweep 3", "sweep 32"),
col=c("red","blue","green","black"),lwd=2)
barplot(max.a,xlab="Capital",ylab="Final Policy",col="white")
```

# 基礎実習MB02

```multi.forwd <- function(x,y){
return(x*y)
}
multi.bckwd <- function(x, y, dout){
dx = dout * y
dy = dout * x
return(list(dx = dx, dy = dy))
}

apple = 100; n.apple = 2; tax = 1.1
apple.pre.tax = multi.forwd(apple, n.apple)
apple.post.tax = multi.forwd(apple.pre.tax, tax)

dprice = 1
d.apple.post.tax = multi.bckwd(apple.pre.tax, tax, dprice)
d.apple = multi.bckwd(apple, n.apple, d.apple.post.tax\$dx)\$dx
d.n.apple = multi.bckwd(apple, n.apple, d.apple.post.tax\$dx)\$dy

```

# Disclaimer

このcourselogにあるコードは、主に学部生・博士課程前期向けの講義・演習・実習で例として提示しているもので、原則直感的に分かりやすいように書いているつもりです。例によってはとても非効率なものもあるでしょうし、「やっつけ」で書いているものあります。また、普段はMATLABを使用していますので、変な癖がでているかもしれません。これらの例を使用・参考にする場合はそれを踏まえてたうえで使用・参考にして下さい。

A Brief Guide to R (Rのコマンドの手引きおよび例）はこちらをどうぞ