演習で書いたプログラムで問題はありませんでした。
以下、結果を可視化するコマンドを書き加えました:
# initalization
p.win =0.4; p.lose = 0.6; P = c(p.lose, p.win);
R = c(rep(0,100), 1); V = rep(0,101);
gamma = 1; tol = 1e-10; counter=0
cols = c("red","skyblue",'orange','black')
par(mfrow=c(1,2))
# value iteration
repeat{
counter = counter+1
delta = 0
for (i.state in 2:100) {
v <- V[i.state]
temp.v = rep(0, (i.state - 1))
for (i.bet in 1:(i.state - 1)) {
lower.B = i.state - i.bet
upper.B = min(i.state + i.bet, 101)
temp.v[i.bet] = sum(P * (R[c(lower.B, upper.B)] + gamma * V[c(lower.B, upper.B)]))
V[i.state] = max(temp.v)
}
delta <- max(abs(v-V[i.state]), delta)
}
# plotting results
if (counter==1){
plot(V[2:100], type='l', col=cols[1], lwd=2, xlab="capital", ylab="value")
} else {
if (counter<4){
lines(V[2:100], type='l', col=cols[counter], lwd=2)
}
}
if (delta < tol){break}
}
# end of value iteration
lines(V[2:100],type='l', col=cols[4], lwd=2)
legend("topleft", c("1st sweep","2nd sweep", "3rd sweep","32nd sweep") ,col=cols, lwd=2)
# identifying optimal action
policy = rep(0,101)
for (i.state in 2:100) {
temp.v = rep(0, (i.state - 1))
for (i.bet in 1:(i.state - 1)) {
lower.B = i.state - i.bet
upper.B = min(i.state + i.bet, 101)
temp.v[i.bet] = sum(P * (R[c(lower.B, upper.B)] + gamma * V[c(lower.B, upper.B)]))
policy[i.state] = which.max(round(temp.v,4))
}
}
barplot(policy,xlab="capital",ylab="Optimal action")