演習で書いたプログラムで問題はありませんでした。
以下、結果を可視化するコマンドを書き加えました:
# initalization p.win =0.4; p.lose = 0.6; P = c(p.lose, p.win); R = c(rep(0,100), 1); V = rep(0,101); gamma = 1; tol = 1e-10; counter=0 cols = c("red","skyblue",'orange','black') par(mfrow=c(1,2)) # value iteration repeat{ counter = counter+1 delta = 0 for (i.state in 2:100) { v <- V[i.state] temp.v = rep(0, (i.state - 1)) for (i.bet in 1:(i.state - 1)) { lower.B = i.state - i.bet upper.B = min(i.state + i.bet, 101) temp.v[i.bet] = sum(P * (R[c(lower.B, upper.B)] + gamma * V[c(lower.B, upper.B)])) V[i.state] = max(temp.v) } delta <- max(abs(v-V[i.state]), delta) } # plotting results if (counter==1){ plot(V[2:100], type='l', col=cols[1], lwd=2, xlab="capital", ylab="value") } else { if (counter<4){ lines(V[2:100], type='l', col=cols[counter], lwd=2) } } if (delta < tol){break} } # end of value iteration lines(V[2:100],type='l', col=cols[4], lwd=2) legend("topleft", c("1st sweep","2nd sweep", "3rd sweep","32nd sweep") ,col=cols, lwd=2) # identifying optimal action policy = rep(0,101) for (i.state in 2:100) { temp.v = rep(0, (i.state - 1)) for (i.bet in 1:(i.state - 1)) { lower.B = i.state - i.bet upper.B = min(i.state + i.bet, 101) temp.v[i.bet] = sum(P * (R[c(lower.B, upper.B)] + gamma * V[c(lower.B, upper.B)])) policy[i.state] = which.max(round(temp.v,4)) } } barplot(policy,xlab="capital",ylab="Optimal action")