Posted on June 5, 2019 by matsukat

認知情報解析06/05の演習問題

演習で書いたプログラムで問題はありませんでした。
以下、結果を可視化するコマンドを書き加えました：

# initalization
p.win =0.4; p.lose = 0.6; P = c(p.lose, p.win);
R = c(rep(0,100), 1); V = rep(0,101);
gamma = 1; tol = 1e-10; counter=0
cols = c("red","skyblue",'orange','black')
par(mfrow=c(1,2))

# value iteration
repeat{
  counter = counter+1
  delta = 0
  for (i.state in 2:100) {
    v <- V[i.state]
    temp.v = rep(0, (i.state - 1))
    for (i.bet in 1:(i.state - 1)) {
      lower.B = i.state - i.bet
      upper.B = min(i.state + i.bet, 101)
      temp.v[i.bet] = sum(P * (R[c(lower.B, upper.B)] + gamma * V[c(lower.B, upper.B)]))
      V[i.state] = max(temp.v)
    }
    delta <- max(abs(v-V[i.state]), delta)
  }
  # plotting results
  if (counter==1){
    plot(V[2:100], type='l', col=cols[1], lwd=2, xlab="capital", ylab="value")
  } else {
    if (counter<4){
      lines(V[2:100], type='l', col=cols[counter], lwd=2)
    }
  }
  if (delta < tol){break}
}
# end of value iteration

lines(V[2:100],type='l', col=cols[4], lwd=2)
legend("topleft", c("1st sweep","2nd sweep", "3rd sweep","32nd sweep") ,col=cols, lwd=2)

# identifying optimal action
policy = rep(0,101)
for (i.state in 2:100) {
  temp.v = rep(0, (i.state - 1))
  for (i.bet in 1:(i.state - 1)) {
    lower.B = i.state - i.bet
    upper.B = min(i.state + i.bet, 101)
    temp.v[i.bet] = sum(P * (R[c(lower.B, upper.B)] + gamma * V[c(lower.B, upper.B)]))
    policy[i.state] = which.max(round(temp.v,4))
  }
}
barplot(policy,xlab="capital",ylab="Optimal action")

Related