2019 認知情報解析学演習 RL01

set.seed(111)
n.trial = 1000; N = 10; sigma  = 1
Q.star = runif(N); Q = rep(0, N)
count = rep(0,N); Q.cum = rep(0, N)
rew.earned = rep(0,n.trial)
### playing slot-machine
for (i.trial in 1:n.trial){
  max.a = max(Q)
  max.idx = which(Q == max.a)
  if (length(max.idx)>1){
    max.idx = sample(max.idx, 1)
  }
  r.t = rnorm(1, Q.star[max.idx], sd = sigma)
  Q.cum[max.idx] = Q.cum[max.idx] + r.t
  count[max.idx] = count[max.idx] + 1
  Q[max.idx] = Q.cum[max.idx] / count[max.idx]
  rew.earned[i.trial] = r.t
}
plot(rew.earned,type='l')
Q