set.seed(111)
n.trial = 1000; N = 10; sigma = 1
Q.star = runif(N); Q = rep(0, N)
count = rep(0,N); Q.cum = rep(0, N)
rew.earned = rep(0,n.trial)
### playing slot-machine
for (i.trial in 1:n.trial){
max.a = max(Q)
max.idx = which(Q == max.a)
if (length(max.idx)>1){
max.idx = sample(max.idx, 1)
}
r.t = rnorm(1, Q.star[max.idx], sd = sigma)
Q.cum[max.idx] = Q.cum[max.idx] + r.t
count[max.idx] = count[max.idx] + 1
Q[max.idx] = Q.cum[max.idx] / count[max.idx]
rew.earned[i.trial] = r.t
}
plot(rew.earned,type='l')
Q
Related