認知情報解析学演習 PSO

funZ<-function(x) {
  x[1]^4-16*x[1]^2-5*x[1]+x[2]^4-16*x[2]^2-5*x[2]
}

n.theta = 10;
n.iter = 1000;
Wp = 0.1;
Wg = 0.1;
theta = matrix(rnorm(n.theta*2), nrow=n.theta)
v = matrix(rnorm(n.theta*2), nrow=n.theta)
theta.hist = array(0,c(n.theta,2,n.iter))
theta.hist[,,1]=theta
p.b.v <- apply(theta,1,funZ)
p.best = theta
g.b.v = min(p.b.v)
g.b.idx = which.min(p.b.v)
g.best <- theta[g.b.idx,]
v = v + Wp*runif(n.theta)*(p.best - theta)+ Wg*runif(n.theta)*t(g.best - t(theta))
theta = theta + v


for (i.iter in 2:n.iter){
  v = v + Wp*runif(n.theta)*(p.best - theta)+ Wg*runif(n.theta)*t(g.best - t(theta))
  theta = theta + v
  fitG = apply(theta,1,funZ)
  if (min(fitG) < g.b.v){
    g.best = theta[which.min(fitG),]
    g.b.v = min(fitG)
  }
  idx = which(fitG < p.b.v)
  p.best[idx,] = theta[idx,]
  p.b.v= fitG
  theta.hist[,,i.iter]=theta
}


funZ2<-function(x,y) {x^4-16*x^2-5*x+y^4-16*y^2-5*y}
xmin=-5;xmax=5;n_len=100;
x<-seq(xmin, xmax,length.out=n_len);
y<-x;
z<-outer(x,y,funZ2)
contour(x,y,z,nlevels= 50, ,lwd = 0.3,drawlabels = F,col='blue')
for (i.agent in 1:n.theta){
  lines(theta.hist[i.agent,1,],theta.hist[i.agent,2,],col=i.agent)
  points(theta[1,1],theta[1,2],pch=20,cex=2,col=i.agent)
}
n.trial = 1000; n.rep = 100
Q.true = rnorm(10); 
opt.act = which.max(Q.true)
r.hist = matrix(0,n.rep, n.trial)
oa.hist = matrix(0,n.rep, n.trial)
for (i.rep in 1:n.rep){
  Q.est = rep(0,10);R.cum = rep(0,10)
  act.count = rep(0,10)
  for (i.trial in 1:n.trial){
    max.id = which(Q.est == max(Q.est))
    if (length(max.id)>1){
      action = sample(max.id,1)
    } else {
      action = max.id
    }
    if (action == opt.act){
      oa.hist[i.rep, i.trial] = 1
    } 
    r = rnorm(1, Q.true[action],1)
    r.hist[i.rep, i.trial] = r
    R.cum[action] = R.cum[action] + r
    act.count[action] = act.count[action] + 1
    Q.est = R.cum / (act.count + 0.01)
  }
}

plot(colMeans(r.hist),type='l')
plot(colMeans(oa.hist),type='l',ylim=c(0,1))


### epsilon greedy
n.trial = 1000; n.rep = 1000
r.hist = matrix(0,n.rep, n.trial)
oa.hist = matrix(0,n.rep, n.trial)
epsilon = 0.01
for (i.rep in 1:n.rep){
  Q.est = rep(0,10);R.cum = rep(0,10)
  act.count = rep(0,10)
  Q.true = rnorm(10); 
  opt.act = which.max(Q.true)
  for (i.trial in 1:n.trial){
    if (runif(1)>epsilon){
      max.id = which(Q.est == max(Q.est))
      if (length(max.id)>1){
        action = sample(max.id,1)
      } else {
        action = max.id
      }
    } else {
      action = sample(10,1)
    }
    if (action == opt.act){
      oa.hist[i.rep, i.trial] = 1
    } 
    r = rnorm(1, Q.true[action],1)
    r.hist[i.rep, i.trial] = r
    R.cum[action] = R.cum[action] + r
    act.count[action] = act.count[action] + 1
    Q.est[action] = R.cum[action] / act.count[action]
  }
}

### softmax
n.trial = 1000; n.rep = 1000
r.hist = matrix(0,n.rep, n.trial)
oa.hist = matrix(0,n.rep, n.trial)
gamma = 3
for (i.rep in 1:n.rep){
  Q.est = rep(0,10);R.cum = rep(0,10)
  act.count = rep(0,10)
  Q.true = rnorm(10); 
  opt.act = which.max(Q.true)
  for (i.trial in 1:n.trial){
    action = sample(10, 1, prob=exp(gamma*Q.est)/sum(exp(gamma*Q.est)))
    if (action == opt.act){
      oa.hist[i.rep, i.trial] = 1
    } 
    r = rnorm(1, Q.true[action],1)
    r.hist[i.rep, i.trial] = r
    R.cum[action] = R.cum[action] + r
    act.count[action] = act.count[action] + 1
    Q.est[action] = R.cum[action] / act.count[action]
  }