library(keras)
library(tensorflow)
tf$compat$v1$disable_eager_execution()
deprocess_image <- function(x) {
dms <- dim(x)
x <- x - mean(x)
x <- x / (sd(x) + 1e-5)
x <- x * 0.1
x <- x + 0.5
x <- pmax(0, pmin(x, 1))
array(x, dim = dms)
}
generate_pattern <- function(layer_name, filter_index, size = 150) {
layer_output <- model$get_layer(layer_name)$output
loss <- k_mean(layer_output[,,,filter_index])
grads <- k_gradients(loss, model$input)[[1]]
grads <- grads / (k_sqrt(k_mean(k_square(grads))) + 1e-5)
iterate <- k_function(list(model$input), list(loss, grads))
input_img_data <-
array(runif(size * size * 3), dim = c(1, size, size, 3)) * 20 + 128
step <- 1
for (i in 1:40) {
c(loss_value, grads_value) %<-% iterate(list(input_img_data))
input_img_data <- input_img_data + (grads_value * step)
}
img <- input_img_data[1,,,]
deprocess_image(img)
}
library(grid)
library(keras)
model <- application_vgg16(
weights = "imagenet",
include_top = FALSE
)
layer_name <- "block3_conv1"
filter_index <- 1
grid.raster(generate_pattern("block3_conv1", 1))
Category Archives: 認知情報解析
認知情報解析演習B
input_chiba <- layer_input(shape = c(28,28,1))
brach_a = input_chiba %>%
layer_conv_2d(filter = 32, kernel_size = 1,
activation = "relu", strides = 2)
brach_b = input_chiba %>%
layer_conv_2d(filter = 32, kernel_size = 1,
activation = "relu") %>%
layer_conv_2d(filter = 32, kernel_size = 2,
activation = "relu", stride = 2)
brach_c = input_chiba %>%
layer_average_pooling_2d(pool_size = 2, stride = 2) %>%
layer_conv_2d(filter = 32, kernel_size = 1,
activation = "relu")
brach_d = input_chiba %>%
layer_conv_2d(filter = 32, kernel_size = 1,
activation = "relu") %>%
layer_conv_2d(filter = 32, kernel_size = 1,
activation = "relu") %>%
layer_conv_2d(filter = 32, kernel_size = 2,
activation = "relu", stride = 2)
concat = layer_concatenate(list(brach_a, brach_b, brach_c, brach_d))
output_chiba = concat %>% layer_flatten() %>%
layer_dense(units = 64, activation = "relu") %>%
layer_dense(units = 10, activation = "softmax")
chiba_model <- keras_model(input_chiba, output_chiba)
mnist <- dataset_mnist()
c(c(train_images, train_labels),c(test_images,test_labels)) %<-% mnist
train_images <- array_reshape(train_images,c(60000,28,28,1))
test_images <- array_reshape(test_images,c(10000,28,28,1))
train_images = train_images/255
test_images = test_images/255
train_labels = to_categorical(train_labels)
test_labels = to_categorical(test_labels)
chiba_model %>% compile(
optimizer = "rmsprop",
loss = "categorical_crossentropy",
metrics = c("accuracy")
)
history <- chiba_model %>% fit(
train_images,
train_labels,
epochs = 15,
batch_size =64,
validation_data = list(test_images,test_labels)
)
plot(history)
認知情報解析学演習 viz convnet
base_dir <- "/home/shiga/R_data/cat_and_dog/small_set"
train_dir <- file.path(base_dir, "train")
validation_dir <- file.path(base_dir, "validation")
test_dir <- file.path(base_dir, "test")
library(keras)
model <- keras_model_sequential() %>%
layer_conv_2d(filters = 32, kernel_size = c(3, 3), activation = "relu",
input_shape = c(150, 150, 3)) %>%
layer_max_pooling_2d(pool_size = c(2, 2)) %>%
layer_conv_2d(filters = 64, kernel_size = c(3, 3), activation = "relu") %>%
layer_max_pooling_2d(pool_size = c(2, 2)) %>%
layer_conv_2d(filters = 128, kernel_size = c(3, 3), activation = "relu") %>%
layer_max_pooling_2d(pool_size = c(2, 2)) %>%
layer_conv_2d(filters = 128, kernel_size = c(3, 3), activation = "relu") %>%
layer_max_pooling_2d(pool_size = c(2, 2)) %>%
layer_flatten() %>%
layer_dropout(rate = 0.5) %>%
layer_dense(units = 512, activation = "relu") %>%
layer_dense(units = 1, activation = "sigmoid")
model %>% compile(
loss = "binary_crossentropy",
optimizer = optimizer_rmsprop(lr = 1e-4),
metrics = c("acc")
)
datagen <- image_data_generator(
rescale = 1/255,
rotation_range = 40,
width_shift_range = 0.2,
height_shift_range = 0.2,
shear_range = 0.2,
zoom_range = 0.2,
horizontal_flip = TRUE
)
test_datagen <- image_data_generator(rescale = 1/255)
train_generator <- flow_images_from_directory(
train_dir,
datagen,
target_size = c(150, 150),
batch_size = 32,
class_mode = "binary"
)
validation_generator <- flow_images_from_directory(
validation_dir,
test_datagen,
target_size = c(150, 150),
batch_size = 32,
class_mode = "binary"
)
history <- model %>% fit_generator(
train_generator,
steps_per_epoch = 100,
epochs = 100,
validation_data = validation_generator,
validation_steps = 50
)
###
img_path <- "/home/shiga/R_data/cat_and_dog/small_set/test/cat/cat.1700.jpg"
img <- image_load(img_path, target_size = c(150, 150))
img_tensor <- image_to_array(img)
img_tensor <- array_reshape(img_tensor, c(1, 150, 150, 3))
img_tensor <- img_tensor / 255
plot(as.raster(img_tensor[1,,,]))
layer_outputs <- lapply(model$layers[1:8], function(layer) layer$output)
# Creates a model that will return these outputs, given the model input:
activation_model <- keras_model(inputs = model$input, outputs = layer_outputs)
activations <- activation_model %>% predict(img_tensor)
first_layer_activation <- activations[[1]]
dim(first_layer_activation)
plot_channel <- function(channel) {
rotate <- function(x) t(apply(x, 2, rev))
image(rotate(channel), axes = FALSE, asp = 1,
col = terrain.colors(12))
}
plot_channel(first_layer_activation[1,,,5])
fifth_layer_activation <- activations[[5]]
plot_channel(fifth_layer_activation[1,,,1])
plot_channel(fifth_layer_activation[1,,,3])
認知情報解析演習 石とりゲーム(bugあり)
col1 = matrix(c(rep(0,4),c(1,0,0,0),c(1,1,0,0),c(1,1,1,0),rep(1,4)),nrow=4,byrow=T)
col2 = matrix(c(rep(10,4),c(11,10,10,10),c(11,11,10,10),c(11,11,11,10),rep(11,4)),nrow=4,byrow=T)
col3 = matrix(c(rep(100,4),c(101,100,100,100),c(101,101,100,100),c(101,101,101,100),rep(101,4)),nrow=4,byrow=T)
act.list = list()
state.temp = list()
counter = 0
Q1 = list()
Q2 = list()
for (i.c1 in 1:5){
if (sum(col1[,i.c1])==0){
act1 = c()
} else {
act1 = seq(1,sum(col1[,i.c1]),1)
}
for (i.c2 in 1:5){
if (sum(col2[,i.c2])==40){
act2 = c()
} else {
act2 = seq(11,sum(col2[,i.c2]==11)*11,11)
}
for (i.c3 in 1:5){
if (sum(col3[,i.c3])==400){
act3 = c()
} else {
act3 = seq(101,sum(col3[,i.c3]==101)*101,101)
}
counter = counter + 1
state.temp[[counter]] = cbind(col1[,i.c1],col2[,i.c2],col3[,i.c3])
act.list[[counter]] = c(act1,act2,act3)
Q1[[counter]] = rep(0, length(c(act1,act2,act3)))
Q2[[counter]] = rep(0, length(c(act1,act2,act3)))
}
}
}
rm.stone <- function(act, st.shape){
if (act == -99){s}
if (act > 100){
n.remove = act%%100
n.stone = length(which(st.shape[,3]==101))
start = (5-n.stone)
st.shape[(start:(start+n.remove-1)),3] = 100
} else {
if (act > 10){
n.remove = act%%10
n.stone = length(which(st.shape[,2]==11))
start = (5-n.stone)
st.shape[(start:(start+n.remove-1)),2] = 10
} else {
n.remove = act
n.stone = length(which(st.shape[,1]==1))
start = (5-n.stone)
st.shape[(start:(start+n.remove-1)),1] = 0
}
}
return(st.shape)
}
id.state <- function(st.shape, state.temp){
for (i.st in 1:125){
if (all(st.shape == state.temp[[i.st]])){
state.idx = i.st
break
}
}
return(state.idx)
}
ck.act <- function(Q, act.vec, eta){
if (is.null(act.vec)){
return(list(act = -99, act.idx = -99))
break
}
if (length(act.vec)==1){
act = act.vec
} else {
p = exp(Q[[state.idx]])/sum(exp(Q[[state.idx]]))
act = sample(act.vec, 1, prob = p)
}
act.idx = which(act.vec==act)
return(list(act = act, act.idx = act.idx))
}
gamma=1;alpha = 0.1;n.rep=10000
for (i.rep in 1:n.rep){
# first action
state.idx = 125; counter = 1
st.shape = state.temp[[state.idx]]
res.act = ck.act(Q1,act.list[[state.idx]],eta)
act = res.act$act;act.idx = res.act$act.idx
state.old = state.idx
act.old = act.idx
# 2nd to last
while (state.idx != 1) {
counter = counter + 1
st.shape <- rm.stone(act, st.shape)
state.idx <- id.state(st.shape, state.temp)
if (counter%%2==1) {
res.act = ck.act(Q1,act.list[[state.idx]],eta)
} else {
res.act = ck.act(Q2,act.list[[state.idx]],eta)
}
act = res.act$act; act.idx = res.act$act.idx
if (state.idx == 1){
if (counter%%2==1){rew1 = -1; rew2 = 1;} else {rew1 = 1; rew2 = -1;}
Q1[[state.old]][act.old]=Q1[[state.old]][act.old]
+alpha*(rew1-Q1[[state.old]][act.old])
Q2[[state.old]][act.old]=Q2[[state.old]][act.old]
+alpha*rew2-Q2[[state.old]][act.old])
} else {
rew1 = 0;
rew2 =0;
if (counter%%2==1){
Q1[[state.old]][act.old]=Q1[[state.old]][act.old]
+alpha*(rew1+gamma* Q1[[state.idx]][act.idx]-Q1[[state.old]][act.old])
} else {
Q2[[state.old]][act.old]=Q2[[state.old]][act.old]
+alpha*(rew2+gamma* Q2[[state.idx]][act.idx]-Q2[[state.old]][act.old])
}
}
state.old = state.idx
act.old = act.idx
}
}
認知情報解析 実習問題
# 修正済み
# 時間がかかる場合は、繰り返し回数を減らしてください。
# 効率が悪いと思うので、適宜変更してください。
temp.state = expand.grid(loc1 = 0:2,loc2=0:2,loc3=0:2,
loc4 = 0:2,loc5=0:2,loc6=0:2,
loc7 = 0:2,loc8=0:2,loc9=0:2)
temp.state = = expand.grid(rep(list(0:2),9))
n.ones = rowSums(temp.state == 1 )
n.twos = rowSums(temp.state == 2 )
omitTwo = which(n.ones < n.twos)
omitOne = which((n.ones-1 ) > n.twos)
omitUniq = unique(c(omitOne, omitTwo))
state = temp.state[-omitUniq,]
poss.act = apply(state, 1, function(x) which(x==0))
temp.win = matrix(1:9,3)
win.idx = matrix(c(temp.win[1,],temp.win[2,],temp.win[3,],
temp.win[,1],temp.win[,2],temp.win[,3],
diag(temp.win),
diag(temp.win[3:1,])),ncol=3,byrow=T)
idx1 = c()
idx2 = c()
idxCM = c()
for (i.win in 1:nrow(win.idx)){
idx.temp = apply(state, 1, function(x) sum(x[win.idx[i.win,]]==1)==3)
idx1 = c(idx1, which(idx.temp))
idxCM.temp = apply(state, 1, function(x) sum(x[win.idx[i.win,]]==1)==2)
idxCM = c(idxCM, which(idxCM.temp))
idx.temp = apply(state, 1, function(x) sum(x[win.idx[i.win,]]==2)==3)
idx2 = c(idx2, which(idx.temp))
}
n0=apply(state,1,function(x) length((which(x==0))))
tie = which(n0==0)
Q = list()
V = list()
rew.sum = list()
rew.count = list()
policy = list()
for (i.state in 1:nrow(state)){
Q[[i.state]] = rep(0,length(poss.act[[i.state]]))
V[[i.state]] = rep(0,length(poss.act[[i.state]]))
rew.sum[[i.state]] = rep(0,length(poss.act[[i.state]]))
rew.count[[i.state]] = rep(0,length(poss.act[[i.state]]))
policy[[i.state]] = rep(1/length(poss.act[[i.state]]),length(poss.act[[i.state]]))
}
R.W = 10
R.T = 5
R.L = -10
gamma = 1
epsilon = 0.05
eta = 1
ck.result <- function(st.idx, idx1, idx2, tie){
term = F
rew = 0
result = "not terminal"
if (match(st.idx ,idx1, nomatch = 0)!=0){
rew = R.W
term = T
result = "win"
} else if (match(st.idx ,idx2, nomatch = 0)!=0){
rew = R.L
term = T
result = "lose"
} else if (match(st.idx ,tie, nomatch = 0)!=0){
rew = R.T
term = T
result = "tie"
}
return(list(rew = rew, term = term, result = result))
}
n.rep = 10000
game.hist = rep(0,n.rep)
# main loop
for (i.rep in 1:n.rep){
st.idx = 1
term = F
state.temp = rep(0,9)
state.hist1 = c()
state.hist2 = c()
repeat {
# playing game
if (length(poss.act[[st.idx]])==1){
act1 = poss.act[[st.idx]]
} else{
p.act = exp(eta*Q[[st.idx]])/sum(exp(eta*Q[[st.idx]]))
act1 = sample(poss.act[[st.idx]],1, prob = p.act)
}
state.hist1 = rbind(state.hist1,c(st.idx, act1))
state.temp[act1] = 1
st.idx = which(apply(state, 1, function(x) sum(x==state.temp) )==9)
res = ck.result(st.idx, idx1, idx2, tie)
if (res$term == T){
rew = res$rew
break
}
p.act = exp(eta*Q[[st.idx]])/sum(exp(eta*Q[[st.idx]]))
act2 = sample(poss.act[[st.idx]],1, prob = policy[[st.idx]])
state.hist2 = rbind(state.hist2,c(st.idx, act2))
state.temp[act2] = 2
st.idx = which(apply(state, 1, function(x) sum(x==state.temp) )==9)
res = ck.result(st.idx, idx1, idx2, tie)
if (res$term == T){
rew = res$rew
break
}
}
# update Q & policy
game.hist[i.rep] = res$result!="lose"
n.st = nrow(state.hist1)
#print(res$result)
if (i.rep%%100==0){print(i.rep)}
for (i.st in 1:n.st){
act.idx = which(poss.act[[state.hist1[i.st,1]]]==state.hist1[i.st,2])
rew.sum[[state.hist1[i.st,1]]][act.idx] = rew.sum[[state.hist1[i.st,1]]][act.idx] + rew
rew.count[[state.hist1[i.st,1]]][act.idx] = rew.count[[state.hist1[i.st,1]]][act.idx] + 1
Q[[state.hist1[i.st,1]]][act.idx] = rew.sum[[state.hist1[i.st,1]]][act.idx] / rew.count[[state.hist1[i.st,1]]][act.idx]
}
}
# plotting results
library(pracma)
game.hist.smooth = movavg(game.hist, 400, type="s")
plot(game.hist.smooth,type='l')
認知情報解析演習a Monte Carlo 01
north=c(1:3,15,1:10)
east=2:15;east[ c(3,7,11)]=c(3,7,11)
south=c(5:15,12:14)
west=c(15,1:13);west[ c(4,8,12)]=c(4,8,12)
trM=cbind(north,east,south,west)
r=-1;P=rep(0.25,4);V = rep(0,14)max.iter = 10000;
state.count=rep(0,15)
for (i.iter in 1:max.iter){
state = sample(1:14,1)
state.seq = state
while(state!=15){
action = sample(1:4,1,prob = P)
state.seq = c(state.seq,trM[state,action])
state = trM[state,action]
}
uniq.seq = unique(state.seq)
for (i.uniq in 1:(length(uniq.seq)-1)){
first.visit = which(state.seq == uniq.seq[i.uniq])[1]
V[uniq.seq[i.uniq]] = V[uniq.seq[i.uniq]] + r*(length(state.seq)-first.visit-1)
}
state.count[uniq.seq] = state.count[uniq.seq] + 1
}
V = matrix(c(0,V/state.count[1:14],0),nrow=4)
認知情報解析06/05の演習問題
演習で書いたプログラムで問題はありませんでした。
以下、結果を可視化するコマンドを書き加えました:
# initalization
p.win =0.4; p.lose = 0.6; P = c(p.lose, p.win);
R = c(rep(0,100), 1); V = rep(0,101);
gamma = 1; tol = 1e-10; counter=0
cols = c("red","skyblue",'orange','black')
par(mfrow=c(1,2))
# value iteration
repeat{
counter = counter+1
delta = 0
for (i.state in 2:100) {
v <- V[i.state]
temp.v = rep(0, (i.state - 1))
for (i.bet in 1:(i.state - 1)) {
lower.B = i.state - i.bet
upper.B = min(i.state + i.bet, 101)
temp.v[i.bet] = sum(P * (R[c(lower.B, upper.B)] + gamma * V[c(lower.B, upper.B)]))
V[i.state] = max(temp.v)
}
delta <- max(abs(v-V[i.state]), delta)
}
# plotting results
if (counter==1){
plot(V[2:100], type='l', col=cols[1], lwd=2, xlab="capital", ylab="value")
} else {
if (counter<4){
lines(V[2:100], type='l', col=cols[counter], lwd=2)
}
}
if (delta < tol){break}
}
# end of value iteration
lines(V[2:100],type='l', col=cols[4], lwd=2)
legend("topleft", c("1st sweep","2nd sweep", "3rd sweep","32nd sweep") ,col=cols, lwd=2)
# identifying optimal action
policy = rep(0,101)
for (i.state in 2:100) {
temp.v = rep(0, (i.state - 1))
for (i.bet in 1:(i.state - 1)) {
lower.B = i.state - i.bet
upper.B = min(i.state + i.bet, 101)
temp.v[i.bet] = sum(P * (R[c(lower.B, upper.B)] + gamma * V[c(lower.B, upper.B)]))
policy[i.state] = which.max(round(temp.v,4))
}
}
barplot(policy,xlab="capital",ylab="Optimal action")
認知情報解析学演習a 課題03
V=rep(0,25);
# defining probability matrix
P=matrix(1/4,nrow=25,ncol=4) #
# defining deterministic transition matrix
north=c(2:25,25)
north[ c(5,10,15,20,25)]=c(5,10,15,20,25)
east=c(6:25,21:25)
west=c(1:5,1:20)
south=c(1,1:24)
south[ c(1,6,11,16,21)]=c(1,6,11,16,21)
trM=cbind(north,east,south,west)
trM[10,]=6
trM[20,]=18
# defining reward matrix
R=matrix(0,nrow=25,ncol=4)
R[which(trM==1:25)]=-1
R[10,]=10
R[20,]=5
delta=1; gamma=0.9; tol=1e-10;
bestP=sample(1:4,25,replace=T)
stable=F;counter=0;
while (stable==F){
counter=counter+1
# iterative policy evaluation
while (delta>tol) {
delta=0;
V.old=V
for (i_state in 1:25) {
v=V[i_state]
V[i_state]=sum(P[i_state,]*(R[i_state,]+gamma*V.old[trM[i_state,]]))
delta=max(delta,abs(v-V[i_state]))
}
}
# policy improvement
stable=F
for (i_state in 1:25) {
b=bestP[i_state]
bestP[i_state]=which.max(V[trM[i_state,]])
ifelse((bestP[i_state]==b),stable<-T,stable<-F)
}
}
apply(matrix(bestP,nrow=5),2,rev)
bestP.mat = apply(matrix(as.character(bestP),nrow=5),2,rev)
bestP.mat[which(bestP.mat=="1")] = "N"
bestP.mat[which(bestP.mat=="2")] = "E"
bestP.mat[which(bestP.mat=="3")] = "S"
bestP.mat[which(bestP.mat=="4")] = "W"
print(bestP.mat)
認知情報解析 課題2
# initializing Q matrix
Q = P = matrix(1/4,nrow=25,ncol=4) #
# defining deterministic transition matrix
north=c(2:25,25)
north[ c(5,10,15,20,25)]=c(5,10,15,20,25)
east=c(6:25,21:25)
west=c(1:5,1:20)
south=c(1,1:24)
south[ c(1,6,11,16,21)]=c(1,6,11,16,21)
trM=cbind(north,east,south,west)
trM[10,]=6
trM[20,]=18
R=matrix(0,nrow=25,ncol=4)
R[which(trM==1:25)]=-1
R[10,]=10
R[20,]=5
nRep=1000; gamma=0.9; P = 0.25
for (i_rep in 1:nRep) {
Q.old = Q
for (i_state in 1:25) {
for (i_act in 1:4){
Q[i_state, i_act]=R[i_state, i_act]+gamma * P * sum(Q.old[trM[i_state,i_act]])
}
}
}
強化学習 方策の比較1
# Qが最大のactionを選択
max.Q <- function(Q){
max.a = max(Q)
max.idx = which(Q == max.a)
if (length(max.idx)>1){
max.idx = sample(max.idx, 1)
}
return(max.idx)
}
# greedy方策
greedy <- function(n.trial, Q.star, N){
Q = Q.cum = count = rep(0, N)
rew.earned = rep(0, n.trial)
for (i.trial in 1:n.trial){
act.idx = max.Q(Q)
r.t = rnorm(1, mean = Q.star[act.idx], sd = 1)
Q.cum[act.idx] = Q.cum[act.idx] + r.t
count[act.idx] = count[act.idx] + 1
Q[act.idx] = Q.cum[act.idx] / count[act.idx]
rew.earned[i.trial] = r.t
}
return(list(Q = Q, rew.earned = rew.earned))
}
# epsilon greedy方策
# epsilon = 0の場合はgreedy方策と同等
eps.greedy <- function(n.trial, Q.star, N, epsilon){
Q = Q.cum = count = rep(0, N)
rew.earned = rep(0, n.trial)
for (i.trial in 1:n.trial){
if (runif(1) < epsilon){
act.idx = sample(1:N, 1)
} else {
act.idx = max.Q(Q)
}
r.t = rnorm(1, mean = Q.star[act.idx], sd = 1)
Q.cum[act.idx] = Q.cum[act.idx] + r.t
count[act.idx] = count[act.idx] + 1
Q[act.idx] = Q.cum[act.idx] / count[act.idx]
rew.earned[i.trial] = r.t
}
return(list(Q = Q, rew.earned = rew.earned))
}
# n.rep回繰り返す関数
comp.eps.greedy <- function(n.trial, n.rep, N, epsilon){
rew.history = matrix(0, nrow = n.trial, ncol = n.rep)
for (i.rep in 1:n.rep){
Q.star = rnorm(N, mean = 0, sd = 1);
res = eps.greedy(n.trial, Q.star, N, epsilon)
rew.history[ , i.rep] = res$rew.earned
}
return(rew.history)
}
# 実行
EG.000 = comp.eps.greedy(1000, 1000, 10, 0.000)
EG.001 = comp.eps.greedy(1000, 1000, 10, 0.001)
EG.010 = comp.eps.greedy(1000, 1000, 10, 0.010)
EG.100 = comp.eps.greedy(1000, 1000, 10, 0.100)
EG.150 = comp.eps.greedy(1000, 1000, 10, 0.150)
# 結果の可視化
plot(rowMeans(EG.000), type="l", ylab="Average Reward", xlab="Trial",
ylim = c(0,2))
lines(rowMeans(EG.001),col=2)
lines(rowMeans(EG.010),col=3)
lines(rowMeans(EG.100),col=4)
lines(rowMeans(EG.150),col=5)
legend("bottomright",
c("epsilon = 0.000",
"epsilon = 0.001",
"epsilon = 0.010",
"epsilon = 0.100",
"epsilon = 0.150"),
col=1:5,lwd=2 )
# softmax
softmax <- function(n.trial, Q.star, N, tau){
Q = Q.cum = count = rep(0, N)
rew.earned = rep(0, n.trial)
for (i.trial in 1:n.trial){
p = exp(Q*tau)/sum(exp(Q*tau))
act.idx = sample(1:N, 1, prob = p)
r.t = rnorm(1, mean = Q.star[act.idx], sd = 1)
Q.cum[act.idx] = Q.cum[act.idx] + r.t
count[act.idx] = count[act.idx] + 1
Q[act.idx] = Q.cum[act.idx] / count[act.idx]
rew.earned[i.trial] = r.t
}
return(list(Q = Q, rew.earned = rew.earned))
}
comp.softmax <- function(n.trial, n.rep, N, tau){
rew.history = matrix(0, nrow = n.trial, ncol = n.rep)
for (i.rep in 1:n.rep){
Q.star = rnorm(N, mean = 0, sd = 1);
res = softmax(n.trial, Q.star, N, tau)
rew.history[ , i.rep] = res$rew.earned
}
return(rew.history)
}
# 実行
EG.000 = comp.eps.greedy(1000, 1000, 10, 0.000)
EG.010 = comp.eps.greedy(1000, 1000, 10, 0.010)
EG.100 = comp.eps.greedy(1000, 1000, 10, 0.100)
SM.10 = comp.softmax(1000,1000,10,10)
SM.03 = comp.softmax(1000,1000,10,3)
# 結果の可視化
plot(rowMeans(EG.000), type="l", ylab="Average Reward", xlab="Trial",
ylim = c(0,2))
lines(rowMeans(EG.010),col=2)
lines(rowMeans(EG.100),col=3)
lines(rowMeans(SM.10),col=4)
lines(rowMeans(SM.03),col=5)
legend("bottomright",
c("epsilon = 0.000",
"epsilon = 0.010",
"epsilon = 0.100",
"tau = 10",
"tau = 3"),
col=1:5,lwd=2 )
# epsilon greedy (2nd version)
eps.greedy2 <- function(n.trial, Q.star, N, epsilon, lr, init.Q){
Q = rep(init.Q, N)
rew.earned = rep(0, n.trial)
for (i.trial in 1:n.trial){
if (runif(1) < epsilon){
act.idx = sample(1:N, 1)
} else {
act.idx = max.Q(Q)
}
r.t = rnorm(1, mean = Q.star[act.idx], sd = 1)
Q[act.idx] = Q[act.idx] + lr*(r.t - Q[act.idx])
rew.earned[i.trial] = r.t
}
return(list(Q = Q, rew.earned = rew.earned))
}