library(keras) library(tensorflow) tf$compat$v1$disable_eager_execution() deprocess_image <- function(x) { dms <- dim(x) x <- x - mean(x) x <- x / (sd(x) + 1e-5) x <- x * 0.1 x <- x + 0.5 x <- pmax(0, pmin(x, 1)) array(x, dim = dms) } generate_pattern <- function(layer_name, filter_index, size = 150) { layer_output <- model$get_layer(layer_name)$output loss <- k_mean(layer_output[,,,filter_index]) grads <- k_gradients(loss, model$input)[[1]] grads <- grads / (k_sqrt(k_mean(k_square(grads))) + 1e-5) iterate <- k_function(list(model$input), list(loss, grads)) input_img_data <- array(runif(size * size * 3), dim = c(1, size, size, 3)) * 20 + 128 step <- 1 for (i in 1:40) { c(loss_value, grads_value) %<-% iterate(list(input_img_data)) input_img_data <- input_img_data + (grads_value * step) } img <- input_img_data[1,,,] deprocess_image(img) } library(grid) library(keras) model <- application_vgg16( weights = "imagenet", include_top = FALSE ) layer_name <- "block3_conv1" filter_index <- 1 grid.raster(generate_pattern("block3_conv1", 1))
Category Archives: 認知情報解析
認知情報解析演習B
input_chiba <- layer_input(shape = c(28,28,1)) brach_a = input_chiba %>% layer_conv_2d(filter = 32, kernel_size = 1, activation = "relu", strides = 2) brach_b = input_chiba %>% layer_conv_2d(filter = 32, kernel_size = 1, activation = "relu") %>% layer_conv_2d(filter = 32, kernel_size = 2, activation = "relu", stride = 2) brach_c = input_chiba %>% layer_average_pooling_2d(pool_size = 2, stride = 2) %>% layer_conv_2d(filter = 32, kernel_size = 1, activation = "relu") brach_d = input_chiba %>% layer_conv_2d(filter = 32, kernel_size = 1, activation = "relu") %>% layer_conv_2d(filter = 32, kernel_size = 1, activation = "relu") %>% layer_conv_2d(filter = 32, kernel_size = 2, activation = "relu", stride = 2) concat = layer_concatenate(list(brach_a, brach_b, brach_c, brach_d)) output_chiba = concat %>% layer_flatten() %>% layer_dense(units = 64, activation = "relu") %>% layer_dense(units = 10, activation = "softmax") chiba_model <- keras_model(input_chiba, output_chiba) mnist <- dataset_mnist() c(c(train_images, train_labels),c(test_images,test_labels)) %<-% mnist train_images <- array_reshape(train_images,c(60000,28,28,1)) test_images <- array_reshape(test_images,c(10000,28,28,1)) train_images = train_images/255 test_images = test_images/255 train_labels = to_categorical(train_labels) test_labels = to_categorical(test_labels) chiba_model %>% compile( optimizer = "rmsprop", loss = "categorical_crossentropy", metrics = c("accuracy") ) history <- chiba_model %>% fit( train_images, train_labels, epochs = 15, batch_size =64, validation_data = list(test_images,test_labels) ) plot(history)
認知情報解析学演習 viz convnet
base_dir <- "/home/shiga/R_data/cat_and_dog/small_set" train_dir <- file.path(base_dir, "train") validation_dir <- file.path(base_dir, "validation") test_dir <- file.path(base_dir, "test") library(keras) model <- keras_model_sequential() %>% layer_conv_2d(filters = 32, kernel_size = c(3, 3), activation = "relu", input_shape = c(150, 150, 3)) %>% layer_max_pooling_2d(pool_size = c(2, 2)) %>% layer_conv_2d(filters = 64, kernel_size = c(3, 3), activation = "relu") %>% layer_max_pooling_2d(pool_size = c(2, 2)) %>% layer_conv_2d(filters = 128, kernel_size = c(3, 3), activation = "relu") %>% layer_max_pooling_2d(pool_size = c(2, 2)) %>% layer_conv_2d(filters = 128, kernel_size = c(3, 3), activation = "relu") %>% layer_max_pooling_2d(pool_size = c(2, 2)) %>% layer_flatten() %>% layer_dropout(rate = 0.5) %>% layer_dense(units = 512, activation = "relu") %>% layer_dense(units = 1, activation = "sigmoid") model %>% compile( loss = "binary_crossentropy", optimizer = optimizer_rmsprop(lr = 1e-4), metrics = c("acc") ) datagen <- image_data_generator( rescale = 1/255, rotation_range = 40, width_shift_range = 0.2, height_shift_range = 0.2, shear_range = 0.2, zoom_range = 0.2, horizontal_flip = TRUE ) test_datagen <- image_data_generator(rescale = 1/255) train_generator <- flow_images_from_directory( train_dir, datagen, target_size = c(150, 150), batch_size = 32, class_mode = "binary" ) validation_generator <- flow_images_from_directory( validation_dir, test_datagen, target_size = c(150, 150), batch_size = 32, class_mode = "binary" ) history <- model %>% fit_generator( train_generator, steps_per_epoch = 100, epochs = 100, validation_data = validation_generator, validation_steps = 50 ) ### img_path <- "/home/shiga/R_data/cat_and_dog/small_set/test/cat/cat.1700.jpg" img <- image_load(img_path, target_size = c(150, 150)) img_tensor <- image_to_array(img) img_tensor <- array_reshape(img_tensor, c(1, 150, 150, 3)) img_tensor <- img_tensor / 255 plot(as.raster(img_tensor[1,,,])) layer_outputs <- lapply(model$layers[1:8], function(layer) layer$output) # Creates a model that will return these outputs, given the model input: activation_model <- keras_model(inputs = model$input, outputs = layer_outputs) activations <- activation_model %>% predict(img_tensor) first_layer_activation <- activations[[1]] dim(first_layer_activation) plot_channel <- function(channel) { rotate <- function(x) t(apply(x, 2, rev)) image(rotate(channel), axes = FALSE, asp = 1, col = terrain.colors(12)) } plot_channel(first_layer_activation[1,,,5]) fifth_layer_activation <- activations[[5]] plot_channel(fifth_layer_activation[1,,,1]) plot_channel(fifth_layer_activation[1,,,3])
認知情報解析演習 石とりゲーム(bugあり)
col1 = matrix(c(rep(0,4),c(1,0,0,0),c(1,1,0,0),c(1,1,1,0),rep(1,4)),nrow=4,byrow=T) col2 = matrix(c(rep(10,4),c(11,10,10,10),c(11,11,10,10),c(11,11,11,10),rep(11,4)),nrow=4,byrow=T) col3 = matrix(c(rep(100,4),c(101,100,100,100),c(101,101,100,100),c(101,101,101,100),rep(101,4)),nrow=4,byrow=T) act.list = list() state.temp = list() counter = 0 Q1 = list() Q2 = list() for (i.c1 in 1:5){ if (sum(col1[,i.c1])==0){ act1 = c() } else { act1 = seq(1,sum(col1[,i.c1]),1) } for (i.c2 in 1:5){ if (sum(col2[,i.c2])==40){ act2 = c() } else { act2 = seq(11,sum(col2[,i.c2]==11)*11,11) } for (i.c3 in 1:5){ if (sum(col3[,i.c3])==400){ act3 = c() } else { act3 = seq(101,sum(col3[,i.c3]==101)*101,101) } counter = counter + 1 state.temp[[counter]] = cbind(col1[,i.c1],col2[,i.c2],col3[,i.c3]) act.list[[counter]] = c(act1,act2,act3) Q1[[counter]] = rep(0, length(c(act1,act2,act3))) Q2[[counter]] = rep(0, length(c(act1,act2,act3))) } } } rm.stone <- function(act, st.shape){ if (act == -99){s} if (act > 100){ n.remove = act%%100 n.stone = length(which(st.shape[,3]==101)) start = (5-n.stone) st.shape[(start:(start+n.remove-1)),3] = 100 } else { if (act > 10){ n.remove = act%%10 n.stone = length(which(st.shape[,2]==11)) start = (5-n.stone) st.shape[(start:(start+n.remove-1)),2] = 10 } else { n.remove = act n.stone = length(which(st.shape[,1]==1)) start = (5-n.stone) st.shape[(start:(start+n.remove-1)),1] = 0 } } return(st.shape) } id.state <- function(st.shape, state.temp){ for (i.st in 1:125){ if (all(st.shape == state.temp[[i.st]])){ state.idx = i.st break } } return(state.idx) } ck.act <- function(Q, act.vec, eta){ if (is.null(act.vec)){ return(list(act = -99, act.idx = -99)) break } if (length(act.vec)==1){ act = act.vec } else { p = exp(Q[[state.idx]])/sum(exp(Q[[state.idx]])) act = sample(act.vec, 1, prob = p) } act.idx = which(act.vec==act) return(list(act = act, act.idx = act.idx)) } gamma=1;alpha = 0.1;n.rep=10000 for (i.rep in 1:n.rep){ # first action state.idx = 125; counter = 1 st.shape = state.temp[[state.idx]] res.act = ck.act(Q1,act.list[[state.idx]],eta) act = res.act$act;act.idx = res.act$act.idx state.old = state.idx act.old = act.idx # 2nd to last while (state.idx != 1) { counter = counter + 1 st.shape <- rm.stone(act, st.shape) state.idx <- id.state(st.shape, state.temp) if (counter%%2==1) { res.act = ck.act(Q1,act.list[[state.idx]],eta) } else { res.act = ck.act(Q2,act.list[[state.idx]],eta) } act = res.act$act; act.idx = res.act$act.idx if (state.idx == 1){ if (counter%%2==1){rew1 = -1; rew2 = 1;} else {rew1 = 1; rew2 = -1;} Q1[[state.old]][act.old]=Q1[[state.old]][act.old] +alpha*(rew1-Q1[[state.old]][act.old]) Q2[[state.old]][act.old]=Q2[[state.old]][act.old] +alpha*rew2-Q2[[state.old]][act.old]) } else { rew1 = 0; rew2 =0; if (counter%%2==1){ Q1[[state.old]][act.old]=Q1[[state.old]][act.old] +alpha*(rew1+gamma* Q1[[state.idx]][act.idx]-Q1[[state.old]][act.old]) } else { Q2[[state.old]][act.old]=Q2[[state.old]][act.old] +alpha*(rew2+gamma* Q2[[state.idx]][act.idx]-Q2[[state.old]][act.old]) } } state.old = state.idx act.old = act.idx } }
認知情報解析 実習問題
# 修正済み # 時間がかかる場合は、繰り返し回数を減らしてください。 # 効率が悪いと思うので、適宜変更してください。 temp.state = expand.grid(loc1 = 0:2,loc2=0:2,loc3=0:2, loc4 = 0:2,loc5=0:2,loc6=0:2, loc7 = 0:2,loc8=0:2,loc9=0:2) temp.state = = expand.grid(rep(list(0:2),9)) n.ones = rowSums(temp.state == 1 ) n.twos = rowSums(temp.state == 2 ) omitTwo = which(n.ones < n.twos) omitOne = which((n.ones-1 ) > n.twos) omitUniq = unique(c(omitOne, omitTwo)) state = temp.state[-omitUniq,] poss.act = apply(state, 1, function(x) which(x==0)) temp.win = matrix(1:9,3) win.idx = matrix(c(temp.win[1,],temp.win[2,],temp.win[3,], temp.win[,1],temp.win[,2],temp.win[,3], diag(temp.win), diag(temp.win[3:1,])),ncol=3,byrow=T) idx1 = c() idx2 = c() idxCM = c() for (i.win in 1:nrow(win.idx)){ idx.temp = apply(state, 1, function(x) sum(x[win.idx[i.win,]]==1)==3) idx1 = c(idx1, which(idx.temp)) idxCM.temp = apply(state, 1, function(x) sum(x[win.idx[i.win,]]==1)==2) idxCM = c(idxCM, which(idxCM.temp)) idx.temp = apply(state, 1, function(x) sum(x[win.idx[i.win,]]==2)==3) idx2 = c(idx2, which(idx.temp)) } n0=apply(state,1,function(x) length((which(x==0)))) tie = which(n0==0) Q = list() V = list() rew.sum = list() rew.count = list() policy = list() for (i.state in 1:nrow(state)){ Q[[i.state]] = rep(0,length(poss.act[[i.state]])) V[[i.state]] = rep(0,length(poss.act[[i.state]])) rew.sum[[i.state]] = rep(0,length(poss.act[[i.state]])) rew.count[[i.state]] = rep(0,length(poss.act[[i.state]])) policy[[i.state]] = rep(1/length(poss.act[[i.state]]),length(poss.act[[i.state]])) } R.W = 10 R.T = 5 R.L = -10 gamma = 1 epsilon = 0.05 eta = 1 ck.result <- function(st.idx, idx1, idx2, tie){ term = F rew = 0 result = "not terminal" if (match(st.idx ,idx1, nomatch = 0)!=0){ rew = R.W term = T result = "win" } else if (match(st.idx ,idx2, nomatch = 0)!=0){ rew = R.L term = T result = "lose" } else if (match(st.idx ,tie, nomatch = 0)!=0){ rew = R.T term = T result = "tie" } return(list(rew = rew, term = term, result = result)) } n.rep = 10000 game.hist = rep(0,n.rep) # main loop for (i.rep in 1:n.rep){ st.idx = 1 term = F state.temp = rep(0,9) state.hist1 = c() state.hist2 = c() repeat { # playing game if (length(poss.act[[st.idx]])==1){ act1 = poss.act[[st.idx]] } else{ p.act = exp(eta*Q[[st.idx]])/sum(exp(eta*Q[[st.idx]])) act1 = sample(poss.act[[st.idx]],1, prob = p.act) } state.hist1 = rbind(state.hist1,c(st.idx, act1)) state.temp[act1] = 1 st.idx = which(apply(state, 1, function(x) sum(x==state.temp) )==9) res = ck.result(st.idx, idx1, idx2, tie) if (res$term == T){ rew = res$rew break } p.act = exp(eta*Q[[st.idx]])/sum(exp(eta*Q[[st.idx]])) act2 = sample(poss.act[[st.idx]],1, prob = policy[[st.idx]]) state.hist2 = rbind(state.hist2,c(st.idx, act2)) state.temp[act2] = 2 st.idx = which(apply(state, 1, function(x) sum(x==state.temp) )==9) res = ck.result(st.idx, idx1, idx2, tie) if (res$term == T){ rew = res$rew break } } # update Q & policy game.hist[i.rep] = res$result!="lose" n.st = nrow(state.hist1) #print(res$result) if (i.rep%%100==0){print(i.rep)} for (i.st in 1:n.st){ act.idx = which(poss.act[[state.hist1[i.st,1]]]==state.hist1[i.st,2]) rew.sum[[state.hist1[i.st,1]]][act.idx] = rew.sum[[state.hist1[i.st,1]]][act.idx] + rew rew.count[[state.hist1[i.st,1]]][act.idx] = rew.count[[state.hist1[i.st,1]]][act.idx] + 1 Q[[state.hist1[i.st,1]]][act.idx] = rew.sum[[state.hist1[i.st,1]]][act.idx] / rew.count[[state.hist1[i.st,1]]][act.idx] } } # plotting results library(pracma) game.hist.smooth = movavg(game.hist, 400, type="s") plot(game.hist.smooth,type='l')
認知情報解析演習a Monte Carlo 01
north=c(1:3,15,1:10) east=2:15;east[ c(3,7,11)]=c(3,7,11) south=c(5:15,12:14) west=c(15,1:13);west[ c(4,8,12)]=c(4,8,12) trM=cbind(north,east,south,west) r=-1;P=rep(0.25,4);V = rep(0,14)max.iter = 10000; state.count=rep(0,15) for (i.iter in 1:max.iter){ state = sample(1:14,1) state.seq = state while(state!=15){ action = sample(1:4,1,prob = P) state.seq = c(state.seq,trM[state,action]) state = trM[state,action] } uniq.seq = unique(state.seq) for (i.uniq in 1:(length(uniq.seq)-1)){ first.visit = which(state.seq == uniq.seq[i.uniq])[1] V[uniq.seq[i.uniq]] = V[uniq.seq[i.uniq]] + r*(length(state.seq)-first.visit-1) } state.count[uniq.seq] = state.count[uniq.seq] + 1 } V = matrix(c(0,V/state.count[1:14],0),nrow=4)
認知情報解析06/05の演習問題
演習で書いたプログラムで問題はありませんでした。
以下、結果を可視化するコマンドを書き加えました:
# initalization p.win =0.4; p.lose = 0.6; P = c(p.lose, p.win); R = c(rep(0,100), 1); V = rep(0,101); gamma = 1; tol = 1e-10; counter=0 cols = c("red","skyblue",'orange','black') par(mfrow=c(1,2)) # value iteration repeat{ counter = counter+1 delta = 0 for (i.state in 2:100) { v <- V[i.state] temp.v = rep(0, (i.state - 1)) for (i.bet in 1:(i.state - 1)) { lower.B = i.state - i.bet upper.B = min(i.state + i.bet, 101) temp.v[i.bet] = sum(P * (R[c(lower.B, upper.B)] + gamma * V[c(lower.B, upper.B)])) V[i.state] = max(temp.v) } delta <- max(abs(v-V[i.state]), delta) } # plotting results if (counter==1){ plot(V[2:100], type='l', col=cols[1], lwd=2, xlab="capital", ylab="value") } else { if (counter<4){ lines(V[2:100], type='l', col=cols[counter], lwd=2) } } if (delta < tol){break} } # end of value iteration lines(V[2:100],type='l', col=cols[4], lwd=2) legend("topleft", c("1st sweep","2nd sweep", "3rd sweep","32nd sweep") ,col=cols, lwd=2) # identifying optimal action policy = rep(0,101) for (i.state in 2:100) { temp.v = rep(0, (i.state - 1)) for (i.bet in 1:(i.state - 1)) { lower.B = i.state - i.bet upper.B = min(i.state + i.bet, 101) temp.v[i.bet] = sum(P * (R[c(lower.B, upper.B)] + gamma * V[c(lower.B, upper.B)])) policy[i.state] = which.max(round(temp.v,4)) } } barplot(policy,xlab="capital",ylab="Optimal action")
認知情報解析学演習a 課題03
V=rep(0,25); # defining probability matrix P=matrix(1/4,nrow=25,ncol=4) # # defining deterministic transition matrix north=c(2:25,25) north[ c(5,10,15,20,25)]=c(5,10,15,20,25) east=c(6:25,21:25) west=c(1:5,1:20) south=c(1,1:24) south[ c(1,6,11,16,21)]=c(1,6,11,16,21) trM=cbind(north,east,south,west) trM[10,]=6 trM[20,]=18 # defining reward matrix R=matrix(0,nrow=25,ncol=4) R[which(trM==1:25)]=-1 R[10,]=10 R[20,]=5 delta=1; gamma=0.9; tol=1e-10; bestP=sample(1:4,25,replace=T) stable=F;counter=0; while (stable==F){ counter=counter+1 # iterative policy evaluation while (delta>tol) { delta=0; V.old=V for (i_state in 1:25) { v=V[i_state] V[i_state]=sum(P[i_state,]*(R[i_state,]+gamma*V.old[trM[i_state,]])) delta=max(delta,abs(v-V[i_state])) } } # policy improvement stable=F for (i_state in 1:25) { b=bestP[i_state] bestP[i_state]=which.max(V[trM[i_state,]]) ifelse((bestP[i_state]==b),stable<-T,stable<-F) } } apply(matrix(bestP,nrow=5),2,rev) bestP.mat = apply(matrix(as.character(bestP),nrow=5),2,rev) bestP.mat[which(bestP.mat=="1")] = "N" bestP.mat[which(bestP.mat=="2")] = "E" bestP.mat[which(bestP.mat=="3")] = "S" bestP.mat[which(bestP.mat=="4")] = "W" print(bestP.mat)
認知情報解析 課題2
# initializing Q matrix Q = P = matrix(1/4,nrow=25,ncol=4) # # defining deterministic transition matrix north=c(2:25,25) north[ c(5,10,15,20,25)]=c(5,10,15,20,25) east=c(6:25,21:25) west=c(1:5,1:20) south=c(1,1:24) south[ c(1,6,11,16,21)]=c(1,6,11,16,21) trM=cbind(north,east,south,west) trM[10,]=6 trM[20,]=18 R=matrix(0,nrow=25,ncol=4) R[which(trM==1:25)]=-1 R[10,]=10 R[20,]=5 nRep=1000; gamma=0.9; P = 0.25 for (i_rep in 1:nRep) { Q.old = Q for (i_state in 1:25) { for (i_act in 1:4){ Q[i_state, i_act]=R[i_state, i_act]+gamma * P * sum(Q.old[trM[i_state,i_act]]) } } }
強化学習 方策の比較1
# Qが最大のactionを選択 max.Q <- function(Q){ max.a = max(Q) max.idx = which(Q == max.a) if (length(max.idx)>1){ max.idx = sample(max.idx, 1) } return(max.idx) } # greedy方策 greedy <- function(n.trial, Q.star, N){ Q = Q.cum = count = rep(0, N) rew.earned = rep(0, n.trial) for (i.trial in 1:n.trial){ act.idx = max.Q(Q) r.t = rnorm(1, mean = Q.star[act.idx], sd = 1) Q.cum[act.idx] = Q.cum[act.idx] + r.t count[act.idx] = count[act.idx] + 1 Q[act.idx] = Q.cum[act.idx] / count[act.idx] rew.earned[i.trial] = r.t } return(list(Q = Q, rew.earned = rew.earned)) } # epsilon greedy方策 # epsilon = 0の場合はgreedy方策と同等 eps.greedy <- function(n.trial, Q.star, N, epsilon){ Q = Q.cum = count = rep(0, N) rew.earned = rep(0, n.trial) for (i.trial in 1:n.trial){ if (runif(1) < epsilon){ act.idx = sample(1:N, 1) } else { act.idx = max.Q(Q) } r.t = rnorm(1, mean = Q.star[act.idx], sd = 1) Q.cum[act.idx] = Q.cum[act.idx] + r.t count[act.idx] = count[act.idx] + 1 Q[act.idx] = Q.cum[act.idx] / count[act.idx] rew.earned[i.trial] = r.t } return(list(Q = Q, rew.earned = rew.earned)) } # n.rep回繰り返す関数 comp.eps.greedy <- function(n.trial, n.rep, N, epsilon){ rew.history = matrix(0, nrow = n.trial, ncol = n.rep) for (i.rep in 1:n.rep){ Q.star = rnorm(N, mean = 0, sd = 1); res = eps.greedy(n.trial, Q.star, N, epsilon) rew.history[ , i.rep] = res$rew.earned } return(rew.history) } # 実行 EG.000 = comp.eps.greedy(1000, 1000, 10, 0.000) EG.001 = comp.eps.greedy(1000, 1000, 10, 0.001) EG.010 = comp.eps.greedy(1000, 1000, 10, 0.010) EG.100 = comp.eps.greedy(1000, 1000, 10, 0.100) EG.150 = comp.eps.greedy(1000, 1000, 10, 0.150) # 結果の可視化 plot(rowMeans(EG.000), type="l", ylab="Average Reward", xlab="Trial", ylim = c(0,2)) lines(rowMeans(EG.001),col=2) lines(rowMeans(EG.010),col=3) lines(rowMeans(EG.100),col=4) lines(rowMeans(EG.150),col=5) legend("bottomright", c("epsilon = 0.000", "epsilon = 0.001", "epsilon = 0.010", "epsilon = 0.100", "epsilon = 0.150"), col=1:5,lwd=2 ) # softmax softmax <- function(n.trial, Q.star, N, tau){ Q = Q.cum = count = rep(0, N) rew.earned = rep(0, n.trial) for (i.trial in 1:n.trial){ p = exp(Q*tau)/sum(exp(Q*tau)) act.idx = sample(1:N, 1, prob = p) r.t = rnorm(1, mean = Q.star[act.idx], sd = 1) Q.cum[act.idx] = Q.cum[act.idx] + r.t count[act.idx] = count[act.idx] + 1 Q[act.idx] = Q.cum[act.idx] / count[act.idx] rew.earned[i.trial] = r.t } return(list(Q = Q, rew.earned = rew.earned)) } comp.softmax <- function(n.trial, n.rep, N, tau){ rew.history = matrix(0, nrow = n.trial, ncol = n.rep) for (i.rep in 1:n.rep){ Q.star = rnorm(N, mean = 0, sd = 1); res = softmax(n.trial, Q.star, N, tau) rew.history[ , i.rep] = res$rew.earned } return(rew.history) } # 実行 EG.000 = comp.eps.greedy(1000, 1000, 10, 0.000) EG.010 = comp.eps.greedy(1000, 1000, 10, 0.010) EG.100 = comp.eps.greedy(1000, 1000, 10, 0.100) SM.10 = comp.softmax(1000,1000,10,10) SM.03 = comp.softmax(1000,1000,10,3) # 結果の可視化 plot(rowMeans(EG.000), type="l", ylab="Average Reward", xlab="Trial", ylim = c(0,2)) lines(rowMeans(EG.010),col=2) lines(rowMeans(EG.100),col=3) lines(rowMeans(SM.10),col=4) lines(rowMeans(SM.03),col=5) legend("bottomright", c("epsilon = 0.000", "epsilon = 0.010", "epsilon = 0.100", "tau = 10", "tau = 3"), col=1:5,lwd=2 ) # epsilon greedy (2nd version) eps.greedy2 <- function(n.trial, Q.star, N, epsilon, lr, init.Q){ Q = rep(init.Q, N) rew.earned = rep(0, n.trial) for (i.trial in 1:n.trial){ if (runif(1) < epsilon){ act.idx = sample(1:N, 1) } else { act.idx = max.Q(Q) } r.t = rnorm(1, mean = Q.star[act.idx], sd = 1) Q[act.idx] = Q[act.idx] + lr*(r.t - Q[act.idx]) rew.earned[i.trial] = r.t } return(list(Q = Q, rew.earned = rew.earned)) }