認知情報解析演習 石とりゲーム(bugあり)

col1 = matrix(c(rep(0,4),c(1,0,0,0),c(1,1,0,0),c(1,1,1,0),rep(1,4)),nrow=4,byrow=T)
col2 = matrix(c(rep(10,4),c(11,10,10,10),c(11,11,10,10),c(11,11,11,10),rep(11,4)),nrow=4,byrow=T)
col3 = matrix(c(rep(100,4),c(101,100,100,100),c(101,101,100,100),c(101,101,101,100),rep(101,4)),nrow=4,byrow=T)
act.list = list()
state.temp = list()
counter = 0
Q1 = list()
Q2 = list()
for (i.c1 in 1:5){
  if (sum(col1[,i.c1])==0){
    act1 = c()
  } else {
    act1 = seq(1,sum(col1[,i.c1]),1)
  }
  for (i.c2 in 1:5){
    if (sum(col2[,i.c2])==40){
      act2 = c()
    } else {
      act2 = seq(11,sum(col2[,i.c2]==11)*11,11)
    }
    for (i.c3 in 1:5){
      if (sum(col3[,i.c3])==400){
        act3 = c()
      } else {
        act3 = seq(101,sum(col3[,i.c3]==101)*101,101)
      }
      counter = counter + 1
      state.temp[[counter]] = cbind(col1[,i.c1],col2[,i.c2],col3[,i.c3])
      act.list[[counter]] = c(act1,act2,act3)
      Q1[[counter]] = rep(0, length(c(act1,act2,act3)))
      Q2[[counter]] = rep(0, length(c(act1,act2,act3)))
    }
  }
}
rm.stone <- function(act, st.shape){
  if (act == -99){s}
  if (act > 100){
    n.remove = act%%100
    n.stone = length(which(st.shape[,3]==101))
    start = (5-n.stone)
    st.shape[(start:(start+n.remove-1)),3] = 100
  } else {
    if (act > 10){
      n.remove = act%%10
      n.stone = length(which(st.shape[,2]==11))
      start = (5-n.stone)
      st.shape[(start:(start+n.remove-1)),2] = 10
    } else {
      n.remove = act
      n.stone = length(which(st.shape[,1]==1))
      start = (5-n.stone)
      st.shape[(start:(start+n.remove-1)),1] = 0
    }
  }
  return(st.shape)
}

id.state <- function(st.shape, state.temp){
  for (i.st in 1:125){
    if  (all(st.shape == state.temp[[i.st]])){
      state.idx = i.st 
      break
    }
  }
  return(state.idx)
}

ck.act <- function(Q, act.vec, eta){
  if (is.null(act.vec)){
    return(list(act = -99, act.idx = -99))
    break
  }
  if (length(act.vec)==1){
    act = act.vec
  } else {
    p = exp(Q[[state.idx]])/sum(exp(Q[[state.idx]]))
    act = sample(act.vec, 1, prob = p)
  }
  act.idx = which(act.vec==act)
  return(list(act = act, act.idx = act.idx))
}


gamma=1;alpha = 0.1;n.rep=10000
for (i.rep in 1:n.rep){
  # first action
  state.idx = 125; counter = 1
  st.shape = state.temp[[state.idx]]
  res.act = ck.act(Q1,act.list[[state.idx]],eta)
  act = res.act$act;act.idx = res.act$act.idx
  state.old = state.idx
  act.old = act.idx
  
  # 2nd to last
  while (state.idx != 1) {
    counter = counter + 1
    st.shape <- rm.stone(act, st.shape)
    state.idx <- id.state(st.shape, state.temp)
    if (counter%%2==1) {
      res.act = ck.act(Q1,act.list[[state.idx]],eta)
    } else {
      res.act = ck.act(Q2,act.list[[state.idx]],eta)
    }
    act = res.act$act; act.idx = res.act$act.idx
    if (state.idx == 1){
      if (counter%%2==1){rew1 = -1; rew2 = 1;} else {rew1 = 1; rew2 = -1;}
      Q1[[state.old]][act.old]=Q1[[state.old]][act.old]
         +alpha*(rew1-Q1[[state.old]][act.old])
      Q2[[state.old]][act.old]=Q2[[state.old]][act.old]
         +alpha*rew2-Q2[[state.old]][act.old])
    } else {
      rew1 = 0; 
      rew2 =0;
      if (counter%%2==1){
        Q1[[state.old]][act.old]=Q1[[state.old]][act.old]
          +alpha*(rew1+gamma* Q1[[state.idx]][act.idx]-Q1[[state.old]][act.old])
      } else {
        Q2[[state.old]][act.old]=Q2[[state.old]][act.old]
          +alpha*(rew2+gamma* Q2[[state.idx]][act.idx]-Q2[[state.old]][act.old])
      }
    }
    
    state.old = state.idx
    act.old = act.idx
  }
}  

認知情報解析 実習問題

# 修正済み
# 時間がかかる場合は、繰り返し回数を減らしてください。
# 効率が悪いと思うので、適宜変更してください。

temp.state = expand.grid(loc1 = 0:2,loc2=0:2,loc3=0:2,
                         loc4 = 0:2,loc5=0:2,loc6=0:2,
                         loc7 = 0:2,loc8=0:2,loc9=0:2)
temp.state = = expand.grid(rep(list(0:2),9))

n.ones = rowSums(temp.state == 1 )
n.twos = rowSums(temp.state == 2 )
omitTwo = which(n.ones < n.twos)
omitOne = which((n.ones-1 ) > n.twos)
omitUniq = unique(c(omitOne, omitTwo))
state = temp.state[-omitUniq,]
poss.act = apply(state, 1, function(x) which(x==0))

temp.win = matrix(1:9,3)
win.idx = matrix(c(temp.win[1,],temp.win[2,],temp.win[3,],
                   temp.win[,1],temp.win[,2],temp.win[,3],
                   diag(temp.win),
                   diag(temp.win[3:1,])),ncol=3,byrow=T)

idx1 = c()
idx2 = c()
idxCM = c()
for (i.win in 1:nrow(win.idx)){
  idx.temp = apply(state, 1, function(x) sum(x[win.idx[i.win,]]==1)==3)
  idx1 = c(idx1, which(idx.temp))
  idxCM.temp = apply(state, 1, function(x) sum(x[win.idx[i.win,]]==1)==2)
  idxCM = c(idxCM, which(idxCM.temp))
  idx.temp = apply(state, 1, function(x) sum(x[win.idx[i.win,]]==2)==3)
  idx2 = c(idx2, which(idx.temp))
}
n0=apply(state,1,function(x) length((which(x==0))))
tie = which(n0==0)

Q = list()
V = list()
rew.sum = list()
rew.count = list()
policy = list()
for (i.state in 1:nrow(state)){
  Q[[i.state]] =  rep(0,length(poss.act[[i.state]]))
  V[[i.state]] = rep(0,length(poss.act[[i.state]]))
  rew.sum[[i.state]] = rep(0,length(poss.act[[i.state]]))
  rew.count[[i.state]] = rep(0,length(poss.act[[i.state]]))
  policy[[i.state]] = rep(1/length(poss.act[[i.state]]),length(poss.act[[i.state]]))
}

R.W  = 10
R.T  = 5
R.L = -10
gamma = 1
epsilon = 0.05
eta = 1

ck.result <- function(st.idx, idx1, idx2, tie){
  term = F
  rew = 0
  result = "not terminal"
  if (match(st.idx ,idx1, nomatch = 0)!=0){
    rew = R.W
    term = T
    result = "win"
  } else if (match(st.idx ,idx2, nomatch = 0)!=0){
    rew = R.L
    term = T
    result = "lose"
  } else if (match(st.idx ,tie, nomatch = 0)!=0){
    rew = R.T
    term = T
    result = "tie"
  }
  return(list(rew = rew, term = term, result = result))
}

n.rep = 10000
game.hist = rep(0,n.rep)

# main loop
for (i.rep in 1:n.rep){
  st.idx = 1
  term = F
  state.temp = rep(0,9)
  state.hist1 = c()
  state.hist2 = c()
  repeat {
    # playing game
    if (length(poss.act[[st.idx]])==1){
      act1 = poss.act[[st.idx]]
    } else{
      p.act = exp(eta*Q[[st.idx]])/sum(exp(eta*Q[[st.idx]]))
      act1 = sample(poss.act[[st.idx]],1, prob = p.act)
    }
    state.hist1 = rbind(state.hist1,c(st.idx, act1))
    state.temp[act1] = 1
    st.idx = which(apply(state, 1, function(x) sum(x==state.temp) )==9)
    res = ck.result(st.idx, idx1, idx2, tie)
    if (res$term == T){
      rew = res$rew
      break
    }
    p.act = exp(eta*Q[[st.idx]])/sum(exp(eta*Q[[st.idx]]))
    act2 = sample(poss.act[[st.idx]],1, prob = policy[[st.idx]])
    state.hist2 = rbind(state.hist2,c(st.idx, act2))
    state.temp[act2] = 2
    st.idx = which(apply(state, 1, function(x) sum(x==state.temp) )==9)
    res = ck.result(st.idx, idx1, idx2, tie)
    if (res$term == T){
      rew = res$rew
      break
    }
  }
  # update Q & policy
  game.hist[i.rep] = res$result!="lose"
  n.st = nrow(state.hist1)
  #print(res$result)
  if (i.rep%%100==0){print(i.rep)}
  for (i.st in 1:n.st){
    act.idx = which(poss.act[[state.hist1[i.st,1]]]==state.hist1[i.st,2])
    rew.sum[[state.hist1[i.st,1]]][act.idx] = rew.sum[[state.hist1[i.st,1]]][act.idx] + rew
    rew.count[[state.hist1[i.st,1]]][act.idx] = rew.count[[state.hist1[i.st,1]]][act.idx] + 1
    Q[[state.hist1[i.st,1]]][act.idx] = rew.sum[[state.hist1[i.st,1]]][act.idx] / rew.count[[state.hist1[i.st,1]]][act.idx]
  }
}

# plotting results
library(pracma)
game.hist.smooth = movavg(game.hist, 400, type="s")
plot(game.hist.smooth,type='l')

認知情報解析演習a Monte Carlo 01

north=c(1:3,15,1:10)
east=2:15;east[ c(3,7,11)]=c(3,7,11)
south=c(5:15,12:14)
west=c(15,1:13);west[ c(4,8,12)]=c(4,8,12)
trM=cbind(north,east,south,west)

r=-1;P=rep(0.25,4);V = rep(0,14)max.iter = 10000;
state.count=rep(0,15)
for (i.iter in 1:max.iter){
  state = sample(1:14,1)
  state.seq = state
  while(state!=15){
    action = sample(1:4,1,prob = P)
    state.seq = c(state.seq,trM[state,action])
    state = trM[state,action]  
  }
  uniq.seq = unique(state.seq)
  for (i.uniq in 1:(length(uniq.seq)-1)){
    first.visit = which(state.seq == uniq.seq[i.uniq])[1]
    V[uniq.seq[i.uniq]] = V[uniq.seq[i.uniq]] + r*(length(state.seq)-first.visit-1)
  }
  state.count[uniq.seq] = state.count[uniq.seq] + 1
}
V = matrix(c(0,V/state.count[1:14],0),nrow=4)

認知情報解析06/05の演習問題

演習で書いたプログラムで問題はありませんでした。
以下、結果を可視化するコマンドを書き加えました:

# initalization
p.win =0.4; p.lose = 0.6; P = c(p.lose, p.win);
R = c(rep(0,100), 1); V = rep(0,101);
gamma = 1; tol = 1e-10; counter=0
cols = c("red","skyblue",'orange','black')
par(mfrow=c(1,2))

# value iteration
repeat{
  counter = counter+1
  delta = 0
  for (i.state in 2:100) {
    v <- V[i.state]
    temp.v = rep(0, (i.state - 1))
    for (i.bet in 1:(i.state - 1)) {
      lower.B = i.state - i.bet
      upper.B = min(i.state + i.bet, 101)
      temp.v[i.bet] = sum(P * (R[c(lower.B, upper.B)] + gamma * V[c(lower.B, upper.B)]))
      V[i.state] = max(temp.v)
    }
    delta <- max(abs(v-V[i.state]), delta)
  }
  # plotting results
  if (counter==1){
    plot(V[2:100], type='l', col=cols[1], lwd=2, xlab="capital", ylab="value")
  } else {
    if (counter<4){
      lines(V[2:100], type='l', col=cols[counter], lwd=2)
    }
  }
  if (delta < tol){break}
}
# end of value iteration

lines(V[2:100],type='l', col=cols[4], lwd=2)
legend("topleft", c("1st sweep","2nd sweep", "3rd sweep","32nd sweep") ,col=cols, lwd=2)

# identifying optimal action
policy = rep(0,101)
for (i.state in 2:100) {
  temp.v = rep(0, (i.state - 1))
  for (i.bet in 1:(i.state - 1)) {
    lower.B = i.state - i.bet
    upper.B = min(i.state + i.bet, 101)
    temp.v[i.bet] = sum(P * (R[c(lower.B, upper.B)] + gamma * V[c(lower.B, upper.B)]))
    policy[i.state] = which.max(round(temp.v,4))
  }
}
barplot(policy,xlab="capital",ylab="Optimal action")

認知情報解析学演習a 課題03

V=rep(0,25);                

# defining probability matrix
P=matrix(1/4,nrow=25,ncol=4) # 

# defining deterministic transition matrix
north=c(2:25,25)
north[ c(5,10,15,20,25)]=c(5,10,15,20,25)
east=c(6:25,21:25)
west=c(1:5,1:20)
south=c(1,1:24)
south[ c(1,6,11,16,21)]=c(1,6,11,16,21)
trM=cbind(north,east,south,west)
trM[10,]=6
trM[20,]=18

# defining reward matrix
R=matrix(0,nrow=25,ncol=4)
R[which(trM==1:25)]=-1
R[10,]=10
R[20,]=5

delta=1; gamma=0.9; tol=1e-10; 
bestP=sample(1:4,25,replace=T)
stable=F;counter=0;
while (stable==F){
  counter=counter+1
  # iterative policy evaluation
  while (delta>tol) {
    delta=0;
    V.old=V
    for (i_state in 1:25) {
      v=V[i_state]
      V[i_state]=sum(P[i_state,]*(R[i_state,]+gamma*V.old[trM[i_state,]]))
      delta=max(delta,abs(v-V[i_state]))
    }
  }
  # policy improvement
  stable=F
  for (i_state in 1:25) {
    b=bestP[i_state]
    bestP[i_state]=which.max(V[trM[i_state,]])
    ifelse((bestP[i_state]==b),stable<-T,stable<-F)
  }
}

apply(matrix(bestP,nrow=5),2,rev)
bestP.mat = apply(matrix(as.character(bestP),nrow=5),2,rev)
bestP.mat[which(bestP.mat=="1")] = "N"
bestP.mat[which(bestP.mat=="2")] = "E"
bestP.mat[which(bestP.mat=="3")] = "S"
bestP.mat[which(bestP.mat=="4")] = "W"
print(bestP.mat)

認知情報解析 課題2

# initializing Q matrix
Q = P = matrix(1/4,nrow=25,ncol=4) # 

# defining deterministic transition matrix
north=c(2:25,25)
north[ c(5,10,15,20,25)]=c(5,10,15,20,25)
east=c(6:25,21:25)
west=c(1:5,1:20)
south=c(1,1:24)
south[ c(1,6,11,16,21)]=c(1,6,11,16,21)
trM=cbind(north,east,south,west)
trM[10,]=6
trM[20,]=18

R=matrix(0,nrow=25,ncol=4)
R[which(trM==1:25)]=-1
R[10,]=10
R[20,]=5

nRep=1000; gamma=0.9; P = 0.25
for (i_rep in 1:nRep) {
  Q.old = Q
  for (i_state in 1:25) {
    for (i_act in 1:4){
      Q[i_state, i_act]=R[i_state, i_act]+gamma * P * sum(Q.old[trM[i_state,i_act]])
    }
  }
}

強化学習 方策の比較1

# Qが最大のactionを選択
max.Q <- function(Q){
  max.a = max(Q)
  max.idx = which(Q == max.a)
  if (length(max.idx)>1){
    max.idx = sample(max.idx, 1)
  }
  return(max.idx)
}

# greedy方策
greedy <- function(n.trial, Q.star, N){
  Q = Q.cum = count = rep(0, N)
  rew.earned = rep(0, n.trial)
  for (i.trial in 1:n.trial){
    act.idx = max.Q(Q)
    r.t = rnorm(1, mean = Q.star[act.idx], sd = 1)
    Q.cum[act.idx] = Q.cum[act.idx] + r.t
    count[act.idx] = count[act.idx] + 1
    Q[act.idx] = Q.cum[act.idx] / count[act.idx]
    rew.earned[i.trial] = r.t
  }
  return(list(Q = Q, rew.earned = rew.earned))
}

# epsilon greedy方策 
# epsilon = 0の場合はgreedy方策と同等
eps.greedy <- function(n.trial, Q.star, N, epsilon){
  Q = Q.cum = count = rep(0, N)
  rew.earned = rep(0, n.trial)
  for (i.trial in 1:n.trial){
    if (runif(1) < epsilon){
      act.idx = sample(1:N, 1)
    } else {
      act.idx = max.Q(Q)
    }
    r.t = rnorm(1, mean = Q.star[act.idx], sd = 1)
    Q.cum[act.idx] = Q.cum[act.idx] + r.t
    count[act.idx] = count[act.idx] + 1
    Q[act.idx] = Q.cum[act.idx] / count[act.idx]
    rew.earned[i.trial] = r.t
  }
  return(list(Q = Q, rew.earned = rew.earned))
}

# n.rep回繰り返す関数
comp.eps.greedy <- function(n.trial, n.rep, N, epsilon){
  rew.history = matrix(0, nrow = n.trial, ncol = n.rep)
  for (i.rep in 1:n.rep){
    Q.star = rnorm(N, mean = 0, sd = 1);
    res = eps.greedy(n.trial, Q.star, N, epsilon)
    rew.history[ , i.rep] = res$rew.earned
  }
  return(rew.history)
}

# 実行
EG.000 = comp.eps.greedy(1000, 1000, 10, 0.000)
EG.001 = comp.eps.greedy(1000, 1000, 10, 0.001)
EG.010 = comp.eps.greedy(1000, 1000, 10, 0.010)
EG.100 = comp.eps.greedy(1000, 1000, 10, 0.100)
EG.150 = comp.eps.greedy(1000, 1000, 10, 0.150)

# 結果の可視化
plot(rowMeans(EG.000), type="l", ylab="Average Reward", xlab="Trial",
     ylim = c(0,2))
lines(rowMeans(EG.001),col=2)
lines(rowMeans(EG.010),col=3)
lines(rowMeans(EG.100),col=4)
lines(rowMeans(EG.150),col=5)
legend("bottomright",
       c("epsilon = 0.000",
         "epsilon = 0.001",
         "epsilon = 0.010",
         "epsilon = 0.100",
         "epsilon = 0.150"),
       col=1:5,lwd=2 )

# softmax
softmax <- function(n.trial, Q.star, N, tau){
  Q = Q.cum = count = rep(0, N)
  rew.earned = rep(0, n.trial)
  for (i.trial in 1:n.trial){
    p = exp(Q*tau)/sum(exp(Q*tau))
    act.idx = sample(1:N, 1, prob = p)
    r.t = rnorm(1, mean = Q.star[act.idx], sd = 1)
    Q.cum[act.idx] = Q.cum[act.idx] + r.t
    count[act.idx] = count[act.idx] + 1
    Q[act.idx] = Q.cum[act.idx] / count[act.idx]
    rew.earned[i.trial] = r.t
  }
  return(list(Q = Q, rew.earned = rew.earned))
}

comp.softmax <- function(n.trial, n.rep, N, tau){
  rew.history = matrix(0, nrow = n.trial, ncol = n.rep)
  for (i.rep in 1:n.rep){
    Q.star = rnorm(N, mean = 0, sd = 1);
    res = softmax(n.trial, Q.star, N, tau)
    rew.history[ , i.rep] = res$rew.earned
  }
  return(rew.history)
}

# 実行
EG.000 = comp.eps.greedy(1000, 1000, 10, 0.000)
EG.010 = comp.eps.greedy(1000, 1000, 10, 0.010)
EG.100 = comp.eps.greedy(1000, 1000, 10, 0.100)
SM.10 = comp.softmax(1000,1000,10,10)
SM.03 = comp.softmax(1000,1000,10,3)
# 結果の可視化
plot(rowMeans(EG.000), type="l", ylab="Average Reward", xlab="Trial",
     ylim = c(0,2))
lines(rowMeans(EG.010),col=2)
lines(rowMeans(EG.100),col=3)
lines(rowMeans(SM.10),col=4)
lines(rowMeans(SM.03),col=5)
legend("bottomright",
       c("epsilon = 0.000",
         "epsilon = 0.010",
         "epsilon = 0.100",
         "tau = 10",
         "tau = 3"),
       col=1:5,lwd=2 )

# epsilon greedy (2nd version)
eps.greedy2 <- function(n.trial, Q.star, N, epsilon, lr, init.Q){
    Q = rep(init.Q, N)
    rew.earned = rep(0, n.trial)
    for (i.trial in 1:n.trial){
      if (runif(1) < epsilon){
        act.idx = sample(1:N, 1)
      } else {
        act.idx = max.Q(Q)
      }
      r.t = rnorm(1, mean = Q.star[act.idx], sd = 1)
      Q[act.idx] = Q[act.idx] + lr*(r.t - Q[act.idx])
      rew.earned[i.trial] = r.t
    }
    return(list(Q = Q, rew.earned = rew.earned))
  }

2019 認知情報解析学演習 RL01

set.seed(111)
n.trial = 1000; N = 10; sigma  = 1
Q.star = runif(N); Q = rep(0, N)
count = rep(0,N); Q.cum = rep(0, N)
rew.earned = rep(0,n.trial)
### playing slot-machine
for (i.trial in 1:n.trial){
  max.a = max(Q)
  max.idx = which(Q == max.a)
  if (length(max.idx)>1){
    max.idx = sample(max.idx, 1)
  }
  r.t = rnorm(1, Q.star[max.idx], sd = sigma)
  Q.cum[max.idx] = Q.cum[max.idx] + r.t
  count[max.idx] = count[max.idx] + 1
  Q[max.idx] = Q.cum[max.idx] / count[max.idx]
  rew.earned[i.trial] = r.t
}
plot(rew.earned,type='l')
Q

認知情報解析学演習b

init.RNN <- function(n.uniq,size.hidden){
  W.h = matrix(rnorm(size.hidden*size.hidden), nrow = size.hidden)*0.01
  W.x = matrix(rnorm(n.uniq*size.hidden), nrow = n.uniq)*0.01
  b.h = matrix(rnorm(size.hidden), nrow = 1)*0.01
  W.o = matrix(rnorm(n.uniq*size.hidden),nrow = size.hidden)*0.01
  b.o = matrix(rnorm(n.uniq), nrow = 1)*0.01
  return(list(W.h = W.h, W.x= W.x, b.h = b.h, W.o = W.o, b.o = b.o))
}

affine.forwd <- function(x, W, b){
  return(x%*%W + matrix(1, nrow = nrow(x), ncol = 1)%*%b)
}

affine.bckwd <- function(x, W, b, dout){
  dx = dout%*%t(W)
  dW = t(x)%*%dout
  db = colSums(dout)
  return(list(dx = dx, dW = dW, db = db))
}


softmax.forwd <- function(x, target){
  max.x = apply(x,1,max)
  C = ncol(x)
  x = x - max.x%*%matrix(1,nrow=1,ncol=C)
  y = exp(x)/rowSums(exp(x))
  delta = 1e-7;
  R = nrow(as.matrix(y))
  return(-sum(target*log(y + delta))/R)
}

softmax.pred <- function(x, target){
  max.x = apply(x,1,max)
  C = ncol(x)
  x = x - max.x%*%matrix(1,nrow=1,ncol=C)
  y = exp(x)/rowSums(exp(x))
  return(y)
}

softmax.bckwd <- function(x, target,  dout = 1){
  max.x = apply(x, 1, max)
  R = nrow(x)
  C = ncol(x)
  x = x - max.x%*%matrix(1,nrow=1,ncol=C)
  y = exp(x)/rowSums(exp(x))
  return((y-target)/R)
}

RNN.forward <- function(h.prev, x, network){
  b.size = nrow(x)
  h = h.prev%*%network$W.h + x%*%network$W.x
  hb = h+matrix(1,nrow=b.size,ncol=1)%*%network$b.h
  h.next = tanh(hb)
  return(h.next = h.next)
}

RNN.backward <- function(dh.next, network, x, h.next, h.prev){
  dt = dh.next * (1- h.next^2)
  db = colSums(dt)
  dW.h = t(h.prev)%*%dt
  dh.prev = dt%*%t(network$W.h)
  dW.x = t(x)%*%dt
  dx = dt%*%t(network$W.x)
  return(list(db = db, dW.h = dW.h, dh.prev = dh.prev, dW.x = dW.x, dx=dx))
}

txt = "You say goodbye and I say hello.  you say goodbay and I say hello"

txt2corpus <- function(txt){
  txt = tolower(txt)
  txt = gsub('[.]', ' . sos',txt)
  words = unlist(strsplit(c('sos',txt), " "))
  uniq.words = unique(words)
  n.uniq = length(uniq.words)
  n.words = length(words)
  corpus = rep(0,n.words)
  corpus = match(words,uniq.words)
  return(corpus)
}

corp2contxt1SRNN = function(corpus){
  len.corp = length(corpus)
  # creating target matrix
  idxT = cbind(1:(len.corp-1), corpus[2:len.corp])
  targ1S = matrix(0,nrow=len.corp-1,ncol=length(unique(corpus)))
  targ1S[idxT]=1
  # creating context matrices
  idxC = cbind(1:(len.corp-1),corpus[1:(len.corp-1)])
  contxt = matrix(0,nrow=len.corp-1,ncol=length(unique(corpus)))
  contxt[idxC]=1
  return(list(y=targ1S,x=contxt))
}

corp = txt2corpus(txt)
dat = corp2contxt1SRNN(corp)

size.hidden = 7
network <- init.RNN(8,size.hidden)

n.rep = 100000;lambda = 0.01;batch.size = 3; time = 3;
h.prev =array(0, c(batch.size, size.hidden, time))
h.next = array(0, c(batch.size, size.hidden, (time+1)))
loss = rep(0,n.rep)
for (i.rep in 1:n.rep){
  for (i.t in 1:time){
    idx = seq(i.t, corp.len, time)
    h.next[, , i.t] = RNN.forward(h.prev[, , i.t], dat$x[idx,], network)
    if (i.t < time){
      h.prev[, , (i.t+1)] = h.next[, , i.t]
    }
  }
  dWHs = matrix(0,nrow=nrow(network$W.h),ncol=ncol(network$W.h))
  dWXs = matrix(0,nrow=nrow(network$W.x),ncol=ncol(network$W.x))
  dBHs = matrix(0,nrow=nrow(network$b.h),ncol=ncol(network$b.h))
  dWOs = matrix(0,nrow=nrow(network$W.o),ncol=ncol(network$W.o))
  dBOs = matrix(0,nrow=nrow(network$b.o),ncol=ncol(network$b.o))
  d.prev = matrix(0,nrow=batch.size,ncol=size.hidden)
  L = 0
  for (i.t in time:1){
    idx = idx = seq(i.t, corp.len, time)
    O = affine.forwd(h.next[,,i.t], network$W.o, network$b.o)
    L = L + softmax.forwd(O, dat$y[idx,])
    ds = softmax.bckwd(O, dat$y[idx,], 1)
    dW.o = affine.bckwd(h.next[,,i.t], network$W.o, network$b.o, ds)
    dWOs = dWOs + dW.o$dW
    dBOs = dBOs + dW.o$db
    RNN.d = RNN.backward(dW.o$dx+d.prev, network, dat$x[idx,],h.next[,,(i.t+1)],h.prev[,,i.t])
    dWHs = dWHs + RNN.d$dW.h
    dWXs = dWXs + RNN.d$dW.x
    dBHs = dBHs + RNN.d$db
    d.prev = RNN.d$dh.prev
  }
  loss[i.rep] = L
  network$W.o = network$W.o - lambda*dWOs
  network$b.o = network$b.o - lambda*dBOs
  network$W.h = network$W.h - lambda*dWHs
  network$W.x = network$W.x - lambda*dWXs
  network$b.h = network$b.h - lambda*dBHs
}
plot(loss,type='l')
par(mfrow=c(9,1))
for (i.t in 1:time){
  idx = idx = seq(i.t, corp.len, time)
  O = affine.forwd(h.next[,,i.t], network$W.o, network$b.o)
  print(softmax.pred(O, dat$y[idx,]))
  for (i in 1:3){
    barplot(softmax.pred(O, dat$y[idx,])[i,])
  }
}

認知情報解析学演習 DL-2 ch.5

txt = "You say goodbye and I say hello."

txt2corpus <- function(txt){
  txt = tolower(txt)
  txt = gsub('[.]', ' . sos',txt)
  words = unlist(strsplit(c('sos',txt), " "))
  uniq.words = unique(words)
  n.uniq = length(uniq.words)
  n.words = length(words)
  corpus = rep(0,n.words)
  corpus = match(words,uniq.words)
  return(corpus)
}

corp = txt2corpus(txt)

corp2contxt1SRNN = function(corpus){
  len.corp = length(corpus)
  # creating target matrix
  idxT = cbind(1:(len.corp-1), corpus[2:len.corp])
  targ1S = matrix(0,nrow=len.corp-1,ncol=length(unique(corpus)))
  targ1S[idxT]=1
  # creating context matrices
  idxC = cbind(1:(len.corp-1),corpus[1:(len.corp-1)])
  contxt = matrix(0,nrow=len.corp-1,ncol=length(unique(corpus)))
  contxt[idxC]=1
  return(list(y=targ1S,x=contxt))
}
dat<-corp2contxt1SRNN(corp)

init.RNN <- function(n.uniq,size.hidden){
  W.h = matrix(rnorm(size.hidden*size.hidden),nrow=size.hidden)*0.01
  W.x = matrix(rnorm(n.uniq*size.hidden),nrow=n.uniq)*0.01
  b = matrix(rnorm(size.hidden),nrow=1)*0.01
  return(list(W.h = W.h, W.x= W.x, b = b))
}

RNN.forward <- function(h.prev, x, network){
  b.size = nrow(x)
  h = h.prev%*%network$W.h + x.temp%*%network$W.x
  hb = h+matrix(1,nrow=b.size,ncol=1)%*%network$b
  h.next = tanh(hb)
  return(h.next = h.next)
}

RNN.backward <- function(dh.next, network, x, h.next, h.prev){
  dt = dh.next * (1- h.next^2)
  db = colSums(dt)
  dW.h = t(h.prev)%*%dt
  dh.prev = dt%*%t(network$W.h)
  dW.x = t(x)%*%dt
  dx = dt%*%t(network$W.x)
  return(list(db = db, dW.h = dW.h, dh.prev = dh.prev, dW.x = dW.x, dx=dx))
}

affine.forwd <- function(x, W, b){
  return(x%*%W + matrix(1, nrow = nrow(x), ncol = 1)%*%b)
}

affine.bckwd <- function(x, W, b, dout){
  dx = dout%*%t(W)
  dW = t(x)%*%dout
  db = colSums(dout)
  return(list(dx = dx, dW = dW, db = db))
}

softmax.forwd <- function(x, target){
  max.x = apply(x,1,max)
  C = ncol(x)
  x = x - max.x%*%matrix(1,nrow=1,ncol=C)
  y = exp(x)/rowSums(exp(x))
  delta = 1e-7;
  R = nrow(as.matrix(y))
  return(-sum(target*log(y + delta))/R)
}

softmax.bckwd <- function(x, target,  dout = 1){
  max.x = apply(x, 1, max)
  R = nrow(x)
  C = ncol(x)
  x = x - max.x%*%matrix(1,nrow=1,ncol=C)
  y = exp(x)/rowSums(exp(x))
  return((y-target)/R)
}

# checking modules
batch.size = 3; time = 3

corp.len = nrow(dat$y) 
size.hidden = 5
h.prev =array(0, c(batch.size, size.hidden, time))
h.next = array(0, c(batch.size, size.hidden, time))

for (i.t in 1:time){
  idx = seq(i.t, corp.len, batch.size)
  h.next[, , i.t] = RNN.forward(h.prev[, , i.t], dat$x[idx,], network)
  if (i.t < time){
    h.prev[, , (i.t+1)] = h.next[, , i.t]
  }
}

W.out = matrix(rnorm(size.hidden * 8), nrow = size.hidden)
b.out = matrix(rnorm(8), nrow = 1)
O = affine.forwd(h.next[,,3], W.out, b.out)
L = softmax.forwd(O, dat$y[c(3,6,9),])
ds = softmax.bckwd(O, dat$y[c(3,6,9),], 1)
dW.o = affine.bckwd(O, W.out, ds)