認知情報解析学演習 DL-2 ch.5

txt = "You say goodbye and I say hello."

txt2corpus <- function(txt){
  txt = tolower(txt)
  txt = gsub('[.]', ' . sos',txt)
  words = unlist(strsplit(c('sos',txt), " "))
  uniq.words = unique(words)
  n.uniq = length(uniq.words)
  n.words = length(words)
  corpus = rep(0,n.words)
  corpus = match(words,uniq.words)
  return(corpus)
}

corp = txt2corpus(txt)

corp2contxt1SRNN = function(corpus){
  len.corp = length(corpus)
  # creating target matrix
  idxT = cbind(1:(len.corp-1), corpus[2:len.corp])
  targ1S = matrix(0,nrow=len.corp-1,ncol=length(unique(corpus)))
  targ1S[idxT]=1
  # creating context matrices
  idxC = cbind(1:(len.corp-1),corpus[1:(len.corp-1)])
  contxt = matrix(0,nrow=len.corp-1,ncol=length(unique(corpus)))
  contxt[idxC]=1
  return(list(y=targ1S,x=contxt))
}
dat<-corp2contxt1SRNN(corp)

init.RNN <- function(n.uniq,size.hidden){
  W.h = matrix(rnorm(size.hidden*size.hidden),nrow=size.hidden)*0.01
  W.x = matrix(rnorm(n.uniq*size.hidden),nrow=n.uniq)*0.01
  b = matrix(rnorm(size.hidden),nrow=1)*0.01
  return(list(W.h = W.h, W.x= W.x, b = b))
}

RNN.forward <- function(h.prev, x, network){
  b.size = nrow(x)
  h = h.prev%*%network$W.h + x.temp%*%network$W.x
  hb = h+matrix(1,nrow=b.size,ncol=1)%*%network$b
  h.next = tanh(hb)
  return(h.next = h.next)
}

RNN.backward <- function(dh.next, network, x, h.next, h.prev){
  dt = dh.next * (1- h.next^2)
  db = colSums(dt)
  dW.h = t(h.prev)%*%dt
  dh.prev = dt%*%t(network$W.h)
  dW.x = t(x)%*%dt
  dx = dt%*%t(network$W.x)
  return(list(db = db, dW.h = dW.h, dh.prev = dh.prev, dW.x = dW.x, dx=dx))
}

affine.forwd <- function(x, W, b){
  return(x%*%W + matrix(1, nrow = nrow(x), ncol = 1)%*%b)
}

affine.bckwd <- function(x, W, b, dout){
  dx = dout%*%t(W)
  dW = t(x)%*%dout
  db = colSums(dout)
  return(list(dx = dx, dW = dW, db = db))
}

softmax.forwd <- function(x, target){
  max.x = apply(x,1,max)
  C = ncol(x)
  x = x - max.x%*%matrix(1,nrow=1,ncol=C)
  y = exp(x)/rowSums(exp(x))
  delta = 1e-7;
  R = nrow(as.matrix(y))
  return(-sum(target*log(y + delta))/R)
}

softmax.bckwd <- function(x, target,  dout = 1){
  max.x = apply(x, 1, max)
  R = nrow(x)
  C = ncol(x)
  x = x - max.x%*%matrix(1,nrow=1,ncol=C)
  y = exp(x)/rowSums(exp(x))
  return((y-target)/R)
}

# checking modules
batch.size = 3; time = 3

corp.len = nrow(dat$y) 
size.hidden = 5
h.prev =array(0, c(batch.size, size.hidden, time))
h.next = array(0, c(batch.size, size.hidden, time))

for (i.t in 1:time){
  idx = seq(i.t, corp.len, batch.size)
  h.next[, , i.t] = RNN.forward(h.prev[, , i.t], dat$x[idx,], network)
  if (i.t < time){
    h.prev[, , (i.t+1)] = h.next[, , i.t]
  }
}

W.out = matrix(rnorm(size.hidden * 8), nrow = size.hidden)
b.out = matrix(rnorm(8), nrow = 1)
O = affine.forwd(h.next[,,3], W.out, b.out)
L = softmax.forwd(O, dat$y[c(3,6,9),])
ds = softmax.bckwd(O, dat$y[c(3,6,9),], 1)
dW.o = affine.bckwd(O, W.out, ds)