認知情報解析学演習 RNN

txt = "You say goodbye and I say hello.  you say goodbay and I say hello"

txt2corpus <- function(txt){
  txt = tolower(txt)
  txt = gsub('[.]', ' . sos',txt)
  words = unlist(strsplit(c('sos',txt), " "))
  uniq.words = unique(words)
  n.uniq = length(uniq.words)
  n.words = length(words)
  corpus = rep(0,n.words)
  corpus = match(words,uniq.words)
  return(corpus)
}

corp = txt2corpus(txt)

corp2contxt1SRNN = function(corpus){
  len.corp = length(corpus)
  # creating target matrix
  idxT = cbind(1:(len.corp-1), corpus[2:len.corp])
  targ1S = matrix(0,nrow=len.corp-1,ncol=length(unique(corpus)))
  targ1S[idxT]=1
  # creating context matrices
  idxC = cbind(1:(len.corp-1),corpus[1:(len.corp-1)])
  contxt = matrix(0,nrow=len.corp-1,ncol=length(unique(corpus)))
  contxt[idxC]=1
  return(list(y=targ1S,x=contxt))
}

init.RNN <- function(n.uniq,size.hidden){
  W.h = matrix(rnorm(size.hidden*size.hidden),nrow=size.hidden)*0.01
  W.x = matrix(rnorm(n.uniq*size.hidden),nrow=n.uniq)*0.01
  b = matrix(rnorm(size.hidden),nrow=1)*0.01
  return(list(W.h = W.h, W.x= W.x, b = b))
}
MatMult.forwd <- function(x, W){
  return(x%*%W)
}

MatMult.bckwd <- function(x, W, dout){
  dx = dout%*%t(W)
  dW = t(x)%*%dout
  return(list(dx = dx, dW = dW))
}

affine.forwd <- function(x, W, b){
  return(x%*%W + matrix(1, nrow = nrow(x), ncol = 1)%*%b)
}

affine.bckwd <- function(x, W, b, dout){
  dx = dout%*%t(W)
  dW = t(x)%*%dout
  db = colSums(dout)
  return(list(dx = dx, dW = dW, db = db))
}


softmax.forwd <- function(x, target){
  max.x = apply(x,1,max)
  C = ncol(x)
  x = x - max.x%*%matrix(1,nrow=1,ncol=C)
  y = exp(x)/rowSums(exp(x))
  delta = 1e-7;
  R = nrow(as.matrix(y))
  return(-sum(target*log(y + delta))/R)
}

softmax.pred <- function(x, target){
  max.x = apply(x,1,max)
  C = ncol(x)
  x = x - max.x%*%matrix(1,nrow=1,ncol=C)
  y = exp(x)/rowSums(exp(x))
  return(y)
}

softmax.bckwd <- function(x, target,  dout = 1){
  max.x = apply(x, 1, max)
  R = nrow(x)
  C = ncol(x)
  x = x - max.x%*%matrix(1,nrow=1,ncol=C)
  y = exp(x)/rowSums(exp(x))
  return((y-target)/R)
}


RNN.forward <- function(h.prev, x, network){
  b.size = nrow(x)
  h = h.prev%*%network$W.h + x%*%network$W.x
  hb = h+matrix(1,nrow=b.size,ncol=1)%*%network$b
  h.next = tanh(hb)
  return(h.next = h.next)
}

RNN.backward <- function(dh.next, network, x, h.next, h.prev){
  dt = dh.next * (1- h.next^2)
  db = colSums(dt)
  dW.h = t(h.prev)%*%dt
  dh.prev = dt%*%t(network$W.h)
  dW.x = t(x)%*%dt
  dx = dt%*%t(network$W.x)
  return(list(db = db, dW.h = dW.h, dh.prev = dh.prev, dW.x = dW.x, dx=dx))
}


txt = "You say goodbye and I say hello."

batch.size = 3
time = 3

size.hidden = 7
corp = txt2corpus(txt)
dat<-corp2contxt1SRNN(corp)
network <- init.RNN(ncol(dat$y), size.hidden)
corp.len = nrow(dat$y)
h.prev =array(0, c(batch.size, size.hidden, time))
h.next = array(0, c(batch.size, size.hidden, (time+1)))
W.out = matrix(rnorm(size.hidden * ncol(dat$y)), nrow = size.hidden)
b.out = matrix(rnorm(ncol(dat$y)), nrow = 1)
n.rep = 100000;lambda = 0.25;

loss = rep(0,n.rep)
for (i.rep in 1:n.rep){
  for (i.t in 1:time){
    idx = seq(i.t, corp.len, time)
    h.next[, , i.t] = RNN.forward(h.prev[, , i.t], dat$x[idx,], network)
    if (i.t < time){
      h.prev[, , (i.t+1)] = h.next[, , i.t]
    }
  }
  dWHs = matrix(0,nrow=nrow(network$W.h),ncol=ncol(network$W.h))
  dWXs = matrix(0,nrow=nrow(network$W.x),ncol=ncol(network$W.x))
  dBs = matrix(0,nrow=nrow(network$b),ncol=ncol(network$b))
  dWOs = matrix(0,nrow=nrow(W.out),ncol=ncol(W.out))
  dBOs = matrix(0,nrow=nrow(b.out),ncol=ncol(b.out))
  d.prev = matrix(0,nrow=batch.size,ncol=size.hidden)
  L = 0
  for (i.t in time:1){
    idx = idx = seq(i.t, corp.len, time)
    O = affine.forwd(h.next[,,i.t], W.out, b.out)
    L = L + softmax.forwd(O, dat$y[idx,])
    ds = softmax.bckwd(O, dat$y[idx,], 1)
    dW.o = affine.bckwd(h.next[,,i.t], W.out, b.out, ds)
    dWOs = dWOs + dW.o$dW
    dBOs = dBOs + dW.o$db
    RNN.d = RNN.backward(dW.o$dx+d.prev, network, dat$x[idx,],h.next[,,(i.t+1)],h.prev[,,i.t])
    #RNN.d = RNN.backward(dW.o$dx, network, dat$x[idx,],h.next[,,i.t],h.prev[,,i.t])
    dWHs = dWHs + RNN.d$dW.h
    dWXs = dWXs + RNN.d$dW.x
    dBs = dBs + RNN.d$db
    d.prev = RNN.d$dh.prev
  }
  loss[i.rep] = L
  W.out = W.out - lambda*dWOs
  b.out = b.out - lambda*dBOs
  network$W.h - lambda*dWHs
  network$W.x - lambda*dWXs
  network$b - lambda*dBs
}
plot(loss, type='l')
for (i.t in 1:time){
  idx = idx = seq(i.t, corp.len, time)
  O = affine.forwd(h.next[,,i.t], W.out, b.out)
  print(softmax.pred(O, dat$y[idx,]))
}