init.RNN <- function(n.uniq,size.hidden){
W.h = matrix(rnorm(size.hidden*size.hidden), nrow = size.hidden)*0.01
W.x = matrix(rnorm(n.uniq*size.hidden), nrow = n.uniq)*0.01
b.h = matrix(rnorm(size.hidden), nrow = 1)*0.01
W.o = matrix(rnorm(n.uniq*size.hidden),nrow = size.hidden)*0.01
b.o = matrix(rnorm(n.uniq), nrow = 1)*0.01
return(list(W.h = W.h, W.x= W.x, b.h = b.h, W.o = W.o, b.o = b.o))
}
affine.forwd <- function(x, W, b){
return(x%*%W + matrix(1, nrow = nrow(x), ncol = 1)%*%b)
}
affine.bckwd <- function(x, W, b, dout){
dx = dout%*%t(W)
dW = t(x)%*%dout
db = colSums(dout)
return(list(dx = dx, dW = dW, db = db))
}
softmax.forwd <- function(x, target){
max.x = apply(x,1,max)
C = ncol(x)
x = x - max.x%*%matrix(1,nrow=1,ncol=C)
y = exp(x)/rowSums(exp(x))
delta = 1e-7;
R = nrow(as.matrix(y))
return(-sum(target*log(y + delta))/R)
}
softmax.pred <- function(x, target){
max.x = apply(x,1,max)
C = ncol(x)
x = x - max.x%*%matrix(1,nrow=1,ncol=C)
y = exp(x)/rowSums(exp(x))
return(y)
}
softmax.bckwd <- function(x, target, dout = 1){
max.x = apply(x, 1, max)
R = nrow(x)
C = ncol(x)
x = x - max.x%*%matrix(1,nrow=1,ncol=C)
y = exp(x)/rowSums(exp(x))
return((y-target)/R)
}
RNN.forward <- function(h.prev, x, network){
b.size = nrow(x)
h = h.prev%*%network$W.h + x%*%network$W.x
hb = h+matrix(1,nrow=b.size,ncol=1)%*%network$b.h
h.next = tanh(hb)
return(h.next = h.next)
}
RNN.backward <- function(dh.next, network, x, h.next, h.prev){
dt = dh.next * (1- h.next^2)
db = colSums(dt)
dW.h = t(h.prev)%*%dt
dh.prev = dt%*%t(network$W.h)
dW.x = t(x)%*%dt
dx = dt%*%t(network$W.x)
return(list(db = db, dW.h = dW.h, dh.prev = dh.prev, dW.x = dW.x, dx=dx))
}
txt = "You say goodbye and I say hello. you say goodbay and I say hello"
txt2corpus <- function(txt){
txt = tolower(txt)
txt = gsub('[.]', ' . sos',txt)
words = unlist(strsplit(c('sos',txt), " "))
uniq.words = unique(words)
n.uniq = length(uniq.words)
n.words = length(words)
corpus = rep(0,n.words)
corpus = match(words,uniq.words)
return(corpus)
}
corp2contxt1SRNN = function(corpus){
len.corp = length(corpus)
# creating target matrix
idxT = cbind(1:(len.corp-1), corpus[2:len.corp])
targ1S = matrix(0,nrow=len.corp-1,ncol=length(unique(corpus)))
targ1S[idxT]=1
# creating context matrices
idxC = cbind(1:(len.corp-1),corpus[1:(len.corp-1)])
contxt = matrix(0,nrow=len.corp-1,ncol=length(unique(corpus)))
contxt[idxC]=1
return(list(y=targ1S,x=contxt))
}
corp = txt2corpus(txt)
dat = corp2contxt1SRNN(corp)
size.hidden = 7
network <- init.RNN(8,size.hidden)
n.rep = 100000;lambda = 0.01;batch.size = 3; time = 3;
h.prev =array(0, c(batch.size, size.hidden, time))
h.next = array(0, c(batch.size, size.hidden, (time+1)))
loss = rep(0,n.rep)
for (i.rep in 1:n.rep){
for (i.t in 1:time){
idx = seq(i.t, corp.len, time)
h.next[, , i.t] = RNN.forward(h.prev[, , i.t], dat$x[idx,], network)
if (i.t < time){
h.prev[, , (i.t+1)] = h.next[, , i.t]
}
}
dWHs = matrix(0,nrow=nrow(network$W.h),ncol=ncol(network$W.h))
dWXs = matrix(0,nrow=nrow(network$W.x),ncol=ncol(network$W.x))
dBHs = matrix(0,nrow=nrow(network$b.h),ncol=ncol(network$b.h))
dWOs = matrix(0,nrow=nrow(network$W.o),ncol=ncol(network$W.o))
dBOs = matrix(0,nrow=nrow(network$b.o),ncol=ncol(network$b.o))
d.prev = matrix(0,nrow=batch.size,ncol=size.hidden)
L = 0
for (i.t in time:1){
idx = idx = seq(i.t, corp.len, time)
O = affine.forwd(h.next[,,i.t], network$W.o, network$b.o)
L = L + softmax.forwd(O, dat$y[idx,])
ds = softmax.bckwd(O, dat$y[idx,], 1)
dW.o = affine.bckwd(h.next[,,i.t], network$W.o, network$b.o, ds)
dWOs = dWOs + dW.o$dW
dBOs = dBOs + dW.o$db
RNN.d = RNN.backward(dW.o$dx+d.prev, network, dat$x[idx,],h.next[,,(i.t+1)],h.prev[,,i.t])
dWHs = dWHs + RNN.d$dW.h
dWXs = dWXs + RNN.d$dW.x
dBHs = dBHs + RNN.d$db
d.prev = RNN.d$dh.prev
}
loss[i.rep] = L
network$W.o = network$W.o - lambda*dWOs
network$b.o = network$b.o - lambda*dBOs
network$W.h = network$W.h - lambda*dWHs
network$W.x = network$W.x - lambda*dWXs
network$b.h = network$b.h - lambda*dBHs
}
plot(loss,type='l')
par(mfrow=c(9,1))
for (i.t in 1:time){
idx = idx = seq(i.t, corp.len, time)
O = affine.forwd(h.next[,,i.t], network$W.o, network$b.o)
print(softmax.pred(O, dat$y[idx,]))
for (i in 1:3){
barplot(softmax.pred(O, dat$y[idx,])[i,])
}
}
Related