init.RNN <- function(n.uniq,size.hidden){ W.h = matrix(rnorm(size.hidden*size.hidden), nrow = size.hidden)*0.01 W.x = matrix(rnorm(n.uniq*size.hidden), nrow = n.uniq)*0.01 b.h = matrix(rnorm(size.hidden), nrow = 1)*0.01 W.o = matrix(rnorm(n.uniq*size.hidden),nrow = size.hidden)*0.01 b.o = matrix(rnorm(n.uniq), nrow = 1)*0.01 return(list(W.h = W.h, W.x= W.x, b.h = b.h, W.o = W.o, b.o = b.o)) } affine.forwd <- function(x, W, b){ return(x%*%W + matrix(1, nrow = nrow(x), ncol = 1)%*%b) } affine.bckwd <- function(x, W, b, dout){ dx = dout%*%t(W) dW = t(x)%*%dout db = colSums(dout) return(list(dx = dx, dW = dW, db = db)) } softmax.forwd <- function(x, target){ max.x = apply(x,1,max) C = ncol(x) x = x - max.x%*%matrix(1,nrow=1,ncol=C) y = exp(x)/rowSums(exp(x)) delta = 1e-7; R = nrow(as.matrix(y)) return(-sum(target*log(y + delta))/R) } softmax.pred <- function(x, target){ max.x = apply(x,1,max) C = ncol(x) x = x - max.x%*%matrix(1,nrow=1,ncol=C) y = exp(x)/rowSums(exp(x)) return(y) } softmax.bckwd <- function(x, target, dout = 1){ max.x = apply(x, 1, max) R = nrow(x) C = ncol(x) x = x - max.x%*%matrix(1,nrow=1,ncol=C) y = exp(x)/rowSums(exp(x)) return((y-target)/R) } RNN.forward <- function(h.prev, x, network){ b.size = nrow(x) h = h.prev%*%network$W.h + x%*%network$W.x hb = h+matrix(1,nrow=b.size,ncol=1)%*%network$b.h h.next = tanh(hb) return(h.next = h.next) } RNN.backward <- function(dh.next, network, x, h.next, h.prev){ dt = dh.next * (1- h.next^2) db = colSums(dt) dW.h = t(h.prev)%*%dt dh.prev = dt%*%t(network$W.h) dW.x = t(x)%*%dt dx = dt%*%t(network$W.x) return(list(db = db, dW.h = dW.h, dh.prev = dh.prev, dW.x = dW.x, dx=dx)) } txt = "You say goodbye and I say hello. you say goodbay and I say hello" txt2corpus <- function(txt){ txt = tolower(txt) txt = gsub('[.]', ' . sos',txt) words = unlist(strsplit(c('sos',txt), " ")) uniq.words = unique(words) n.uniq = length(uniq.words) n.words = length(words) corpus = rep(0,n.words) corpus = match(words,uniq.words) return(corpus) } corp2contxt1SRNN = function(corpus){ len.corp = length(corpus) # creating target matrix idxT = cbind(1:(len.corp-1), corpus[2:len.corp]) targ1S = matrix(0,nrow=len.corp-1,ncol=length(unique(corpus))) targ1S[idxT]=1 # creating context matrices idxC = cbind(1:(len.corp-1),corpus[1:(len.corp-1)]) contxt = matrix(0,nrow=len.corp-1,ncol=length(unique(corpus))) contxt[idxC]=1 return(list(y=targ1S,x=contxt)) } corp = txt2corpus(txt) dat = corp2contxt1SRNN(corp) size.hidden = 7 network <- init.RNN(8,size.hidden) n.rep = 100000;lambda = 0.01;batch.size = 3; time = 3; h.prev =array(0, c(batch.size, size.hidden, time)) h.next = array(0, c(batch.size, size.hidden, (time+1))) loss = rep(0,n.rep) for (i.rep in 1:n.rep){ for (i.t in 1:time){ idx = seq(i.t, corp.len, time) h.next[, , i.t] = RNN.forward(h.prev[, , i.t], dat$x[idx,], network) if (i.t < time){ h.prev[, , (i.t+1)] = h.next[, , i.t] } } dWHs = matrix(0,nrow=nrow(network$W.h),ncol=ncol(network$W.h)) dWXs = matrix(0,nrow=nrow(network$W.x),ncol=ncol(network$W.x)) dBHs = matrix(0,nrow=nrow(network$b.h),ncol=ncol(network$b.h)) dWOs = matrix(0,nrow=nrow(network$W.o),ncol=ncol(network$W.o)) dBOs = matrix(0,nrow=nrow(network$b.o),ncol=ncol(network$b.o)) d.prev = matrix(0,nrow=batch.size,ncol=size.hidden) L = 0 for (i.t in time:1){ idx = idx = seq(i.t, corp.len, time) O = affine.forwd(h.next[,,i.t], network$W.o, network$b.o) L = L + softmax.forwd(O, dat$y[idx,]) ds = softmax.bckwd(O, dat$y[idx,], 1) dW.o = affine.bckwd(h.next[,,i.t], network$W.o, network$b.o, ds) dWOs = dWOs + dW.o$dW dBOs = dBOs + dW.o$db RNN.d = RNN.backward(dW.o$dx+d.prev, network, dat$x[idx,],h.next[,,(i.t+1)],h.prev[,,i.t]) dWHs = dWHs + RNN.d$dW.h dWXs = dWXs + RNN.d$dW.x dBHs = dBHs + RNN.d$db d.prev = RNN.d$dh.prev } loss[i.rep] = L network$W.o = network$W.o - lambda*dWOs network$b.o = network$b.o - lambda*dBOs network$W.h = network$W.h - lambda*dWHs network$W.x = network$W.x - lambda*dWXs network$b.h = network$b.h - lambda*dBHs } plot(loss,type='l') par(mfrow=c(9,1)) for (i.t in 1:time){ idx = idx = seq(i.t, corp.len, time) O = affine.forwd(h.next[,,i.t], network$W.o, network$b.o) print(softmax.pred(O, dat$y[idx,])) for (i in 1:3){ barplot(softmax.pred(O, dat$y[idx,])[i,]) } }
Monthly Archives: January 2019
認知情報解析学演習 RNN
txt = "You say goodbye and I say hello. you say goodbay and I say hello" txt2corpus <- function(txt){ txt = tolower(txt) txt = gsub('[.]', ' . sos',txt) words = unlist(strsplit(c('sos',txt), " ")) uniq.words = unique(words) n.uniq = length(uniq.words) n.words = length(words) corpus = rep(0,n.words) corpus = match(words,uniq.words) return(corpus) } corp = txt2corpus(txt) corp2contxt1SRNN = function(corpus){ len.corp = length(corpus) # creating target matrix idxT = cbind(1:(len.corp-1), corpus[2:len.corp]) targ1S = matrix(0,nrow=len.corp-1,ncol=length(unique(corpus))) targ1S[idxT]=1 # creating context matrices idxC = cbind(1:(len.corp-1),corpus[1:(len.corp-1)]) contxt = matrix(0,nrow=len.corp-1,ncol=length(unique(corpus))) contxt[idxC]=1 return(list(y=targ1S,x=contxt)) } init.RNN <- function(n.uniq,size.hidden){ W.h = matrix(rnorm(size.hidden*size.hidden),nrow=size.hidden)*0.01 W.x = matrix(rnorm(n.uniq*size.hidden),nrow=n.uniq)*0.01 b = matrix(rnorm(size.hidden),nrow=1)*0.01 return(list(W.h = W.h, W.x= W.x, b = b)) } MatMult.forwd <- function(x, W){ return(x%*%W) } MatMult.bckwd <- function(x, W, dout){ dx = dout%*%t(W) dW = t(x)%*%dout return(list(dx = dx, dW = dW)) } affine.forwd <- function(x, W, b){ return(x%*%W + matrix(1, nrow = nrow(x), ncol = 1)%*%b) } affine.bckwd <- function(x, W, b, dout){ dx = dout%*%t(W) dW = t(x)%*%dout db = colSums(dout) return(list(dx = dx, dW = dW, db = db)) } softmax.forwd <- function(x, target){ max.x = apply(x,1,max) C = ncol(x) x = x - max.x%*%matrix(1,nrow=1,ncol=C) y = exp(x)/rowSums(exp(x)) delta = 1e-7; R = nrow(as.matrix(y)) return(-sum(target*log(y + delta))/R) } softmax.pred <- function(x, target){ max.x = apply(x,1,max) C = ncol(x) x = x - max.x%*%matrix(1,nrow=1,ncol=C) y = exp(x)/rowSums(exp(x)) return(y) } softmax.bckwd <- function(x, target, dout = 1){ max.x = apply(x, 1, max) R = nrow(x) C = ncol(x) x = x - max.x%*%matrix(1,nrow=1,ncol=C) y = exp(x)/rowSums(exp(x)) return((y-target)/R) } RNN.forward <- function(h.prev, x, network){ b.size = nrow(x) h = h.prev%*%network$W.h + x%*%network$W.x hb = h+matrix(1,nrow=b.size,ncol=1)%*%network$b h.next = tanh(hb) return(h.next = h.next) } RNN.backward <- function(dh.next, network, x, h.next, h.prev){ dt = dh.next * (1- h.next^2) db = colSums(dt) dW.h = t(h.prev)%*%dt dh.prev = dt%*%t(network$W.h) dW.x = t(x)%*%dt dx = dt%*%t(network$W.x) return(list(db = db, dW.h = dW.h, dh.prev = dh.prev, dW.x = dW.x, dx=dx)) } txt = "You say goodbye and I say hello." batch.size = 3 time = 3 size.hidden = 7 corp = txt2corpus(txt) dat<-corp2contxt1SRNN(corp) network <- init.RNN(ncol(dat$y), size.hidden) corp.len = nrow(dat$y) h.prev =array(0, c(batch.size, size.hidden, time)) h.next = array(0, c(batch.size, size.hidden, (time+1))) W.out = matrix(rnorm(size.hidden * ncol(dat$y)), nrow = size.hidden) b.out = matrix(rnorm(ncol(dat$y)), nrow = 1) n.rep = 100000;lambda = 0.25; loss = rep(0,n.rep) for (i.rep in 1:n.rep){ for (i.t in 1:time){ idx = seq(i.t, corp.len, time) h.next[, , i.t] = RNN.forward(h.prev[, , i.t], dat$x[idx,], network) if (i.t < time){ h.prev[, , (i.t+1)] = h.next[, , i.t] } } dWHs = matrix(0,nrow=nrow(network$W.h),ncol=ncol(network$W.h)) dWXs = matrix(0,nrow=nrow(network$W.x),ncol=ncol(network$W.x)) dBs = matrix(0,nrow=nrow(network$b),ncol=ncol(network$b)) dWOs = matrix(0,nrow=nrow(W.out),ncol=ncol(W.out)) dBOs = matrix(0,nrow=nrow(b.out),ncol=ncol(b.out)) d.prev = matrix(0,nrow=batch.size,ncol=size.hidden) L = 0 for (i.t in time:1){ idx = idx = seq(i.t, corp.len, time) O = affine.forwd(h.next[,,i.t], W.out, b.out) L = L + softmax.forwd(O, dat$y[idx,]) ds = softmax.bckwd(O, dat$y[idx,], 1) dW.o = affine.bckwd(h.next[,,i.t], W.out, b.out, ds) dWOs = dWOs + dW.o$dW dBOs = dBOs + dW.o$db RNN.d = RNN.backward(dW.o$dx+d.prev, network, dat$x[idx,],h.next[,,(i.t+1)],h.prev[,,i.t]) #RNN.d = RNN.backward(dW.o$dx, network, dat$x[idx,],h.next[,,i.t],h.prev[,,i.t]) dWHs = dWHs + RNN.d$dW.h dWXs = dWXs + RNN.d$dW.x dBs = dBs + RNN.d$db d.prev = RNN.d$dh.prev } loss[i.rep] = L W.out = W.out - lambda*dWOs b.out = b.out - lambda*dBOs network$W.h - lambda*dWHs network$W.x - lambda*dWXs network$b - lambda*dBs } plot(loss, type='l') for (i.t in 1:time){ idx = idx = seq(i.t, corp.len, time) O = affine.forwd(h.next[,,i.t], W.out, b.out) print(softmax.pred(O, dat$y[idx,])) }
認知情報解析学演習 DL-2 ch.5
txt = "You say goodbye and I say hello." txt2corpus <- function(txt){ txt = tolower(txt) txt = gsub('[.]', ' . sos',txt) words = unlist(strsplit(c('sos',txt), " ")) uniq.words = unique(words) n.uniq = length(uniq.words) n.words = length(words) corpus = rep(0,n.words) corpus = match(words,uniq.words) return(corpus) } corp = txt2corpus(txt) corp2contxt1SRNN = function(corpus){ len.corp = length(corpus) # creating target matrix idxT = cbind(1:(len.corp-1), corpus[2:len.corp]) targ1S = matrix(0,nrow=len.corp-1,ncol=length(unique(corpus))) targ1S[idxT]=1 # creating context matrices idxC = cbind(1:(len.corp-1),corpus[1:(len.corp-1)]) contxt = matrix(0,nrow=len.corp-1,ncol=length(unique(corpus))) contxt[idxC]=1 return(list(y=targ1S,x=contxt)) } dat<-corp2contxt1SRNN(corp) init.RNN <- function(n.uniq,size.hidden){ W.h = matrix(rnorm(size.hidden*size.hidden),nrow=size.hidden)*0.01 W.x = matrix(rnorm(n.uniq*size.hidden),nrow=n.uniq)*0.01 b = matrix(rnorm(size.hidden),nrow=1)*0.01 return(list(W.h = W.h, W.x= W.x, b = b)) } RNN.forward <- function(h.prev, x, network){ b.size = nrow(x) h = h.prev%*%network$W.h + x.temp%*%network$W.x hb = h+matrix(1,nrow=b.size,ncol=1)%*%network$b h.next = tanh(hb) return(h.next = h.next) } RNN.backward <- function(dh.next, network, x, h.next, h.prev){ dt = dh.next * (1- h.next^2) db = colSums(dt) dW.h = t(h.prev)%*%dt dh.prev = dt%*%t(network$W.h) dW.x = t(x)%*%dt dx = dt%*%t(network$W.x) return(list(db = db, dW.h = dW.h, dh.prev = dh.prev, dW.x = dW.x, dx=dx)) } affine.forwd <- function(x, W, b){ return(x%*%W + matrix(1, nrow = nrow(x), ncol = 1)%*%b) } affine.bckwd <- function(x, W, b, dout){ dx = dout%*%t(W) dW = t(x)%*%dout db = colSums(dout) return(list(dx = dx, dW = dW, db = db)) } softmax.forwd <- function(x, target){ max.x = apply(x,1,max) C = ncol(x) x = x - max.x%*%matrix(1,nrow=1,ncol=C) y = exp(x)/rowSums(exp(x)) delta = 1e-7; R = nrow(as.matrix(y)) return(-sum(target*log(y + delta))/R) } softmax.bckwd <- function(x, target, dout = 1){ max.x = apply(x, 1, max) R = nrow(x) C = ncol(x) x = x - max.x%*%matrix(1,nrow=1,ncol=C) y = exp(x)/rowSums(exp(x)) return((y-target)/R) } # checking modules batch.size = 3; time = 3 corp.len = nrow(dat$y) size.hidden = 5 h.prev =array(0, c(batch.size, size.hidden, time)) h.next = array(0, c(batch.size, size.hidden, time)) for (i.t in 1:time){ idx = seq(i.t, corp.len, batch.size) h.next[, , i.t] = RNN.forward(h.prev[, , i.t], dat$x[idx,], network) if (i.t < time){ h.prev[, , (i.t+1)] = h.next[, , i.t] } } W.out = matrix(rnorm(size.hidden * 8), nrow = size.hidden) b.out = matrix(rnorm(8), nrow = 1) O = affine.forwd(h.next[,,3], W.out, b.out) L = softmax.forwd(O, dat$y[c(3,6,9),]) ds = softmax.bckwd(O, dat$y[c(3,6,9),], 1) dW.o = affine.bckwd(O, W.out, ds)