# 認知情報解析学演習b

```init.RNN <- function(n.uniq,size.hidden){
W.h = matrix(rnorm(size.hidden*size.hidden), nrow = size.hidden)*0.01
W.x = matrix(rnorm(n.uniq*size.hidden), nrow = n.uniq)*0.01
b.h = matrix(rnorm(size.hidden), nrow = 1)*0.01
W.o = matrix(rnorm(n.uniq*size.hidden),nrow = size.hidden)*0.01
b.o = matrix(rnorm(n.uniq), nrow = 1)*0.01
return(list(W.h = W.h, W.x= W.x, b.h = b.h, W.o = W.o, b.o = b.o))
}

affine.forwd <- function(x, W, b){
return(x%*%W + matrix(1, nrow = nrow(x), ncol = 1)%*%b)
}

affine.bckwd <- function(x, W, b, dout){
dx = dout%*%t(W)
dW = t(x)%*%dout
db = colSums(dout)
return(list(dx = dx, dW = dW, db = db))
}

softmax.forwd <- function(x, target){
max.x = apply(x,1,max)
C = ncol(x)
x = x - max.x%*%matrix(1,nrow=1,ncol=C)
y = exp(x)/rowSums(exp(x))
delta = 1e-7;
R = nrow(as.matrix(y))
return(-sum(target*log(y + delta))/R)
}

softmax.pred <- function(x, target){
max.x = apply(x,1,max)
C = ncol(x)
x = x - max.x%*%matrix(1,nrow=1,ncol=C)
y = exp(x)/rowSums(exp(x))
return(y)
}

softmax.bckwd <- function(x, target,  dout = 1){
max.x = apply(x, 1, max)
R = nrow(x)
C = ncol(x)
x = x - max.x%*%matrix(1,nrow=1,ncol=C)
y = exp(x)/rowSums(exp(x))
return((y-target)/R)
}

RNN.forward <- function(h.prev, x, network){
b.size = nrow(x)
h = h.prev%*%network\$W.h + x%*%network\$W.x
hb = h+matrix(1,nrow=b.size,ncol=1)%*%network\$b.h
h.next = tanh(hb)
return(h.next = h.next)
}

RNN.backward <- function(dh.next, network, x, h.next, h.prev){
dt = dh.next * (1- h.next^2)
db = colSums(dt)
dW.h = t(h.prev)%*%dt
dh.prev = dt%*%t(network\$W.h)
dW.x = t(x)%*%dt
dx = dt%*%t(network\$W.x)
return(list(db = db, dW.h = dW.h, dh.prev = dh.prev, dW.x = dW.x, dx=dx))
}

txt = "You say goodbye and I say hello.  you say goodbay and I say hello"

txt2corpus <- function(txt){
txt = tolower(txt)
txt = gsub('[.]', ' . sos',txt)
words = unlist(strsplit(c('sos',txt), " "))
uniq.words = unique(words)
n.uniq = length(uniq.words)
n.words = length(words)
corpus = rep(0,n.words)
corpus = match(words,uniq.words)
return(corpus)
}

corp2contxt1SRNN = function(corpus){
len.corp = length(corpus)
# creating target matrix
idxT = cbind(1:(len.corp-1), corpus[2:len.corp])
targ1S = matrix(0,nrow=len.corp-1,ncol=length(unique(corpus)))
targ1S[idxT]=1
# creating context matrices
idxC = cbind(1:(len.corp-1),corpus[1:(len.corp-1)])
contxt = matrix(0,nrow=len.corp-1,ncol=length(unique(corpus)))
contxt[idxC]=1
return(list(y=targ1S,x=contxt))
}

corp = txt2corpus(txt)
dat = corp2contxt1SRNN(corp)

size.hidden = 7
network <- init.RNN(8,size.hidden)

n.rep = 100000;lambda = 0.01;batch.size = 3; time = 3;
h.prev =array(0, c(batch.size, size.hidden, time))
h.next = array(0, c(batch.size, size.hidden, (time+1)))
loss = rep(0,n.rep)
for (i.rep in 1:n.rep){
for (i.t in 1:time){
idx = seq(i.t, corp.len, time)
h.next[, , i.t] = RNN.forward(h.prev[, , i.t], dat\$x[idx,], network)
if (i.t < time){
h.prev[, , (i.t+1)] = h.next[, , i.t]
}
}
dWHs = matrix(0,nrow=nrow(network\$W.h),ncol=ncol(network\$W.h))
dWXs = matrix(0,nrow=nrow(network\$W.x),ncol=ncol(network\$W.x))
dBHs = matrix(0,nrow=nrow(network\$b.h),ncol=ncol(network\$b.h))
dWOs = matrix(0,nrow=nrow(network\$W.o),ncol=ncol(network\$W.o))
dBOs = matrix(0,nrow=nrow(network\$b.o),ncol=ncol(network\$b.o))
d.prev = matrix(0,nrow=batch.size,ncol=size.hidden)
L = 0
for (i.t in time:1){
idx = idx = seq(i.t, corp.len, time)
O = affine.forwd(h.next[,,i.t], network\$W.o, network\$b.o)
L = L + softmax.forwd(O, dat\$y[idx,])
ds = softmax.bckwd(O, dat\$y[idx,], 1)
dW.o = affine.bckwd(h.next[,,i.t], network\$W.o, network\$b.o, ds)
dWOs = dWOs + dW.o\$dW
dBOs = dBOs + dW.o\$db
RNN.d = RNN.backward(dW.o\$dx+d.prev, network, dat\$x[idx,],h.next[,,(i.t+1)],h.prev[,,i.t])
dWHs = dWHs + RNN.d\$dW.h
dWXs = dWXs + RNN.d\$dW.x
dBHs = dBHs + RNN.d\$db
d.prev = RNN.d\$dh.prev
}
loss[i.rep] = L
network\$W.o = network\$W.o - lambda*dWOs
network\$b.o = network\$b.o - lambda*dBOs
network\$W.h = network\$W.h - lambda*dWHs
network\$W.x = network\$W.x - lambda*dWXs
network\$b.h = network\$b.h - lambda*dBHs
}
plot(loss,type='l')
par(mfrow=c(9,1))
for (i.t in 1:time){
idx = idx = seq(i.t, corp.len, time)
O = affine.forwd(h.next[,,i.t], network\$W.o, network\$b.o)
print(softmax.pred(O, dat\$y[idx,]))
for (i in 1:3){
barplot(softmax.pred(O, dat\$y[idx,])[i,])
}
}
```

# 認知情報解析学演習　RNN

```txt = "You say goodbye and I say hello.  you say goodbay and I say hello"

txt2corpus <- function(txt){
txt = tolower(txt)
txt = gsub('[.]', ' . sos',txt)
words = unlist(strsplit(c('sos',txt), " "))
uniq.words = unique(words)
n.uniq = length(uniq.words)
n.words = length(words)
corpus = rep(0,n.words)
corpus = match(words,uniq.words)
return(corpus)
}

corp = txt2corpus(txt)

corp2contxt1SRNN = function(corpus){
len.corp = length(corpus)
# creating target matrix
idxT = cbind(1:(len.corp-1), corpus[2:len.corp])
targ1S = matrix(0,nrow=len.corp-1,ncol=length(unique(corpus)))
targ1S[idxT]=1
# creating context matrices
idxC = cbind(1:(len.corp-1),corpus[1:(len.corp-1)])
contxt = matrix(0,nrow=len.corp-1,ncol=length(unique(corpus)))
contxt[idxC]=1
return(list(y=targ1S,x=contxt))
}

init.RNN <- function(n.uniq,size.hidden){
W.h = matrix(rnorm(size.hidden*size.hidden),nrow=size.hidden)*0.01
W.x = matrix(rnorm(n.uniq*size.hidden),nrow=n.uniq)*0.01
b = matrix(rnorm(size.hidden),nrow=1)*0.01
return(list(W.h = W.h, W.x= W.x, b = b))
}
MatMult.forwd <- function(x, W){
return(x%*%W)
}

MatMult.bckwd <- function(x, W, dout){
dx = dout%*%t(W)
dW = t(x)%*%dout
return(list(dx = dx, dW = dW))
}

affine.forwd <- function(x, W, b){
return(x%*%W + matrix(1, nrow = nrow(x), ncol = 1)%*%b)
}

affine.bckwd <- function(x, W, b, dout){
dx = dout%*%t(W)
dW = t(x)%*%dout
db = colSums(dout)
return(list(dx = dx, dW = dW, db = db))
}

softmax.forwd <- function(x, target){
max.x = apply(x,1,max)
C = ncol(x)
x = x - max.x%*%matrix(1,nrow=1,ncol=C)
y = exp(x)/rowSums(exp(x))
delta = 1e-7;
R = nrow(as.matrix(y))
return(-sum(target*log(y + delta))/R)
}

softmax.pred <- function(x, target){
max.x = apply(x,1,max)
C = ncol(x)
x = x - max.x%*%matrix(1,nrow=1,ncol=C)
y = exp(x)/rowSums(exp(x))
return(y)
}

softmax.bckwd <- function(x, target,  dout = 1){
max.x = apply(x, 1, max)
R = nrow(x)
C = ncol(x)
x = x - max.x%*%matrix(1,nrow=1,ncol=C)
y = exp(x)/rowSums(exp(x))
return((y-target)/R)
}

RNN.forward <- function(h.prev, x, network){
b.size = nrow(x)
h = h.prev%*%network\$W.h + x%*%network\$W.x
hb = h+matrix(1,nrow=b.size,ncol=1)%*%network\$b
h.next = tanh(hb)
return(h.next = h.next)
}

RNN.backward <- function(dh.next, network, x, h.next, h.prev){
dt = dh.next * (1- h.next^2)
db = colSums(dt)
dW.h = t(h.prev)%*%dt
dh.prev = dt%*%t(network\$W.h)
dW.x = t(x)%*%dt
dx = dt%*%t(network\$W.x)
return(list(db = db, dW.h = dW.h, dh.prev = dh.prev, dW.x = dW.x, dx=dx))
}

txt = "You say goodbye and I say hello."

batch.size = 3
time = 3

size.hidden = 7
corp = txt2corpus(txt)
dat<-corp2contxt1SRNN(corp)
network <- init.RNN(ncol(dat\$y), size.hidden)
corp.len = nrow(dat\$y)
h.prev =array(0, c(batch.size, size.hidden, time))
h.next = array(0, c(batch.size, size.hidden, (time+1)))
W.out = matrix(rnorm(size.hidden * ncol(dat\$y)), nrow = size.hidden)
b.out = matrix(rnorm(ncol(dat\$y)), nrow = 1)
n.rep = 100000;lambda = 0.25;

loss = rep(0,n.rep)
for (i.rep in 1:n.rep){
for (i.t in 1:time){
idx = seq(i.t, corp.len, time)
h.next[, , i.t] = RNN.forward(h.prev[, , i.t], dat\$x[idx,], network)
if (i.t < time){
h.prev[, , (i.t+1)] = h.next[, , i.t]
}
}
dWHs = matrix(0,nrow=nrow(network\$W.h),ncol=ncol(network\$W.h))
dWXs = matrix(0,nrow=nrow(network\$W.x),ncol=ncol(network\$W.x))
dBs = matrix(0,nrow=nrow(network\$b),ncol=ncol(network\$b))
dWOs = matrix(0,nrow=nrow(W.out),ncol=ncol(W.out))
dBOs = matrix(0,nrow=nrow(b.out),ncol=ncol(b.out))
d.prev = matrix(0,nrow=batch.size,ncol=size.hidden)
L = 0
for (i.t in time:1){
idx = idx = seq(i.t, corp.len, time)
O = affine.forwd(h.next[,,i.t], W.out, b.out)
L = L + softmax.forwd(O, dat\$y[idx,])
ds = softmax.bckwd(O, dat\$y[idx,], 1)
dW.o = affine.bckwd(h.next[,,i.t], W.out, b.out, ds)
dWOs = dWOs + dW.o\$dW
dBOs = dBOs + dW.o\$db
RNN.d = RNN.backward(dW.o\$dx+d.prev, network, dat\$x[idx,],h.next[,,(i.t+1)],h.prev[,,i.t])
#RNN.d = RNN.backward(dW.o\$dx, network, dat\$x[idx,],h.next[,,i.t],h.prev[,,i.t])
dWHs = dWHs + RNN.d\$dW.h
dWXs = dWXs + RNN.d\$dW.x
dBs = dBs + RNN.d\$db
d.prev = RNN.d\$dh.prev
}
loss[i.rep] = L
W.out = W.out - lambda*dWOs
b.out = b.out - lambda*dBOs
network\$W.h - lambda*dWHs
network\$W.x - lambda*dWXs
network\$b - lambda*dBs
}
plot(loss, type='l')
for (i.t in 1:time){
idx = idx = seq(i.t, corp.len, time)
O = affine.forwd(h.next[,,i.t], W.out, b.out)
print(softmax.pred(O, dat\$y[idx,]))
}
```

# 認知情報解析学演習　DL-2 ch.5

```txt = "You say goodbye and I say hello."

txt2corpus <- function(txt){
txt = tolower(txt)
txt = gsub('[.]', ' . sos',txt)
words = unlist(strsplit(c('sos',txt), " "))
uniq.words = unique(words)
n.uniq = length(uniq.words)
n.words = length(words)
corpus = rep(0,n.words)
corpus = match(words,uniq.words)
return(corpus)
}

corp = txt2corpus(txt)

corp2contxt1SRNN = function(corpus){
len.corp = length(corpus)
# creating target matrix
idxT = cbind(1:(len.corp-1), corpus[2:len.corp])
targ1S = matrix(0,nrow=len.corp-1,ncol=length(unique(corpus)))
targ1S[idxT]=1
# creating context matrices
idxC = cbind(1:(len.corp-1),corpus[1:(len.corp-1)])
contxt = matrix(0,nrow=len.corp-1,ncol=length(unique(corpus)))
contxt[idxC]=1
return(list(y=targ1S,x=contxt))
}
dat<-corp2contxt1SRNN(corp)

init.RNN <- function(n.uniq,size.hidden){
W.h = matrix(rnorm(size.hidden*size.hidden),nrow=size.hidden)*0.01
W.x = matrix(rnorm(n.uniq*size.hidden),nrow=n.uniq)*0.01
b = matrix(rnorm(size.hidden),nrow=1)*0.01
return(list(W.h = W.h, W.x= W.x, b = b))
}

RNN.forward <- function(h.prev, x, network){
b.size = nrow(x)
h = h.prev%*%network\$W.h + x.temp%*%network\$W.x
hb = h+matrix(1,nrow=b.size,ncol=1)%*%network\$b
h.next = tanh(hb)
return(h.next = h.next)
}

RNN.backward <- function(dh.next, network, x, h.next, h.prev){
dt = dh.next * (1- h.next^2)
db = colSums(dt)
dW.h = t(h.prev)%*%dt
dh.prev = dt%*%t(network\$W.h)
dW.x = t(x)%*%dt
dx = dt%*%t(network\$W.x)
return(list(db = db, dW.h = dW.h, dh.prev = dh.prev, dW.x = dW.x, dx=dx))
}

affine.forwd <- function(x, W, b){
return(x%*%W + matrix(1, nrow = nrow(x), ncol = 1)%*%b)
}

affine.bckwd <- function(x, W, b, dout){
dx = dout%*%t(W)
dW = t(x)%*%dout
db = colSums(dout)
return(list(dx = dx, dW = dW, db = db))
}

softmax.forwd <- function(x, target){
max.x = apply(x,1,max)
C = ncol(x)
x = x - max.x%*%matrix(1,nrow=1,ncol=C)
y = exp(x)/rowSums(exp(x))
delta = 1e-7;
R = nrow(as.matrix(y))
return(-sum(target*log(y + delta))/R)
}

softmax.bckwd <- function(x, target,  dout = 1){
max.x = apply(x, 1, max)
R = nrow(x)
C = ncol(x)
x = x - max.x%*%matrix(1,nrow=1,ncol=C)
y = exp(x)/rowSums(exp(x))
return((y-target)/R)
}

# checking modules
batch.size = 3; time = 3

corp.len = nrow(dat\$y)
size.hidden = 5
h.prev =array(0, c(batch.size, size.hidden, time))
h.next = array(0, c(batch.size, size.hidden, time))

for (i.t in 1:time){
idx = seq(i.t, corp.len, batch.size)
h.next[, , i.t] = RNN.forward(h.prev[, , i.t], dat\$x[idx,], network)
if (i.t < time){
h.prev[, , (i.t+1)] = h.next[, , i.t]
}
}

W.out = matrix(rnorm(size.hidden * 8), nrow = size.hidden)
b.out = matrix(rnorm(8), nrow = 1)
O = affine.forwd(h.next[,,3], W.out, b.out)
L = softmax.forwd(O, dat\$y[c(3,6,9),])
ds = softmax.bckwd(O, dat\$y[c(3,6,9),], 1)
dW.o = affine.bckwd(O, W.out, ds)
```