認知情報解析演習B DL2 Ch 3

# p99
ctxt = c(1,rep(0,6))
W = matrix(rnorm(7*3),nrow=7)
h = ctxt%*%W

# p 100
MatMult.forwd <- function(x, W){
  return(x%*%W)
}
res <- MatMult.forwd(ctxt,W)

# p105
c1 = c(1,rep(0,6))
c2 = rep(0,7); c2[3]=1
W.in = matrix(rnorm(7*3),nrow=7)
W.out = matrix(rnorm(7*3),nrow=3)
h1 = MatMult.forwd(c1,W.in)
h2 = MatMult.forwd(c2,W.in)
h = 0.5*(h1+h2)
s = MatMult.forwd(h,W.out)
print(s)

# p107
softmax1.pred <- function(x){
  max.x = max(x)
  x = x - max.x
  y = exp(x)/sum(exp(x))
  return(y)
}

softmaxCE1.forwd <- function(x, target){
  max.x = max(x)
  x = x - max.x
  y = exp(x)/sum(exp(x))
  delta = 1e-7;
  return(-sum(target*log(y + delta)))
}

targ=rep(0,7); targ[2]=1

# p112
txt = "You say goodbye and I say hello."
txt = tolower(txt)
txt = gsub('[.]', ' .',txt)
words = unlist(strsplit(txt, " "))
uniq.words = unique(words)
n.uniq = length(uniq.words)
n.words = length(words)
corpus = rep(0,n.words)
corpus = match(words,uniq.words)

txt = "You say goodbye and I say hello.";
txt2corpus <- function(txt){
  txt = tolower(txt)
  txt = gsub('[.]', ' .',txt)
  words = unlist(strsplit(txt, " "))
  uniq.words = unique(words)
  n.uniq = length(uniq.words)
  n.words = length(words)
  corpus = rep(0,n.words)
  corpus = match(words,uniq.words)
  return(corpus)
}
corp = txt2corpus(txt)

len.corp = length(corp)
target = corp[2:(len.corp-1)]
col1 = corp[1:(len.corp-2)]
col2 = corp[3:len.corp]
context = cbind(col1,col2)

corp2contxt <- function(corpus){
  len.corp = length(corpus)
  target = corpus[2:(len.corp-1)]
  col1 = corpus[1:(len.corp-2)]
  col2 = corpus[3:len.corp]
  context = cbind(col1,col2)
  return(list(context=context,target = target))
}

# p114
len.corp = length(corpus)
idxT = cbind(1:(len.corp-2), corpus[2:(len.corp-1)])
targ1S = matrix(0,nrow=len.corp-2,ncol=length(unique(corpus)))
targ1S[idxT]=1

idxC1 = cbind(1:(len.corp-2),corpus[1:(len.corp-2)])
idxC2 = cbind(1:(len.corp-2),corpus[3:len.corp])

contxt1 = matrix(0,nrow=len.corp-2,ncol=length(unique(corpus)))
contxt2 = matrix(0,nrow=len.corp-2,ncol=length(unique(corpus)))
contxt1[idxC1]=1
contxt2[idxC2]=1

corp2contxt1S = function(corpus){
  len.corp = length(corpus)
  # creating target matrix
  idxT = cbind(1:(len.corp-2), corpus[2:(len.corp-1)])
  targ1S = matrix(0,nrow=len.corp-2,ncol=length(unique(corpus)))
  targ1S[idxT]=1
  
  # creating context matrices
  idxC1 = cbind(1:(len.corp-2),corpus[1:(len.corp-2)])
  idxC2 = cbind(1:(len.corp-2),corpus[3:len.corp])
  contxt1 = matrix(0,nrow=len.corp-2,ncol=length(unique(corpus)))
  contxt2 = matrix(0,nrow=len.corp-2,ncol=length(unique(corpus)))
  contxt1[idxC1]=1
  contxt2[idxC2]=1
  return(list(target=targ1S,contxt1=contxt1,contxt2=contxt2))
}

# p116~
init.W2V <- function(n.uniq,size.hidden){
  W.in = matrix(rnorm(n.uniq*size.hidden),nrow=n.uniq)*0.01
  W.out = matrix(rnorm(n.uniq*size.hidden),nrow=size.hidden)*0.01
  return(list(W.in = W.in, W.out= W.out))
}

softmax.forwd <- function(x, target){
  max.x = apply(x,1,max)
  C = ncol(x)
  x = x - max.x%*%matrix(1,nrow=1,ncol=C)
  y = exp(x)/rowSums(exp(x))
  delta = 1e-7;
  R = nrow(as.matrix(y))
  return(-sum(target*log(y + delta))/R)
}

softmax.bckwd <- function(x, target,  dout = 1){
  max.x = apply(x, 1, max)
  R = nrow(x)
  C = ncol(x)
  x = x - max.x%*%matrix(1,nrow=1,ncol=C)
  y = exp(x)/rowSums(exp(x))
  return((y-target)/R)
}

MatMult.bckwd <- function(x, W, dout){
  dx = dout%*%t(W)
  dW = t(x)%*%dout
  return(list(dx = dx, dW = dW))
}

softmax.pred <- function(x, target){
  max.x = apply(x,1,max)
  C = ncol(x)
  x = x - max.x%*%matrix(1,nrow=1,ncol=C)
  y = exp(x)/rowSums(exp(x))
  return(y)
}

network<-init.W2V(7,5)
n.batch = 3;
n.data = nrow(dat$target)
n.iter = 1000;
lambda=0.1;
loss = rep(0, n.iter)
for (i.iter in 1:n.iter){
  samp.idx = sample(1:n.data,n.batch)
  batch.c1 = dat$contxt1[samp.idx,]
  batch.c2 = dat$contxt2[samp.idx,]
  batch.y = dat$target[samp.idx,]
  h1 <- MatMult.forwd(batch.c1, network$W.in)
  h2 <- MatMult.forwd(batch.c2, network$W.in)
  h = 0.5 * (h1 + h2)
  s = MatMult.forwd(h,network$W.out)
  z = softmax.forwd(s,batch.y)
  loss[i.iter] = z
  ds = softmax.bckwd(s, batch.y, 1)
  da = MatMult.bckwd(h,network$W.out,ds)
  da$dW = da$dW*0.5
  dIn1 = MatMult.bckwd(batch.c1,network$W.in,da$dx)
  dIn2 = MatMult.bckwd(batch.c2,network$W.in,da$dx)
  network$W.out = network$W.out - lambda*da$dW
  network$W.in = network$W.in - lambda*dIn1$dW
  network$W.in = network$W.in - lambda*dIn2$dW
}
plot(loss, type='l')