# p136
embed.forwd <- function(W, idx){
return(W[idx,])
}
h1 <- embed.forwd(W,idx1)
h2 <- embed.forwd(W,idx2)
h = (h1 + h2)/2
embed.dot.forwd <- function(W, h, idx){
return(O=rowSums(W[idx,]*h))
}
s = embed.dot.forwd(network$W.out,h,batch.y)
sigmoid.forwd <- function(O){
return(1/(1+exp(-O)))
}
sigmoidWL.forwd <- function(O,target){
y = 1/(1+exp(-O))
loss=-sum(target*log(y)+(1-target)*log(1 - y))
return(loss)
}
sigmoidWL.backwd <- function(O,target,dout=1){
y = 1/(1+exp(-O))
dW = (y - target)*dout
return(dW)
}
embed.dot.backwd <- function(W,h,idx,dout){
dh = dout * W[idx,]
dW = dout * h
return(list(dh = dh, dW = dW))
}
embed.backwd <- function(W, idx, dout){
dW = matrix(0,nrow(W),ncol(W))
for (i.idx in 1:length(idx)){
dW[idx[i.idx], ] = dW[idx[i.idx], ] + dout[i.idx, ]
}
return(dW)
}
# 158
txt2corpus <- function(txt){
txt = tolower(txt)
txt = gsub('[.]', ' .',txt)
words = unlist(strsplit(txt, " "))
uniq.words = unique(words)
n.uniq = length(uniq.words)
n.words = length(words)
corpus = rep(0,n.words)
corpus = match(words,uniq.words)
return(corpus)
}
corp2contxt <- function(corpus){
len.corp = length(corpus)
target = corpus[2:(len.corp-1)]
col1 = corpus[1:(len.corp-2)]
col2 = corpus[3:len.corp]
context = cbind(col1,col2)
return(list(context=context,target = target))
}
init.W2V <- function(n.uniq,size.hidden){
W.in = matrix(rnorm(n.uniq*size.hidden),nrow=n.uniq)*0.01
W.out = matrix(rnorm(n.uniq*size.hidden),nrow=n.uniq)*0.01
return(list(W.in = W.in, W.out= W.out))
}
### running example
network<-init.W2V(8,5)
n.batch = 3;
txt = "You say goodbye and I say hello.."
corp = txt2corpus(txt)
corp = c(8,8,corp)
dat<-corp2contxt(corp)
n.data = length(dat$target)
n.iter = 3000;
lambda=0.1;
loss = rep(0, n.iter)
for (i.iter in 1:n.iter){
samp.idx = sample(1:n.data,n.batch)
batch.c1 = dat$context[samp.idx,1]
batch.c2 = dat$context[samp.idx,2]
batch.y = dat$target[samp.idx]
h1 <- embed.forwd(network$W.in,batch.c1)
h2 <- embed.forwd(network$W.in,batch.c2)
h = 0.5 * (h1 + h2)
# positive only
s = embed.dot.forwd(network$W.out,h,batch.y)
z = sigmoidWL.forwd(s,1)
loss[i.iter] = z
ds = sigmoidWL.backwd(s, 1, 1)
dE = embed.dot.backwd(network$W.out,h, batch.y, ds)
dh = dE$dh*0.5
dIn1 = embed.backwd(network$W.in,dat$context[samp.idx,1], dh)
dIn2 = embed.backwd(network$W.in,dat$context[samp.idx,2], dh)
network$W.out[batch.y,] = network$W.out[batch.y,] - lambda*dE$dW
network$W.in = network$W.in - lambda*dIn1
network$W.in = network$W.in - lambda*dIn2
}
plot(loss, type='l')
samp.idx = c(2:6,8,9,1)
batch.c1 = dat$context[samp.idx,1]
batch.c2 = dat$context[samp.idx,2]
batch.y = dat$target[samp.idx]
h1 <- embed.forwd(network$W.in,batch.c1)
h2 <- embed.forwd(network$W.in,batch.c2)
h = 0.5 * (h1 + h2)
s = MatMult.forwd(h,t(network$W.out))
z = sigmoid.forwd(s)
res=cbind(z,batch.y)
par(mfrow=c(8,1))
for (i in 1:8){
col.spec = rep("black",8)
col.spec[i]="orange"
barplot(res[i, 1:8],col=col.spec)
}
# with negative sampling
# p158
unigram.sampler <- function(corpus, target, power, sample.size){
neg.corpus <- corpus[-which(corpus == target)]
uniq.word = unique(neg.corpus)
n.word = length(neg.corpus)
prob = (as.vector(table(neg.corpus))/n.word)^power
prob = prob/sum(prob)
sample.idx = sample(uniq.word, sample.size, prob = prob)
return(sort(sample.idx))
}
get.neg.sample <- function(corpus, target, power, sample.size){
sample.id = matrix(0, nrow = length(target), ncol = sample.size)
for (i.targ in 1:length(target)){
sample.id[i.targ,] = unigram.sampler(corpus, target[i.targ], power, sample.size)
}
return(sample.id)
}
init.W2V <- function(n.uniq,size.hidden){
W.in = matrix(rnorm(n.uniq*size.hidden),nrow=n.uniq)*0.01
W.out = matrix(rnorm(n.uniq*size.hidden),nrow=n.uniq)*0.01
return(list(W.in = W.in, W.out= W.out))
}
network<-init.W2V(8,5)
n.batch = 3;
txt = "You say goodbye and I say hello.."
corp = txt2corpus(txt)
corp = c(8,8,corp)
dat<-corp2contxt(corp)
n.data = length(dat$target)
n.iter = 3000;
lambda=0.1;
loss = rep(0, n.iter)
sample.size = 2
for (i.iter in 1:n.iter){
samp.idx = sample(1:n.data,n.batch)
batch.c1 = dat$context[samp.idx,1]
batch.c2 = dat$context[samp.idx,2]
batch.y = dat$target[samp.idx]
h1 <- embed.forwd(network$W.in,batch.c1)
h2 <- embed.forwd(network$W.in,batch.c2)
h = 0.5 * (h1 + h2)
# positive
s = embed.dot.forwd(network$W.out,h,batch.y)
z = sigmoidWL.forwd(s,1)
loss[i.iter] = z
ds = sigmoidWL.backwd(s, 1, 1)
dE = embed.dot.backwd(network$W.out,h, batch.y, ds)
dh = dE$dh*0.5
dIn1 = embed.backwd(network$W.in,dat$context[samp.idx,1], dh)
dIn2 = embed.backwd(network$W.in,dat$context[samp.idx,2], dh)
network$W.out[batch.y,] = network$W.out[batch.y,] - lambda*dE$dW
network$W.in = network$W.in - lambda*dIn1
network$W.in = network$W.in - lambda*dIn2
# negative
neg.set <- get.neg.sample(corp,batch.y, 0.75, sample.size)
for (i.neg in 1:sample.size){
s = embed.dot.forwd(network$W.out,h,neg.set[,i.neg])
z = sigmoidWL.forwd(s,0)
loss[i.iter] = loss[i.iter] + z
ds = sigmoidWL.backwd(s, 0, dout=1)
dE = embed.dot.backwd(network$W.out,h,neg.set[,i.neg], ds)
dh = dE$dh*0.5
dIn1 = embed.backwd(network$W.in, dat$context[samp.idx,1],dh)
dIn2 = embed.backwd(network$W.in, dat$context[samp.idx,2],dh)
network$W.out[neg.set[,i.neg],] = network$W.out[neg.set[,i.neg],] - lambda/sample.size*dE$dW
network$W.in = network$W.in - lambda*dIn1
network$W.in = network$W.in - lambda*dIn2
}
}
plot(loss, type='l')
samp.idx = c(2:6,8,9,1)
batch.c1 = dat$context[samp.idx,1]
batch.c2 = dat$context[samp.idx,2]
batch.y = dat$target[samp.idx]
h1 <- embed.forwd(network$W.in,batch.c1)
h2 <- embed.forwd(network$W.in,batch.c2)
h = 0.5 * (h1 + h2)
s = MatMult.forwd(h,t(network$W.out))
z = sigmoid.forwd(s)
res=cbind(z,batch.y)
par(mfrow=c(8,1))
for (i in 1:8){
col.spec = rep("black",8)
col.spec[i]="orange"
barplot(res[i, 1:8],col=col.spec)
}
par(mfrow=c(4,1))
for (i in 1:4){
col.spec = rep("black",8)
col.spec[i]="orange"
barplot(res[i, 1:8],col=col.spec)
}
Related