# 認知情報解析　DL1の復習

```
sigmoid.forwd <- function(x){
return(1/(1+exp(-x)))
}

sigmoid.bckwd <- function(x, dout){
y = sigmoid.forwd(x)
return(dout*(1-y)*y)
}

affine.forwd <- function(x, W, b){
return(x%*%W + matrix(1, nrow = nrow(x), ncol = 1)%*%b)
}

affine.bckwd <- function(x, W, b, dout){
dx = dout%*%t(W)
dW = t(x)%*%dout
db = colSums(dout)
return(list(dx = dx, dW = dW, db = db))
}

softmax.forwd <- function(x, target){
max.x = apply(x,1,max)
C = ncol(x)
x = x - max.x%*%matrix(1,nrow=1,ncol=C)
y = exp(x)/rowSums(exp(x))
delta = 1e-7;
R = nrow(as.matrix(y))
return(-sum(target*log(y + delta))/R)
}

softmax.bckwd <- function(x, target,  dout = 1){
max.x = apply(x, 1, max)
R = nrow(x)
C = ncol(x)
x = x - max.x%*%matrix(1,nrow=1,ncol=C)
y = exp(x)/rowSums(exp(x))
return((y-target)/R)
}

softmax.pred <- function(x, target){
max.x = apply(x,1,max)
C = ncol(x)
x = x - max.x%*%matrix(1,nrow=1,ncol=C)
y = exp(x)/rowSums(exp(x))
return(y)
}
N = 100; dim = 2; n.class =3;
x = matrix(0,nrow=N*n.class, ncol = dim)
t = matrix(0,nrow=N*n.class, ncol = n.class)

for (i.cl in 1:n.class){
for (i.N in 1:N){
idx = N*(i.cl-1)+i.N
t[idx,i.cl] = 1
}
}

# spiral data
plot(x[1:100,1],x[1:100,2],pch=20,col='black',cex=2,xlim=c(-1,1),ylim=c(-1,1))
points(x[101:200,1],x[101:200,2],pch=20,col='blue',cex=2)
points(x[201:300,1],x[201:300,2],pch=20,col='green',cex=2)

init.network <- function(n.neurons){
n.layer = length(n.neurons)
W = list(); b = list()
for (i.layer in 1:(n.layer-1)){
W[[i.layer]] =
matrix(rnorm(n.neurons[i.layer]*n.neurons[(i.layer+1)],sd = 0.01),nrow=n.neurons[i.layer])
b[[i.layer]] =  matrix(rnorm(n.neurons[(i.layer+1)],sd = 0.01), nrow = 1)
}
return(list(W = W,b = b))
}

train.x = x
train.y = t
params = init.network(c(2,30,30,3))
batch_size = 50; n.iter =50000; lambda =0.3
n.train = nrow(train.x)
loss = rep(0,n.iter)
for (i.iter in 1:n.iter){
a1 = affine.forwd(x.batch,params\$W[[1]],params\$b[[1]])
z1 = sigmoid.forwd(a1)
a2 = affine.forwd(z1,params\$W[[2]],params\$b[[2]])
z2 = sigmoid.forwd(a2)
a3 = affine.forwd(z2,params\$W[[3]],params\$b[[3]])
z3 = softmax.forwd(a3,t.batch)
loss[i.iter] = z3
dwSM = softmax.bckwd(a3, t.batch, 1)
dwA3 = affine.bckwd(a2,params\$W[[3]],params\$b[[3]],dwSM)
dwSG2 = sigmoid.bckwd(a2,dwA3\$dx)
dwA2 = affine.bckwd(a1,params\$W[[2]],params\$b[[2]],dwSG2)
dwSG = sigmoid.bckwd(a1,dwA2\$dx)
dwA1 = affine.bckwd(x.batch,params\$W[[1]],params\$b[[1]],dwSG)
params\$W[[3]] = params\$W[[3]] - lambda*dwA3\$dW
params\$b[[3]] = params\$b[[3]] - lambda*dwA3\$db
params\$W[[2]] = params\$W[[2]] - lambda*dwA2\$dW
params\$b[[2]] = params\$b[[2]] - lambda*dwA2\$db
params\$W[[1]] = params\$W[[1]] - lambda*dwA1\$dW
params\$b[[1]] = params\$b[[1]] - lambda*dwA1\$db
}

# plotting results
plot(loss,type='l', xlab = "trial")

library(plot3D)
M <- mesh(seq(-1,1,length.out = 300),seq(-1,1,length.out = 300))
temp.x = cbind(as.vector(M\$x),as.vector(M\$y))
a1 = affine.forwd(temp.x,params\$W[[1]],params\$b[[1]])
z1 = sigmoid.forwd(a1)
a2 = affine.forwd(z1,params\$W[[2]],params\$b[[2]])
z2 = sigmoid.forwd(a2)
a3 = affine.forwd(z2,params\$W[[3]],params\$b[[3]])
cl = softmax.pred(a3)
cl.pred = apply(cl,1,which.max)

image(matrix(cl.pred,300,300))
x2= x*0.5+0.5
points(x2[1:100,1],x2[1:100,2],pch=20,col='black',cex=2)
points(x2[101:200,1],x2[101:200,2],pch=20,col='blue',cex=2)
points(x2[201:300,1],x2[201:300,2],pch=20,col='green',cex=2)

```