ch03 neural networks

step.func <- function(x){
  return(as.numeric(x > 0))
}
x = seq(-5, 5, 0.1)
y = step.func(x)
plot(x,y, ylab = 'y', xlab = 'a', type ="l", lwd =2)

sigmoid.func <- function(x){
  return(1/(1+exp(-x)))
}

y = sigmoid.func(x)
plot(x,y, ylab = 'y', xlab = 'a', type ="l", lwd =2)

y.step = step.func(x)
y.sigm = sigmoid.func(x)
plot(x,y.step, ylab = 'y', xlab = 'a', type ="l", lwd =2)
lines(x,y.sigm, lwd =2, lty = 2)

relu.func <- function(x){
 return(pmax(0,x))
}

y.relu = relu.func(x)
plot(x,y.relu, ylab = 'y', xlab = 'a', type ="l", lwd =2)

A = matrix(1:4, nrow = 2, byrow = T)
B = matrix(5:8, nrow = 2, byrow = T)

A = matrix(1:6, nrow = 3, byrow = T)
B = matrix(7:8, nrow = 2, byrow = T)

x = c(1,0.5)
W1 = matrix((1:6)*0.1, nrow = 2)
B1 = (1:3)*0.1
A1 = x%*%W1 + B1
Z1 = sigmoid.func(A1)

W2 = matrix((1:6)*0.1, nrow = 3)
B2 = c(0.1, 0.2)
A2 = Z1%*%W2 + B2
Z2 = sigmoid.func(A2)

W3 = matrix((1:4)*0.1, nrow = 2)
B3 = c(0.1, 0.2)
A3 = Z2%*%W3+ B3
Z3 = A3

# function to initialize 3L network
init.3L.network <- function(){
  W1 = matrix((1:6)*0.1, nrow = 2)
  B1 = (1:3)*0.1
  W2 = matrix((1:6)*0.1, nrow = 3)
  B2 = c(0.1, 0.2)
  W3 = matrix((1:4)*0.1, nrow = 2)
  B3 = c(0.1, 0.2)
  return(list(W1 = W1, B1 = B1, W2 = W2, B2 = B2, W3 = W3, B3 = B3))
}
# feedforward process
forward.3L <- function(network, x){
  A1 = x%*%network$W1 + network$B1
  Z1 = sigmoid.func(A1)
  A2 = Z1%*%network$W2 + network$B2
  Z2 = sigmoid.func(A2)
  A3 = Z2%*%network$W3 + network$B3
  Z3 = sigmoid.func(A3)
  A3 = Z3
  return(A3)
}

network<-init.3L.network()
y = forward.3L(network, c(1, 0.5))

a = c(1010,1000,990)
exp(a)/sum(exp(a))

softmax.func <- function(x){
  max.x = max(x)
  return(exp(x-max.x)/sum(exp(x-max.x)))
}
  

train <- read.csv('http://peach.l.chiba-u.ac.jp/course_folder/MNSTtrain.csv', 
  header=TRUE)
train <- data.matrix(train)
train.x <- train[,-1]
train.y <- train[,1]
train.x <- t(train.x/255)
download.file("http://peach.l.chiba-u.ac.jp/course_folder/trNetwork.Rdata",
  "trNetwork.Rdata")
load("trNetwork.Rdata")
network=trNetwork

n.train = ncol(train.x)
correct.cl = 0
conf.matrix = matrix(0,10,10)
for (i.loop in 1:n.train){
  y = forward.3L(network,train.x[,i.loop])
  max.y = max.col(y)
  conf.matrix[max.y, (train.y[i.loop]+1)] = conf.matrix[max.y, (train.y[i.loop]+1)] + 1
}
accuracy = sum(diag(conf.matrix))/n.train

# batch 
batch_size = 200
conf.matrix = matrix(0,10,10)
for (i.batch in seq(1,n.train, batch_size)){
  y = forward.3L(network, train.x[,(i.batch:(i.batch+batch_size-1))])
  pred = max.col(y)
  conf.matrix = conf.matrix+table(pred,
       (train.y[i.batch:(i.batch+batch_size-1)]+1))
}
accuracy = sum(diag(conf.matrix))/n.train