認知情報解析 Ch05

multi.forwd <- function(x,y){
  return(x*y)
}
multi.bckwd <- function(x, y, dout){
  dx = dout * y
  dy = dout * x
  return(list(dx = dx, dy = dy))
}

apple = 100; n.apple = 2; tax = 1.1
apple.pre.tax = multi.forwd(apple, n.apple)
apple.post.tax = multi.forwd(apple.pre.tax, tax)

dprice = 1
d.apple.post.tax = multi.bckwd(apple.pre.tax, tax, dprice)
d.apple = multi.bckwd(apple, n.apple, d.apple.post.tax$dx)$dx
d.n.apple = multi.bckwd(apple, n.apple, d.apple.post.tax$dx)$dy

add.forwd <- function(x,y){
  return(x + y)
}
add.bckwd <- function(x, y, dout){
  dx = dout
  dy = dout
  return(list(dx = dx, dy = dy))
}

apple = 100; n.apple = 2; tax = 1.1
orange = 150; n.orange = 3;

apple.price = multi.forwd(apple, n.apple)
orange.price = multi.forwd(orange, n.orange)
all.price = add.forwd(apple.price, orange.price)
price = multi.forwd(all.price, tax)

dprice = 1
d.all.price = multi.bckwd(all.price, tax, dprice)
d.apple.price = add.bckwd(apple.price, orange.price, d.all.price$dx)$dx
d.orange.price = add.bckwd(orange, n.orange.price, d.all.price$dx)$dy
d.apple = multi.bckwd(apple, n.apple, d.apple.price)$dx
d.n.apple = multi.bckwd(apple, n.apple, d.apple.price)$dy
d.orange = multi.bckwd(orange, n.orange, d.orange.price)$dx
d.n.orange = multi.bckwd(orange, n.orange, d.orange.price)$dy

relu.forwd <- function(x){
  return(pmax(x,0))
}

relu.bckwd <- function(x, dout){
  dout[which(x <= 0)] = 0
  return(dout)
}

sigmoid.forwd <- function(x){
  return(1/(1+exp(-x)))
}

sigmoid.bckwd <- function(x, dout){
  y = sigmoid.forwd(x) 
  return(dout*(1-y)*y)
}

affine.forwd <- function(x, W, b){
  return(x%*%W + matrix(1, nrow = nrow(x), ncol = 1)%*%b)
}

affine.bckwd <- function(x, W, b, dout){
  dx = dout%*%t(W)
  dW = t(x)%*%dout
  db = colSums(dout)
  return(list(dx = dx, dW = dW, db = db))
}

softmax.forwd <- function(x, target){
  max.x = apply(x,1,max)
  C = ncol(x)
  x = x - max.x%*%matrix(1,nrow=1,ncol=C)
  y = exp(x)/rowSums(exp(x))
  delta = 1e-7;
  R = nrow(as.matrix(y))
  return(-sum(target*log(y + delta))/R)
}

softmax.bckwd <- function(x, target,  dout = 1){
  max.x = apply(x, 1, max)
  R = nrow(x)
  C = ncol(x)
  x = x - max.x%*%matrix(1,nrow=1,ncol=C)
  y = exp(x)/rowSums(exp(x))
  return((y-target)/R)
}

init.network <- function(n.neurons){
  n.layer = length(n.neurons)
  W = list(); b = list()
  for (i.layer in 1:(n.layer-1)){
    W[[i.layer]] = matrix(rnorm(n.neurons[i.layer]*n.neurons[(i.layer+1)],sd = 0.1),
      nrow=n.neurons[i.layer])
    b[[i.layer]] =  matrix(rnorm(n.neurons[(i.layer+1)],sd = 0.1), nrow = 1)
  }
  return(list(W = W,b = b))
}

sigmoid.func <- function(x){
  return(1/(1+exp(-x)))
}

relu.func <- function(x){
  y = apply(x,2,function(x) pmax(x,0))
  return(y)
}

activation <- function(A, actFun){
  if (actFun == "sigmoid"){
    return(sigmoid.func(A))
  }
  if (actFun == "relu"){
    return(relu.func(A))
  }
  if (actFun == "softmax"){
    return(softmax(A))
  }
}

feedforward <- function(network, x, actFun) {
  n.layer <- length(network$W)
  batch.size = nrow(x)
  for (i.layer in 1:n.layer){
    A = x%*%network$W[[i.layer]] 
      + matrix(1,nrow=batch.size,ncol = 1)%*%network$b[[i.layer]]
    x = activation(A, actFun[i.layer])
  }
  return(x)
}

cross.entropy = function(y, target){
  delta = 1e-7;
  R = nrow(as.matrix(y))
  return(-sum(target*log(y + delta))/R)
}

loss.network = function(params, x, t, actFun){
  y = feedforward(params,x,actFun)
  return(cross.entropy(y, t))
}

以下は課題が完了するまでは見ないように心がけててください。
########################################################################
########################################################################
########################################################################
train.x = as.matrix(iris[,1:4])
train.y.temp = as.numeric(iris[,5])
train.y = matrix(0,nrow = nrow(train.x), ncol =3)
train.y[which(train.y.temp==1), 1]=1
train.y[which(train.y.temp==2), 2]=1
train.y[which(train.y.temp==3), 3]=1

params = init.network(c(4,15,3))
batch_size = 10; n.iter =5000; lambda =0.05
n.train = nrow(train.x)
loss = rep(0,n.iter)
for (i.iter in 1:n.iter){
  batch_mask = sample(1:n.train, batch_size)
  x.batch = train.x[batch_mask,]
  t.batch = train.y[batch_mask,]
  a1 = affine.forwd(x.batch,params$W[[1]],params$b[[1]])
  z1 = sigmoid.forwd(a1)
  a2 = affine.forwd(z1,params$W[[2]],params$b[[2]])
  z2 = softmax.forwd(a2,t.batch)
  dwSM = softmax.bckwd(a2, t.batch, 1)
  dwA2 = affine.bckwd(a1,params$W[[2]],params$b[[2]],dwSM)
  dwSG = sigmoid.bckwd(a1,dwA2$dx)
  dwA1 = affine.bckwd(x.batch,params$W[[1]],params$b[[1]],dwSG)
  params$W[[2]] = params$W[[2]] - lambda*dwA2$dW
  params$b[[2]] = params$b[[2]] - lambda*dwA2$db
  params$W[[1]] = params$W[[1]] - lambda*dwA1$dW
  params$b[[1]] = params$b[[1]] - lambda*dwA1$db
  loss[i.iter] = loss.network(params,x.batch,t.batch,c("sigmoid","softmax"))
}
plot(loss,type='l', xlab = "trial")


# MNIST
train <- read.csv('~/courses/CogMod/CMA2017/MNSTtrain.csv', header=TRUE)
test <- read.csv('~/courses/CogMod/CMA2017/MNSTtest.csv', header=TRUE)
train <- data.matrix(train)
test <- data.matrix(test)
train.x <- as.matrix(train[,-1]/255)
train.y.temp <- train[,1]
train.y = matrix(0,nrow = nrow(train.x), ncol = 10)
for (i in 1:nrow(train.x)){
  train.y[i,(train.y.temp[i]+1)]=1
}

params = init.network(c(784,50,10))
batch_size = 100; n.iter =5000; lambda =0.05
n.train = nrow(train.x)
loss = rep(0,n.iter)
for (i.iter in 1:n.iter){
  batch_mask = sample(1:n.train, batch_size)
  x.batch = train.x[batch_mask,]
  t.batch = train.y[batch_mask,]
  a1 = affine.forwd(x.batch,params$W[[1]],params$b[[1]])
  z1 = sigmoid.forwd(a1)
  a2 = affine.forwd(z1,params$W[[2]],params$b[[2]])
  z2 = softmax.forwd(a2,t.batch)
  dwSM = softmax.bckwd(a2, t.batch, 1)
  dwA2 = affine.bckwd(a1,params$W[[2]],params$b[[2]],dwSM)
  dwSG = sigmoid.bckwd(a1,dwA2$dx)
  dwA1 = affine.bckwd(x.batch,params$W[[1]],params$b[[1]],dwSG)
  params$W[[2]] = params$W[[2]] - lambda*dwA2$dW
  params$b[[2]] = params$b[[2]] - lambda*dwA2$db
  params$W[[1]] = params$W[[1]] - lambda*dwA1$dW
  params$b[[1]] = params$b[[1]] - lambda*dwA1$db
  loss[i.iter] = loss.network(params,x.batch,t.batch,c("sigmoid","softmax"))
}
plot(loss,type='l', xlab = "trial")
apply((feedforward(params, train.x[1:10,], c("sigmoid", "softmax"))),1,which.max)
train.y.temp[1:10]+1

### MNIST 3-layer
params = init.network(c(784,50,50,10))
batch_size = 100; n.iter =5000; lambda =0.05
n.train = nrow(train.x)
loss = rep(0,n.iter)
for (i.iter in 1:n.iter){
  batch_mask = sample(1:n.train, batch_size)
  x.batch = train.x[batch_mask,]
  t.batch = train.y[batch_mask,]
  a1 = affine.forwd(x.batch,params$W[[1]],params$b[[1]])
  z1 = sigmoid.forwd(a1)
  a2 = affine.forwd(z1,params$W[[2]],params$b[[2]])
  z2 = sigmoid.forwd(a2)
  a3 = affine.forwd(z2,params$W[[3]],params$b[[3]])
  z3 = softmax.forwd(a3,t.batch)
  dwSM = softmax.bckwd(a3, t.batch, 1)
  dwA3 = affine.bckwd(a2,params$W[[3]],params$b[[3]],dwSM)
  dwSG2 = sigmoid.bckwd(a2,dwA3$dx)
  dwA2 = affine.bckwd(a1,params$W[[2]],params$b[[2]],dwSG2)
  dwSG1 = sigmoid.bckwd(a1,dwA2$dx)
  dwA1 = affine.bckwd(x.batch,params$W[[1]],params$b[[1]],dwSG1)
  params$W[[3]] = params$W[[3]] - lambda*dwA3$dW
  params$b[[3]] = params$b[[3]] - lambda*dwA3$db
  params$W[[2]] = params$W[[2]] - lambda*dwA2$dW
  params$b[[2]] = params$b[[2]] - lambda*dwA2$db
  params$W[[1]] = params$W[[1]] - lambda*dwA1$dW
  params$b[[1]] = params$b[[1]] - lambda*dwA1$db
  loss[i.iter] = loss.network(params,x.batch,t.batch,c("sigmoid","sigmoid","softmax"))
}
plot(loss,type='l', xlab = "trial")
pred<-apply((feedforward(params, train.x, c("sigmoid","sigmoid", "softmax"))),
  1, which.max)
table(pred,train.y+1)

データ解析基礎論A Week06

# 2017 week 06 
# 
# regression
dat<-read.csv("http://www.matsuka.info/data_folder/hwsk8-17-6.csv")
plot(ani~otouto,data=dat,xlab="Score of Younger Brother", 
  ylab = "Score of Elder Brother", pch=20,cex=2,
  xlim=c(5,27),ylim = c(5,27))
dat.lm <- lm(ani~otouto, data=dat)
summary(dat.lm)
abline(dat.lm, col = 'red',lwd = 2.5)

# two sample t-test
boxplot(dat,col=c('skyblue','coral'),ylab = "score")
t.test(dat$ani, dat$otouto, var.equal=T)
dat2<-data.frame(score = c(dat$ani, dat$otouto),order=c(rep("ani",10),rep("otouto",10)))
plot(dat2$score~as.numeric(dat2$order),pch=20,xlab="order",
  ylab="score",xlim=c(0.5,2.5),cex=2,xaxt="n")
axis(1,c(1,2),c("ani","otouto"))
dat2.lm<-lm(score~order,data=dat2)
abline(dat2.lm,col='red',lwd=3)

# one sample t-test
dat.D = dat$ani - dat$otouto
boxplot(dat.D,col="skyblue",ylab="Difference")
t.test(dat.D)
dat.D.lm<-lm(dat.D~1)
plot(dat.D~rep(1,10),pch=20,xlab="",ylab="Difference",cex=3)
summary(dat.D.lm)

# plotting errors
plot(ani~otouto,data=dat,xlab="Score of Younger Brother", 
  ylab = "Score of Elder Brother", pch=20,cex=2,
   xlim=c(5,27),ylim = c(5,27))
dat.lm <- lm(ani~otouto, data=dat)
abline(h=mean(dat$ani),lty=2,col="green",lwd=3)
abline(dat.lm,col='red',lwd=3)
pred.lm<-predict(dat.lm)
for (i.ani in 1:10){
  lines(rep(dat$otouto[i.ani],2),c(dat$ani[i.ani],pred.lm[i.ani]),
    col='blue',lwd=3)
}

# multiple regression
dat<-read.csv("http://www.matsuka.info/data_folder/tdkReg01.csv")
dat.regALL<-lm(sales~price+design+material,data=dat)

# ANCOVA
dat<-read.csv("http://www.matsuka.info/data_folder/ancova01.csv")
dat$pretest=dat$pretest*0.1

認知情報解析 ch04 解答例

init.network <- function(n.neurons){
  n.layer = length(n.neurons)
  W = list(); b = list()
  for (i.layer in 1:(n.layer-1)){
    W[[i.layer]] = matrix(rnorm(n.neurons[i.layer]*n.neurons[(i.layer+1)],sd = 0.1),
      nrow=n.neurons[i.layer])
    b[[i.layer]] =  matrix(rnorm(n.neurons[(i.layer+1)],sd = 0.1), nrow = 1)
  }
  return(list(W = W,b = b))
}

sigmoid.func <- function(x){
  return(1/(1+exp(-x)))
}

relu.func <- function(x){
  y = apply(x,2,function(x) pmax(x,0))
  return(y)
}

activation <- function(A, actFun){
  if (actFun == "sigmoid"){
    return(sigmoid.func(A))
  }
  if (actFun == "relu"){
    return(relu.func(A))
  }
  if (actFun == "softmax"){
    return(softmax(A))
  }
}

softmax<- function(x){
  max.x = apply(x,1,max)
  C = ncol(x)
  x = x - max.x%*%matrix(1,nrow=1,ncol=C)
  return(exp(x)/rowSums(exp(x)))
}

feedforward <- function(network, x, actFun) {
  n.layer <- length(network$W)
  batch.size = nrow(x)
  for (i.layer in 1:n.layer){
    A = x%*%network$W[[i.layer]] 
      + matrix(1,nrow=batch.size,ncol = 1)%*%network$b[[i.layer]]
    x = activation(A, actFun[i.layer])
  }
  return(x)
}


cross.entropy = function(y, target){
  delta = 1e-7;
  R = nrow(as.matrix(y))
  return(-sum(target*log(y + delta))/R)
}

loss.network = function(params, x, t, actFun){
  y = feedforward(params,x,actFun)
  return(cross.entropy(y, t))
}

numerical.grad <- function(func, params, x, t, actFun) {
  # input args
  # func:   name of objective function
  # params: list of parameters (W & b)
  # x   :   input
  # t   :   target output
  # actFun: activation function
  ##############################################
  h = 1e-4
  n.list = length(params)
  grad = params
  for (i.list in 1:n.list) {
    R = nrow(params$W[[i.list]])
    C = ncol(params$W[[i.list]])
    grad$W[[i.list]] = matrix(0, R, C)
    grad$b[[i.list]] = matrix(0, nrow = 1, C)
    for (i.col in 1:C) {
      for (i.row in 1:R) {
        temp.w = params$W[[i.list]][i.row, i.col]
        params$W[[i.list]][i.row, i.col] = temp.w + h
        plusH = do.call(func, list(params, x, t, actFun))
        params$W[[i.list]][i.row, i.col] = temp.w - h
        minusH = do.call(func, list(params, x, t, actFun))
        grad$W[[i.list]][i.row, i.col] = (plusH - minusH) / (2 * h)
        params$W[[i.list]][i.row, i.col] = temp.w
      }
      temp.b = params$b[[i.list]][i.col]
      params$b[[i.list]][i.col] = temp.b + h
      plusH = do.call(func, list(params, x, t, actFun))
      params$b[[i.list]][i.col] = temp.b - h
      minusH = do.call(func, list(params, x, t, actFun))
      grad$b[[i.list]][i.col] = (plusH - minusH) / (2 * h)
      params$b[[i.list]][i.col] = temp.b
    }
  }
  return(grad)
}

train.x = as.matrix(iris[,1:4])
train.y.temp = as.numeric(iris[,5])
train.y = matrix(0,nrow = nrow(train.x), ncol =3)
train.y[which(train.y.temp==1), 1]=1
train.y[which(train.y.temp==2), 2]=1
train.y[which(train.y.temp==3), 3]=1
n.neurons = c(4,15,3)
params = init.network(n.neurons)
batch_size = 50; n.iter =2000; lambda =0.05
n.train = nrow(train.x)
loss = rep(0,n.iter)
n.layer = length(params$W)
for (i.iter in 1:n.iter){
  batch_mask = sample(1:n.train, batch_size)
  x.batch = train.x[batch_mask,]
  t.batch = train.y[batch_mask,]
  dW = numerical.grad("loss.network",params,x.batch,t.batch,c("sigmoid","softmax"))
  for (i.layer in 1:n.layer){
    params$W[[i.layer]] = params$W[[i.layer]]  - lambda*dW$W[[i.layer]]
    params$b[[i.layer]] = params$b[[i.layer]]  - lambda*dW$b[[i.layer]] 
  }
  loss[i.iter] = loss.network(params,x.batch,t.batch,c("sigmoid","softmax"))
}
plot(loss,type='l')

データ解析基礎論A Week05 t検定

dat<-read.csv("http://www.matsuka.info/data_folder/datWA01.txt")
mean.M <-mean(dat$h[dat$gender=="M"])
sigma = 10
n.M = length(dat$h[dat$gender=="M"])
z.value=(mean.M-171)/(sqrt(sigma/n.M))
(1-pnorm(abs(z.value)))*2

ssize = c(24,25,26,23.5,25,27,24,22,27.5,28)
ssize.mean = mean(ssize)
ssize.var = var(ssize)
N = 10
t.value=(ssize.mean-24)/(sqrt(ssize.var/N))
(1-pt(abs(t.value),df=9))*2

h.mean.M <-mean(dat$h[dat$gender=="M"])
h.var.M <- var(dat$h[dat$gender=="M"])
n.M = length(dat$h[dat$gender=="M"])
t.value=(h.mean.M-171)/(sqrt(h.var.M/n.M))
(1-pt(abs(t.value),df = (n.M-1)))*2

# RCMD
A=c(12,19,10,10,14,18,15,11,16)
B=c(15,20,16,14,17,16,12,12,19)
d=A-B
tValue<-mean(d)/sqrt(var(d)/length(d))
(1-pt(abs(tValue), df=8))*2

t.test(d,mu=0)


X1=c(78,70,66,76,78,76,88,76)
X2=c(76,72,60,72,70,72,84,70)
t.value=(mean(X1)-mean(X2))/sqrt((var(X1)+var(X2))/8)
2*(1-pt(abs(t.value),14))

認知情報解析 ch04

MSE <- function(target, y){
  return(0.5*sum((target-y)^2))
}

t = rep(0,10)
t[3]=1
y = c(0.1, 0.05, 0.6, 0, 0.05, 0.1, 0, 0.1, 0, 0)

x = seq(0,1,0.01)
plot(x,-log(x),lwd = 2)
cross.entropy = function(y, target){
  delta = 1e-7;
 R = nrow(as.matrix(y))
  return(-sum(target*log(y + delta))/R)
}

numerical.diff = function(func, x){
  h = 1e-4
  plusH = do.call(func,list(x+h))
  minusH = do.call(func,list(x-h))
  num.diff = (plusH - minusH)/(2*h)
  return(num.diff)
}

func01 = function(x){
  return(0.01*x^2+0.1*x)
}

x = seq(0,20,0.1)
y = func01(x)
plot(x,y,xlab ="x", ylab = "f(x)",type = "l",lwd =2)
ND.5 = numerical.diff('func01',5)
abline(a = func01(5)-ND.5*5, b = ND.5, col = 'red',  lwd =2)
abline(v = 5, lty = 2, col = 'red')
ND.10 = numerical.diff('func01',10)
abline(a = func01(10)-ND.10*10, b = ND.10, col = 'blue',lwd = 2)
abline(v = 10, lty = 2, col = 'blue')


func02 = function(x0, x1){
  return(x0^2 + x1^2)
}
func02.x0 = function(x0){
  return(x0^2)
}
func02.x1 = function(x1){
  return(x1^2)
}

func02R = function(x){
  return(x[1]^2 + x[2]^2)
}
numerical.grad <- function(func, x){
  h = 1e-4
  R = nrow(x)
  C = ncol(x)
  grad = matrix(0, R, C)
  for (i.col in 1:C){
    for (i.row in 1:R){
      temp.x = x[i.row,i.col]
      x[i.row, i.col] = temp.x + h
      plusH = do.call(func, list(x))
      x[i.row, i.col] = temp.x - h
      minusH = do.call(func,list(x))
      grad[i.row, i.col] = (plusH - minusH)/(2*h)
      x[i.row, i.col] = temp.x
    }
  }
  return(grad)
}

numerical.grad("func02R",matrix(c(3,4),nrow=1))
numerical.grad("func02R",matrix(c(0,4),nrow=1))
numerical.grad("func02R",matrix(c(3,0),nrow=1))

require(plot3D)
x = seq(-2,2,0.2)
y = seq(-2,2,0.2)
M = mesh(x,y)
R = nrow(M$x)
C = nrow(M$x)
scaling = 0.05
plot(c(),c(),xlim = c(-2,2),ylim=c(-2,2))
for (i.col in 1:C){
  for (i.row in 1:R){
    ng = numerical.grad("func02R",matrix(c(M$x[i.row,i.col],M$y[i.row,i.col]),nrow=1))
    arrows(M$x[i.row,i.col],M$y[i.row,i.col],
           (M$x[i.row,i.col]-ng[1]*scaling),(M$y[i.row,i.col]-ng[2]*scaling),
           length = 0.05)
  }
}

grad.desc <- function(func, init.x, lr, n.iter){
  x = init.x
  for (i.iter in 1:n.iter) {
    grad = numerical.grad(func, x)
    x = x - lr*grad
  }
  return(x)
}
x.init = matrix(c(-3,4),nrow = 1)
grad.desc("func02R",x.init,0.1,100)

x = seq(-4,4,0.2)
y = seq(-4,4,0.2)
M = mesh(x,y)
Z = as.vector(M$x^2)+as.vector(M$y^2) 
Z.mesh = matrix(Z,nrow(M$x))
contour(x,y,Z.mesh,drawlabels = F)
grad.desc2 <- function(func, init.x, lr, n.iter){
  x = init.x
  x.hist = init.x
  for (i.iter in 1:n.iter) {
    grad = numerical.grad(func, x)
    x = x - lr*grad
    x.hist = rbind(x.hist,x)
  }
  return(x.hist)
}
gd = grad.desc2("func02R",x.init,0.1,100)
points(gd,col = 'green',pch=20)

# manual implementation
w = matrix(c(0.47355232,0.85557411,0.9977393,0.03563661,0.84668094,0.69422093),nrow=2)
x = matrix(c(0.6, 0.9), nrow=1)
t = c(0,0,1)
nn.predict <- function(w,x){
  return(x%*%w)
}

loss.func = function(w, x, t){
  pred = nn.predict(w,x)
  y = softmax.func(pred)
  return(cross.entropy(y, t))
}


numerical.gradCE <- function(func, w, x, t){
# input args
# func: name of function
# w   : weight
# x   : input 
# t   : target output
##############################################
  h = 1e-4
  R = nrow(w)
  C = ncol(w)
  grad = matrix(0, R, C)
  for (i.col in 1:C){
    for (i.row in 1:R){
      temp.w = w[i.row,i.col]
      w[i.row, i.col] = temp.w + h
      plusH = do.call(func, list(w,x,t))
      w[i.row, i.col] = temp.w - h
      minusH = do.call(func,list(w,x,t))
      grad[i.row, i.col] = (plusH - minusH)/(2*h)
      w[i.row, i.col] = temp.w
    }
  }
  return(grad)
}

dW = numerical.gradCE("loss.func",w,x,t)

### ch 4.5 2-layer NN  ###
init.2LN <- function(n.input, n.hidden, n.output, w.std = 0.01){
  W1 = matrix(rnorm(n.input*n.hidden,0,w.std),nrow = n.input)
  B1 = matrix(rnorm(n.hidden,0,w.std),nrow =1)
  W2 = matrix(rnorm(n.hidden*n.output,0,w.std),nrow = n.hidden)
  B2 = matrix(rnorm(n.output,0,w.std),nrow =1)
  return(list(W1 = W1, B1 = B1, W2 = W2, B2 = B2))
}

softmax.2LN <- function(x){
  max.x = apply(x,1,max)
  C = ncol(x)
  x = x - max.x%*%matrix(1,nrow=1,ncol=C)
  return(exp(x)/rowSums(exp(x)))
}

sigmoid.func <- function(x){
  return(1/(1+exp(-x)))
}

pred.2LN <- function(params, x){
  NR = nrow(x)
  a1 = x%*%params$W1 + matrix(1,nrow = NR)%*%params$B1
  z1 = sigmoid.func(a1)
  a2 = z1%*%params$W2 + matrix(1,nrow = NR)%*%params$B2
  y = softmax.2LN(a2)
  return(y)
}

loss.2LN = function(params, x, t){
  y = pred.2LN(params,x)
  return(cross.entropy(y, t))
}

numerical.grad2LN <- function(func, params, x, t) {
  # input args
  # func: name of function
  # w   : weight
  # x   : input
  # t   : target output
  ##############################################
  h = 1e-4; n.list = length(params); grad = params
  for (i.list in 1:n.list) {
    R = nrow(params[[i.list]])
    C = ncol(params[[i.list]])
    grad[[i.list]] = matrix(0, R, C)
    for (i.col in 1:C) {
      for (i.row in 1:R) {
        temp.w = params[[i.list]][i.row, i.col]
        params[[i.list]][i.row, i.col] = temp.w + h
        plusH = do.call(func, list(params, x, t))
        params[[i.list]][i.row, i.col] = temp.w - h
        minusH = do.call(func, list(params, x, t))
        grad[[i.list]][i.row, i.col] = (plusH - minusH) / (2 * h)
        params[[i.list]][i.row, i.col] = temp.w
      }
    }
  }
  return(grad)
}

## example using IRIS data set
train.x = as.matrix(iris[,1:4])
train.y.temp = as.numeric(iris[,5])
train.y = matrix(0,nrow = nrow(train.x), ncol =3)
train.y[which(train.y.temp==1), 1]=1
train.y[which(train.y.temp==2), 2]=1
train.y[which(train.y.temp==3), 3]=1

params = init.2LN(4,15,3,0.01)
batch_size = 7; n.iter =2000; lambda =0.05
n.train = nrow(train.x)
loss = rep(0,n.iter)
for (i.iter in 1:n.iter){
  batch_mask = sample(1:n.train, batch_size)
  x.batch = train.x[batch_mask,]
  t.batch = train.y[batch_mask,]
  dW = numerical.grad2LN("loss.2LN",params,x.batch,t.batch)
  params$W1 = params$W1 - lambda*dW$W1
  params$B1 = params$B1 - lambda*dW$B1
  params$W2 = params$W2 - lambda*dW$W2
  params$B2 = params$B2 - lambda*dW$B2
  loss[i.iter] = loss.2LN(params,x.batch,t.batch)
}

認知情報解析・課題1・解答例

relu.func <- function(x){
 y = apply(x,2,function(x) pmax(x,0))
 return(y)
}

sigmoid.func <- function(x){
  return(1/(1+exp(-x)))
}

init.network <- function(n.neurons){
  n.layer = length(n.neurons)
  W = list(); b = list()
  for (i.layer in 1:(n.layer-1)){
    W[[i.layer]] = 
      matrix(rnorm(n.neurons[i.layer]*n.neurons[(i.layer+1)]),nrow=n.neurons[i.layer])
    b[[i.layer]] =  matrix(rnorm(n.neurons[(i.layer+1)]), nrow = 1)
  }
  return(list(W = W,b = b))
}

activation <- function(A, actFun){
  if (actFun == "sigmoid"){
    return(sigmoid.func(A))
  }
  if (actFun == "relu"){
    return(relu.func(A))
  }
}

feedforward <- function(network, x, actFun) {
  n.layer <- length(network$W)
  batch.size = nrow(x)
  for (i.layer in 1:n.layer){
    A = x%*%network$W[[i.layer]] + 
      matrix(1,nrow=batch.size,ncol = 1)%*%network$b[[i.layer]]
    x = activation(A, actFun[i.layer])
  }
  return(x)
}

データ解析基礎論a Week04

nSample=10;nRep=10^5;
CLT.unif <- function(nSample, nRep) {
  x=matrix(runif(nSample*nRep),nrow=nSample);
  x.means<-colMeans(x)
  hist(x.means,50,main='Distribution of Means of Uniform Distribution', 
    xlab='Means', probability=T)
  x.means.dens<-density(x.means)
  lines(x.means.dens,lwd=3,lty=1,col='red')
  x2=seq(0,1,0.001);CLT=dnorm(x2,mean=0.5,sd=(sqrt(1/12))/(sqrt(nSample)))
  lines(x2,CLT,lwd=3,lty=3,col='cyan')
  legend("topright",c("Density of Actual Means","Normal Distribution"), 
   lty=c(1,3), col=c('red','cyan'),lwd=3)
}

> CLT.unif(10,100000)