認知情報解析 ch04 解答例

init.network <- function(n.neurons){
  n.layer = length(n.neurons)
  W = list(); b = list()
  for (i.layer in 1:(n.layer-1)){
    W[[i.layer]] = matrix(rnorm(n.neurons[i.layer]*n.neurons[(i.layer+1)],sd = 0.1),
      nrow=n.neurons[i.layer])
    b[[i.layer]] =  matrix(rnorm(n.neurons[(i.layer+1)],sd = 0.1), nrow = 1)
  }
  return(list(W = W,b = b))
}

sigmoid.func <- function(x){
  return(1/(1+exp(-x)))
}

relu.func <- function(x){
  y = apply(x,2,function(x) pmax(x,0))
  return(y)
}

activation <- function(A, actFun){
  if (actFun == "sigmoid"){
    return(sigmoid.func(A))
  }
  if (actFun == "relu"){
    return(relu.func(A))
  }
  if (actFun == "softmax"){
    return(softmax(A))
  }
}

softmax<- function(x){
  max.x = apply(x,1,max)
  C = ncol(x)
  x = x - max.x%*%matrix(1,nrow=1,ncol=C)
  return(exp(x)/rowSums(exp(x)))
}

feedforward <- function(network, x, actFun) {
  n.layer <- length(network$W)
  batch.size = nrow(x)
  for (i.layer in 1:n.layer){
    A = x%*%network$W[[i.layer]] 
      + matrix(1,nrow=batch.size,ncol = 1)%*%network$b[[i.layer]]
    x = activation(A, actFun[i.layer])
  }
  return(x)
}


cross.entropy = function(y, target){
  delta = 1e-7;
  R = nrow(as.matrix(y))
  return(-sum(target*log(y + delta))/R)
}

loss.network = function(params, x, t, actFun){
  y = feedforward(params,x,actFun)
  return(cross.entropy(y, t))
}

numerical.grad <- function(func, params, x, t, actFun) {
  # input args
  # func:   name of objective function
  # params: list of parameters (W & b)
  # x   :   input
  # t   :   target output
  # actFun: activation function
  ##############################################
  h = 1e-4
  n.list = length(params)
  grad = params
  for (i.list in 1:n.list) {
    R = nrow(params$W[[i.list]])
    C = ncol(params$W[[i.list]])
    grad$W[[i.list]] = matrix(0, R, C)
    grad$b[[i.list]] = matrix(0, nrow = 1, C)
    for (i.col in 1:C) {
      for (i.row in 1:R) {
        temp.w = params$W[[i.list]][i.row, i.col]
        params$W[[i.list]][i.row, i.col] = temp.w + h
        plusH = do.call(func, list(params, x, t, actFun))
        params$W[[i.list]][i.row, i.col] = temp.w - h
        minusH = do.call(func, list(params, x, t, actFun))
        grad$W[[i.list]][i.row, i.col] = (plusH - minusH) / (2 * h)
        params$W[[i.list]][i.row, i.col] = temp.w
      }
      temp.b = params$b[[i.list]][i.col]
      params$b[[i.list]][i.col] = temp.b + h
      plusH = do.call(func, list(params, x, t, actFun))
      params$b[[i.list]][i.col] = temp.b - h
      minusH = do.call(func, list(params, x, t, actFun))
      grad$b[[i.list]][i.col] = (plusH - minusH) / (2 * h)
      params$b[[i.list]][i.col] = temp.b
    }
  }
  return(grad)
}

train.x = as.matrix(iris[,1:4])
train.y.temp = as.numeric(iris[,5])
train.y = matrix(0,nrow = nrow(train.x), ncol =3)
train.y[which(train.y.temp==1), 1]=1
train.y[which(train.y.temp==2), 2]=1
train.y[which(train.y.temp==3), 3]=1
n.neurons = c(4,15,3)
params = init.network(n.neurons)
batch_size = 50; n.iter =2000; lambda =0.05
n.train = nrow(train.x)
loss = rep(0,n.iter)
n.layer = length(params$W)
for (i.iter in 1:n.iter){
  batch_mask = sample(1:n.train, batch_size)
  x.batch = train.x[batch_mask,]
  t.batch = train.y[batch_mask,]
  dW = numerical.grad("loss.network",params,x.batch,t.batch,c("sigmoid","softmax"))
  for (i.layer in 1:n.layer){
    params$W[[i.layer]] = params$W[[i.layer]]  - lambda*dW$W[[i.layer]]
    params$b[[i.layer]] = params$b[[i.layer]]  - lambda*dW$b[[i.layer]] 
  }
  loss[i.iter] = loss.network(params,x.batch,t.batch,c("sigmoid","softmax"))
}
plot(loss,type='l')

認知情報解析 ch04

MSE <- function(target, y){
  return(0.5*sum((target-y)^2))
}

t = rep(0,10)
t[3]=1
y = c(0.1, 0.05, 0.6, 0, 0.05, 0.1, 0, 0.1, 0, 0)

x = seq(0,1,0.01)
plot(x,-log(x),lwd = 2)
cross.entropy = function(y, target){
  delta = 1e-7;
 R = nrow(as.matrix(y))
  return(-sum(target*log(y + delta))/R)
}

numerical.diff = function(func, x){
  h = 1e-4
  plusH = do.call(func,list(x+h))
  minusH = do.call(func,list(x-h))
  num.diff = (plusH - minusH)/(2*h)
  return(num.diff)
}

func01 = function(x){
  return(0.01*x^2+0.1*x)
}

x = seq(0,20,0.1)
y = func01(x)
plot(x,y,xlab ="x", ylab = "f(x)",type = "l",lwd =2)
ND.5 = numerical.diff('func01',5)
abline(a = func01(5)-ND.5*5, b = ND.5, col = 'red',  lwd =2)
abline(v = 5, lty = 2, col = 'red')
ND.10 = numerical.diff('func01',10)
abline(a = func01(10)-ND.10*10, b = ND.10, col = 'blue',lwd = 2)
abline(v = 10, lty = 2, col = 'blue')


func02 = function(x0, x1){
  return(x0^2 + x1^2)
}
func02.x0 = function(x0){
  return(x0^2)
}
func02.x1 = function(x1){
  return(x1^2)
}

func02R = function(x){
  return(x[1]^2 + x[2]^2)
}
numerical.grad <- function(func, x){
  h = 1e-4
  R = nrow(x)
  C = ncol(x)
  grad = matrix(0, R, C)
  for (i.col in 1:C){
    for (i.row in 1:R){
      temp.x = x[i.row,i.col]
      x[i.row, i.col] = temp.x + h
      plusH = do.call(func, list(x))
      x[i.row, i.col] = temp.x - h
      minusH = do.call(func,list(x))
      grad[i.row, i.col] = (plusH - minusH)/(2*h)
      x[i.row, i.col] = temp.x
    }
  }
  return(grad)
}

numerical.grad("func02R",matrix(c(3,4),nrow=1))
numerical.grad("func02R",matrix(c(0,4),nrow=1))
numerical.grad("func02R",matrix(c(3,0),nrow=1))

require(plot3D)
x = seq(-2,2,0.2)
y = seq(-2,2,0.2)
M = mesh(x,y)
R = nrow(M$x)
C = nrow(M$x)
scaling = 0.05
plot(c(),c(),xlim = c(-2,2),ylim=c(-2,2))
for (i.col in 1:C){
  for (i.row in 1:R){
    ng = numerical.grad("func02R",matrix(c(M$x[i.row,i.col],M$y[i.row,i.col]),nrow=1))
    arrows(M$x[i.row,i.col],M$y[i.row,i.col],
           (M$x[i.row,i.col]-ng[1]*scaling),(M$y[i.row,i.col]-ng[2]*scaling),
           length = 0.05)
  }
}

grad.desc <- function(func, init.x, lr, n.iter){
  x = init.x
  for (i.iter in 1:n.iter) {
    grad = numerical.grad(func, x)
    x = x - lr*grad
  }
  return(x)
}
x.init = matrix(c(-3,4),nrow = 1)
grad.desc("func02R",x.init,0.1,100)

x = seq(-4,4,0.2)
y = seq(-4,4,0.2)
M = mesh(x,y)
Z = as.vector(M$x^2)+as.vector(M$y^2) 
Z.mesh = matrix(Z,nrow(M$x))
contour(x,y,Z.mesh,drawlabels = F)
grad.desc2 <- function(func, init.x, lr, n.iter){
  x = init.x
  x.hist = init.x
  for (i.iter in 1:n.iter) {
    grad = numerical.grad(func, x)
    x = x - lr*grad
    x.hist = rbind(x.hist,x)
  }
  return(x.hist)
}
gd = grad.desc2("func02R",x.init,0.1,100)
points(gd,col = 'green',pch=20)

# manual implementation
w = matrix(c(0.47355232,0.85557411,0.9977393,0.03563661,0.84668094,0.69422093),nrow=2)
x = matrix(c(0.6, 0.9), nrow=1)
t = c(0,0,1)
nn.predict <- function(w,x){
  return(x%*%w)
}

loss.func = function(w, x, t){
  pred = nn.predict(w,x)
  y = softmax.func(pred)
  return(cross.entropy(y, t))
}


numerical.gradCE <- function(func, w, x, t){
# input args
# func: name of function
# w   : weight
# x   : input 
# t   : target output
##############################################
  h = 1e-4
  R = nrow(w)
  C = ncol(w)
  grad = matrix(0, R, C)
  for (i.col in 1:C){
    for (i.row in 1:R){
      temp.w = w[i.row,i.col]
      w[i.row, i.col] = temp.w + h
      plusH = do.call(func, list(w,x,t))
      w[i.row, i.col] = temp.w - h
      minusH = do.call(func,list(w,x,t))
      grad[i.row, i.col] = (plusH - minusH)/(2*h)
      w[i.row, i.col] = temp.w
    }
  }
  return(grad)
}

dW = numerical.gradCE("loss.func",w,x,t)

### ch 4.5 2-layer NN  ###
init.2LN <- function(n.input, n.hidden, n.output, w.std = 0.01){
  W1 = matrix(rnorm(n.input*n.hidden,0,w.std),nrow = n.input)
  B1 = matrix(rnorm(n.hidden,0,w.std),nrow =1)
  W2 = matrix(rnorm(n.hidden*n.output,0,w.std),nrow = n.hidden)
  B2 = matrix(rnorm(n.output,0,w.std),nrow =1)
  return(list(W1 = W1, B1 = B1, W2 = W2, B2 = B2))
}

softmax.2LN <- function(x){
  max.x = apply(x,1,max)
  C = ncol(x)
  x = x - max.x%*%matrix(1,nrow=1,ncol=C)
  return(exp(x)/rowSums(exp(x)))
}

sigmoid.func <- function(x){
  return(1/(1+exp(-x)))
}

pred.2LN <- function(params, x){
  NR = nrow(x)
  a1 = x%*%params$W1 + matrix(1,nrow = NR)%*%params$B1
  z1 = sigmoid.func(a1)
  a2 = z1%*%params$W2 + matrix(1,nrow = NR)%*%params$B2
  y = softmax.2LN(a2)
  return(y)
}

loss.2LN = function(params, x, t){
  y = pred.2LN(params,x)
  return(cross.entropy(y, t))
}

numerical.grad2LN <- function(func, params, x, t) {
  # input args
  # func: name of function
  # w   : weight
  # x   : input
  # t   : target output
  ##############################################
  h = 1e-4; n.list = length(params); grad = params
  for (i.list in 1:n.list) {
    R = nrow(params[[i.list]])
    C = ncol(params[[i.list]])
    grad[[i.list]] = matrix(0, R, C)
    for (i.col in 1:C) {
      for (i.row in 1:R) {
        temp.w = params[[i.list]][i.row, i.col]
        params[[i.list]][i.row, i.col] = temp.w + h
        plusH = do.call(func, list(params, x, t))
        params[[i.list]][i.row, i.col] = temp.w - h
        minusH = do.call(func, list(params, x, t))
        grad[[i.list]][i.row, i.col] = (plusH - minusH) / (2 * h)
        params[[i.list]][i.row, i.col] = temp.w
      }
    }
  }
  return(grad)
}

## example using IRIS data set
train.x = as.matrix(iris[,1:4])
train.y.temp = as.numeric(iris[,5])
train.y = matrix(0,nrow = nrow(train.x), ncol =3)
train.y[which(train.y.temp==1), 1]=1
train.y[which(train.y.temp==2), 2]=1
train.y[which(train.y.temp==3), 3]=1

params = init.2LN(4,15,3,0.01)
batch_size = 7; n.iter =2000; lambda =0.05
n.train = nrow(train.x)
loss = rep(0,n.iter)
for (i.iter in 1:n.iter){
  batch_mask = sample(1:n.train, batch_size)
  x.batch = train.x[batch_mask,]
  t.batch = train.y[batch_mask,]
  dW = numerical.grad2LN("loss.2LN",params,x.batch,t.batch)
  params$W1 = params$W1 - lambda*dW$W1
  params$B1 = params$B1 - lambda*dW$B1
  params$W2 = params$W2 - lambda*dW$W2
  params$B2 = params$B2 - lambda*dW$B2
  loss[i.iter] = loss.2LN(params,x.batch,t.batch)
}

認知情報解析・課題1・解答例

relu.func <- function(x){
 y = apply(x,2,function(x) pmax(x,0))
 return(y)
}

sigmoid.func <- function(x){
  return(1/(1+exp(-x)))
}

init.network <- function(n.neurons){
  n.layer = length(n.neurons)
  W = list(); b = list()
  for (i.layer in 1:(n.layer-1)){
    W[[i.layer]] = 
      matrix(rnorm(n.neurons[i.layer]*n.neurons[(i.layer+1)]),nrow=n.neurons[i.layer])
    b[[i.layer]] =  matrix(rnorm(n.neurons[(i.layer+1)]), nrow = 1)
  }
  return(list(W = W,b = b))
}

activation <- function(A, actFun){
  if (actFun == "sigmoid"){
    return(sigmoid.func(A))
  }
  if (actFun == "relu"){
    return(relu.func(A))
  }
}

feedforward <- function(network, x, actFun) {
  n.layer <- length(network$W)
  batch.size = nrow(x)
  for (i.layer in 1:n.layer){
    A = x%*%network$W[[i.layer]] + 
      matrix(1,nrow=batch.size,ncol = 1)%*%network$b[[i.layer]]
    x = activation(A, actFun[i.layer])
  }
  return(x)
}

ch03 neural networks

step.func <- function(x){
  return(as.numeric(x > 0))
}
x = seq(-5, 5, 0.1)
y = step.func(x)
plot(x,y, ylab = 'y', xlab = 'a', type ="l", lwd =2)

sigmoid.func <- function(x){
  return(1/(1+exp(-x)))
}

y = sigmoid.func(x)
plot(x,y, ylab = 'y', xlab = 'a', type ="l", lwd =2)

y.step = step.func(x)
y.sigm = sigmoid.func(x)
plot(x,y.step, ylab = 'y', xlab = 'a', type ="l", lwd =2)
lines(x,y.sigm, lwd =2, lty = 2)

relu.func <- function(x){
 return(pmax(0,x))
}

y.relu = relu.func(x)
plot(x,y.relu, ylab = 'y', xlab = 'a', type ="l", lwd =2)

A = matrix(1:4, nrow = 2, byrow = T)
B = matrix(5:8, nrow = 2, byrow = T)

A = matrix(1:6, nrow = 3, byrow = T)
B = matrix(7:8, nrow = 2, byrow = T)

x = c(1,0.5)
W1 = matrix((1:6)*0.1, nrow = 2)
B1 = (1:3)*0.1
A1 = x%*%W1 + B1
Z1 = sigmoid.func(A1)

W2 = matrix((1:6)*0.1, nrow = 3)
B2 = c(0.1, 0.2)
A2 = Z1%*%W2 + B2
Z2 = sigmoid.func(A2)

W3 = matrix((1:4)*0.1, nrow = 2)
B3 = c(0.1, 0.2)
A3 = Z2%*%W3+ B3
Z3 = A3

# function to initialize 3L network
init.3L.network <- function(){
  W1 = matrix((1:6)*0.1, nrow = 2)
  B1 = (1:3)*0.1
  W2 = matrix((1:6)*0.1, nrow = 3)
  B2 = c(0.1, 0.2)
  W3 = matrix((1:4)*0.1, nrow = 2)
  B3 = c(0.1, 0.2)
  return(list(W1 = W1, B1 = B1, W2 = W2, B2 = B2, W3 = W3, B3 = B3))
}
# feedforward process
forward.3L <- function(network, x){
  A1 = x%*%network$W1 + network$B1
  Z1 = sigmoid.func(A1)
  A2 = Z1%*%network$W2 + network$B2
  Z2 = sigmoid.func(A2)
  A3 = Z2%*%network$W3 + network$B3
  Z3 = sigmoid.func(A3)
  A3 = Z3
  return(A3)
}

network<-init.3L.network()
y = forward.3L(network, c(1, 0.5))

a = c(1010,1000,990)
exp(a)/sum(exp(a))

softmax.func <- function(x){
  max.x = max(x)
  return(exp(x-max.x)/sum(exp(x-max.x)))
}
  

train <- read.csv('http://peach.l.chiba-u.ac.jp/course_folder/MNSTtrain.csv', 
  header=TRUE)
train <- data.matrix(train)
train.x <- train[,-1]
train.y <- train[,1]
train.x <- t(train.x/255)
download.file("http://peach.l.chiba-u.ac.jp/course_folder/trNetwork.Rdata",
  "trNetwork.Rdata")
load("trNetwork.Rdata")
network=trNetwork

n.train = ncol(train.x)
correct.cl = 0
conf.matrix = matrix(0,10,10)
for (i.loop in 1:n.train){
  y = forward.3L(network,train.x[,i.loop])
  max.y = max.col(y)
  conf.matrix[max.y, (train.y[i.loop]+1)] = conf.matrix[max.y, (train.y[i.loop]+1)] + 1
}
accuracy = sum(diag(conf.matrix))/n.train

# batch 
batch_size = 200
conf.matrix = matrix(0,10,10)
for (i.batch in seq(1,n.train, batch_size)){
  y = forward.3L(network, train.x[,(i.batch:(i.batch+batch_size-1))])
  pred = max.col(y)
  conf.matrix = conf.matrix+table(pred,
       (train.y[i.batch:(i.batch+batch_size-1)]+1))
}
accuracy = sum(diag(conf.matrix))/n.train

認知情報解析 2017 – Ch 02

Chapter 2: Perceptron

# with THRESHOLD (theta)
AND.gate <- function(x1, x2){
  w1 = 0.5
  w2 = 0.5
  theta = 0.7
  y.temp = w1*x1 + w2*x2
  if (y.temp <= theta){
    y = 0
  } else {
    y = 1
  }
  return(y)
}

AND.gate <- function(x1, x2){
  w1 = 0.5; w2 = 0.5; theta = 0.7
  return(as.numeric(w1*x1 + w2*x2 > theta))
}

NAND.gate <- function(x1, x2){
  w1 = -0.5; w2 = -0.5; theta = -0.7
  return(as.numeric(w1*x1 + w2*x2 > theta))
}

OR.gate <- function(x1, x2){
  w1 = 0.5; w2 = 0.5; theta = 0.3
  return(as.numeric(w1*x1 + w2*x2 > theta))
}

# with BIAS (b)
AND.gate <- function(x1, x2){
  w1 = 0.5
  w2 = 0.5
  b = -0.7
  y.temp = w1*x1 + w2*x2 + b
  if (y.temp <= 0){
    y = 0
  } else {
    y = 1
  }
  return(y)
}

AND.gate <- function(x1, x2){
  w1 = 0.5; w2 = 0.5; b = -0.7
  return(as.numeric(w1*x1 + w2*x2 + b > 0))
}

NAND.gate <- function(x1, x2){
  w1 = -0.5; w2 = -0.5; b = 0.7
  return(as.numeric(w1*x1 + w2*x2 + b > 0))
}

OR.gate <- function(x1, x2){
  w1 = 0.5; w2 = 0.5; b = -0.3
  return(as.numeric(w1*x1 + w2*x2 + b > 0))
}

NOR.gate <- function(x1, x2){
  w1 = -0.5; w2 = -0.5; b = 0.3
  return(as.numeric(w1*x1 + w2*x2 + b > 0))
}

plot.logic <- function(logic.oper){
  x1 = c(0,0,1,1);
  x2 = c(0,1,0,1);
  if (logic.oper == "and") {
    w1 = 0.5; w2 = 0.5; theta = 0.7;
    true.point = AND.gate(x1,x2)
  } else if (logic.oper == "or") {
    w1 = 0.5; w2 = 0.5; theta = 0.3;
    true.point = OR.gate(x1,x2)
  } else if (logic.oper == "nand") {
    w1 = -0.5; w2 = -0.5; theta = -0.7;
    true.point = NAND.gate(x1,x2)
  } else if (logic.oper == "nor"){
    w1 = -0.5; w2 = -0.5; theta = -0.3;
    true.point = NOR.gate(x1,x2)
  } else {warning("incompatible operator");stop() }
  plot(c(0,0,1,1),c(0,1,0,1),xlim = c(-0.5, 1.5), ylim = c(-0.5, 1.5), 
       pch = 20, cex= 2, col = true.point+1)
  abline(a = theta/w1, b = -w1/w2, lwd = 3)
}   
 
XOR.gate <- function(x1, x2){
  gate1 <- NAND.gate(x1,x2)
  gate2 <- OR.gate(x1,x2)
  y <- AND.gate(gate1,gate2)
  return(y)
}

plot.XOR <- function(){
  x1 = c(0,0,1,1);
  x2 = c(0,1,0,1);
  w11 = -0.5; w21 = -0.5; theta1 = -0.7
  w12 = 0.5; w22 = 0.5; theta2 = 0.3
  true.point = XOR.gate(x1, x2)
  plot(c(0,0,1,1),c(0,1,0,1),xlim = c(-0.5, 1.5), ylim = c(-0.5, 1.5), 
       pch = 20, cex= 2, col = true.point+1)
  abline(a = theta1/w11, b = -w11/w21, lwd = 3)
  abline(a = theta2/w12, b = -w12/w22, lwd = 3)    
}   

認知情報解析 カジノ体験

サイコロを3つなげ、その和が何であるかをかけるギャンブルがあるそうです。
和が、4~10ならsmall, 11~17ならlargeとおおよそ2択のギャンブルだそうです。(3をx、18をyと呼ぶことにします)
Kくんが体験したのは「過去50回の結果がlargeが70%と表示されていたので、次はsmallじゃないかと思いsmallに賭けて、実際にsmallが出た」ということでした。この出来事がどの程度よく起きるのかシミュレーションを用いて検証しまししょう。

# 非効率だけど、直感的に分かりやすいと思われる例

# 1試行の関数
die <- function() {
  die1<-sample(1:6,1)
  die2<-sample(1:6,1)
  die3<-sample(1:6,1)
  die.sum = die1 + die2 + die3
  if (die.sum==3) {
     result="X"
  } else { 
    if (die.sum==18) {
      result="Y"
   } else {
     if (die.sum<11){ 
       result="small"
       } else {
         result="large"
       }
     }
   }
  return(result)
}

# 51試行の結果を表す関数
SL51=function( ) {
  n.rep=51
  res=rep("N",51)
  for (i_rep in 1:n.rep) {
    res[i_rep]=die()
  }
  p.large=sum(res[1:50]=="large")/50
  return(list(p.large=p.large,last.result=res[51]))
}
  
# 51試行をN.REP回繰り返す関数
ck.kurimoto <- function(n.rep) {
  p.hist = rep(0,n.rep)
  lr.hist = rep("N",n.rep)
  for (i_rep in 1:n.rep) {
     result=SL51()
     p.hist[i_rep] = result$p.large
     lr.hist[i_rep] = result$last.result
  }
  return(list(p.hist=p.hist,lr.hist=lr.hist))
}

# 50試行の内、Lが出た割合が70%以上のものを選択
index70=which(m.result$p.hist>=0.7)
sum(m.result$lr.hist[index70]=="small")/length(index70)

# 上の3つの関数をまとめたもの
n.rep=1e6
die.mat = matrix(sample(c("X","Y","S","L"),
 n.rep*51,prob=c(1/16,1/16,7/16,7/16),replace=T),nrow=n.rep)
p.large=rowSums(die.mat[,1:50]=="L")/50
index70=which(p.large>=0.7)
ck51L70=die.mat[index70,51]
table(ck51L70)/length(index70)

認知情報解析演習 ギャンブル

# 直感的にはわかりやすいけど、非効率な例
bet.example<-function(n.trial,p.win,max.bet) {
  rew.history=rep(0,n.trial)
  bet.history=rep(0,n.trial)
  WL.history=rep("W",n.trial)
  loss.counter=0
  i_trial=0;
  WL="L"
  loss.counter.reset="F"
  while (i_trial < n.trial | WL=="L" ) {
    i_trial=i_trial+1
    bet=2^loss.counter
    if (bet > max.bet) {
      bet = max.bet
      loss.counter.reset = "T"
    }
    bet.history[i_trial]=bet
    WL=sample(c("W","L"),1,prob=c(p.win, 1-p.win))
    WL.history[i_trial]=WL
    if (WL=="L") {
      loss.counter=loss.counter+1
      reward=-bet
    } else {
      loss.counter=0
      reward=bet
    }
    if (loss.counter.reset=="T"){loss.counter=0}
    rew.history[i_trial]=reward
  }
  return(list(reward=sum(rew.history),money.required=max(bet.history)))
}

# より効率的な例 - max.betは未導入
bet.example02<-function(n.trial,p.win) {
  WL=sample(c("W","L"),n.trial,replace=T,prob=c(p.win, 1-p.win))
  if (WL[n.trial]!="W") {
    WL.extra=sample(c("W","L"),1,prob=c(p.win,1-p.win))
    while (tail(WL.extra,1)=="L") {
      WL.extra=c(WL.extra,sample(c("W","L"),1,prob=c(p.win,1-p.win)))
     }
    WL=c(WL,WL.extra)
  }
  reward=sum(WL=="W")
  result=rle(WL)
  loss.index=result$values=="L"
  money.required=2^max(result$lengths[loss.index])
  return(list(reward,money.required))
}

認知情報解析演習 TSP

traveling salesman problem
来週までに、inversionとtranslationを行う関数を作っておいてください。

# initialisation, etc.
n.city=20
location=matrix(runif(n.city*2,min=0,max=100),nrow=n.city)
route=sample(n.city) 
calc.Dist<-function(location,route) {
  n.city=length(route)
  total.Dist=0
  route=c(route,route[1])
  for (i_city in 1:n.city){
    total.Dist=total.Dist+dist(location[c(route[i_city],route[i_city+1]),])
  }
  return(total.Dist)
}

# plotting results
plot(rbind(location[route,],location[route[1],]),type='o',pch=20,cex=2.5, col='red',
  xlab='location @X',ylab='location @Y',main='Initial route')
plot(rbind(location[best.route,],location[best.route[1],]),type='o',pch=20, 
  col='magenta',cex=2.5,xlab='location @X',ylab='location @Y',main='Optimized route')
#switching 
TSP.switch<-function(route) {
  two.cities=sample(route,2,replace=F)
  route[two.cities]=route[rev(two.cities)]
  return(route)
}

# inversion
TSP.inv<-function(route,inv.length) { 
  inv.begin=sample(route,1)
  inv.end=min(inv.begin+inv.length-1,length(route))
  route[inv.begin:inv.end]=rev(route[inv.begin:inv.end])
  return(route)
}

# translation
TSP.trans<-function(route,tr.length) {
  trP1=sample(route,1)
  tr.vec=route[trP1:min(length(route),trP1+tr.length-1)]
  temp.vec=route[-(trP1:min(length(route),trP1+tr.length-1))]
  trP2=sample(1:length(temp.vec),1)
  if (trP2==length(temp.vec)) {
    new=c(temp.vec,tr.vec)
  } else {
    new=c(temp.vec[1:trP2],tr.vec,temp.vec[(trP2+1):length(temp.vec)])
  }
return(new)
}

# demo
TSP.demo<-function(n.city=20, maxItr=1000) {
  location=matrix(runif(n.city*2,min=0,max=100),nrow=n.city)
  route=sample(n.city) 
  ## param. for simulated annealing - change values if necessary 
  C=1;eta=0.99;TEMP=50; 
  ##
  optDist=calc.Dist(location,route)
  optHist=matrix(0,nrow=maxItr,ncol=(length(route)+1))
  optHist[1,]=c(route,optDist)
  for (i_loop in 2:maxItr) {
    rand.op=sample(c('inv','sw','trans'),1,prob=c(0.75,0.125,0.125))
    if (rand.op=='inv') {
      new.route=TSP.inv(route,sample(2:n.city,1))
    } else if (rand.op=='sw') {
      new.route=TSP.switch(route)
    } else { new.route=TSP.trans(route,sample(2:(round(n.city/2)),1))}
    new.dist=calc.Dist(location,new.route)
    delta=new.dist-optDist
    prob=1/(1+exp(delta/(C*TEMP)))
    if (runif(1) < prob) {
      route=new.route;optDist=new.dist;
    } 
    optHist[i_loop,]=c(route,optDist);
    TEMP=TEMP*eta
  }
  par(mfrow=c(1,2)) 
  plot(rbind(location,location[1,]),type='o',pch=20,cex=2.5, col='red',
    xlab='location @X',ylab='location @Y',main='Initial route')
  plot(location[optHist[1000,c(1:n.city,1)],],type='o',pch=20, col='magenta',
    cex=2.5,xlab='location @X',ylab='location @Y',main='Optimized route')
  return(optHist)
}

認知情報解析演習a

最適化問題

 
#Objective Function 
funZ<-function(x,y) {x^4-16*x^2-5*x+y^4-16*y^2-5*y}
xmin=-5;xmax=5;n_len=100;
xs=ys<-seq(xmin, xmax,length.out=n_len)
z<-outer(xs,ys,funZ)
contour(x=xs,y=ys,z=z,nlevels=30,drawlabels=F,col="darkgreen",
 main=expression(z=x^4-16*x^2-5*x+y^4-16*y^2-5*y))

勾配法の実装例

 
# initialization
x=x.hist=runif(1,min=-5,max=5) # initial value of x
y=y.hist=runif(1,min=-5,max=5) # initial value of y
lambda=0.01                    # step size
zeta=1e-8                      # tol (stopping criterion)
t=0                            # time
g=100                          # maximum gradient
# end initialization

### note ###
# obj fun: z=x^4-16*x^2-5*x+y^4-16*y^2-5*y
# part. deriv. x: 4*x^3-32*x-5
# part. deriv. y: 4*y^3-32*y-5
#############

# main 
while (g>zeta) {
  t=t+1
  g.x=4*x^3-32*x-5
  g.y=4*y^3-32*y-5
  x=x-lambda*g.x
  y=y-lambda*g.y
  g=max(abs(g.x),abs(g.y))
  x.hist=c(x.hist,x)
  y.hist=c(y.hist,y)
}

# plotting results
par(mfrow=c(1,3))
par(mar=c(4.0,4.1,4.1,1.1))
contour(x=xs,y=ys,z=z,nlevels=30,drawlabels=F,col="darkgreen",
 main=expression(z=x^4-16*x^2-5*x+y^4-16*y^2-5*y))
lines(x.hist,y.hist,type='o',pch=20,col='red')
plot(x.hist,type='l',xlab="time",ylab="value of x",lwd=2,
 main="how value of y changes over time")
plot(y.hist,type='l',xlab="time",ylab="value of y",lwd=2,
 main="how value of y changes over time")

単純確率的最適化法の実装例

 
# initialization
x=runif(1,min=-5,max=5)             # initial value of x
y=runif(1,min=-5,max=5)             # initial value of y
f.xy=x^4-16*x^2-5*x+y^4-16*y^2-5*y  # initial value of obj. fun.
w=0.1                               # search width
t=0                                 # time
max.t=1000                          # max iter (stopping criterion)
stop.crit=F                         # stop criterion
x.hist=rep(0,max.t)                 # history of x
y.hist=rep(0,max.t)                 # history of x
x.hist[1]=x
y.hist[1]=y
# end initialization

# main
while (stop.crit==F) {
  t=t+1
  x.temp=x+rnorm(1,mean=0,sd=w)
  y.temp=y+rnorm(1,mean=0,sd=w)
  f.temp=funZ(x.temp,y.temp)
  if (f.temp < f.xy) {
    x=x.temp
    y=y.temp
    f.xy=f.temp
  }
  x.hist[t]=x
  y.hist[t]=y
  if (t==max.t){stop.crit=T}
}

# plotting results
par(mfrow=c(1,3))
par(mar=c(4.0,4.1,4.1,1.1))
contour(x=xs,y=ys,z=z,nlevels=30,drawlabels=F,col="darkgreen",
 main=expression(z=x^4-16*x^2-5*x+y^4-16*y^2-5*y))
lines(x.hist,y.hist,type='o',pch=20,col='red')
plot(x.hist,type='l',xlab="time",ylab="value of x",lwd=2,
 main="how value of y changes over time")
plot(y.hist,type='l',xlab="time",ylab="value of y",lwd=2,
 main="how value of y changes over time")

焼きなまし法の実装例

# initialization
x=runif(1,min=-5,max=5)             # initial value of x
y=runif(1,min=-5,max=5)             # initial value of y
f.xy=x^4-16*x^2-5*x+y^4-16*y^2-5*y  # initial value of obj. fun.
t=0                                 # time
max.t=1000                          # max iter (stopping criterion)
stop.crit=F                         # stop criterion
x.hist=rep(0,max.t)                 # history of x
y.hist=rep(0,max.t)                 # history of x
x.hist[1]=x
y.hist[1]=y
tau=1
c=1
g=0.999
# end initialization
 
# main
while (stop.crit==F) {
  t=t+1
  x.temp=x+rnorm(1,mean=0,sd=tau)
  y.temp=y+rnorm(1,mean=0,sd=tau)
  f.temp=funZ(x.temp,y.temp)
  deltaE=f.temp-f.xy
  p=1/(1+exp(deltaE/(c*tau)))
  if (runif(1) < p) {
    x=x.temp
    y=y.temp
    f.xy=f.temp
  }
  x.hist[t]=x
  y.hist[t]=y
  tau=tau*g
  if (t==max.t){stop.crit=T}
}
 
# plotting results
par(mfrow=c(1,3))
par(mar=c(4.0,4.1,4.1,1.1))
contour(x=xs,y=ys,z=z,nlevels=30,drawlabels=F,col="darkgreen",
 main=expression(z=x^4-16*x^2-5*x+y^4-16*y^2-5*y))
lines(x.hist,y.hist,type='o',pch=20,col='red')
plot(x.hist,type='l',xlab="time",ylab="value of x",lwd=2,
 main="how value of y changes over time")
plot(y.hist,type='l',xlab="time",ylab="value of y",lwd=2,
 main="how value of y changes over time")

3つの手法の比較

 

GD<-function(x.init,y.init,n.iter,lambda){
x=x.init;y=y.init;z.hist=rep(0,n.iter)
# main 
for (i_iter in 2:n.iter) {
  g.x=4*x^3-32*x-5
  g.y=4*y^3-32*y-5
  x=x-lambda*g.x
  y=y-lambda*g.y
  g=max(abs(g.x),abs(g.y))
  x.hist=c(x.hist,x)
  y.hist=c(y.hist,y)
  z.hist[i_iter]=funZ(x,y)
}
return(z.hist)
}

stochOpt<-function(x.init,y.init,n.iter,w) {
x=x.init;y=y.init;f.xy=funZ(x,y)
z.hist=rep(0,n.iter);z.hist[1]=f.xy
# main
for (i_iter in 2:n.iter) {
  x.temp=x+rnorm(1,mean=0,sd=w)
  y.temp=y+rnorm(1,mean=0,sd=w)
  f.temp=funZ(x.temp,y.temp)
  if (f.temp < f.xy) {
    x=x.temp;y=y.temp;f.xy=f.temp
  }
  z.hist[i_iter]=f.xy
}
return(z.hist)
}
 
simAnn<-function(x.init,y.init,n.iter,tau,c,g) {
x=x.init;y=y.init;f.xy=funZ(x,y)
z.hist=rep(0,n.iter);z.hist[1]=f.xy
# main
for (i_iter in 2:n.iter) {
  x.temp=x+rnorm(1,mean=0,sd=tau)
  y.temp=y+rnorm(1,mean=0,sd=tau)
  f.temp=funZ(x.temp,y.temp)
  deltaE=f.temp-f.xy
  p=1/(1+exp(deltaE/(c*tau)))
  if (runif(1) < p) {
    x=x.temp;y=y.temp;f.xy=f.temp
  }
  z.hist[i_iter]=f.xy
  tau=tau*g
}
return(z.hist)
}

n.rep=500
n.iter=500
xs=runif(n.rep,-3,3);ys=runif(n.rep,-3,3)
GD.hist=SA.hist=SO.hist=matrix(0,n.iter,n.rep)
for (i.rep in 1:n.rep) {
  GD.hist[,i.rep]=GD(xs[i.rep],ys[i.rep],n.iter,0.025)
  SO.hist[,i.rep]=stochOpt(xs[i.rep],ys[i.rep],n.iter,2)
  SA.hist[,i.rep]=simAnn(xs[i.rep],ys[i.rep],n.iter,3,1,0.999)
}

plot(rowMeans(GD.hist),type='l',ylim=c(-160,0),lwd=3)
lines(rowMeans(SO.hist),col='red',lwd=3)
lines(rowMeans(SA.hist),col='blue',lwd=3)

認知情報解析A: なぜ外国人居住区ができるのか?

社会を<モデル>でみる:数理社会学への招待
29章:なぜ差別しなくても外国人居住区ができるのか
○と♯の2つのグループが存在し、以下の条件で他の場所へ移動する。
○: 近隣に2人以下○の場合、
♯: 近隣の1/3以上が♯でない場合、

例1
#
# 1 epochで複数回移動するエージェントがいてもいいバージョン
# 次回は移動は1epoch内で最大1回とするものを作りましょう。
#

# function to check circles
ck.circle <- function(world,i_row,i_col,max.row,max.col) {
  neighbor=world[max(i_row-1,1):min(i_row+1,max.row),
                 max(i_col-1,1):min(i_col+1,max.col)]
  circles=length(which(neighbor==2))
  act=ifelse(circles<3,"move","stay")
  return(act)
}

# function to check sharps
ck.sharp <- function(world,i_row,i_col,max.row,max.col) {
  neighbor=world[max(i_row-1,1):min(i_row+1,max.row),
                 max(i_col-1,1):min(i_col+1,max.col)]
  all.neighb=length(which(neighbor!=0))-1
  if (all.neighb==0){
    return("move")
  } else {
    sharps=length(which(neighbor==1))-1
    act=ifelse(sharps/all.neighb<(1/3),"move","stay")
    return(act)
  }
}

FUNmigration<-function(N.row=10, N.col=10, N.sharp=10, N.circle=10) {
   
# initialization
world=matrix(0,nrow=N.row,ncol=N.col)
temp.vec=c(rep(1,N.sharp),rep(2,N.circle))
world[sample(1:(N.row*N.col),N.sharp+N.circle,replace=F)]=temp.vec
moved=1;counter=0

# plotting initial config.
par(mfrow=c(1,2))
idx1<-which(world==2,arr.ind=T);idx2<-which(world==1,arr.ind=T)
plot(idx1[,1],idx1[,2],type="n",xlim=c(0.5,N.row+0.5),
  ylim=c(0.5,N.col+0.5),main="Initial Arrangement",
  xlab="location @ x", ylab="location @ y")
  text(idx1[,1],idx1[,2],"o",cex=4,col="red");
  text(idx2[,1],idx2[,2],"#",cex=4,col="blue")

# main loop, max.iter=1000
while (moved>0 & counter<1000) {
  moved=0
  counter=counter+1
  for (i_row in 1:N.row) {
    for (i_col in 1:N.col) {
      if (world[i_row,i_col]==1) {
        act=ck.sharp(world,i_row,i_col,N.row,N.col)
        if (act=="move") {   
          dest=sample(which(world==0),1)
          world[dest]=1
          world[i_row,i_col]=0
          moved=1  
        }
      }
      if (world[i_row,i_col]==2) {
        act=ck.circle(world,i_row,i_col,N.row,N.col)
        if (act == "move") {
         dest=sample(which(world==0),1)
         world[dest]=2
         world[i_row,i_col]=0
         moved=1
        }
}}}}

# plotting result
idx1<-which(world==1,arr.ind=T)
idx2<-which(world==2,arr.ind=T)
plot(idx1[,1],idx1[,2],type="n",xlim=c(0.5,N.row+0.5),ylim=c(0.5,N.col+0.5),
  main="Arragement After Migration",,xlab="location @ x", ylab="location @ y")
text(idx1[,1],idx1[,2],"#",cex=4,col="blue")
text(idx2[,1],idx2[,2],"o",cex=4,col="red")
return(world)
}

例2
#
# 移動は1epoch内で最大1回とするバージョン
#

FUNmigration2<-function(N.row=10, N.col=10, N.sharp=10, N.circle=10) {
   
# initialization
world=matrix(0,nrow=N.row,ncol=N.col)
temp.vec=c(rep(1,N.sharp),rep(2,N.circle))
world[sample(1:(N.row*N.col),N.sharp+N.circle,replace=F)]=temp.vec
moved=1;counter=0

# plotting initial config.
par(mfrow=c(1,2))
idx1<-which(world==2,arr.ind=T);idx2<-which(world==1,arr.ind=T)
plot(idx1[,1],idx1[,2],type="n",xlim=c(0.5,N.row+0.5),
  ylim=c(0.5,N.col+0.5),main="Initial Arrangement",
  xlab="location @ x", ylab="location @ y")
  text(idx1[,1],idx1[,2],"o",cex=4,col="red");
  text(idx2[,1],idx2[,2],"#",cex=4,col="blue")

ck.circle <- function(world,i_row,i_col,max.row,max.col) {
  neighbor=world[max(i_row-1,1):min(i_row+1,max.row),
                 max(i_col-1,1):min(i_col+1,max.col)]
  circles=length(which(neighbor<0))
  act=ifelse(circles<3,"move","stay")
  return(act)
}
 
ck.sharp <- function(world,i_row,i_col,max.row,max.col) {
  neighbor=world[max(i_row-1,1):min(i_row+1,max.row),
                 max(i_col-1,1):min(i_col+1,max.col)]
  all.neighb=length(which(neighbor!=0))-1
  if (all.neighb==0){
    return("move")
  } else {
    sharps=length(which(neighbor>0))-1
    act=ifelse(sharps/all.neighb<(1/3),"move","stay")
    return(act)
  }
}

# ここから違う
moved=1;counter=0;N.row=10;N.col=10;
while (moved>0 & counter<1000) {
  moved=0
  counter=counter+1
  for (i_sharp in 1:N.sharp) {
    idx=which(world==i_sharp,arr.ind=T)
    act=ck.sharp(world,idx[1],idx[2],N.row,N.col);
    if (act=="move") {   
      dest=sample(which(world==0),1)
      world[dest]=i_sharp
      world[idx[1],idx[2]]=0
      moved=1  
    }
  }
  for (i_circle in -1:-N.circle) {
    idx=which(world==i_circle,arr.ind=T)
    act=ck.circle(world,idx[1],idx[2],N.row,N.col);
    if (act=="move") {   
      dest=sample(which(world==0),1)
      world[dest]=i_circle
      world[idx[1],idx[2]]=0
      moved=1  
    }  
  }
} 

# plotting result
idx1<-which(world==1,arr.ind=T)
idx2<-which(world==2,arr.ind=T)
plot(idx1[,1],idx1[,2],type="n",xlim=c(0.5,N.row+0.5),ylim=c(0.5,N.col+0.5),
  main="Arragement After Migration",,xlab="location @ x", ylab="location @ y")
text(idx1[,1],idx1[,2],"#",cex=4,col="blue")
text(idx2[,1],idx2[,2],"o",cex=4,col="red")
return(world)
}