Disclaimer

このcourselogにあるコードは、主に学部生・博士課程前期向けの講義・演習・実習で例として提示しているもので、原則直感的に分かりやすいように書いているつもりです。例によってはとても非効率なものもあるでしょうし、「やっつけ」で書いているものあります。また、普段はMATLABを使用していますので、変な癖がでているかもしれません。これらの例を使用・参考にする場合はそれを踏まえてたうえで使用・参考にして下さい。2013.09月のサーバー移行の際にバックアップに失敗して、2013前期までの内容を失ってしまいました…

A Brief Guide to R (Rのコマンドの手引きおよび例)はこちらをどうぞ

広域システム特別講義II w07

ALCOVE w/ GD

ALC.init<-function(size) {
  # size[1] = input dimension
  # size[2] = number of exemplars
  # size[3] = number of categories
  alpha=matrix(1,nrow=1,ncol=size[1])/size[1]
  w=matrix(rnorm(size[3]*size[2])*0.1,ncol=size[2])
  c=2            # gain
  phi=3          # decision strength
  lrA=0.2        # learning rate for attentions
  lrW=0.1        # learning rate for associations
  return(list(alpha = alpha,
              w = w,
              lrA = lrA,
              lrW = lrW,
              c = c,
              phi = phi))
}

# alcove forward process
alcove.forward <- function(parmSet, inp, exemplar){
  diff= matrix(abs(matrix(1,n.exemp,1)%*%inp-exemplar),nrow=nrow(exemplar))
  exemp=exp(-parmSet$c*(diff%*%t(parmSet$alpha)))
  out=parmSet$w%*%exemp
  return(list(diff = diff, exemp = exemp, out = out))
}

# alcove backward process
alcove.backward <- function(res.forward,parmSet, target){
  err=target-res.forward$out
  dW=parmSet$lrW*res.forward$exemp%*%t(err)
  dA=-parmSet$lrA*(t(err)%*%parmSet$w*t(res.forward$exemp))%*%res.forward$diff*parmSet$c
  return(list(dW = dW, dA = dA))
}

# main function
alcove<-function(parmSet, inp, exemplar, targ,iter) {
  # ----ALCOVE for R ---
  # input arguments
  #  parmSet - parameter set
  # inp - input matrix (n_sample x n_dimension)
  # exemplar - exemplar matrix (n_sample x n_dimension)
  # targ - target matrix (n_sample x n_category)
  # iter - # of training epochs
  # ----ALCOVE for R ---
  # initialization
  inpDim = ncol(inp)
  n.inp = nrow(inp)
  n.exemp = nrow(exemplar)
  n.cat = ncol(targ)
  accu=rep(0,nrow=iter+1)
  attn=matrix(0,nrow=iter+1,ncol=inpDim)
  attn[1,]=alpha
  # end initialization

  # main loop
  for (i_iter in 1:iter) {
    ro=sample(1:n.inp, n.inp)
    prob_temp=0;
    for (i_tr in 1:n.inp) {
      res.forward <- alcove.forward(parmSet, inp[ro[i_tr],], exemplar)
      res.backward <- alcove.backward(res.forward, parmSet, targ[ro[i_tr],])
      parmSet$w = parmSet$w+t(res.backward$dW)
      parmSet$alpha = parmSet$alpha+res.backward$dA
      parmSet$alpha[which(parmSet$alpha<0)]=0
      pT=(exp(parmSet$phi*res.forward$out)/sum(exp(parmSet$phi*res.forward$out)))*targ[ro[i_tr],]
      prob_temp=prob_temp+pT[which(!pT==0)]
    }
    accu[i_iter+1]=prob_temp/n.inp
    attn[i_iter+1,]=alpha
  }
  attnN=attn/apply(attn,1, sum)
  out=matrix(0,nrow=n.cat,ncol=n.inp)
  for (i_tr in 1:n.inp) {
    res.forward<-alcove.forward(parmSet, inp[i_tr,], exemplar)
    out[,i_tr]=res.forward$out
  }
  return(list(attn=attn,attnN=attnN,accu=accu,out=out,ps=parmSet))
}

焼きなまし法(simulated annealing)の例

minSA<-function(func, initXY=c(0,0), maxItr=1000, C=1, eta=0.99, width=10) {
  x=initXY
  z=do.call(func,list(x))
  n.var = length(x)
  opHist=matrix(0,nrow=maxItr,ncol=(n.var+1))
  opHist[1,]=c(x,z)
  for (i_loop in 2:maxItr) {
    xTemp = x + rnorm(n.var, mean = 0, sd=width)
    zTemp= do.call(func,list(xTemp))
    delta=zTemp-z;
    prob=1/(1+exp(delta/(C*width)))
    if (runif(1) < prob) {
      x=xTemp;
      z=zTemp;
    } 
    opHist[i_loop,]=c(x,z);
    width=width*eta
  }
  return(opHist)
}

Evolutionary Strategyの例

ES_recomb<-function(G) {
  nParent=nrow(G$x);nVar=ncol(G$x)
  child = G;
  for (i_child in 1:nParent) {
    parentID=sample(1:nParent,2)
    coID=sample(c(0,1),nVar,replace=T)
    child$x[i_child,]=G$x[parentID[1],]
    child$x[i_child,which(coID==1)]=G$x[parentID[2],which(coID==1)]
    child$sigma[i_child,]=0.5*(G$sigma[parentID[1],]+G$sigma[parentID[2],])
  }
  return(child)
}
 
ES_mutate<-function(child,tau){
  nChild=nrow(child$x);nVar=ncol(child$x)
  child$sigma<-child$sigma*exp(matrix(rnorm(nChild*nVar)*tau,nrow=nChild))
  child$x=child$x+child$sigma*matrix(rnorm(nChild*nVar),nrow=nChild,ncol=nVar)
  return(child)
}
 
ES_survive<-function(G, child, fitG, fitC){
  nParent=nrow(G$x);
  fitT=c(fitG,fitC);
  fitMin=sort(fitT,index.return=T)
  tempX=rbind(G$x,child$x);
  tempS=rbind(G$sigma,child$sigma)
  G$x=tempX[fitMin$ix[1:nParent],]
  G$sigma=tempS[fitMin$ix[1:nParent],]
  return(G)
}
 
ES<-function(G,func, nGen, tau,X,y){
  optHist=matrix(0,nrow=nGen,ncol=1)
  G.hist = matrix(0,nrow=nGen,ncol = ncol(G$x))
  fitG = fitG = apply(G$x,1,func,X,y)
  optHist[1] = min(fitG)
  G.hist[1,] = G$x[which.min(fitG),]
  child = G;
  for (i_gen in 2:nGen) {
    child<-ES_recomb(G)
    child<-ES_mutate(child,tau)
    fitG = apply(G$x,1,func,X,y)
    fitC = apply(child$x,1,func,X,y)
    G<-ES_survive(G, child, fitG, fitC)
    optHist[i_gen]=min(c(fitG,fitC))
    G.hist[i_gen, ] = G$x[1,]
  }
  return(list(G = G, optHist = optHist, G.hist = G.hist))
}
Posted in UT

データ解析基礎論B W06

# PCA
dat<-read.csv("http://peach.l.chiba-u.ac.jp/course_folder/AMSA_track_mens.csv",row.names=1)
plot(dat,pch=20)
dat.pca<-princomp(dat)
summary(dat.pca)
screeplot(dat.pca,type='l',lwd=2,pch=20,cex=4,,col='red')
abline(h=sum(summary(dat.pca)[[1]])/8,lty=2,lwd=2)

# factor analysis
dat<-read.csv("http://peach.l.chiba-u.ac.jp/course_folder/FA01.csv")
dat.fa<-factanal(dat,1)
dat.fa$uniquenesses
1-dat.fa$loadings[1:3]^2

dat<-read.table("http://www.matsuka.info/data_folder/tdkPCA01.txt")
dat.pca<-princomp(dat)

dat.fa<-factanal(dat,1,score="regression")
plot(dat.fa$score~dat.pca$score[,1],pch=20,cex=2,xlab="Component Score", ylab="Factor Score")
cor(dat.fa$score,dat.pca$score)
cor(rowSums(dat),dat.fa$score)

# plotting results
dat<-read.csv("http://www.matsuka.info/data_folder/tdkCFA.csv")
dat.fa<-factanal(dat,2,score="regression")
plot(dat.fa$loadings[,1],dat.fa$loadings[,2],pch=20,cex=2,col="red",
     xlab="Factor 1", ylab="Factor 2")
abline(h=0,v=0)
n.var = length(dat.fa$loadings[,1])
for (i.var  in 1:n.var){
  lines(c(0,dat.fa$loadings[i.var,1]),c(0,dat.fa$loadings[i.var,2]),
        lty=2,lwd=2,col='skyblue')
}
text(dat.fa$loadings[,1],dat.fa$loadings[,2],colnames(dat),cex=1.4)

# model comparison
dat.model1<-factanal(dat,1)
dat.model2<-factanal(dat,2)
dat.model3<-factanal(dat,3)
dat.model4<-factanal(dat,4)
print(m1res<-c(dat.model1$dof,dat.model1$PVAL))
print(m2res<-c(dat.model2$dof,dat.model2$PVAL))
print(m3res<-c(dat.model3$dof,dat.model3$PVAL))
print(m4res<-c(dat.model4$dof,dat.model4$PVAL))
1-pchisq(dat.model3$STATISTIC-dat.model4$STATISTIC,dat.model3$dof-dat.model4$dof)
dat.model2$STATISTIC
dat.model2$dof

# CFA with SEM
install.packages('sem')
library(sem)

model01=cfa(reference.indicator=FALSE)
F1:extrovert,cheerful, leadership, antisocial, talkative, motivated, hesitance, popularity

mod1<-sem(model01, cov(dat), 100)

model02=cfa(reference.indicator=FALSE)
F1: extrovert, leadership, motivated, hesitance 
F2: cheerful, antisocial, talkative, popularity

mod2<-sem(model02, cov(dat), 100)

opt <- options(fit.indices = c("RMSEA"))

広域システム特別講義 II W06

実習課題2解答例  ALCOVE & GDによるカテゴリー学習
課題1と同様にforward processとbackward processの関数を導入しています。

ALC.init<-function(size) {
  # size[1] = input dimension
  # size[2] = number of exemplars
  # size[3] = number of categories
  alpha=matrix(1,nrow=1,ncol=size[1])/size[1]
  w=matrix(rnorm(size[3]*size[2])*0.1,ncol=size[2])
  c=2            # gain
  phi=3          # decision strength
  lrA=0.2        # learning rate for attentions
  lrW=0.1        # learning rate for associations
  return(list(alpha = alpha,
              w = w,
              lrA = lrA,
              lrW = lrW,
              c = c,
              phi = phi))
}

# alcove forward process
alcove.forward <- function(parmSet, inp, exemplar){
  diff= matrix(abs(matrix(1,n.exemp,1)%*%inp-exemplar),nrow=nrow(exemplar))
  exemp=exp(-parmSet$c*(diff%*%t(parmSet$alpha)))
  out=parmSet$w%*%exemp
  return(list(diff = diff, exemp = exemp, out = out))
}

# alcove backward process
alcove.backward <- function(res.forward,parmSet, target){
  err=target-res.forward$out
  dW=parmSet$lrW*res.forward$exemp%*%t(err)
  dA=-parmSet$lrA*(t(err)%*%parmSet$w*t(res.forward$exemp))%*%res.forward$diff*parmSet$c
  return(list(dW = dW, dA = dA))
}

# main function
alcove<-function(parmSet, inp, exemplar, targ,iter) {
  # ----ALCOVE for R ---
  # input arguments
  #  parmSet - parameter set
  # inp - input matrix (n_sample x n_dimension)
  # exemplar - exemplar matrix (n_sample x n_dimension)
  # targ - target matrix (n_sample x n_category)
  # iter - # of training epochs
  # ----ALCOVE for R ---
  # initialization
  inpDim = ncol(inp)
  n.inp = nrow(inp)
  n.exemp = nrow(exemplar)
  n.cat = ncol(targ)
  accu=rep(0,nrow=iter+1)
  attn=matrix(0,nrow=iter+1,ncol=inpDim)
  attn[1,]=alpha
  # end initialization

  # main loop
  for (i_iter in 1:iter) {
    ro=sample(1:n.inp, n.inp)
    prob_temp=0;
    for (i_tr in 1:n.inp) {
      res.forward <- alcove.forward(parmSet, inp[ro[i_tr],], exemplar)
      res.backward <- alcove.backward(res.forward, parmSet, targ[ro[i_tr],])
      parmSet$w = parmSet$w+t(res.backward$dW)
      parmSet$alpha = parmSet$alpha+res.backward$dA
      parmSet$alpha[which(parmSet$alpha<0)]=0
      pT=(exp(parmSet$phi*res.forward$out)/sum(exp(parmSet$phi*res.forward$out)))*targ[ro[i_tr],]
      prob_temp=prob_temp+pT[which(!pT==0)]
    }
    accu[i_iter+1]=prob_temp/n.inp
    attn[i_iter+1,]=alpha
  }
  attnN=attn/apply(attn,1, sum)
  out=matrix(0,nrow=n.cat,ncol=n.inp)
  for (i_tr in 1:n.inp) {
    res.forward<-alcove.forward(parmSet, inp[i_tr,], exemplar)
    out[,i_tr]=res.forward$out
  }
  return(list(attn=attn,attnN=attnN,accu=accu,out=out,ps=parmSet))
}


exemplar = matrix(c(0,0,0,
                    0,0,1,
                    0,1,0,
                    0,1,1,
                    1,0,0,
                    1,0,1,
                    1,1,0,
                    1,1,1),byrow=T,nrow=8)
inp = exemplar

target1 = matrix(c(0,0,0,0,1,1,1,1,
                   1,1,1,1,0,0,0,0),nrow=8)

target2 = matrix(c(1,1,0,0,0,0,1,1,
                   0,0,1,1,1,1,0,0),nrow=8)

target3 = matrix(c(1,1,1,0,0,1,0,0,
                   0,0,0,1,1,0,1,1),nrow=8)

target4 = matrix(c(1,1,1,0,1,0,0,0,
                   0,0,0,1,0,1,1,1),nrow=8)

target5 = matrix(c(1,1,1,0,0,0,0,1,
                   0,0,0,1,1,1,1,0),nrow=8)

target6 = matrix(c(1,0,0,1,0,1,1,0,
                   0,1,1,0,1,0,0,1),nrow=8)

seed.num = 1;n.train = 50
set.seed(seed.num)
parmSet<-ALC.init(c(3,8,2))
result1<-alcove(parmSet,inp,exemplar,target1,n.train)
plot(result1$accu,type='o',lwd=2,pch=20,col=1)
set.seed(seed.num)
parmSet<-ALC.init(c(3,8,2))
result2<-alcove(parmSet,inp,exemplar,target2,n.train)
lines(result2$accu,type='o',lwd=2,pch=20,col=2)
set.seed(seed.num)
parmSet<-ALC.init(c(3,8,2))
result3<-alcove(parmSet,inp,exemplar,target3,n.train)
lines(result3$accu,type='o',lwd=2,pch=20,col=3)
set.seed(seed.num)
parmSet<-ALC.init(c(3,8,2))
result4<-alcove(parmSet,inp,exemplar,target4,n.train)
lines(result4$accu,type='o',lwd=2,pch=20,col=4)
set.seed(seed.num)
parmSet<-ALC.init(c(3,8,2))
result5<-alcove(parmSet,inp,exemplar,target5,n.train)
lines(result5$accu,type='o',lwd=2,pch=20,col=5)
set.seed(seed.num)
parmSet<-ALC.init(c(3,8,2))
result6<-alcove(parmSet,inp,exemplar,target6,n.train)
lines(result6$accu,type='o',lwd=2,pch=20,col=6)
legend("bottomright",paste("type",1:6,sep=''),col=1:6,lwd=2,pch=20)

実習課題1: 学習の効率化を導入
momentumとadagradを組み込んだ例
パラメターの初期化時に、momentum、adagrad、Adamに使用する行列・ベクトルも同時に初期化

init.network <- function(n.neurons, actFun, Opt, sdv){
  n.layer = length(n.neurons)
  W = list(); b = list(); 
  mW = list(); mb = list();    # momentum
  hW = list(); hb = list();    # adaGrad
  aMW = list(); aMb = list();  # adam M
  aVW = list(); aVb = list();  # adam V
  for (i.layer in 1:(n.layer-1)){
    if (nargs() == 3) {
      if (actFun[i.layer]=="sigmoid"){
        # Xavier
        sdv = 1/sqrt(n.neurons[i.layer])
      } else {
        # He - assumes ReLU
        sdv = sqrt(2/n.neurons[i.layer])
      }
    }
    W[[i.layer]] = matrix(rnorm(n.neurons[i.layer]*n.neurons[(i.layer+1)], sd = sdv),
                          nrow=n.neurons[i.layer])
    b[[i.layer]] =  matrix(rnorm(n.neurons[(i.layer+1)], sd = sdv), nrow = 1)
    if (Opt == "momentum"){
      mW[[i.layer]] = matrix(0, nrow=n.neurons[i.layer], ncol=n.neurons[(i.layer+1)])
      mb[[i.layer]] = matrix(0, nrow = 1, ncol=n.neurons[(i.layer+1)])
    } 
    if (Opt == "adagrad"){
      hW[[i.layer]] = matrix(0, nrow=n.neurons[i.layer], ncol=n.neurons[(i.layer+1)])
      hb[[i.layer]] = matrix(0, nrow = 1, ncol=n.neurons[(i.layer+1)])
    }
    if (Opt == "adam"){
      aMW[[i.layer]] = matrix(0, nrow=n.neurons[i.layer], ncol=n.neurons[(i.layer+1)])
      aMb[[i.layer]] = matrix(0, nrow = 1, ncol=n.neurons[(i.layer+1)])
      aVW[[i.layer]] = matrix(0, nrow=n.neurons[i.layer], ncol=n.neurons[(i.layer+1)])
      aVb[[i.layer]] = matrix(0, nrow = 1, ncol=n.neurons[(i.layer+1)])
    }
  }
  return(list(W = W, b = b, actFun = actFun, optimizer = Opt,
              mW=mW, mb=mb,
              hW=hW, hb=hb,
              aMW=aMW,aMb=aMb,aVW=aVW))
}

OPT<-function(net, Daff, HP){
  if (net$optimizer == "momentum"){
    return(Opt.mom(net, Daff, HP))
  }
  if (net$optimizer == "adagrad"){
    return(Opt.adagrad(net, Daff, HP))
  }
  if (net$optimizer == "adam"){
    return(Opt.adam(net, Daff, HP))
  }
}

Opt.mom <- function(net, Daff, HP){
  # HP[3] = learning rate
  # HP[4] = weight decay
  # HP[5] = momentum
  n.layer <- length(net$W)
  for (i.layer in 1:n.layer){
    net$mW[[i.layer]] = HP[5]*net$mW[[i.layer]] - HP[3]*Daff[[i.layer]]$dW - HP[4]*net$W[[i.layer]]
    net$mb[[i.layer]] = HP[5]*net$mb[[i.layer]] - HP[3]*Daff[[i.layer]]$db-HP[4]*net$b[[i.layer]]
    net$W[[i.layer]] = net$W[[i.layer]] + net$mW[[i.layer]]
    net$b[[i.layer]] = net$b[[i.layer]] + net$mb[[i.layer]]
  }
  return(net=net)
}

Opt.adagrad <-function(net, Daff, HP){
  # HP[3] = learning rate
  # HP[4] = weight decay
  n.layer <- length(net$W)
  for (i.layer in 1:n.layer){
    net$hW[[i.layer]] = net$hW[[i.layer]] + Daff[[i.layer]]$dW*Daff[[i.layer]]$dW 
    net$hb[[i.layer]] = net$hb[[i.layer]] + Daff[[i.layer]]$db*Daff[[i.layer]]$db
    net$W[[i.layer]] = net$W[[i.layer]] - HP[3]*(1/(sqrt(net$hW[[i.layer]])+1e-7))*Daff[[i.layer]]$dW - HP[4]*net$W[[i.layer]]
    net$b[[i.layer]] = net$b[[i.layer]] + HP[3]*(1/(sqrt(net$hb[[i.layer]])+1e-7))*Daff[[i.layer]]$db - HP[4]*net$b[[i.layer]]
  }
  return(net=net)
}

cu.nnet = function(train.x, train.y, net, HP = c(10,200,0.002,0.01,0.8,0.999,0.9)){
  # HP: Hyperparameters
  # HP[1] = batch size
  # HP[2] = n iteration
  # HP[3] = learning rate
  # HP[4] = weight decay
  # HP[5] = momentum
  # HP[6] = beta1 (adam)
  # HP[7] = beta2 (adam)
  n.layer <- length(net$W)
  loss = rep(0,HP[2])
  A = list(); z = list(); Dact = list(); Daff = list()
  for (i.iter in 1:HP[2]){
    batch_mask = sample(1:nrow(train.x), HP[1])
    x.batch = train.x[batch_mask,]
    t.batch = train.y[batch_mask,]
    for (i.layer in 1:n.layer){
      if (i.layer == 1){
        x = x.batch
      } else {
        x = z[[(i.layer - 1)]]
      }
      A[[i.layer]] = affine.forwd(x, net$W[[i.layer]], net$b[[i.layer]])
      z[[i.layer]] = activation(A[[i.layer]], t.batch, net$actFun[i.layer])
    }
    loss[i.iter] = z[[i.layer]]$ce
    smx = z[[i.layer]]$smx
    for (i.layerR in n.layer:1){
      if (i.layerR == n.layer){
        dout = 1
      } else {
        dout = Daff[[(i.layerR+1)]]$dx
      }
      Dact[[i.layerR]] = Dact(z[[i.layerR]], smx, t.batch, dout, net$actFun[i.layerR])
      if (i.layerR==1){
        x = x.batch
      } else {
        x = A[[(i.layerR-1)]]
      }
      Daff[[i.layerR]] = affine.bckwd(x, net$W[[i.layerR]], Dact[[i.layerR]])
    }
    net = OPT(net, Daff, HP)
  }
  return(list(loss = loss, net = net))
}

# iris
train.x = as.matrix(iris[,1:4])
train.y.temp = as.numeric(iris[,5])
train.y = matrix(0,nrow = nrow(train.x), ncol =3)
train.y[which(train.y.temp==1), 1]=1
train.y[which(train.y.temp==2), 2]=1
train.y[which(train.y.temp==3), 3]=1
actF = c("relu","relu","softmax")
network = init.network(c(4,10,10,3), actF, "adagrad")
res = cu.nnet(train.x, train.y, network, HP=c(15,500,0.1,0,0.8,0.999,0.9))
plot(res$loss,type='l',ylim=c(0,max(res$loss)))

学習プロセスの工夫(効率化)

func = function(x){
  return(1/20*x[1]^2 + x[2]^2)
}
numerical.grad <- function(func, x){
  h = 1e-4
  R = nrow(x)
  C = ncol(x)
  grad = matrix(0, R, C)
  for (i.col in 1:C){
    for (i.row in 1:R){
      temp.x = x[i.row,i.col]
      x[i.row, i.col] = temp.x + h
      plusH = do.call(func, list(x))
      x[i.row, i.col] = temp.x - h
      minusH = do.call(func,list(x))
      grad[i.row, i.col] = (plusH - minusH)/(2*h)
      x[i.row, i.col] = temp.x
    }
  }
  return(grad)
}
grad.desc <- function(func, init.x, lr, n.iter){
  x = init.x
  x.hist = init.x
  for (i.iter in 1:n.iter) {
    grad = numerical.grad(func, x)
    x = x - lr*grad
    x.hist = rbind(x.hist,x)
  }
  return(x.hist)
}
#
gd.moment <- function(func, init.x, lr, moment, n.iter){
  x = init.x
  x.hist = init.x
  v = rep(0,length(x))
  for (i.iter in 1:n.iter) {
    grad = numerical.grad(func, x)
    v = moment*v-lr*grad
    x = x + v
    x.hist = rbind(x.hist,x)
    grad.hist = -lr*grad
  }
  return(x.hist)
}

# expriment
x = seq(-10,10,0.2)
y = seq(-5,5,0.2)
M = mesh(x,y)
Z = as.vector(1/20*M$x^2)+as.vector(M$y^2) 
Z.mesh = matrix(Z,nrow(M$x))
contour(x,y,Z.mesh,drawlabels = F,nlevels=40)
# simple GD
x.init = matrix(c(-7,2),nrow = 1)
gd = grad.desc("func",x.init,0.9,100)
lines(gd,type='o',col = 'green',pch=20)
# GD with momentum
x.init = matrix(c(-7,2),nrow = 1)
gdm = gd.moment("func",x.init,0.2,0.8,100)
lines(gdm,type='o',col = 'blue',pch=20)

General Multi-layer Perceptron

# sigmoid
sigmoid.forwd <- function(x){
  return(1/(1+exp(-x)))
}
sigmoid.bckwd <- function(z, dout){
  return(dout*(1-z)*z)
}
# Affine
affine.forwd <- function(x, W, b){
  return(x%*%W + matrix(1, nrow = nrow(x), ncol = 1)%*%b)
}
affine.bckwd <- function(x, W, dout){
  dx = dout%*%t(W)
  dW = t(x)%*%dout
  db = colSums(dout)
  return(list(dx = dx, dW = dW, db = db))
}
# softmax with CE
softmax.forwd <- function(x, target){
  max.x = apply(x,1,max)
  C = ncol(x)
  x = x - max.x%*%matrix(1,nrow=1,ncol=C)
  y = exp(x)/rowSums(exp(x))
  delta = 1e-7;
  R = nrow(as.matrix(y))
  return(list(smx = y, ce = -sum(target*log(y + delta))/R))
}
softmax.bckwd <- function(smx, target){
  R = nrow(smx)
  return((smx-target)/R)
}

# activation - forward
activation <- function(A, target, actFun){
  if (actFun == "sigmoid"){
    return(sigmoid.forwd(A))
  }
  if (actFun == "relu"){
    return(relu.forwd(A))
  }
  if (actFun == "softmax"){
    return(softmax.forwd(A, target))
  }
}

# activation - backward
Dact <- function(z, smx, target, dout, actFun){
  if (actFun == "sigmoid"){
    return(sigmoid.bckwd(z, dout))
  }
  if (actFun == "relu"){
    return(relu.bckwd(z, dout))
  }
  if (actFun == "softmax"){
    return(softmax.bckwd(smx, target))
  }
}
# function for initialization 
init.network <- function(n.neurons, actFun){
  n.layer = length(n.neurons)
  W = list(); b = list()
  for (i.layer in 1:(n.layer-1)){
    W[[i.layer]] = matrix(rnorm(n.neurons[i.layer]*n.neurons[(i.layer+1)],sd = 0.1),
                          nrow=n.neurons[i.layer])
    b[[i.layer]] =  matrix(rnorm(n.neurons[(i.layer+1)],sd = 0.1), nrow = 1)
  }
  return(list(W = W, b = b, actFun = actFun))
}

# main function GENERAL version
ut.nnet = function(train.x, train.y, net, HP = c(10,1000,0.05)){
  # HP: Hyperparameters
  # HP[1] = batch size
  # HP[2] = n iteration
  # HP[3] = learning rate
  n.layer <- length(net$W)
  loss = rep(0,HP[2])
  A = list(); z = list(); Dact = list(); Daff = list()
  for (i.iter in 1:HP[2]){
    batch_mask = sample(1:nrow(train.x), HP[1])
    x.batch = train.x[batch_mask,]
    t.batch = train.y[batch_mask,]  
    for (i.layer in 1:n.layer){
      if (i.layer == 1){
        x = x.batch
      } else {
        x = z[[(i.layer - 1)]]
      }
      A[[i.layer]] = affine.forwd(x, net$W[[i.layer]], net$b[[i.layer]])
      z[[i.layer]] = activation(A[[i.layer]], t.batch, net$actFun[i.layer])
    }
    loss[i.iter] = z[[i.layer]]$ce
    smx = z[[i.layer]]$smx
    for (i.layerR in n.layer:1){
      if (i.layerR == n.layer){
        dout = 1
      } else {
        dout = Daff[[(i.layerR+1)]]$dx
      }
      Dact[[i.layerR]] = Dact(z[[i.layerR]], smx, t.batch, dout, net$actFun[i.layerR])
      if (i.layerR==1){
        x = x.batch
      } else {
        x = A[[(i.layerR-1)]]
      }
      Daff[[i.layerR]] = affine.bckwd(x, network$W[[i.layerR]], Dact[[i.layerR]])
    }
    for (i.layer in 1:n.layer){
      net$W[[i.layer]] = net$W[[i.layer]] - HP[3]*Daff[[i.layer]]$dW
      net$b[[i.layer]] = net$b[[i.layer]] - HP[3]*Daff[[i.layer]]$db
    }
  }
  return(list(loss = loss, net = net))
}

# learning iris data
train.x = as.matrix(iris[,1:4])
train.y.temp = as.numeric(iris[,5])
train.y = matrix(0,nrow = nrow(train.x), ncol =3)
train.y[which(train.y.temp==1), 1]=1
train.y[which(train.y.temp==2), 2]=1
train.y[which(train.y.temp==3), 3]=1
actF = c("relu","softmax")
network = init.network(c(4,15,3), actF)
res = ut.nnet(train.x, train.y, network, HP=c(15,2000,0.1))
plot(res$loss,type='l',xlab = "Trial", ylab = "loss")

ALCOVE

ALC.init<-function(size) {
  # size[1] = input dimension
  # size[2] = number of exemplars
  # size[3] = number of categories
  alpha=matrix(1,nrow=1,ncol=size[1])/size[1]
  w=matrix(rnorm(size[3]*size[2])*0.1,ncol=size[2]) 
  stimType='nom' # stimType 
  c=2            # gain
  phi=5          # decision strength
  lrA=0.1        # learning rate for attentions
  lrW=0.1        # learning rate for associations
  return(list(alpha = alpha,
              w = w,
              stimType = stimType,
              lrA = lrA,
              lrW = lrW,
              c = c,
              phi = phi))
}

# main function
alcove<-function(parmSet, inp, exemplar, targ,iter) {
  # ----ALCOVE for R ---
  # input arguments
  #  parmSet - parameter set
  # inp - input matrix (n_sample x n_dimension)
  # targ - target matrix (n_sample x n_category)
  # iter - # of training epochs 
  # ----ALCOVE for R ---
  # initialization
  inpDim = ncol(inp)
  n.inp = nrow(inp)
  n.exemp = nrow(exemplar)
  n.cat = ncol(targ)
  alpha=parmSet$alpha
  w=parmSet$w            
  accu=rep(0,nrow=iter+1)
  attn=matrix(0,nrow=iter+1,ncol=inpDim)
  attn[1,]=alpha
  # end initialization

  for (i_iter in 1:iter) {
    ro=sample(1:n.inp, n.inp)
    prob_temp=0;
    for (i_tr in 1:n.inp) {
      if (parmSet$stimType=='nom') {
        diff= matrix(as.numeric(matrix(1,n.exemp,1)%*%inp[ro[i_tr],]!=exemplar),nrow=n.exemp)
      } else { diff=inp[ro[i_tr],]-inp }
      exemp=exp(-parmSet$c*(diff%*%t(alpha)))
      out=w%*%exemp
      err=targ[ro[i_tr],]-out
      delta_W=parmSet$lrW*exemp%*%t(err)
      delta_A=-parmSet$lrA*(t(err)%*%w*t(exemp))%*%diff*parmSet$c
      w=w+t(delta_W)
      alpha=alpha+delta_A
      alpha[which(alpha<0)]=0
      pT=(exp(parmSet$phi*out)/sum(exp(parmSet$phi*out)))*targ[ro[i_tr],]
      prob_temp=prob_temp+pT[which(!pT==0)]
    }
    accu[i_iter+1]=prob_temp/n.inp
    attn[i_iter+1,]=alpha
  }
  attnN=attn/apply(attn,1, sum)
  out=matrix(0,nrow=n.cat,ncol=n.inp)
  for (i_tr in 1:n.inp) {
    if (parmSet$stimType=='nom') {
      diff= matrix(as.numeric(matrix(1,n.exemp,1)%*%inp[i_tr,]!=exemplar),nrow=n.exemp)
    } else { diff=inp[i_tr,]-inp}
    exemp=exp(-parmSet$c*(diff%*%t(alpha)))
    out[,i_tr]=w%*%exemp
  }
  return(list(alpha=alpha,w=w,attn=attn,attnN=attnN,accu=accu,out=out,ps=parmSet))
}

##################################################
#  example
# カテゴリー学習モデル:ALCOVE
# medin & schaffer 1978 のシミュレーション
##################################################
inp=matrix(c(1, 1, 1, 0,
             1, 0, 1, 0,
             1, 0, 1, 1,
             1, 1, 0, 1,
             0, 1, 1, 1,
             1, 1, 0, 0,
             0, 1, 1 ,0,
             0, 0, 0, 1,
             0, 0, 0, 0),nrow=9,byrow=T)
targ=matrix(c(1, 0,
              1, 0,
              1, 0,
              1, 0,
              1, 0,
              0, 1,
              0, 1,
              0, 1,
              0, 1),nrow=9,byrow=T)  
parmSet<-ALC.init(c(4,9,2))
result<-alcove(parmSet,inp,inp,targ,30)
plot(result$accu, type="o", col="black",ylim=c(0,1.1), 
     xlab="training", ylab="Proportion", cex.lab=1.4)
lines(result$attnN[,1], type="o", pch=22, lty=2, col="blue")
lines(result$attnN[,2], type="o", pch=23, lty=3, col="red")
lines(result$attnN[,3], type="o", pch=24, lty=4, col="green")
lines(result$attnN[,4], type="o", pch=25, lty=5, col="cyan")
title(main="ALOCVE: Learning MS (1978) 5-4 Stimulus Set ")
legend("topleft", c("Accuracy","AttnD1","AttnD2","AttnD3","AttnD4"),
       cex=1,col=c("black","blue","red","green","cyan"), pch=c(21:25), lty=c(1:5));

Posted in UT

広域システム特別講義 II w05

実習課題2の解答例(2層ネットワークでirisデータを分類):

# 学習データの準備
train.x = as.matrix(iris[,1:4])
train.y.temp = as.numeric(iris[,5])
train.y = matrix(0,nrow = nrow(train.x), ncol =3)
train.y[which(train.y.temp==1), 1]=1
train.y[which(train.y.temp==2), 2]=1
train.y[which(train.y.temp==3), 3]=1

# ネットワークを初期化する関数
init.network <- function(n.neurons){
  n.layer = length(n.neurons)
  W = list(); b = list()
  for (i.layer in 1:(n.layer-1)){
    W[[i.layer]] = 
      matrix(rnorm(n.neurons[i.layer]*n.neurons[(i.layer+1)],sd = 0.1),
        nrow=n.neurons[i.layer])
    b[[i.layer]] =  matrix(rnorm(n.neurons[(i.layer+1)],sd = 0.1), nrow = 1)
  }
  return(list(W = W,b = b))
}

# sigmoid関数(forwardとbackward)
sigmoid.forwd <- function(x){
  return(1/(1+exp(-x)))
}
sigmoid.bckwd <- function(x, dout){
  y = sigmoid.forwd(x) 
  return(dout*(1-y)*y)
}

# affine layerの関数(forwardとbackward)
affine.forwd <- function(x, W, b){
  return(x%*%W + matrix(1, nrow = nrow(x), ncol = 1)%*%b)
}
affine.bckwd <- function(x, W, b, dout){
  dx = dout%*%t(W)
  dW = t(x)%*%dout
  db = colSums(dout)
  return(list(dx = dx, dW = dW, db = db))
}

# softmax with cross-entropy関数(forwardとbackward)
softmax.forwd <- function(x, target){
  max.x = apply(x,1,max)
  C = ncol(x)
  x = x - max.x%*%matrix(1,nrow=1,ncol=C)
  y = exp(x)/rowSums(exp(x))
  delta = 1e-7;
  R = nrow(as.matrix(y))
  return(-sum(target*log(y + delta))/R)
}
softmax.bckwd <- function(x, target,  dout = 1){
  max.x = apply(x, 1, max)
  R = nrow(x)
  C = ncol(x)
  x = x - max.x%*%matrix(1,nrow=1,ncol=C)
  y = exp(x)/rowSums(exp(x))
  return((y-target)/R)
}

# main関数
params = init.network(c(4,15,3))
batch_size = 50; n.iter =5000; lambda =0.05
n.train = nrow(train.x)
loss = rep(0,n.iter)
for (i.iter in 1:n.iter){
  batch_mask = sample(1:n.train, batch_size)
  x.batch = train.x[batch_mask,]
  t.batch = train.y[batch_mask,]
  a1 = affine.forwd(x.batch,params$W[[1]],params$b[[1]])
  z1 = sigmoid.forwd(a1)
  a2 = affine.forwd(z1,params$W[[2]],params$b[[2]])
  z2 = softmax.forwd(a2,t.batch)
  loss[i.iter] = z2
  dwSM = softmax.bckwd(a2, t.batch, 1)
  dwA2 = affine.bckwd(a1,params$W[[2]],params$b[[2]],dwSM)
  dwSG = sigmoid.bckwd(a1,dwA2$dx)
  dwA1 = affine.bckwd(x.batch,params$W[[1]],params$b[[1]],dwSG)
  params$W[[2]] = params$W[[2]] - lambda*dwA2$dW
  params$b[[2]] = params$b[[2]] - lambda*dwA2$db
  params$W[[1]] = params$W[[1]] - lambda*dwA1$dW
  params$b[[1]] = params$b[[1]] - lambda*dwA1$db
}
#結果の可視化
plot(loss,type='l', xlab = "trial")

実習課題1の解答例(2層ネットワークでXORを学習):
入力スペースを、0と1でなく、0と10にしました。
numerical.grad2LNを用いて1回で全てのパラメターを学習できるように、パラメターはリストとして初期化しています。

# パラメターの初期化
init.2LN <- function(n.input, n.hidden, n.output, w.std = 0.01){
  W1 = matrix(rnorm(n.input*n.hidden,0,w.std),nrow = n.input)
  B1 = matrix(rnorm(n.hidden,0,w.std),nrow =1)
  W2 = matrix(rnorm(n.hidden*n.output,0,w.std),nrow = n.hidden)
  B2 = matrix(rnorm(n.output,0,w.std),nrow =1)
  return(list(W1 = W1, B1 = B1, W2 = W2, B2 = B2))
}
# シグモイド関数
sigmoid.func <- function(x){
  return(1/(1+exp(-x)))
}
# forward process / 予測
forward.2LN <- function(params, x){
  NR = nrow(x)
  a1 = x%*%params$W1 + matrix(1,nrow = NR)%*%params$B1
  z1 = sigmoid.func(a1)
  a2 = z1%*%params$W2 + matrix(1,nrow = NR)%*%params$B2
  yhat = a2
  return(yhat)
}
# loss / error 
error.2LN = function(params, x, y){
  yhat = forward.2LN(params, x)
  return(sum((yhat-y)^2))
}
# 数値微分
numerical.grad2LN <- function(func, params, x, y) {
  # input args
  # func: name of function
  # w   : weight
  # x   : input
  # y   : target output
  ##############################################
  h = 1e-4; n.list = length(params); grad = params
  for (i.list in 1:n.list) {
    R = nrow(params[[i.list]])
    C = ncol(params[[i.list]])
    grad[[i.list]] = matrix(0, R, C)
    for (i.col in 1:C) {
      for (i.row in 1:R) {
        temp.w = params[[i.list]][i.row, i.col]
        params[[i.list]][i.row, i.col] = temp.w + h
        plusH = do.call(func, list(params, x, y))
        params[[i.list]][i.row, i.col] = temp.w - h
        minusH = do.call(func, list(params, x, y))
        grad[[i.list]][i.row, i.col] = (plusH - minusH) / (2 * h)
        params[[i.list]][i.row, i.col] = temp.w
      }
    }
  }
  return(grad)
}

# learning XOR logic
set.seed(11111)
x = matrix(c(0,1,0,1,0,0,1,1),nrow=4)*10
y = matrix(c(0,1,1,0),nrow=4)
params = init.2LN(2,2,1,0.01)
n.iter =1000; lambda =0.1
err = rep(0,n.iter)
# main loop
for (i.iter in 1:n.iter){
  dW = numerical.grad2LN("error.2LN",params,x,y)
  params$W1 = params$W1 - lambda*dW$W1
  params$B1 = params$B1 - lambda*dW$B1
  params$W2 = params$W2 - lambda*dW$W2
  params$B2 = params$B2 - lambda*dW$B2
  err[i.iter] = error.2LN(params,x,y)
}
# checking result
forward.2LN(params,x)
plot(err,type='l',xlab='epoch',ylab='error',lwd=2)
# logic gates
AND.gate <- function(x1, x2){
  w1 = 0.5; w2 = 0.5; b = -0.7
  return(as.numeric(w1*x1 + w2*x2 + b > 0))
}

NAND.gate <- function(x1, x2){
  w1 = -0.5; w2 = -0.5; b = 0.7
  return(as.numeric(w1*x1 + w2*x2 + b > 0))
}

OR.gate <- function(x1, x2){
  w1 = 0.5; w2 = 0.5; b = -0.3
  return(as.numeric(w1*x1 + w2*x2 + b > 0))
}

XOR.gate <- function(x1, x2){
  gate1 <- NAND.gate(x1,x2)
  gate2 <- OR.gate(x1,x2)
  y <- AND.gate(gate1,gate2)
  return(y)
}

# activation function
step.func <- function(x){
  return(as.numeric(x > 0))
}

sigmoid.func <- function(x){
  return(1/(1+exp(-x)))
}

relu.func <- function(x){
  return(pmax(0,x))
} 

softmax.func <- function(x){
  max.x = max(x)
  return(exp(x-max.x)/sum(exp(x-max.x)))
}

# examples of matrix calc.
x = c(1,0.5)
W1 = matrix((1:6)*0.1, nrow = 2)
B1 = (1:3)*0.1 
Z1 = sigmoid.func(A1)

W2 = matrix((1:6)*0.1, nrow = 3)
B2 = c(0.1, 0.2)
A2 = Z1%*%W2 + B2
Z2 = sigmoid.func(A2)

W3 = matrix((1:4)*0.1, nrow = 2)
B3 = c(0.1, 0.2)
A3 = Z2%*%W3+ B3
Z3 = A3

# initializing network
init.3L.network <- function(){
  W1 = matrix((1:6)*0.1, nrow = 2)
  B1 = (1:3)*0.1
  W2 = matrix((1:6)*0.1, nrow = 3)
  B2 = c(0.1, 0.2)
  W3 = matrix((1:4)*0.1, nrow = 2)
  B3 = c(0.1, 0.2)
  return(list(W1 = W1, B1 = B1, 
              W2 = W2, B2 = B2, 
              W3 = W3, B3 = B3))
}

forward.3L <- function(network, x){
  A1 = x%*%network$W1 + network$B1
  Z1 = sigmoid.func(A1)
  A2 = Z1%*%network$W2 + network$B2
  Z2 = sigmoid.func(A2)
  A3 = Z2%*%network$W3 + network$B3
  Z3 = sigmoid.func(A3)
  A3 = Z3
  return(A3)
}

network<-init.3L.network()
y = forward.3L(network, c(1, 0.5))

# calc. graphs
multi.forwd <- function(x,y){
  return(x*y)
}
multi.bckwd <- function(x, y, dout){
  dx = dout * y
  dy = dout * x
  return(list(dx = dx, dy = dy))
}

apple = 100; n.apple = 2; tax = 1.1
apple.pre.tax = multi.forwd(apple, n.apple)
apple.post.tax = multi.forwd(apple.pre.tax, tax)

dprice = 1
d.apple.post.tax = multi.bckwd(apple.pre.tax, tax, dprice)
d.apple = multi.bckwd(apple, n.apple, d.apple.post.tax$dx)$dx
d.n.apple = multi.bckwd(apple, n.apple, d.apple.post.tax$dx)$d

add.forwd <- function(x,y){
  return(x + y)
}
add.bckwd <- function(x, y, dout){
  dx = dout
  dy = dout
  return(list(dx = dx, dy = dy))
}

apple = 100; n.apple = 2; tax = 1.1
orange = 150; n.orange = 3;

apple.price = multi.forwd(apple, n.apple)
orange.price = multi.forwd(orange, n.orange)
all.price = add.forwd(apple.price, orange.price)
price = multi.forwd(all.price, tax)

dprice = 1
d.all.price = multi.bckwd(all.price, tax, dprice)
d.apple.price = add.bckwd(apple.price, orange.price, d.all.price$dx)$dx
d.orange.price = add.bckwd(orange, n.orange.price, d.all.price$dx)$dy
d.apple = multi.bckwd(apple, n.apple, d.apple.price)$dx
d.n.apple = multi.bckwd(apple, n.apple, d.apple.price)$dy
d.orange = multi.bckwd(orange, n.orange, d.orange.price)$dx
d.n.orange = multi.bckwd(orange, n.orange, d.orange.price)$dy

# forward and backward processes
relu.forwd <- function(x){
  return(pmax(x,0))
}

relu.bckwd <- function(x, dout){
  dout[which(x <= 0)] = 0
  return(dout)
}

sigmoid.forwd <- function(x){
  return(1/(1+exp(-x)))
}

sigmoid.bckwd <- function(x, dout){
  y = sigmoid.forwd(x) 
  return(dout*(1-y)*y)
}

affine.forwd <- function(x, W, b){
  return(x%*%W + matrix(1, nrow = nrow(x), ncol = 1)%*%b)
}

affine.bckwd <- function(x, W, b, dout){
  dx = dout%*%t(W)
  dW = t(x)%*%dout
  db = colSums(dout)
  return(list(dx = dx, dW = dW, db = db))
}

softmax.forwd <- function(x, target){
  max.x = apply(x,1,max)
  C = ncol(x)
  x = x - max.x%*%matrix(1,nrow=1,ncol=C)
  y = exp(x)/rowSums(exp(x))
  delta = 1e-7;
  R = nrow(as.matrix(y))
  return(-sum(target*log(y + delta))/R)
}

softmax.bckwd <- function(x, target,  dout = 1){
  max.x = apply(x, 1, max)
  R = nrow(x)
  C = ncol(x)
  x = x - max.x%*%matrix(1,nrow=1,ncol=C)
  y = exp(x)/rowSums(exp(x))
  return((y-target)/R)
}
Posted in UT

データ解析基礎論B W04

# Split Plot Factorial (SFP)
source("http://peach.l.chiba-u.ac.jp/course_folder/tsme.txt")
dat<-read.csv("http://www.matsuka.info/data_folder/dktb3211.txt")
dat.aov<-aov(result~method*duration+Error(s+s:duration),dat)

TSME<-SPF.tsme(dat.aov,dat,"result")
dat.aov.sum<-summary(dat.aov)

# within-subjcts factor
DF_error.W = dat.aov.sum[[2]][[1]][3,1]
MS_error.W = dat.aov.sum[[2]][[1]][3,3]

q <- qtukey(0.95,4,DF_error.W)
hsd<-q*sqrt(MS_error.W/(5*2))
dat.means.duration<-tapply(dat$result,dat$duration,mean)
abs(outer(dat.means.duration,dat.means.duration,'-'))>hsd

# between-subjects factor
DF_error.B = dat.aov.sum[[1]][[1]][2,1]
MS_error.B = dat.aov.sum[[1]][[1]][2,3]

q <- qtukey(0.95,2,DF_error.B)
hsd<-q*sqrt(MS_error.B/(5*4))

dat.means.method<-tapply(dat$result,dat$method,mean)
abs(outer(dat.means.method,dat.means.method,'-'))>hsd

# randomized block factorial
dat<-read.csv("http://www.matsuka.info/data_folder/dktb3218.txt")
dat.aov<-aov(result~method+duration+method:duration +
  Error(s+method:s+duration:s+method:duration:s), data=dat)
TSME<-RBF.tsme(dat.aov, dat, "result")

dat.aov.sum<-summary(dat.aov)
# testing for method
mse  = dat.aov.sum[[2]][[1]][2,3]
qv=qtukey(0.95,2,4)
hsd=qv*sqrt(mse/(5*4))
dat.means.method=tapply(dat$result,dat$method,mean)
abs(outer(dat.means.method,dat.means.method,'-')) > hsd

# testing for duration
mse  = dat.aov.sum[[3]][[1]][2,3]
qv=qtukey(0.95,4,12)
hsd=qv*sqrt(mse/(5*2))
dat.means.duration=tapply(dat$result,dat$duration,mean)
abs(outer(dat.means.duration,dat.means.duration,'-')) > hsd

ケーキの分配ゲーム

説明はここにあります

# ケーキの総数
n.cake = 10 

# 利得行列
pay.mat = matrix(0,(n.cake+1),(n.cake+1))
for (i.cake in 1:n.cake){
  pay.mat[(i.cake+1),1:(n.cake-i.cake+1)] =i.cake
}

# 初期化(各戦略の確率や時間など)
p.cake = runif(n.cake+1)
p.cake = p.cake/sum(p.cake)
max.time = 50
dt = 0.01
t = seq(0,max.time,dt)
n.iter = length(t)
p.hist = matrix(0,nrow = n.iter, ncol = (n.cake+1))
p.hist[1,] = p.cake

# メインのスクリプト
for (i.time in 2:n.iter){
  W = colSums(p.cake*t(pay.mat))
  W.ave = sum(outer(p.cake,p.cake)*pay.mat)
  p.cake = p.cake + p.cake*(W - W.ave)/W.ave * dt
  p.hist[i.time,] = p.cake
}

# 結果の可視化
plot(p.hist[,1],ylim=c(0,1),type='l',lwd=2,ylab = 'Proportion',xlab="time")  
for (i.strat in 2:(n.cake+1)){
  lines(p.hist[,i.strat],col=i.strat,lwd=2)
}
legend("topleft",paste("request = ",0:10),col=1:(n.cake+1),lwd =2,cex=0.75)

広域システム特別講義II W04

実習オプション1の解答例
GD、焼きなまし法、確率的最適法の比較。
各最適化法の設定によって結果は変わってきますので、色々と試してみてください。

# numerical differentiation
numerical.diff <- function(func, x, h){
  n.var = length(x)
  grad = rep(0, n.var)
  for (i.var in 1:n.var){
    temp.x = x[i.var]
    x[i.var] = temp.x + h
    plusH = do.call(func, list(x))
    x[i.var] = temp.x - h
    minusH = do.call(func,list(x))
    grad[i.var] = (plusH - minusH)/(2*h)
    x[i.var] = temp.x
  }
  return(grad)
}

# numerical gradient descent
minGD = function(func,x,h,max.iter, lambda,gamma){
  n.var = length(x)
  grad = rep(1e10,n.var);
  v =rep(0,n.var);
  opHist=matrix(0,nrow = max.iter, ncol=n.var)
  opHist[1,]=x
  for (i.iter in 2:max.iter){
    grad = numerical.diff(func,x,h)
    v = gamma*v - lambda*grad
    x = x + v
    opHist[i.iter,]  = x
  }
  return(opHist)
}

# simulated annealing
minSA<-function(func, x, maxItr=1000, C=1, eta=0.99, width=10) {
  z=do.call(func,list(x))
  n.var = length(x)
  opHist=matrix(0,nrow=maxItr,ncol=n.var)
  opHist[1,]=x
  value = do.call(func,list(x))
  for (i_loop in 2:maxItr) {
    new.x = x + rnorm(n.var, mean = 0, sd=width)
    new.value= do.call(func,list(new.x))
    delta=new.value-value;
    prob=1/(1+exp(delta/(C*width)))
    if (runif(1) < prob) {
      x=new.x;
      value=new.value;
    }
    opHist[i_loop,]=x;
    width=width*eta
  }
  return(opHist)
}

# naive stochastic optimization
minNSO <- function(func, x, max.iter, width){
  n.var = length(x)
  opHist = matrix(0,nrow=max.iter,ncol=n.var)
  opHist[1,] = x
  value = do.call(func, list(x))
  for (i.iter in 2:max.iter){
    new.x = x + rnorm(n.var,0,width)
    new.value = do.call(func, list(new.x))
    if (new.value < value){
      x = new.x
      value = new.value
    }
    opHist[i.iter,]=x
  }
  return(opHist)
}


# running expereiment
#
# function to minimize
fun<-function(x) {
  x[1]^4-16*x[1]^2-5*x[1]+x[2]^4-16*x[2]^2-5*x[2]
}

# number of replication
n.rep = 1000
max.iter = 1000
# parameters for GD
lambda = 0.01
gamma = 0.2
# paramter for simulated annealing
C = 1
eta = 0.999
widthSA = 5
# parameter for naive stoch. opt.
widthNS = 5

objGD = matrix(0,nrow = max.iter, ncol = n.rep)
objSA = matrix(0,nrow = max.iter, ncol = n.rep)
objNS = matrix(0,nrow = max.iter, ncol = n.rep)

# for plotting results
#par(mfrow = c(2,1))
#funZ<-function(x,y) {x^4-16*x^2-5*x+y^4-16*y^2-5*y}
#x<-seq(xmin, xmax,length.out=100);y<-x;z<-outer(x,y,funZ)
#contour(x,y,z,nlevels=30,drawlabel=F)
set.seed(1001)
for (i.rep in 1:n.rep){
  initX = rnorm(2,0,1)
  # GD
  resGD = minGD('fun', initX, 1e-4, max.iter, lambda,gamma)
  #lines(resGD,col='red')
  objGD[,i.rep] = apply(resGD,1,fun)
  # sim. anneal.
  resSA = minSA('fun', initX, max.iter, C, eta, widthSA)
  #lines(resSA,col='skyblue')
  objSA[,i.rep] = apply(resSA,1,fun)
  # stoch. opt
  resNS = minNSO('fun', initX,max.iter, widthNS)
  #lines(resNS,col='orange')
  objNS[,i.rep] = apply(resNS,1,fun)
}
objGD.mean = rowMeans(objGD)
objSA.mean = rowMeans(objSA)
objNS.mean = rowMeans(objNS)

plot(objGD.mean,type='l',col='red', xlab='trial', ylab='obj value',lwd=2,
     xlim=c(1,max.iter),
     ylim =c(min(c(objGD.mean,objSA.mean,objNS.mean)),
             max(c(objGD.mean,objSA.mean,objNS.mean))))
lines(objSA.mean,col='skyblue',lwd=2)
lines(objNS.mean,col='orange',lwd=2)
legend('topright',c('GD','Sim Anneal.', 'Naive Stoch. opt.'),
       lwd=2,col=c('red','skyblue','orange'))

実習オプション2の解答例
10変数、5次の交互作用ではなかなか適切な解に到達できなかったので、5変数5次に変更しています。
定義可能なモデルは2^31 = 2147483648 となっています。
また、adj r^2 ではなくAICの最小化を目的としています。

# function to make model labels for paritcular set of combn
mk.labels <- function(C){
  var.labels = c()
  n.comb = ncol(C)
  for (i.comb in 1: n.comb){
    temp.label = toString(paste("X",C[ , i.comb], sep = ""))
    #  options for model specification
    #  Opt 1: models that include all lower-degree interaction term
    #  Opt 2: models that do NOT include any lower-degree interaction term
    #
    # Opt1
    #var.labels = c(var.labels, gsub(",", "*", temp.label) )
    #
    # Opt2
    var.labels = c(var.labels, gsub(",", ":", temp.label) )
  }
  return(var.labels)
}

# function to make all model labels given number of variable
# and maximum degrees of interactions
mk.labels.all <- function(n.var, max.interaction){
  var.labels = paste("X", 1:n.var, sep = "")
  for (i.interaction  in 2:max.interaction ){
    combination = combn(n.var, i.interaction)
    var.labels = c(var.labels, mk.labels(combination))
  }
  return(var.labels)
}

# crearting model label
# assuming 10 variables & 5-degree interaction terms
var.labels<-mk.label.all(5,5)

# generating data set
set.seed(30)
nData = 1000; n.var = 5
X=matrix(rnorm(n.var*nData,mean=10,sd=5),ncol=n.var);
Y=X[,seq(1,5)]%*%c(1,1,-2,-1,1) +
  apply(X[,c(1,3)],1,prod)*0.1 +
  apply(X[,c(2,4)],1,prod)*-0.1 +
  apply(X[,c(1,3,5)],1,prod)*0.01 +
  apply(X[,c(2,4,5)],1,prod)*0.01+
  rnorm(nData,mean=0,sd=10);

dat = data.frame(Y,X)
colnames(dat) <- c('Y', paste("X",1:n.var,sep=""))
# checking fit of the "TRUE" model
summary(lm(Y~X1*X3+X2*X4+X1:X3:X5++X2:X4:X5+X5, dat))
AIC(lm(Y~X1*X3+X2*X4+X1:X3:X5++X2:X4:X5+X5, dat))

# initializing parent population
nPop = 50
G = matrix(sample(0:1,nPop*length(var.labels),replace=TRUE,prob = c(0.8, 0.2)),nrow=nPop)

# recombination
GA_recomb<-function(G) {
  nPop=nrow(G);
  nVar=ncol(G);
  child = G;
  G.permuted = G[sample(1:nPop),]
  recomb.idx=which(matrix(sample(0:1,nPop*nVar,replace=T),nrow=nPop)==1)
  child[recomb.idx] = G.permuted[recomb.idx]
  return(child)
}

# mutation
GA_mutate = function(child, p){
  n.pop = nrow(child)
  n.var = ncol(child)
  mut.mat= matrix((runif(n.pop*n.var) < p), nrow = n.pop)
  child = abs(child-mut.mat)
  return(child)
}

# surviver selection - works with either min or max prob.
GA_survive<-function(G, child, fitG, fitC, mnmx = "min"){
  nPop=nrow(G);
  fitT=c(fitG,fitC);
  if (mnmx == "max"){
    fitMax=sort(fitT,index.return=TRUE,decreasing = TRUE)
  } else {
    fitMax=sort(fitT,index.return=TRUE)
  }
  tempX=rbind(G,child);
  G=tempX[fitMax$ix[1:nPop],]
  return(G)
}

# main function
GA<-function(G, nGen,p,dat){
  optHist=matrix(0,nrow=nGen,ncol=1)
  G.hist = matrix(0,nrow=nGen,ncol = ncol(G))
  nPop = nrow(G)
  nVar = ncol(G)
  fitG = rep(0,nPop)
  fitC = fitG
  for (i.pop in 1:nPop){
    model.def = paste("Y ~", gsub(",", "+", toString(var.labels[G[i.pop,] == 1])))
    #fitG[i.pop] = summary(lm(model.def,dat))$adj.r.squared
    fitG[i.pop] = AIC(lm(model.def,dat))
  }
  optHist[1]=max(c(fitG))
  G.hist[1,] = G[which.max(fitG),]

  # to display progress
  prog.prop = round(seq(0,nGen,length.out=10))
  prog.report = paste(seq(0,100,10),"% done\n",sep="")
  for (i_gen in 2:nGen) {
    if (any(i_gen == prog.prop)){
      cat(prog.report[which(i_gen==prog.prop)])
    }
    child <- GA_recomb(G)
    child <- GA_mutate(child,p)
    # assuming same numbers of set of genes
    for (i.pop in 1:nPop){
      model.def = paste("Y ~", gsub(",", "+", toString(var.labels[G[i.pop,] == 1])))
      #fitG[i.pop] = summary(lm(model.def,dat))$adj.r.squared
      fitG[i.pop] = AIC(lm(model.def,dat))
      if (sum(child[i.pop,],na.rm=TRUE)==0){
        child[i.pop,sample(1:ncol(child.all),sample(1:nVar,1))]=1
      }
      model.def = paste("Y ~", gsub(",", "+", toString(var.labels[child[i.pop,] == 1])))
      #fitC[i.pop] = summary(lm(model.def,dat))$adj.r.squared
      fitC[i.pop] = AIC(lm(model.def,dat))
    }

    G <- GA_survive(G, child, fitG, fitC, "min")
    optHist[i_gen]=min(c(fitG,fitC))
    G.hist[i_gen, ] = G[1,]
  }
  return(list(G = G, optHist = optHist, G.hist = G.hist))
}

# running simulation
res = GA(G,1000,0.15, dat)
model.def = paste("Y ~", gsub(",", "+", toString(var.labels[res$G[1,] == 1])))
summary(lm(model.def,dat))
AIC(lm(model.def,dat))
plot(res$optHist,type='l',xlab="generation",ylab="AIC")
var.labels[which(res$G[1,]==1)]

修正点

# updated GD optim.
tol = 1e-7; grad = 1e10; lambda = 0.2; gamma = 0.9
x = 10; x.hist = x; v = 0
repeat {
  grad = 2*x+2
  if (abs(grad) <= tol){break}
  v = gamma*v-lambda*grad
  x = x + v
  x.hist=c(x.hist,x)
  print(c(x,grad))
}
x.temp=seq(-10,10,0.1)
plot(x.temp, x.temp^2+2*x.temp,type='l',lwd=2)
lines(x.hist,x.hist^2+2*x.hist,type='o',pch=1,col='red',cex=1)

単純な確率的最適化

# naive stochastic optim.
naive.stochOptim <- function(obj.func, x, n.iter, width){
  opt.value <- do.call(obj.func, list(x))
  opt.hist = matrix(0, nrow = n.iter, ncol = 5)
  opt.hist[1,] = c(x, x, opt.value, opt.value, 1)
  for (i.iter in 2:n.iter){
    accpet = 0
    temp.x <-  x + rnorm(1, mean = 0, sd = width)
    temp.value <- do.call(obj.func, list(temp.x))
    if (temp.value <  opt.value){
      x = temp.x
      opt.value = temp.value
      accept = 1
    }
    opt.hist[i.iter, ] = c(x, temp.x, opt.value, temp.value, 1)
  }
  return(data.frame(opt.hist))
}

set.seed(50)
n.iter =500
fun01<-function(x){x^2+2*x}
res <- naive.stochOptim(fun01,3,n.iter,1) 

# vizualizing results
par(mfrow=c(2,1))
# upper panel
plot(res$X2,res$X4,col='red',pch=20,cex=2,xlab = "x", ylab='f(x)')
legend("topleft",c("accepted","tested"),pch=c(20,20),col=c("black","red"))
x.seq = seq(min(res$X2),max(res$X2),length.out = n.iter)
lines(x.seq,fun01(x.seq))
points(res$X1,res$X3,col='black',pch=20)
# lower panel
plot(res$X3,1:n.iter,type='n',pch=20,xlim=c(min(res$X2),max(res$X2)),xlab='x',ylab="Trial")
for (i.iter in 2:n.iter){
  lines(c(res$X1[(i.iter-1)],res$X2[i.iter]),c((i.iter-1),i.iter),type='o',pch=20,col='red',cex=1.5)
}
points(res$X1,1:n.iter,type='o',pch=20)
legend("topright",c("accepted","tested"),pch=c(20,20),col=c("black","red"))

焼きなまし法

# simulated annealing w/ one variable
SimAneal01<-function(func, initXY, maxItr=1000, C=1, eta=0.99, width=10) {
  x=initXY
  opt.value = do.call(func,list(x))
  n.var = length(x)
  opt.hist=matrix(0, nrow=maxItr, ncol=5)
  opt.hist[1,]=c(x,x,opt.value,opt.value,0)
  for (i_loop in 2:maxItr) {
    accept = 0
    temp.x = x + rnorm(n.var, mean = 0, sd=width)
    temp.value= do.call(func,list(temp.x))
    delta=temp.value-opt.value;
    prob=1/(1+exp(delta/(C*width)))
    if (runif(1) < prob) {
      x = temp.x;
      opt.value = temp.value;
      accept = 1
    } 
    opt.hist[i_loop,]=c(x, temp.x, opt.value, temp.value, accept);
    width=width*eta
  }
  return(data.frame(opt.hist))
}

set.seed(48)
n.iter = 500
res <- SimAneal01(fun01, 3, n.iter, 1, 0.985, 1) 
#vizualizing results
par(mfrow=c(2,1))
# upper panel
plot(res$X2,res$X4,col='red',pch=20,cex=2,xlab = "x", ylab='f(x)')
legend("topleft",c("accepted","tested"),pch=c(20,20),col=c("black","red"))
x.seq = seq(min(res$X2),max(res$X2),length.out = n.iter)
lines(x.seq,fun01(x.seq))
points(res$X1,res$X3,col='black',pch=20)
# lower panel
plot(res$X3,1:n.iter,type='n',pch=20,xlim=c(min(res$X2),max(res$X2)),xlab='x',ylab="Trial")
for (i.iter in 2:n.iter){
  lines(c(res$X1[(i.iter-1)],res$X2[i.iter]),c((i.iter-1),i.iter),type='o',pch=20,col='red',cex=1.5)
}
points(res$X1,1:n.iter,type='o',pch=20)
legend("topright",c("accepted","tested"),pch=c(20,20),col=c("black","red"))

遺伝的アルゴリズムを用いた最適化
最適回帰モデルの同定

# genetic algorithm - identify the best linear model
GA_recomb<-function(G) {
  nPop=nrow(G);
  nVar=ncol(G);
  child = G;
  G.permuted = G[sample(1:nPop),]
  recomb.idx=which(matrix(sample(0:1,nPop*nVar,replace=T),nrow=nPop)==1)
  child[recomb.idx] = G.permuted[recomb.idx]
  return(child)
}

GA_mutate = function(child, p){
  n.pop = nrow(child)
  n.var = ncol(child)
  mut.mat= matrix((runif(n.pop*n.var) < p), nrow = n.pop)
  child = abs(child-mut.mat)
  return(child)
}

GA_survive<-function(G, child, fitG, fitC){
  nPop=nrow(G);
  fitT=c(fitG,fitC);
  fitMax=sort(fitT,index.return=T,decreasing = T)
  tempX=rbind(G,child);
  G=tempX[fitMax$ix[1:nPop],]
  return(G)
}

GA<-function(G, nGen,p,X,Y){
  optHist=matrix(0,nrow=nGen,ncol=1)
  G.hist = matrix(0,nrow=nGen,ncol = ncol(G))
  nPop = nrow(G)
  nVar = ncol(G)
  fitG = rep(0,nPop)
  fitC = fitG
  for (i.pop in 1:nPop){
    dat.temp = X[,which(G[i.pop,]==1)]
    fitG[i.pop] = summary(lm(Y~dat.temp))$adj.r.squared
  }
  optHist[1]=max(c(fitG))
  G.hist[1,] = G[which.max(fitG),]
  for (i_gen in 2:nGen) {
    child<-GA_recomb(G)
    child<-GA_mutate(child,p)
    # assuming same numbers of set of genes
    for (i.pop in 1:nPop){
      dat.temp = X[,which(G[i.pop,]==1)]
      fitG[i.pop] = summary(lm(Y~dat.temp))$adj.r.squared
      if (sum(child[i.pop,])==0){
        child[i.pop,sample(1:ncol(child.all),sample(1:nVar,1))]=1
      }
      dat.temp = X[,which(child[i.pop,]==1)]
      fitC[i.pop] = summary(lm(Y~dat.temp))$adj.r.squared
    }
    G<-GA_survive(G, child, fitG, fitC)
    optHist[i_gen]=max(c(fitG,fitC))
    G.hist[i_gen, ] = G[1,]
  }
  return(list(G = G, optHist = optHist, G.hist = G.hist))
}

set.seed(19)
nVar = 10
nData = 50
X=matrix(rnorm(nVar*nData,mean=10,sd=5),ncol=nVar);
y=X%*%c(-2,0,2,0,2,0,-3,0,-2,0)+rnorm(nData,mean=0,sd=8);
summary(lm(y~X[,seq(1,9,2)]))
summary(lm(y~X))
G = matrix(sample(0:1,300,replace = T),nrow=30)
res<-GA(G,100,0.1,X,y)
head(res$G)

# GA - challenging task
C = combn(3,2)
temp.lab =toString(paste("X",C[ , i.comb], sep = ""))
gsub(",", "*", temp.lab) 

n.comb = ncol(C)
for (i.comb in 1: n.comb){
  temp.label = toString(paste("X",C[ , i.comb], sep = ""))
  var.labels = c(var.labels, gsub(",", "*", temp.label) )
}

mk.labels <- function(C){
  var.labels = c()
  n.comb = ncol(C)
  for (i.comb in 1: n.comb){
    temp.label = toString(paste("X",C[ , i.comb], sep = ""))
    var.labels = c(var.labels, gsub(",", "*", temp.label) )
  }
  return(var.labels)
}
var.labels = paste("X", 1:n.var, sep = "")

# interaction terms
for (i.interaction  in 2:max.interaction ){
  combination = combn(n.var, i.interaction)
  var.labels = c(var.labels, mk.labels(combination))
}
model.def = paste("Y ~", gsub(",", "+", toString(var.labels[c(1,1,1,1,1,0,0) == 1]))) 
X=matrix(rnorm(nVar*nData,mean=10,sd=5),ncol=nVar);
y=X%*%c(1,1,-2)+rnorm(nData,mean=0,sd=3);
dat = data.frame(y=y,X1=X[,1],X2=X[,2],X3=X[,3])

model.lm = lm(model.def, dat)

Evolutionary Strategyを用いた最適
回帰モデルの係数の推定(例であって、実用には適さない)

# Evolutionary Strategy - estimating coefficient
ES_recomb<-function(G) {
  nParent=nrow(G$x);nVar=ncol(G$x)
  child = G;
  for (i_child in 1:nParent) {
    parentID=sample(1:nParent,2)
    coID=sample(c(0,1),nVar,replace=T)
    child$x[i_child,]=G$x[parentID[1],]
    child$x[i_child,which(coID==1)]=G$x[parentID[2],which(coID==1)]
    child$sigma[i_child,]=0.5*(G$sigma[parentID[1],]+G$sigma[parentID[2],])
  }
  return(child)
}

ES_mutate<-function(child,tau){
  nChild=nrow(child$x);nVar=ncol(child$x)
  child$sigma<-child$sigma*exp(matrix(rnorm(nChild*nVar)*tau,nrow=nChild))
  child$x=child$x+child$sigma*matrix(rnorm(nChild*nVar),nrow=nChild,ncol=nVar)
  return(child)
}

ES_survive<-function(G, child, fitG, fitC){
  nParent=nrow(G$x);
  fitT=c(fitG,fitC);
  fitMin=sort(fitT,index.return=T)
  tempX=rbind(G$x,child$x);
  tempS=rbind(G$sigma,child$sigma)
  G$x=tempX[fitMin$ix[1:nParent],]
  G$sigma=tempS[fitMin$ix[1:nParent],]
  return(G)
}

ES<-function(G,func, nGen, tau,X,y){
  optHist=matrix(0,nrow=nGen,ncol=1)
  G.hist = matrix(0,nrow=nGen,ncol = ncol(G$x))
  fitG = fitG = apply(G$x,1,func,X,y)
  optHist[1] = min(fitG)
  G.hist[1,] = G$x[which.min(fitG),]
  child = G;
  for (i_gen in 2:nGen) {
    child<-ES_recomb(G)
    child<-ES_mutate(child,tau)
    fitG = apply(G$x,1,func,X,y)
    fitC = apply(child$x,1,func,X,y)
    G<-ES_survive(G, child, fitG, fitC)
    optHist[i_gen]=min(c(fitG,fitC))
    G.hist[i_gen, ] = G$x[1,]
  }
  return(list(G = G, optHist = optHist, G.hist = G.hist))
}

set.seed(20); nData = 100
X=matrix(rnorm(9*nData,mean=10,sd=2),ncol=9);X=cbind(rep(1,nData),X)
y=X%*%c(10,2,5,-3,-5,0,0,0,0,0)+rnorm(nData,mean=0,sd=2);
fun04<-function(b,X,y){
  yhat<-X%*%b
  return(sum((y-yhat)^2))
}
res=ES(G, fun04, 1000, 1,X,y)

Particle Swarm Optimizationを用いた最適化

# PSO - optim. 2-variable function
PSO<-function(G, wP, wG, func, maxIter){
  swarm.hist = array(0, c(nrow(G), ncol(G), maxIter))
  swarm.hist[,,1]=G
  p.b.hist = apply(G,1,func)
  global.best.v = min(p.b.hist)
  p.Best = G
  g.Best = matrix(G[which.min(p.b.hist),],nrow=nrow(G),ncol=ncol(G),byrow=T)
  v = matrix(0,nrow = nrow(G),ncol = ncol(G))
  for (i.iter in 2:maxIter){
    v = v + wP*runif(1)*(p.Best - G) + wG*runif(1)*(g.Best - G)
    G = G + v
    fitG = apply(G,1,func)
    if (min(fitG) < global.best.v){
      g.Best = matrix(G[which.min(fitG),],nrow=nrow(G),ncol=ncol(G),byrow=T)
      global.best.v = min(fitG)  
    }
    idx = which(fitG < p.b.hist)
    p.Best[idx,] = G[idx,]
    p.b.hist= fitG
    swarm.hist[,,i.iter]=G
  }
  return(swarm.hist)
}

# running PSO
par(mfrow=c(1,1))
funZ<-function(x,y) {x^4-16*x^2-5*x+y^4-16*y^2-5*y}
x<-seq(-5, 5,length.out=100);y<-x;z<-outer(x,y,funZ)
contour(x,y,z,nlevels=30,drawlabel=F)
nSwarm = 100; nVar = 2;
G=matrix(rnorm(nVar*nSwarm,mean=0,sd=0.1),ncol=nVar)
res<-PSO(G,0.1,0.1,fun03,500)
lines(res[1,1,],res[1,2,],type='o',col='red')

単純な深層学習モデル(forward processのみ)

# (simple) Deep neural net
# sigmoid func
sigmoid.func <- function(x){
  return(1/(1+exp(-x)))
}

# initialize 3-layer network
init.3L.network <- function(){
  W1 = matrix((1:6)*0.1, nrow = 2)
  B1 = (1:3)*0.1
  W2 = matrix((1:6)*0.1, nrow = 3)
  B2 = c(0.1, 0.2)
  W3 = matrix((1:4)*0.1, nrow = 2)
  B3 = c(0.1, 0.2)
  return(list(W1 = W1, B1 = B1, 
              W2 = W2, B2 = B2, 
              W3 = W3, B3 = B3))
}

# 3layer - forward model
forward.3L <- function(network, x){
  A1 = x%*%network$W1 + network$B1
  Z1 = sigmoid.func(A1)
  A2 = Z1%*%network$W2 + network$B2
  Z2 = sigmoid.func(A2)
  A3 = Z2%*%network$W3 + network$B3
  Z3 = sigmoid.func(A3)
  A3 = Z3
  return(A3)
}


train <- read.csv('http://peach.l.chiba-u.ac.jp/course_folder/MNSTtrain.csv', 
                  header=TRUE)
train <- data.matrix(train)
train.x <- train[,-1]
train.y <- train[,1]
train.x <- t(train.x/255)
download.file("http://peach.l.chiba-u.ac.jp/course_folder/trNetwork.Rdata",
              "trNetwork.Rdata")
load("trNetwork.Rdata")
network=trNetwork

# online process
n.train = ncol(train.x)
correct.cl = 0
conf.matrix = matrix(0,10,10)
for (i.loop in 1:n.train){
  y = forward.3L(network,train.x[,i.loop])
  max.y = max.col(y)
  conf.matrix[max.y, (train.y[i.loop]+1)] = 
    conf.matrix[max.y, (train.y[i.loop]+1)] + 1
}
accuracy = sum(diag(conf.matrix))/n.train

# batch process
batch_size = 200
conf.matrix = matrix(0,10,10)
for (i.batch in seq(1,n.train, batch_size)){
  y = forward.3L(network, train.x[,(i.batch:(i.batch+batch_size-1))])
  pred = max.col(y)
  conf.matrix = conf.matrix + 
    table(pred,(train.y[i.batch:(i.batch+batch_size-1)]+1))
}
accuracy = sum(diag(conf.matrix))/n.train

Posted in UT

データ解析基礎論B W03

# RB
dat<-read.csv("http://www.matsuka.info/data_folder/dktb316.txt")
colnames(dat)<-c("id",'method','subj','words')
dat.aov<-aov(words~method+subj+Error(subj:method),data=dat)
dat.aov.sum = summary(dat.aov)

# Tukey HSD
mse <- dat.aov.sum[[1]][[1]][3,3]
q<-qtukey(0.95,4,df=21)
hsd<-q*sqrt(mse/8)
dat.means=tapply(dat$words, dat$method, mean)
ck.hsd<-abs(outer(dat.means,dat.means,'-'))>hsd

# SPF
dat<-read.csv("http://www.matsuka.info/data_folder/dktb3211.txt")
interaction.plot(dat$duration,  # x軸
                 dat$method,    # まとめる変数    
                 dat$result,    # y軸 
                 pch=c(20,20), 
                 col=c("skyblue","orange"),
                 ylab="score",
                 xlab="Duration",
                 lwd=3, 
                 type='b',
                 cex=2, 
                 trace.label="Method")

source("http://peach.l.chiba-u.ac.jp/course_folder/tsme.txt")
dat<-read.csv("http://www.matsuka.info/data_folder/dktb3211.txt")
dat.aov<-aov(result~method*duration+Error(s+s:duration),dat)