広域システム特別講義II ベイズ統計

library(rjags)
source("http://www.matsuka.info/univ/course_folder/HDI_revised.txt")

island.hopping2 <- function(n.rep=1e5, init.st=4) {
  # example from DBDA 2nd Ed. (Kruschke) ch. 07
  # intro to MCMC, island hopping

  # initialization
  state = 1:7
  curr.st = init.st
  state.counter = rep(0,7)
  state.counter[curr.st] = 1
  state.history=rep(0, n.rep)
  state.history[1]=curr.st

  prob.propose = matrix(1/6, nrow=7,ncol=7)
  diag(prob.propose)<-0

  # main loop
  for (i.rep in 2:n.rep) {
    destination = sample(state, 1, prob=prob.propose[curr.st,])
    prob.move = min(destination/curr.st, 1)
    if (runif(1) < prob.move) {
      curr.st = destination
    }
    state.history[i.rep] = curr.st
    state.counter[curr.st] = state.counter[curr.st]+1
  }
  par(mfrow=c(2, 1))
  par(mai=c(0, 1, 1, 1))
  par(mar=c(4, 3, 1, 3))
  barplot(state.counter, xlab="theta", ylab="Frequency")
  plot(state.history, 1:n.rep, type='o', log='y', xlab="theta", ylab="Time Step (log)", col="orange")
}

island.hopping2(10000, 4)

metropolis.ex01 <- function(n.iter=1e6, theta.init=0.1, sigma=0.2, plot.yn = T){
  # example from DBDA 2nd Ed. (Kruschke) ch. 07
  # metropolis algo. with 1 parameter

  # "posterior of theta" function
  posterior.theta <- function(theta, N, z, a, b) {
    posterior = theta^z * (1-theta)^(N-z) * theta^(a-1) * (1 - theta)^(b-1) / beta(a,b)
  }

  # initialization
  theta.history = rep(0,nrow = n.iter,ncol = 1)
  theta.current = theta.init
  theta.history[1] = theta.current
  # values given in text
  mu = 0
  N = 20
  z = 14
  a = 1
  b = 1

  # main loop
  for (i_iter in 2:n.iter) {
    theta.proposed = theta.current + rnorm(1, mean=mu, sd=sigma)
    if (theta.proposed < 0 | theta.proposed > 1) {
      p.move = 0
    } else {
      p.current = posterior.theta(theta.current, N, z, a, b)
      p.proposed = posterior.theta(theta.proposed, N, z, a, b)
      p.move = min(p.proposed/p.current, 1)
    }
    if (runif(1) < p.move) {
      theta.current = theta.proposed
    }
    theta.history[i_iter] = theta.current
  }

  # plotting results
  if (plot.yn == T) {
    par(mfrow = c(3, 1))
    hist(theta.history, nclass = 100, col = "orange", probability = T, xlab = "theta")
    den=density(theta.history)
    lines(den)
    plot(theta.history[(n.iter-100):n.iter], ((n.iter-100):n.iter), type = 'o', xlim = c(0,1), xlab="theta", ylab = "step in chain")
    plot(theta.history[1:100],1:100, type = 'o', xlim = c(0,1), xlab = "theta", ylab = "step in chain")
  }
  return(theta.history)
}

res = metropolis.ex01(10000, 0.1)

# initialization
n.iter = 10000; sigma = 0.1; counter = 0
z = c(6, 2); N = c(8, 7)
a = c(2, 2); b = c(2, 2);
n.par = 2
th.hist = matrix(0, nrow = n.iter*n.par, ncol = n.par)
theta = runif(2)

# function to calc. prob. move
prob.gibbs <- function(theta, proposed, N, z, a, b, idx){
  p.th=dbeta(theta[idx], z[idx]+a[idx], N[idx]-z[idx]+b[idx])
  p.pro=dbeta(proposed, z[idx]+a[idx], N[idx]-z[idx]+b[idx])
  return(p.pro/p.th)
}

# main loop
for (i.iter in 1:n.iter){
  for (i.par in 1:n.par){
    proposed = theta[i.par] + rnorm(1, mean=0, sd=sigma)
    if (proposed > 1) {proposed = 1}
    if (proposed < 0) {proposed = 0}
    p.move= min(1, prob.gibbs(theta, proposed, N, z, a, b, i.par))
    if (runif(1) < p.move){
      theta[i.par] = proposed
    }
    counter = counter + 1
    th.hist[counter, ] = theta
  }
}
par(mfrow=c(3,1))
HDI.plot(th.hist[,1])
HDI.plot(th.hist[,2])
plot(th.hist[,1],th.hist[,2], type='o',cex=0.1,xlim = c(0,1),ylim=c(0,1))
par(mfrow=c(1,1))

model.txt = "
model {
  for ( i_data in 1:Ntotal ) {
    y[ i_data ] ~ dbern( theta )
  }
  theta ~ dbeta( 1, 1 )
}"
writeLines(model.txt, "model.txt")

dat<-read.csv("http://www.matsuka.info/univ/course_folder/z15N50.csv")
y=dat$y
Ntotal=length(dat$y)
datalist = list(y=y,Ntotal=Ntotal)

# jags
jagsModel = jags.model(file="model.txt",data=datalist,n.chains=3,n.adapt=500)
update(jagsModel,n.iter=1000)
codaSamples=coda.samples(jagsModel,variable.names=c("theta"),n.iter=5000)
mcmcMat<-as.matrix(codaSamples)

par(mfrow=c(2,2))
cols=c("orange", "skyblue","pink")

# chain
mcmc1<-as.mcmc(codaSamples[[1]])
mcmc2<-as.mcmc(codaSamples[[2]])
mcmc3<-as.mcmc(codaSamples[[3]])
plot(mcmc1,type='l')
lines(mcmc2,col='red')
lines(mcmc3,col='blue')

# autocorrelation
ac1=autocorr(mcmc1,lags=0:50)
ac2=autocorr(mcmc2,lags=0:50)
ac3=autocorr(mcmc3,lags=0:50)
plot(ac1, type='o', pch = 20, col = cols[1], ylab = "Autocorrelation", xlab = "Lag")
lines(ac2, type='o', pch = 20, col = cols[2])
lines(ac3, type='o', pch = 20, col = cols[3])

# shrink factor
resALL=mcmc.list(mcmc1,mcmc2,mcmc3)
gd1=gelman.plot(resALL, auto.layout = F)

# density
den1=density(mcmc1)
den2=density(mcmc2)
den3=density(mcmc3)
plot(den1, type='l', col = cols[1], main = " ", xlab = "param. value",lwd=2.5)
lines(den2, col = cols[2], lwd=2.5)
lines(den3, col = cols[3], lwd=2.5)

par(mfrow=c(1,1))

model.txt = "
model {
  for ( i_data in 1:Ntotal ) {
    y[ i_data ] ~ dbern( theta[s[i_data]] )
  }
  for ( i_s in 1:Nsubj) {
    theta[i_s] ~ dbeta( 2, 2 )
  }
}"
writeLines(model.txt, "model.txt")

dat<-read.csv("http://www.matsuka.info/univ/course_folder/z6N8z2N7.csv")
y=dat$y
s=as.numeric(dat$s)
Ntotal=length(dat$y)
Nsubj=length(unique(s))
datalist = list(y=y,s=s,Ntotal=Ntotal,Nsubj=Nsubj)

jagsModel = jags.model(file="model.txt",data=datalist,n.chains=3,n.adapt=500)
update(jagsModel,n.iter=1000)
codaSamples=coda.samples(jagsModel,variable.names=c("theta"),n.iter=5000)
mcmcMat<-as.matrix(codaSamples)
par(mfrow=c(2,2))
cols=c("orange", "skyblue","pink")

# chain
mcmc1<-as.mcmc(codaSamples[[1]])
mcmc2<-as.mcmc(codaSamples[[2]])
mcmc3<-as.mcmc(codaSamples[[3]])
plot(temp1,type='l')
lines(temp2,col='red')
lines(temp3,col='blue')

# autocorrelation
ac1=autocorr(mcmc1,lags=0:50)
ac2=autocorr(mcmc2,lags=0:50)
ac3=autocorr(mcmc3,lags=0:50)
plot(ac1, type='o', pch = 20, col = cols[1], ylab = "Autocorrelation", xlab = "Lag")
lines(ac2, type='o', pch = 20, col = cols[2])
lines(ac3, type='o', pch = 20, col = cols[3])

# shrink factor
resALL=mcmc.list(mcmc1,mcmc2,mcmc3)
gd1=gelman.plot(resALL, auto.layout = F)

# density
den1=density(mcmc1)
den2=density(mcmc2)
den3=density(mcmc3)
plot(den1, type='l', col = cols[1], main = " ", xlab = "param. value",lwd=2.5)
lines(den2, col = cols[2], lwd=2.5)
lines(den3, col = cols[3], lwd=2.5)

par(mfrow=c(1,1))

model.txt = "
model {
  for ( i in 1:Ntotal ) {
    y[i] ~ dnorm( mu , 1/sigma^2  )
  }
  mu ~ dnorm( meanY , 1/(100*sdY)^2 )
  sigma ~ dunif( sdY/1000 , sdY*1000 )
}
"
writeLines(model.txt, "model.txt")

dat<-read.csv("http://www.matsuka.info/univ/course_folder/TwoGroupIQ.csv")
y = dat$Score[dat$Group=="Smart Drug"]
Ntotal = length(y)

dataList = list(y = y, Ntotal = Ntotal, meanY = mean(y), sdY = sd(y))
jagsModel = jags.model("model.txt", data=dataList, n.chains=3, n.adapt=1000 )
update( jagsModel , n.iter=1000)
codaSamples = coda.samples( jagsModel , variable.names=c("mu","sigma"), n.iter=5000, thin=5)
mcmcMat<-as.matrix(codaSamples)

# calculating & plotting normality
par(mfrow=c(2,2))
HDI.plot(mcmcMat[,1])
hist(y,nclass=15,probability = T)
x.temp = seq(40,200,0.1)
n.plot = 100
randperm = sample(1:nrow(mcmcMat),n.plot)
for (i.plot in 1:n.plot){
  norm.temp=dnorm(x.temp,mean=mcmcMat[randperm[i.plot],1],sd=mcmcMat[randperm[i.plot],2])
  lines(x.temp,norm.temp,col='orange')
}
HDI.plot(mcmcMat[,2])

# calculating & plotting effect size
effect.size=(mcmcMat[,"mu"]-100)/mcmcMat[,"sigma"]
HDI.plot(effect.size)


#
dat<-read.csv("http://www.matsuka.info/univ/course_folder/TwoGroupIQ.csv")
y = dat$Score[dat$Group=="Smart Drug"]
Ntotal = length(y)

model.txt="
model {
  for ( i in 1:Ntotal ) {
    y[i] ~ dt( mu , 1/sigma^2 , nu )
  }
  mu ~ dnorm( meanY , 1/(100*sdY)^2 )
  sigma ~ dunif( sdY/1000 , sdY*1000 )
  nu <- nuMinusOne+1
  nuMinusOne ~ dexp(1/30.0)
}"
writeLines(model.txt, "model.txt")

dataList = list(y = y, Ntotal = Ntotal, meanY = mean(y), sdY = sd(y))
jagsModel = jags.model("model.txt", data=dataList, n.chains=3, n.adapt=1000 )
update( jagsModel , n.iter=1000)
codaSamples = coda.samples( jagsModel , variable.names=c("mu","sigma","nu"), n.iter=5000, thin=5)
mcmcMat<-as.matrix(codaSamples)

# calculating & plotting normality
par(mfrow=c(3,2))
HDI.plot(mcmcMat[,1])
HDI.plot(mcmcMat[,3])
normality=log10(mcmcMat[,"nu"])
HDI.plot(normality)
effect.size=(mcmcMat[,"mu"]-100)/mcmcMat[,"sigma"]
HDI.plot(effect.size)

hist(y,nclass=20,probability = T)
n.plot = 100
randperm = sample(1:nrow(mcmcMat),n.plot)
for (i.plot in 1:n.plot){
  x.temp1 = seq(40,200,0.1)
  x.temp2 = (x.temp1 - mcmcMat[randperm[i.plot],1])/mcmcMat[randperm[i.plot],3]
  t.temp=dt(x.temp2,mcmcMat[randperm[i.plot],2])/mcmcMat[randperm[i.plot],3]
  lines(x.temp1,t.temp,col='orange')
}

par(mfrow=c(2,2))
plot(mcmcMat[,1],mcmcMat[,3],col='orange')
plot(mcmcMat[,1],log10(mcmcMat[,2]),col='orange')
plot(0,0,type='n')
plot(log10(mcmcMat[,2]),mcmcMat[,3],col='orange')


dat<-read.csv("http://www.matsuka.info/univ/course_folder/TwoGroupIQ.csv")
y = dat$Score
group = as.numeric(dat$Group)
Ntotal = length(y)
Ngroup = length(unique(group))
model.txt="
model {
  for ( i in 1:Ntotal ) {
    y[i] ~ dt( mu[group[i]] , 1/sigma[group[i]]^2 , nu )
  }
  for (j in 1:Ngroup){
    mu[j] ~ dnorm( meanY , 1/(100*sdY)^2 )
    sigma[j] ~ dunif( sdY/1000 , sdY*1000 )
  }
  nu <- nuMinusOne+1
  nuMinusOne ~ dexp(1/29)
}"
writeLines(model.txt, "model.txt")


dataList = list(y = y, Ntotal = Ntotal, meanY = mean(y), sdY = sd(y),Ngroup=Ngroup,group=group)
jagsModel = jags.model("model.txt", data=dataList, n.chains=3, n.adapt=5000 )
update( jagsModel , n.iter=1000)
codaSamples = coda.samples( jagsModel , variable.names=c("mu","sigma","nu"), n.iter=5000, thin=5)
mcmcMat<-as.matrix(codaSamples)

# plotting result
par(mfrow=c(5,2))
HDI.plot(mcmcMat[,1],xlabel="placebo Mean")

hist(y[dat$Group=="Placebo"],nclass=20,probability = T)
n.plot = 100
randperm = sample(1:nrow(mcmcMat),n.plot)
for (i.plot in 1:n.plot){
  x.temp1 = seq(40,200,0.1)
  x.temp2 = (x.temp1 - mcmcMat[randperm[i.plot],1])/mcmcMat[randperm[i.plot],4]
  t.temp=dt(x.temp2,mcmcMat[randperm[i.plot],3])/mcmcMat[randperm[i.plot],4]
  lines(x.temp1,t.temp,col='orange')
}

HDI.plot(mcmcMat[,2],xlabel="smart drug Mean")

hist(y[dat$Group=="Smart Drug"],nclass=20,probability = T)
n.plot = 100
randperm = sample(1:nrow(mcmcMat),n.plot)
for (i.plot in 1:n.plot){
  x.temp1 = seq(40,200,0.1)
  x.temp2 = (x.temp1 - mcmcMat[randperm[i.plot],2])/mcmcMat[randperm[i.plot],5]
  t.temp=dt(x.temp2,mcmcMat[randperm[i.plot],3])/mcmcMat[randperm[i.plot],5]
  lines(x.temp1,t.temp,col='orange')
}

HDI.plot(mcmcMat[,4],xlabel="placebo scale")

HDI.plot(mcmcMat[,2]-mcmcMat[,1],xlabel="Difference of Means")

HDI.plot(mcmcMat[,5],xlabel="smart drug scale")

HDI.plot(mcmcMat[,5]-mcmcMat[,4],xlabel="Difference of Scales")

HDI.plot(log10(mcmcMat[,3]),xlabel="Normality")

effect.size = (mcmcMat[,2]-mcmcMat[,1])/sqrt((mcmcMat[,5]^2+mcmcMat[,4]^2)/2)
HDI.plot(effect.size,xlabel="effect size")

Posted in UT

データ解析基礎論B 判別分析+

library(MASS)
dat<-data.frame(writing=c(68,85,50,54,66,35,56,25,43,70),
                interview=c(65,80,95,70,75,55,65,75,50,40), 
                cl=c(rep("A",5),rep("N",5)))
dat.lda<-lda(cl~.,data=dat)

intcpt = (dat.lda$scaling[1]*dat.lda$means[1,1] +
          dat.lda$scaling[2]*dat.lda$means[1,2] + 
          dat.lda$scaling[1]*dat.lda$means[2,1] + 
          dat.lda$scaling[2]*dat.lda$means[2,2])/2
z = as.matrix(dat[,1:2])%*%dat.lda$scaling-intcpt

new.dim.slope = dat.lda$scaling[1]/dat.lda$scaling[2]

disc.intcpt = intcpt / dat.lda$scaling[2]
disc.slope = -dat.lda$scaling[1] / dat.lda$scaling[2]

ggplot(dat, aes(x = writing, y= interview, color = cl)) +
  geom_point(size = 4) +
  geom_abline(aes(intercept = intcpt, slope = new.dim.slope )) +
  geom_abline(aes(intercept = disc.intcpt, slope = disc.slope ),color = "red") + xlim(30,100)+ylim(30,100)

dat<-read.csv("http://matsuka.info/data_folder/tdkDA01.csv", header=T)
dat.lda<-lda(class~.,dat)
lda.pred<-predict(dat.lda,dat)
table(lda.pred$class, dat$class)

dat.lda<-lda(class~.,dat, CV=T)
dat.cl = dat.lda$posterior[,1]>dat.lda$posterior[,2]
table(dat.cl, dat$class)


dat<-read.csv("http://matsuka.info/data_folder/tdkDA02.csv",header=T)
dat.lda=lda(class~.,data=dat)
lda.pred <- predict(dat.lda, dat)
plot(dat.lda, dimen=3, col=as.numeric(lda.pred$class),cex=2)

dat.km<-kmeans(dat[,1:6],5)
table(lda.pred$class,dat.km$cluster)
plot(dat,col=dat.km$cluster,pch=20)
plot(dat.lda, dimen=2, col=as.numeric(lda.pred$class),cex=2)


dat<-read.csv("http://www.matsuka.info/data_folder/tdkCFA.csv")
dat1<-subset(dat,dat$popularity<5)
dat2<-subset(dat,dat$popularity>4 & dat$popularity<6)
dat3<-subset(dat,dat$popularity>6)
dat1$popularity="LP";dat2$popularity="MP";dat3$popularity="VP"
datT=rbind(dat1,dat2,dat3)
datT.lda<-lda(popularity~.,datT)
datT.pred<-predict(datT.lda,datT)
table(datT.pred$class,datT$popularity)   
par(mfrow=c(1,2))
plot(datT.lda,col=c(rep('red',20),rep('blue',28),rep('green',29)),cex=1.5)
plot(datT.lda,col=as.numeric(datT.pred$class),cex=1.5)
par(mfrow=c(1,1))

library(nnet)
dat<-read.csv("http://www.matsuka.info/data_folder/tdkReg01.csv")
set.seed(5)
x = dat[,1:3]
y = dat[,4]
dat.nnet = nnet(x,y, size = 150, linout= TRUE, maxit = 1000)
nnet.pred <-predict(dat.nnet,dat)
cor(dat.nnet$fitted.values,dat$sales)^2

dat.lm<-lm(sales~.,data=dat)
plot(dat.nnet$fitted.values, dat$sales,pch=20,col='black')
points(predict(dat.lm), dat$sales,pch=20,col='red')

n.data = nrow(dat);n.sample = n.data*0.6; n.rep = 100 
trainNN.cor = rep(0,n.rep); trainLM.cor = rep(0,n.rep)
testNN.cor = rep(0,n.rep); testLM.cor = rep(0,n.rep)
for (i.rep in 1:n.rep){  
  randperm = sample(1:n.data)  
  train.idx = randperm[1:n.sample]  
  test.idx = randperm[(n.sample+1):n.data]  
  dat.nnet <- nnet(sales~.,size = 100, linout=T, maxit=1000, data = dat[train.idx,])  
  dat.lm <-lm(sales~.,data=dat[train.idx, ])  
  trainNN.cor[i.rep] <- cor(predict(dat.nnet,dat[train.idx, ]), dat[train.idx,]$sales)  
  trainLM.cor[i.rep] <- cor(predict(dat.lm,dat[train.idx, ]), dat[train.idx,]$sales)  
  testNN.cor[i.rep] <- cor(predict(dat.nnet,dat[test.idx, ]), dat[test.idx,]$sales)  
  testLM.cor[i.rep] <- cor(predict(dat.lm,dat[test.idx, ]), dat[test.idx,]$sales)
}
print(c(mean(trainNN.cor,na.rm=T),mean(testNN.cor,na.rm=T)))
print(c(mean(trainLM.cor,na.rm=T),mean(testLM.cor,na.rm=T)))


n.data = nrow(dat);n.sample = n.data*0.6; n.rep = 100 
trainNN.cor = rep(0,n.rep); trainLM.cor = rep(0,n.rep)
testNN.cor = rep(0,n.rep); testLM.cor = rep(0,n.rep)
for (i.rep in 1:n.rep){  
  randperm = sample(1:n.data)  
  train.idx = randperm[1:n.sample]  
  test.idx = randperm[(n.sample+1):n.data]  
  dat.nnet <- nnet(sales~.,size = 30, linout=T,decay= 0.1, maxit=1000, data = dat[train.idx,])  
  dat.lm <-lm(sales~.,data=dat[train.idx, ])  
  trainNN.cor[i.rep] <- cor(predict(dat.nnet,dat[train.idx, ]), dat[train.idx,]$sales)  
  trainLM.cor[i.rep] <- cor(predict(dat.lm,dat[train.idx, ]), dat[train.idx,]$sales)  
  testNN.cor[i.rep] <- cor(predict(dat.nnet,dat[test.idx, ]), dat[test.idx,]$sales)  
  testLM.cor[i.rep] <- cor(predict(dat.lm,dat[test.idx, ]), dat[test.idx,]$sales)
}
print(c(mean(trainNN.cor,na.rm=T),mean(testNN.cor,na.rm=T)))
print(c(mean(trainLM.cor,na.rm=T),mean(testLM.cor,na.rm=T)))


dat<-read.csv("http://matsuka.info/data_folder/tdkDA01.csv", header =T)
dat.nnet<-nnet(class~.,dat,size=30,maxit=1000,decay=0.05)
dat.pred<-predict(dat.nnet,dat,type="class")
table(dat.pred,dat$class)

dat.glm<-glm(class~., family="binomial",dat)
glm.pred<-predict(dat.glm, dat, type="response")>0.5
table(glm.pred,dat$class)

dat<-read.csv("http://www.matsuka.info/data_folder/cda7-16.csv")


dat.nnet<-nnet(survival~., dat, size=30, maxit=1000, decay=0.01)
dat.pred<-predict(dat.nnet,dat,type="class")
table(dat.pred,dat$survival)

Ns = summary(dat$survival)
(Ns[1]/Ns[2])^-1

wts = rep(1,nrow(dat))
wts[which(dat$survival=="no")]=45
dat.nnet<-nnet(survival~., weights=wts, dat, size=30, maxit=1000, decay=0.01)
dat.pred<-predict(dat.nnet,dat,type="class")
table(dat.pred,dat$survival)

dat<-read.csv("http://matsuka.info/data_folder/tdkDA02.csv",header=T)
class.id<-class.ind(dat$class)
x = dat[,1:6]
dat.nnet<-nnet(x,class.id,size=30, maxit=1000, decay=0.01, softmax=TRUE)
max.id = apply(dat.nnet$fitted.values,1,which.max)
table(max.id,dat$class)

dat<-read.table("http://www.matsuka.info/data_folder/tdkPCA01.txt")
dat.nnet<-nnet(dat,dat,size=2, maxit=1000, decay=0.01, linout=TRUE)
cor(dat.nnet$fitted.values,dat)

広域システム特別講義II DL with Keras

dogs&cats_data

library(keras)

### iris data
dat = iris
x_train = dat[,1:3] %>% as.matrix()
y_train <- dat[,4] %>% to_categorical()

model_iris <- keras_model_sequential() %>% 
  layer_dense(units=20, activation = "relu", input_shape = ncol(x_train)) %>%
  layer_dense(units = 10, activation = "relu") %>%
  layer_dense(units = 3, activation = "softmax")

summary(model_iris)

model_iris %>% compile(
  optimizer = "rmsprop",
  loss = "categorical_crossentropy",
  metrics = c("accuracy")
)

model_iris %>% fit(
  x_train, 
  y_train,
  epochs = 100, 
  batch_size = 5
)

model_iris %>% evaluate(x_train,y_train)

### with vaidation
history <- model_iris %>% fit(x_train,
                         y_train,
                         validation_split = 0.20,
                         epochs=300,
                         batch_size = 5,
                         shuffle = T)

plot(history)



normalizer <- function(x) {
  min_x = apply(x,2,min)
  max_x = apply(x,2,max)
  num <- t(x) - min_x
  denom <- max_x - min_x
  normalized = t(num/denom)
  return (normalized)
}

x_trainN = normalizer(x_train)

model_iris <- keras_model_sequential() %>% 
  layer_dense(units=20, activation = "relu", input_shape = ncol(x_train)) %>%
  layer_dense(units = 10, activation = "relu") %>%
  layer_dense(units = 3, activation = "softmax")

model_iris %>% compile(
  optimizer = "rmsprop",
  loss = "categorical_crossentropy",
  metrics = c("accuracy")
)

history_N <- model_iris %>% fit(x_trainN,
                              y_train,
                              validation_split = 0.20,
                              epochs=150,
                              batch_size = 5,
                              shuffle = T)
plot(history_N)


### reuter
imdb <- dataset_imdb(num_words = 10000)
c(c(train_data, train_labels), c(test_data, test_labels)) %<-% imdb
vectorize_sequences <- function(sequences, dimension = 10000) {
  results <- matrix(0, nrow = length(sequences), ncol = dimension)
  for (i in 1:length(sequences))
    results[i, sequences[[i]]] <- 1
  results
}

x_trainTEMP <- vectorize_sequences(train_data)
x_test <- vectorize_sequences(test_data)
y_trainTEMP <- as.numeric(train_labels)
y_test <- as.numeric(test_labels)

val_idx = 1:1000
x_val = x_trainTEMP[val_idx, ]
y_val = y_trainTEMP[val_idx]
x_train = x_trainTEMP[-val_idx, ]
y_train = y_trainTEMP[-val_idx]

original_model <- keras_model_sequential() %>% 
  layer_dense(units = 16, activation = "relu", input_shape = c(10000)) %>% 
  layer_dense(units = 16, activation = "relu") %>% 
  layer_dense(units = 1, activation = "sigmoid")

original_model %>% compile(
  optimizer = "rmsprop",
  loss = "binary_crossentropy",
  metrics = c("accuracy")
)

history_orig <- original_model %>% fit(x_train,
                                      y_train,
                                      epochs=20,
                                      batch_size = 512,
                                      validation_data = list(x_val, y_val))
plot(history_orig)
original_model %>% evaluate(x_test, y_test)

smaller_model <- keras_model_sequential() %>% 
  layer_dense(units = 4, activation = "relu", input_shape = c(10000)) %>% 
  layer_dense(units = 4, activation = "relu") %>% 
  layer_dense(units = 1, activation = "sigmoid")
smaller_model %>% compile(
  optimizer = "rmsprop",
  loss = "binary_crossentropy",
  metrics = c("accuracy")
)

history_small <- smaller_model %>% fit(x_train,
                                       y_train,
                                       epochs=20,
                                       batch_size = 512,
                                       validation_data = list(x_val, y_val))
plot(history_small)
smaller_model %>% evaluate(x_test, y_test)

bigger_model <- keras_model_sequential() %>% 
  layer_dense(units = 512, activation = "relu", input_shape = c(10000)) %>% 
  layer_dense(units = 512, activation = "relu") %>% 
  layer_dense(units = 1, activation = "sigmoid")
bigger_model %>% compile(
  optimizer = "rmsprop",
  loss = "binary_crossentropy",
  metrics = c("accuracy")
)

history_big <- bigger_model %>% fit(x_train,
                                       y_train,
                                       epochs=20,
                                       batch_size = 512,
                                       validation_data = list(x_val, y_val))
plot(history_big)


model_L2 <- keras_model_sequential() %>% 
  layer_dense(units = 16, kernel_regularizer = regularizer_l2(0.0001),
              activation = "relu", input_shape = c(10000)) %>% 
  layer_dense(units = 16, kernel_regularizer = regularizer_l2(0.0001),
              activation = "relu") %>% 
  layer_dense(units = 1, activation = "sigmoid")
model_L2 %>% compile(
  optimizer = "rmsprop",
  loss = "binary_crossentropy",
  metrics = c("accuracy")
)

history_L2 <- model_L2 %>% fit(x_train,
                               y_train,
                               epochs=20,
                               batch_size = 512,
                               validation_data = list(x_val, y_val))
plot(history_L2)
model_L2 %>% evaluate(x_test, y_test)


model_DO <- keras_model_sequential() %>% 
  layer_dense(units = 16,activation = "relu", input_shape = c(10000)) %>%
  layer_dropout(rate = 0.5) %>%
  layer_dense(units = 16,activation = "relu") %>% 
  layer_dropout(rate = 0.5) %>%
  layer_dense(units = 1, activation = "sigmoid")
model_DO %>% compile(
  optimizer = "rmsprop",
  loss = "binary_crossentropy",
  metrics = c("accuracy")
)

history_DO <- model_DO %>% fit(x_train,
                               y_train,
                               epochs=20,
                               batch_size = 512,
                               validation_data = list(x_val, y_val))
plot(history_DO)
model_DO %>% evaluate(x_test, y_test)


### MNIST

mnist <- dataset_mnist()
c(c(tr_img, tr_lab),c(te_img, te_lab)) %<-% mnist
tr_img <- array_reshape(tr_img, c(60000,28,28,1))
tr_img = tr_img / 255
te_img <- array_reshape(te_img, c(10000,28,28,1))
te_img = te_img / 255
tr_lab = to_categorical(tr_lab)
te_lab = to_categorical(te_lab)

model <- keras_model_sequential() %>% 
  layer_conv_2d(filter = 32, kernel_size = c(3,3), 
                activation = "relu",input_shape = c(28,28,1)) %>%
  layer_max_pooling_2d(pool_size = c(2,2)) %>%
  layer_conv_2d(filter = 64, kernel_size = c(3,3),activation = "relu") %>%
  layer_max_pooling_2d(pool_size = c(2,2)) %>%
  layer_flatten() %>%
  layer_dense(units =64, activation = "relu") %>%
  layer_dense(units =10, activation = "softmax")
  
model %>% compile(
  optimizer = "rmsprop",
  loss = "categorical_crossentropy",
  metrics = c("accuracy")
)

history <- model %>% fit(
  tr_img, tr_lab,
  epochs = 5, 
  validation_split = 0.20,
  batch_size = 64
)

model %>% evaluate(te_img, te_lab)


original_dataset_dir <- "~/Downloads/dogs-vs-cats/train"
base_dir <- "~/Downloads/cats_and_dogs_small/"
dir.create(base_dir)
train_dir <- file.path(base_dir, "train")
dir.create(train_dir)
validation_dir <- file.path(base_dir, "validation")
dir.create(validation_dir)
test_dir <- file.path(base_dir, "test")
dir.create(test_dir)
train_cats_dir <- file.path(train_dir, "cats")
dir.create(train_cats_dir)
train_dogs_dir <- file.path(train_dir, "dogs")
dir.create(train_dogs_dir)
validation_cats_dir <- file.path(validation_dir, "cats")
dir.create(validation_cats_dir)
validation_dogs_dir <- file.path(validation_dir, "dogs")
dir.create(validation_dogs_dir)
test_cats_dir <- file.path(test_dir, "cats")
dir.create(test_cats_dir)
test_dogs_dir <- file.path(test_dir, "dogs")
dir.create(test_dogs_dir)
fnames <- paste0("cat.", 1:1000, ".jpg")
file.copy(file.path(original_dataset_dir, fnames), 
          file.path(train_cats_dir)) 
fnames <- paste0("cat.", 1001:1500, ".jpg")
file.copy(file.path(original_dataset_dir, fnames), 
          file.path(validation_cats_dir))
fnames <- paste0("cat.", 1501:2000, ".jpg")
file.copy(file.path(original_dataset_dir, fnames),
          file.path(test_cats_dir))
fnames <- paste0("dog.", 1:1000, ".jpg")
file.copy(file.path(original_dataset_dir, fnames),
          file.path(train_dogs_dir))
fnames <- paste0("dog.", 1001:1500, ".jpg")
file.copy(file.path(original_dataset_dir, fnames),
          file.path(validation_dogs_dir)) 
fnames <- paste0("dog.", 1501:2000, ".jpg")
file.copy(file.path(original_dataset_dir, fnames),
          file.path(test_dogs_dir))


model <- keras_model_sequential() %>% 
  layer_conv_2d(filters = 32, kernel_size = c(3, 3), activation = "relu",
                input_shape = c(150, 150, 3)) %>% 
  layer_max_pooling_2d(pool_size = c(2, 2)) %>% 
  layer_conv_2d(filters = 64, kernel_size = c(3, 3), activation = "relu") %>% 
  layer_max_pooling_2d(pool_size = c(2, 2)) %>% 
  layer_conv_2d(filters = 128, kernel_size = c(3, 3), activation = "relu") %>% 
  layer_max_pooling_2d(pool_size = c(2, 2)) %>% 
  layer_conv_2d(filters = 128, kernel_size = c(3, 3), activation = "relu") %>% 
  layer_max_pooling_2d(pool_size = c(2, 2)) %>% 
  layer_flatten() %>% 
  layer_dense(units = 512, activation = "relu") %>% 
  layer_dense(units = 1, activation = "sigmoid")

summary(model)

model %>% compile(
  loss = "binary_crossentropy",
  optimizer = optimizer_rmsprop(lr = 1e-4),
  metrics = c("acc")
)

train_datagen <- image_data_generator(rescale = 1/255)
validation_datagen <- image_data_generator(rescale = 1/255)
train_generator <- flow_images_from_directory(
  train_dir,
  train_datagen,
  target_size = c(150, 150),
  batch_size = 20,
  class_mode = "binary"
)

validation_generator <- flow_images_from_directory(
  validation_dir,
  validation_datagen,
  target_size = c(150, 150),
  batch_size = 20,
  class_mode = "binary"
)

history <- model %>% fit_generator(
  train_generator,
  steps_per_epoch = 100,
  epochs = 10,
  validation_data = validation_generator,
  validation_steps = 50
)


model <- keras_model_sequential() %>% 
  layer_conv_2d(filters = 32, kernel_size = c(3, 3), activation = "relu",
                input_shape = c(150, 150, 3)) %>% 
  layer_max_pooling_2d(pool_size = c(2, 2)) %>% 
  layer_conv_2d(filters = 64, kernel_size = c(3, 3), activation = "relu") %>% 
  layer_max_pooling_2d(pool_size = c(2, 2)) %>% 
  layer_conv_2d(filters = 128, kernel_size = c(3, 3), activation = "relu") %>% 
  layer_max_pooling_2d(pool_size = c(2, 2)) %>% 
  layer_conv_2d(filters = 128, kernel_size = c(3, 3), activation = "relu") %>% 
  layer_max_pooling_2d(pool_size = c(2, 2)) %>% 
  layer_flatten() %>% 
  layer_dropout(rate = 0.5) %>% 
  layer_dense(units = 512, activation = "relu") %>% 
  layer_dense(units = 1, activation = "sigmoid")  

model %>% compile(
  loss = "binary_crossentropy",
  optimizer = optimizer_rmsprop(lr = 1e-4),
  metrics = c("acc")
)

datagen <- image_data_generator(
  rescale = 1/255,
  rotation_range = 40,
  width_shift_range = 0.2,
  height_shift_range = 0.2,
  shear_range = 0.2,
  zoom_range = 0.2,
  horizontal_flip = TRUE
)

test_datagen <- image_data_generator(rescale = 1/255)

train_generator <- flow_images_from_directory(
  train_dir,
  datagen,
  target_size = c(150, 150),
  batch_size = 32,
  class_mode = "binary"
)

validation_generator <- flow_images_from_directory(
  validation_dir,
  test_datagen,
  target_size = c(150, 150),
  batch_size = 32,
  class_mode = "binary"
)

history <- model %>% fit_generator(
  train_generator,
  steps_per_epoch = 100,
  epochs = 10,
  validation_data = validation_generator,
  validation_steps = 50
)

datagen <- image_data_generator(
  rotation_range = 40,
  width_shift_range = 0.2,
  height_shift_range = 0.2,
  shear_range = 0.2,
  zoom_range = 0.2,
  horizontal_flip = FALSE,
  fill_mode = "nearest"
)

conv_base <- application_vgg16(
  weights = "imagenet",
  include_top = FALSE,
  input_shape = c(150, 150, 3)
)

summary(conv_base)

model <- keras_model_sequential() %>% 
  conv_base %>% 
  layer_flatten() %>% 
  layer_dense(units = 256, activation = "relu") %>% 
  layer_dense(units = 1, activation = "sigmoid")

train_datagen = image_data_generator(
  rescale = 1/255,
  rotation_range = 40,
  width_shift_range = 0.2,
  height_shift_range = 0.2,
  shear_range = 0.2,
  zoom_range = 0.2,
  horizontal_flip = TRUE,
  fill_mode = "nearest"
)
test_datagen <- image_data_generator(rescale = 1/255)
train_generator <- flow_images_from_directory(
  train_dir,
  train_datagen,
  target_size = c(150, 150),
  batch_size = 20,
  class_mode = "binary"
)
validation_generator <- flow_images_from_directory(
  validation_dir,
  test_datagen,
  target_size = c(150, 150),
  batch_size = 20,
  class_mode = "binary"
)
model %>% compile(
  loss = "binary_crossentropy",
  optimizer = optimizer_rmsprop(lr = 2e-5),
  metrics = c("accuracy")
)
history <- model %>% fit_generator(
  train_generator,
  steps_per_epoch = 100,
  epochs = 3,
  validation_data = validation_generator,
  validation_steps = 50
)

unfreeze_weights(conv_base, from = "block5_conv1")
model %>% compile(
  loss = "binary_crossentropy",
  optimizer = optimizer_rmsprop(lr = 1e-5),
  metrics = c("accuracy")
)


history <- model %>% fit_generator(
  train_generator,
  steps_per_epoch = 100,
  epochs = 10,
  validation_data = validation_generator,
  validation_steps = 50
)

test_generator <- flow_images_from_directory(
  test_dir,
  test_datagen,
  target_size = c(150, 150),
  batch_size = 20,
  class_mode = "binary"
)
model %>% evaluate_generator(test_generator, steps = 50)

Posted in UT

認知情報解析学演習 viz convnet

base_dir <- "/home/shiga/R_data/cat_and_dog/small_set"
train_dir <- file.path(base_dir, "train")
validation_dir <- file.path(base_dir, "validation")
test_dir <- file.path(base_dir, "test")

library(keras)
model <- keras_model_sequential() %>% 
  layer_conv_2d(filters = 32, kernel_size = c(3, 3), activation = "relu",
                input_shape = c(150, 150, 3)) %>% 
  layer_max_pooling_2d(pool_size = c(2, 2)) %>% 
  layer_conv_2d(filters = 64, kernel_size = c(3, 3), activation = "relu") %>% 
  layer_max_pooling_2d(pool_size = c(2, 2)) %>% 
  layer_conv_2d(filters = 128, kernel_size = c(3, 3), activation = "relu") %>% 
  layer_max_pooling_2d(pool_size = c(2, 2)) %>% 
  layer_conv_2d(filters = 128, kernel_size = c(3, 3), activation = "relu") %>% 
  layer_max_pooling_2d(pool_size = c(2, 2)) %>% 
  layer_flatten() %>% 
  layer_dropout(rate = 0.5) %>% 
  layer_dense(units = 512, activation = "relu") %>% 
  layer_dense(units = 1, activation = "sigmoid")  

model %>% compile(
  loss = "binary_crossentropy",
  optimizer = optimizer_rmsprop(lr = 1e-4),
  metrics = c("acc")
)

datagen <- image_data_generator(
  rescale = 1/255,
  rotation_range = 40,
  width_shift_range = 0.2,
  height_shift_range = 0.2,
  shear_range = 0.2,
  zoom_range = 0.2,
  horizontal_flip = TRUE
)

test_datagen <- image_data_generator(rescale = 1/255)

train_generator <- flow_images_from_directory(
  train_dir,
  datagen,
  target_size = c(150, 150),
  batch_size = 32,
  class_mode = "binary"
)

validation_generator <- flow_images_from_directory(
  validation_dir,
  test_datagen,
  target_size = c(150, 150),
  batch_size = 32,
  class_mode = "binary"
)

history <- model %>% fit_generator(
  train_generator,
  steps_per_epoch = 100,
  epochs = 100,
  validation_data = validation_generator,
  validation_steps = 50
)


###
img_path <- "/home/shiga/R_data/cat_and_dog/small_set/test/cat/cat.1700.jpg"
img <- image_load(img_path, target_size = c(150, 150))
img_tensor <- image_to_array(img)
img_tensor <- array_reshape(img_tensor, c(1, 150, 150, 3))
img_tensor <- img_tensor / 255
plot(as.raster(img_tensor[1,,,]))

layer_outputs <- lapply(model$layers[1:8], function(layer) layer$output)
# Creates a model that will return these outputs, given the model input:
activation_model <- keras_model(inputs = model$input, outputs = layer_outputs)
activations <- activation_model %>% predict(img_tensor)
first_layer_activation <- activations[[1]]
dim(first_layer_activation)

plot_channel <- function(channel) {
  rotate <- function(x) t(apply(x, 2, rev))
  image(rotate(channel), axes = FALSE, asp = 1, 
        col = terrain.colors(12))
}

plot_channel(first_layer_activation[1,,,5])

fifth_layer_activation <- activations[[5]]
plot_channel(fifth_layer_activation[1,,,1])
plot_channel(fifth_layer_activation[1,,,3])

データ解析基礎論b テスト理論

install.packages("psych")
library("psych")
dat<-read.table("http://peach.l.chiba-u.ac.jp/course_folder/survey_data01.txt")
ca <- alpha(dat)


dat2<-read.csv("http://peach.l.chiba-u.ac.jp/course_folder/survey2.csv")
image(cor(dat2)[10:1,1:10])
ca2 <- alpha(dat2)
ca2

ca1_5 = alpha(dat[,1:5])
ca6_10 = alpha(dat[,6:10])

### not recommended ###
# FA
F1<-factanal(dat[,1:5],1)
F2<-factanal(dat[,6:10],1)

library(sem)
fa.model=cfa(reference.indicator=FALSE)
F1: q1,q2,q3,q4,q5
F2: q6,q7,q8,q9,q10

fa.model<-update(fa.model)
delete, F1<->F2

fa.result<-sem(fa.model, cov(dat2), 300)
summary(fa.result)

### recommended approach
install.packages("ltm")
library(ltm)
dat<-read.table("http://peach.l.chiba-u.ac.jp/course_folder/survey_data01.txt")-1
descript(dat)

irt1P<-rasch(dat)
irt1P
summary(irt1P)
theta = factor.scores(irt1P)

GoF.rasch(irt1P)
person.fit(irt1P)
item.fit(irt1P)
cor(rowSums(theta[[1]][,1:10]),theta[[1]]$z1)


## 2p IRT
irt2P<-ltm(dat~z1)
irt2P
plot(irt2P)
person.fit(irt2P)
item.fit(irt2P)
anova(irt1P,irt2P)

## jisshu
dat <- read.csv("http://peach.l.chiba-u.ac.jp/course_folder/test_data01.csv")

DBDA with pymc3

from IPython.core.pylabtools import figsize
import numpy as np
from matplotlib import pyplot as plt
from os import makedirs
from urllib.request import urlretrieve
import pymc3 as pm
import theano.tensor as tt
from pymc3 import model_to_graphviz
from pymc3 import forestplot


## analysis 1 P208
urlretrieve("http://peach.l.chiba-u.ac.jp/course_folder/z15N50.csv", "data/z15N50.csv")
data01 = np.loadtxt("data/z15N50.csv", skiprows=1)
print(data01)

with pm.Model() as model:
    theta = pm.Beta('theta', alpha = 1, beta = 1)
    obs = pm.Bernoulli('obs', theta, observed = data01)

model_to_graphviz(model)

with model:
    trace = pm.sample(10000)


# checking results
figsize(12.5, 4)
pm.plots.traceplot(trace, var_names=["theta"])
pm.plots.plot_posterior(trace, var_names=["theta"])
pm.plots.autocorrplot(trace, var_names=["theta"])


# Analysis 2 p208
resp = [1,0,1,1,1,1,1,0,0,0,1,0,0,1,0]
subj = [0,0,0,0,0,0,0,0,1,1,1,1,1,1,1]

from pymc3 import model_to_graphviz
N_subj=2
with pm.Model() as model2:

    theta = pm.Beta('theta', alpha = 2, beta = 2, shape=N_subj)

    y = pm.Bernoulli('y', theta,  observed=resp)
    
    diff_theta = pm.Deterministic('delta_theta', theta[0] - theta[1])
model_to_graphviz(model2)

# mcmc sampling
with model2:
    trace = pm.sample(10000)

# viz & checking results
pm.plots.traceplot(trace, var_names=["theta"])
pm.plots.plot_posterior(trace, var_names=["theta"])
pm.plots.plot_posterior(trace, var_names=["delta_theta"])
pm.plots.autocorrplot(trace, var_names=["theta"])


### analysis 3 p240
N_subj = len(data.s.unique())
resp = data.y
subj = data.s - 1
with pm.Model() as model3:
    omega = pm.Beta('omega',1,1)
    kappaM2 = pm.Gamma('kappaM2', 0.01, 0.01)
    kappa = pm.Deterministic('kappa', kappaM2 + 2)
    theta = pm.Beta('theta', 
                    alpha = omega*(kappa-2)+1,
                    beta = (1-omega)*(kappa-2)+1, 
                    shape = N_subj)



    y = pm.Bernoulli('y', theta[subj],  observed=resp)
    
model_to_graphviz(model3)
### analysis 4 p253
N_player = len(data.Player.unique())
N_pos = 9
Ns = data.AtBats
Zs = data.Hits
subj = data.PlayerNumber - 1
pos = data.PriPosNumber - 1

with pm.Model() as model4:
    omegaO = pm.Beta('omegaO',1,1)
    kappaM2O = pm.Gamma('kappaM2', 0.01, 0.01)
    kappaO = pm.Deterministic('kappaO', kappaM2O + 2)
    
    omegaPOS = pm.Beta('omegaPOS', 
                       alpha = omegaO*(kappaO-2)+1, 
                       beta = (1-omegaO)*(kappaO-2)+1, 
                       shape = N_pos)
    kappaM2POS = pm.Gamma('kappaM2POS', 0.01, 0.01,
                           shape = N_pos)
    kappaPOS = pm.Deterministic('kappaPOS', kappaM2POS + 2)

   
    theta = pm.Beta('theta', 
                    alpha = omegaPOS[pos]*(kappaPOS[pos]-2)+1, 
                   beta = (1-omegaPOS[pos])*(kappaPOS[pos]-2)+1, 
                    shape = N_player)

    y = pm.Binomial('y', n = Ns, p = theta,  observed = Zs)
    
model_to_graphviz(model4)

基礎実習 B02

bin2dec <- function(bin){
# convert binary to decimal
   b.rev = rev(bin)
   ones = which(b.rev == 1)
   ones.adj = ones - 1
   b.sq = 2^ones.adj
   dec = sum(b.sq)
   return(dec)
}


dec2bin <- function(dec, digit) {
# convert decimal to binary
bin = rep(0, digit)
r.counter  = 1 
   while(dec != 0){
     r = dec %% 2
     dec = dec %/% 2
     bin[r.counter] = r
     r.counter = r.counter + 1
  }
  return(rev(bin))
}

# cellur automata
# RULE 150
time = 100
n.cells =  101
cells = matrix(0, nrow = time, ncol = n.cells)
state = rep(0,n.cells)
state[51] = 1
cells[1,] = state
for (i.time in 2:time) {
   left = c(state[n.cells], state[1:(n.cells - 1)])
   right = c(state[2:n.cells], state[1])
   s = left + state + right
   state = s %% 2 
   cells[i.time, ] = state
}

time = 100
n.cells =  101
cells = matrix(0, nrow = time, ncol = n.cells)
cells[1,51] = 1
for (i.time in 2:time) {
   left = c(cells[(i.time-1), n.cells], cells[(i.time-1), 1:(n.cells - 1)])
   right = c(cells[(i.time-1), 2:n.cells], cells[(i.time-1), 1])
   s = left + cells[(i.time-1),] + right
   cells[i.time, ] = s %% 2 
}

transFUN<-function(st,ruleID){
  output=dec2bin(ruleID,8);
  a=matrix(c(1,1,1,
             1,1,0,
             1,0,1,
             1,0,0,
             0,1,1,
             0,1,0,
             0,0,1,
             0,0,0),nrow=8,byrow=T)
  newSt=output[which(apply(a,1,function(x) {all(x==st)}))]
  return(newSt)
}

ECA<-function(nCell, nGen,ruleID){
  res=matrix(0,nrow=nGen,ncol=nCell)
  res[1,ceiling(nCell/2)]=1;
  for (i_gen in 2:nGen) {
    for (i_cell in 2:(nCell-1)) {
      res[i_gen,i_cell]=transFUN(res[i_gen-1,(i_cell-1):(i_cell+1)],ruleID)
     }
   res[i_gen,1]=transFUN(c(res[i_gen-1,nCell],
                           res[i_gen-1,1],
                           res[i_gen-1,2]),ruleID)
   res[i_gen,nCell]=transFUN(c(res[i_gen-1,(nCell-1)],
                               res[i_gen-1,nCell],
                               res[i_gen-1,1]),ruleID)
  }
  return(res)
}
res<-ECA(200,100,99)
image(res) 


広域システム特別講義II 教師なし学習1A

dat <- read.table("http://www.matsuka.info/data_folder/tdkPCA01.txt")
dat.pca <- princomp(dat)
dat.pca$score
dat.pca$loadings

library(nnet)
dat <- read.csv("http://www.matsuka.info/data_folder/tdkReg01.csv")
set.seed(5)
x = dat[, 1:3]
y = dat[, 4]
dat.nnet = nnet(x, y, size = 150, linout = TRUE,maxit = 1000)
nnet.pred <- predict(dat.nnet, dat)
cor(dat.nnet$fitted.values,dat$sales)^2

dat.lm<-lm(sales~.,data=dat)
summary(dat.lm)

# pseudo-pca
dat<-read.table("http://www.matsuka.info/data_folder/tdkPCA01.txt")
dat.nnet<-nnet(dat,dat,size=2, maxit=1000, decay=0.01, linout=TRUE)
cor(dat.nnet$fitted.values,dat)


### text processing

txt = "You say goodbye and I say hello.";txt = tolower(txt)
txt = gsub('[.]', ' .',txt)
words = unlist(strsplit(txt, " "))
uniq.words = unique(words)
uniq.words[1]
which(uniq.words=="say")

n.uniq = length(uniq.words)
n.words = length(words)

corpus = rep(0,n.words)
corpus = match(words,uniq.words)

cc = matrix(0,nrow=n.uniq, ncol=n.uniq)
for (i.c in 1:n.uniq){
  loc = which(corpus==i.c)
  L = which(match(uniq.words,words[pmax(loc-1,1)])!='NA')
  R = which(match(uniq.words,words[pmin(loc+1,n.words)])!='NA')
  cc[i.c, c(L,R)]=cc[i.c, c(L,R)]+1
}
diag(cc)=0

contxt <- function(corpus, uniq.words, words){
  cc = matrix(0, nrow=n.uniq, ncol=n.uniq)
  for (i.c in 1:n.uniq){
    loc = which(corpus==i.c)
    L = which(match(uniq.words, words[pmax(loc-1,1)])!='NA')
    R = which(match(uniq.words, words[pmin(loc+1,n.words)])!='NA')
    cc[i.c, c(L,R)]=cc[i.c, c(L,R)]+1
  }
  diag(cc)=0
  return(cc)
}

words.sim <- function(word1, word2, eps=1e-8){
  nw1 = word1/(sqrt(sum(word1^2)) + eps)
  nw2 = word2/(sqrt(sum(word2^2)) + eps)
  return(nw1%*%nw2)
}

w1 = cc[which(uniq.words=="you"),]
w2 = cc[which(uniq.words=="i"),]

words.sim(w1,w2)

most.sim <- function(word, uniq.words, cc, N=5){
  n.uniq = length(uniq.words)
  word2 = cc[which(uniq.words==word),]
  res = data.frame(words = uniq.words, similarity = rep(0,n.uniq))
  top = data.frame(words = rep("",N+1), similarity=rep(0,N+1))
  res$similarity = apply(cc,1, function(x) words.sim(x,word2))
  sort.res = sort(res$similarity, decreasing = T,  index.return=T)
  top$words = res$words[sort.res$ix[1:(N+1)]]
  top$similarity = res$similarity[sort.res$ix[1:(N+1)]]
  self.idx = which(top$words == word)
  return(top[-self.idx,])
}

most.sim('i',uniq.words,cc,5)

ppmi <- function(cc, eps = 1e-8){
  n.uniq = ncol(cc)
  PPMI = matrix(0, n.uniq, n.uniq)
  N = sum(cc)
  r.sum = rowSums(cc)
  pmi = log2(cc*N/outer(r.sum,r.sum))
  PPMI=matrix(pmax(0,pmi),nrow=n.uniq)
  return(PPMI)
}


### LSA
PPMI <- ppmi(cc)

s = svd(PPMI)
plot(s$u[,2],s$u[,1],pch=20,col='red',cex=5)
text(s$u[,2],s$u[,1],uniq.words,cex=2)



### word2vec inefficient
corp2contxt1S = function(corpus){
  len.corp = length(corpus)
  # creating target matrix
  idxT = cbind(1:(len.corp-2), corpus[2:(len.corp-1)])
  targ1S = matrix(0,nrow=len.corp-2,ncol=length(unique(corpus)))
  targ1S[idxT]=1

  # creating context matrices
  idxC1 = cbind(1:(len.corp-2),corpus[1:(len.corp-2)])
  idxC2 = cbind(1:(len.corp-2),corpus[3:len.corp])
  contxt1 = matrix(0,nrow=len.corp-2,ncol=length(unique(corpus)))
  contxt2 = matrix(0,nrow=len.corp-2,ncol=length(unique(corpus)))
  contxt1[idxC1]=1
  contxt2[idxC2]=1
  return(list(target=targ1S,contxt1=contxt1,contxt2=contxt2))
}
txt = "You say goodbye and I say hello.";
corp = txt2corpus(txt)
dat <- corp2contxt1S(corp)
init.W2V <- function(n.uniq,size.hidden){
  W.in = matrix(rnorm(n.uniq*size.hidden),nrow=n.uniq)*0.01
  W.out = matrix(rnorm(n.uniq*size.hidden),nrow=size.hidden)*0.01
  return(list(W.in = W.in, W.out= W.out))
}

softmax.forwd <- function(x, target){
  max.x = apply(x,1,max)
  C = ncol(x)
  x = x - max.x%*%matrix(1,nrow=1,ncol=C)
  y = exp(x)/rowSums(exp(x))
  delta = 1e-7;
  R = nrow(as.matrix(y))
  return(-sum(target*log(y + delta))/R)
}

softmax.bckwd <- function(x, target,  dout = 1){
  max.x = apply(x, 1, max)
  R = nrow(x)
  C = ncol(x)
  x = x - max.x%*%matrix(1,nrow=1,ncol=C)
  y = exp(x)/rowSums(exp(x))
  return((y-target)/R)
}

MatMult.bckwd <- function(x, W, dout){
  dx = dout%*%t(W)
  dW = t(x)%*%dout
  return(list(dx = dx, dW = dW))
}

softmax.pred <- function(x, target){
  max.x = apply(x,1,max)
  C = ncol(x)
  x = x - max.x%*%matrix(1,nrow=1,ncol=C)
  y = exp(x)/rowSums(exp(x))
  return(y)
}

network<-init.W2V(7,5)
n.batch = 3;
n.data = nrow(dat$target)
n.iter = 2000;
lambda=0.1;
loss = rep(0, n.iter)
for (i.iter in 1:n.iter){
  samp.idx = sample(1:n.data,n.batch)
  batch.c1 = dat$contxt1[samp.idx,]
  batch.c2 = dat$contxt2[samp.idx,]
  batch.y = dat$target[samp.idx,]
  h1 <- MatMult.forwd(batch.c1, network$W.in)
  h2 <- MatMult.forwd(batch.c2, network$W.in)
  h = 0.5 * (h1 + h2)
  s = MatMult.forwd(h,network$W.out)
  z = softmax.forwd(s,batch.y)
  loss[i.iter] = z
  ds = softmax.bckwd(s, batch.y, 1)
  da = MatMult.bckwd(h,network$W.out,ds)
  da$dW = da$dW*0.5
  dIn1 = MatMult.bckwd(batch.c1,network$W.in,da$dx)
  dIn2 = MatMult.bckwd(batch.c2,network$W.in,da$dx)
  network$W.out = network$W.out - lambda*da$dW
  network$W.in = network$W.in - lambda*dIn1$dW
  network$W.in = network$W.in - lambda*dIn2$dW
}
plot(loss, type='l')

### word2vec
txt2corpus <- function(txt){
  txt = tolower(txt)
  txt = gsub('[.]', ' .',txt)
  words = unlist(strsplit(txt, " "))
  uniq.words = unique(words)
  n.uniq = length(uniq.words)
  n.words = length(words)
  corpus = rep(0,n.words)
  corpus = match(words,uniq.words)
  return(corpus)
}

corp2contxt <- function(corpus){
  len.corp = length(corpus)
  target = corpus[2:(len.corp-1)]
  col1 = corpus[1:(len.corp-2)]
  col2 = corpus[3:len.corp]
  context = cbind(col1,col2)
  return(list(context=context,target = target))
}


embed.forwd <- function(W, idx){
  return(W[idx,])
}

h1 <- embed.forwd(W,idx1)
h2 <- embed.forwd(W,idx2)
h = (h1 + h2)/2

embed.dot.forwd <- function(W, h, idx){
  return(O=rowSums(W[idx,]*h))
}

s = embed.dot.forwd(network$W.out,h,batch.y)

sigmoid.forwd <- function(O){
  return(1/(1+exp(-O)))
}

sigmoidWL.forwd <- function(O,target){
  #delta = 1e-5
  #y = max(0, (1/(1+exp(-O)) - delta))
  y = 1/(1+exp(-O))
  loss=-sum(target*log(y)+(1-target)*log(1 - y))
  return(loss)
}

sigmoidWL.backwd <- function(O,target,dout=1){
  #delta = 1e-5
  #y = 1/(1+exp(-O)) - delta
  y = 1/(1+exp(-O))
  dW = (y - target)*dout
  return(dW)
}

embed.dot.backwd <- function(W,h,idx,dout){
  dh = dout * W[idx,]
  dW = dout * h
  return(list(dh = dh, dW = dW))
}

embed.backwd <- function(W, idx, dout){
  dW = matrix(0,nrow(W),ncol(W))
  for (i.idx in 1:length(idx)){
    dW[idx[i.idx], ] = dW[idx[i.idx], ] + dout[i.idx, ]
  }
  return(dW)
}


init.W2V <- function(n.uniq,size.hidden){
  W.in = matrix(rnorm(n.uniq*size.hidden),nrow=n.uniq)*0.01
  W.out = matrix(rnorm(n.uniq*size.hidden),nrow=n.uniq)*0.01
  return(list(W.in = W.in, W.out= W.out))
}

### naive W2V
network<-init.W2V(8,5)
n.batch = 3;
txt = "You say goodbye and I say hello.."
corp = txt2corpus(txt)
corp = c(8,8,corp)
dat<-corp2contxt(corp)
n.data =  length(dat$target)

n.iter = 3000;
lambda=0.1;
loss = rep(0, n.iter)
for (i.iter in 1:n.iter){
  samp.idx = sample(1:n.data,n.batch)
  batch.c1 = dat$context[samp.idx,1]
  batch.c2 = dat$context[samp.idx,2]
  batch.y = dat$target[samp.idx]
  h1 <- embed.forwd(network$W.in,batch.c1)
  h2 <- embed.forwd(network$W.in,batch.c2)
  h = 0.5 * (h1 + h2)
  # positive only
  s = embed.dot.forwd(network$W.out,h,batch.y)
  z = sigmoidWL.forwd(s,1)
  loss[i.iter] = z
  ds = sigmoidWL.backwd(s, 1, 1)
  dE = embed.dot.backwd(network$W.out,h, batch.y, ds)
  dh = dE$dh*0.5
  dIn1 = embed.backwd(network$W.in,dat$context[samp.idx,1], dh)
  dIn2 = embed.backwd(network$W.in,dat$context[samp.idx,2], dh)
  network$W.out[batch.y,] = network$W.out[batch.y,] - lambda*dE$dW
  network$W.in = network$W.in - lambda*dIn1
  network$W.in = network$W.in - lambda*dIn2
}

par(mfrow=c(1,1))
plot(loss, type='l')

samp.idx = c(2:6,8,9,1)
batch.c1 = dat$context[samp.idx,1]
batch.c2 = dat$context[samp.idx,2]
batch.y = dat$target[samp.idx]
h1 <- embed.forwd(network$W.in,batch.c1)
h2 <- embed.forwd(network$W.in,batch.c2)
h = 0.5 * (h1 + h2)
s = MatMult.forwd(h,t(network$W.out))
z = sigmoid.forwd(s)
res=cbind(z,batch.y)
#par(mfrow=c(8,1))
#for (i in 1:8){
#  col.spec = rep("black",8)
#  col.spec[i]="orange"
# barplot(res[i, 1:8],col=col.spec)
#}

par(mfrow=c(4,1))
for (i in 1:4){
  col.spec = rep("black",8)
  col.spec[i]="orange"
  barplot(res[i, 1:8],col=col.spec)
}
par(mfrow=c(4,1))
for (i in 5:8){
  col.spec = rep("black",8)
  col.spec[i]="orange"
  barplot(res[i, 1:8],col=col.spec)
}






### with negative sampling
unigram.sampler <- function(corpus, target, power, sample.size){
  neg.corpus <- corpus[-which(corpus == target)]
  uniq.word = unique(neg.corpus)
  n.word = length(neg.corpus)
  prob = (as.vector(table(neg.corpus))/n.word)^power
  prob = prob/sum(prob)
  sample.idx = sample(uniq.word, sample.size, prob = prob)
  return(sort(sample.idx))
}
get.neg.sample <- function(corpus, target, power, sample.size){
  sample.id = matrix(0, nrow = length(target), ncol = sample.size)
  for (i.targ in 1:length(target)){
    sample.id[i.targ,] = unigram.sampler(corpus, target[i.targ], power, sample.size)
  }
  return(sample.id)
}


network<-init.W2V(8,5)
n.batch = 3;
txt = "You say goodbye and I say hello.."
corp = txt2corpus(txt)
corp = c(8,8,corp)
dat<-corp2contxt(corp)
n.data =  length(dat$target)

n.iter = 10000;
lambda=0.1;
loss = rep(0, n.iter)
sample.size = 2
for (i.iter in 1:n.iter){
  samp.idx = sample(1:n.data,n.batch)
  batch.c1 = dat$context[samp.idx,1]
  batch.c2 = dat$context[samp.idx,2]
  batch.y = dat$target[samp.idx]
  h1 <- embed.forwd(network$W.in,batch.c1)
  h2 <- embed.forwd(network$W.in,batch.c2)
  h = 0.5 * (h1 + h2)
  # positive
  s = embed.dot.forwd(network$W.out,h,batch.y)
  z = sigmoidWL.forwd(s,1)
  loss[i.iter] = z
  ds = sigmoidWL.backwd(s, 1, 1)
  dE = embed.dot.backwd(network$W.out,h, batch.y, ds)
  dh = dE$dh*0.5
  dIn1 = embed.backwd(network$W.in,dat$context[samp.idx,1], dh)
  dIn2 = embed.backwd(network$W.in,dat$context[samp.idx,2], dh)
  network$W.out[batch.y,] = network$W.out[batch.y,] - lambda*dE$dW
  network$W.in = network$W.in - lambda*dIn1
  network$W.in = network$W.in - lambda*dIn2
  # negative
  neg.set <- get.neg.sample(corp,batch.y, 0.75, sample.size)
  for (i.neg in 1:sample.size){
    s = embed.dot.forwd(network$W.out,h,neg.set[,i.neg])
    z = sigmoidWL.forwd(s,0)
    loss[i.iter] = loss[i.iter] + z
    ds = sigmoidWL.backwd(s, 0, dout=1)
    dE = embed.dot.backwd(network$W.out,h,neg.set[,i.neg], ds)
    dh = dE$dh*0.5
    dIn1 = embed.backwd(network$W.in, dat$context[samp.idx,1],dh)
    dIn2 = embed.backwd(network$W.in, dat$context[samp.idx,2],dh)
    network$W.out[neg.set[,i.neg],] = network$W.out[neg.set[,i.neg],] - lambda*dE$dW
    network$W.in = network$W.in - lambda*dIn1
    network$W.in = network$W.in - lambda*dIn2
  }
}
par(mfrow=c(1,1))
loss.dat <- data.frame(epoch=1:length(loss), loss = loss)
ggplot(loss.dat, aes(x = epoch, y = loss)) +
  geom_smooth(se=F)


samp.idx = c(2:6,8,9,1)
batch.c1 = dat$context[samp.idx,1]
batch.c2 = dat$context[samp.idx,2]
batch.y = dat$target[samp.idx]
h1 <- embed.forwd(network$W.in,batch.c1)
h2 <- embed.forwd(network$W.in,batch.c2)
h = 0.5 * (h1 + h2)
s = MatMult.forwd(h,t(network$W.out))
z = sigmoid.forwd(s)
res=cbind(z,batch.y)
#par(mfrow=c(8,1))
#for (i in 1:8){
# col.spec = rep("black",8)
#  col.spec[i]="orange"
#  barplot(res[i, 1:8],col=col.spec)
#}

par(mfrow=c(4,1))
for (i in 1:4){
  col.spec = rep("black",8)
  col.spec[i]="orange"
  barplot(res[i, 1:8],col=col.spec)
}

par(mfrow=c(4,1))
for (i in 5:8){
  col.spec = rep("black",8)
  col.spec[i]="orange"
  barplot(res[i, 1:8],col=col.spec)
}
Posted in UT

広域システム特別講義II 教師あり学習1B

train.x = as.matrix(iris[,1:4])
train.y.temp = as.numeric(iris[,5])
train.y = matrix(0,nrow = nrow(train.x), ncol =3)
train.y[which(train.y.temp==1), 1]=1
train.y[which(train.y.temp==2), 2]=1
train.y[which(train.y.temp==3), 3]=1

init.network <- function(n.neurons){
  n.layer = length(n.neurons)
  W = list(); b = list()
  for (i.layer in 1:(n.layer-1)){
    W[[i.layer]] =
      matrix(rnorm(n.neurons[i.layer]*n.neurons[(i.layer+1)],sd = 0.1),nrow=n.neurons[i.layer])
    b[[i.layer]] =  matrix(rnorm(n.neurons[(i.layer+1)],sd = 0.1), nrow = 1)
  }
  return(list(W = W,b = b))
}

sigmoid.forwd <- function(x){
  return(1/(1+exp(-x)))
}

sigmoid.bckwd <- function(x, dout){
  y = sigmoid.forwd(x)
  return(dout*(1-y)*y)
}

affine.forwd <- function(x, W, b){
  return(x%*%W + matrix(1, nrow = nrow(x), ncol = 1)%*%b)
}

affine.bckwd <- function(x, W, b, dout){
  dx = dout%*%t(W)
  dW = t(x)%*%dout
  db = colSums(dout)
  return(list(dx = dx, dW = dW, db = db))
}

softmax.forwd <- function(x, target){
  max.x = apply(x,1,max)
  C = ncol(x)
  x = x - max.x%*%matrix(1,nrow=1,ncol=C)
  y = exp(x)/rowSums(exp(x))
  delta = 1e-7;
  R = nrow(as.matrix(y))
  return(-sum(target*log(y + delta))/R)
}

softmax.bckwd <- function(x, target,  dout = 1){
  max.x = apply(x, 1, max)
  R = nrow(x)
  C = ncol(x)
  x = x - max.x%*%matrix(1,nrow=1,ncol=C)
  y = exp(x)/rowSums(exp(x))
  return((y-target)/R)
}

params = init.network(c(4,15,3))
batch_size = 50; n.iter =5000; lambda =0.05
n.train = nrow(train.x)
loss = rep(0,n.iter)
for (i.iter in 1:n.iter){
  batch_mask = sample(1:n.train, batch_size)
  x.batch = train.x[batch_mask,]
  t.batch = train.y[batch_mask,]
  a1 = affine.forwd(x.batch,params$W[[1]],params$b[[1]])
  z1 = sigmoid.forwd(a1)
  a2 = affine.forwd(z1,params$W[[2]],params$b[[2]])
  z2 = softmax.forwd(a2,t.batch)
  loss[i.iter] = z2
  dwSM = softmax.bckwd(a2, t.batch, 1)
  dwA2 = affine.bckwd(a1,params$W[[2]],params$b[[2]],dwSM)
  dwSG = sigmoid.bckwd(a1,dwA2$dx)
  dwA1 = affine.bckwd(x.batch,params$W[[1]],params$b[[1]],dwSG)
  params$W[[2]] = params$W[[2]] - lambda*dwA2$dW
  params$b[[2]] = params$b[[2]] - lambda*dwA2$db
  params$W[[1]] = params$W[[1]] - lambda*dwA1$dW
  params$b[[1]] = params$b[[1]] - lambda*dwA1$db
}
loss.dat = data.frame(trial = 1:length(loss), loss = loss)
ggplot(loss.dat,aes(x = trial, y = loss)) +
  geom_smooth(se=F)

### methods to improve effeciency
func02R = function(x){
  return(1/20*x[1]^2 + x[2]^2)
}
numerical.grad <- function(func, x){
  h = 1e-4
  R = nrow(x)
  C = ncol(x)
  grad = matrix(0, R, C)
  for (i.col in 1:C){
    for (i.row in 1:R){
      temp.x = x[i.row,i.col]
      x[i.row, i.col] = temp.x + h
      plusH = do.call(func, list(x))
      x[i.row, i.col] = temp.x - h
      minusH = do.call(func,list(x))
      grad[i.row, i.col] = (plusH - minusH)/(2*h)
      x[i.row, i.col] = temp.x
    }
  }
  return(grad)
}

require(plot3D)
x = seq(-10,10,0.5)
y = seq(-10,10,0.5)
M = mesh(x,y)
R = nrow(M$x)
C = nrow(M$x)
scaling = 0.05
plot(c(),c(),xlim = c(-10,10),ylim=c(-10,10))
for (i.col in 1:C){
  for (i.row in 1:R){
    ng = numerical.grad("func02R",matrix(c(M$x[i.row,i.col],M$y[i.row,i.col]),nrow=1))
    arrows(M$x[i.row,i.col],M$y[i.row,i.col],
           (M$x[i.row,i.col]-ng[1]*scaling),(M$y[i.row,i.col]-ng[2]*scaling),
           length = 0.05)
  }
}

x = seq(-10,10,0.2)
y = seq(-10,10,0.2)
M = mesh(x,y)
Z = as.vector(1/20*M$x^2)+as.vector(M$y^2)
Z.mesh = matrix(Z,nrow(M$x))
contour(x,y,Z.mesh,drawlabels = F,nlevels=40)

grad.desc <- function(func, init.x, lr, n.iter){
  x = init.x
  x.hist = init.x
  for (i.iter in 1:n.iter) {
    grad = numerical.grad(func, x)
    x = x - lr*grad
    x.hist = rbind(x.hist,x)
  }
  return(x.hist)
}
x.init = matrix(c(-7,2),nrow = 1)
gd = grad.desc("func02R",x.init,0.9,100)
lines(gd,type='o',col = 'green',pch=20)

gd.moment <- function(func, init.x, lr, moment, n.iter){
  x = init.x
  x.hist = init.x
  grad.hist = rep(0,length(x))
  for (i.iter in 1:n.iter) {
    grad = numerical.grad(func, x)
    x = x - lr*grad+moment*grad.hist
    x.hist = rbind(x.hist,x)
    grad.hist = -lr*grad
  }
  return(x.hist)
}
x.init = matrix(c(-7,2),nrow = 1)
gdm = gd.moment("func02R",x.init,0.9,0.3,100)
lines(gdm,type='o',col = 'blue',pch=20)

adaGrad <- function(func, init.x, eta, n.iter){
  x = init.x
  x.hist = init.x
  h = rep(0,length(x))
  for (i.iter in 1:n.iter) {
    grad = numerical.grad(func, x)
    h = h + grad*grad
    x = x - eta*(1/(sqrt(h)+1e-7))*grad
    x.hist = rbind(x.hist,x)
  }
  return(x.hist)
}
x.init = matrix(c(-7,2),nrow = 1)
adaGrad = adaGrad("func02R",x.init,0.9,100)
contour(x,y,Z.mesh,drawlabels = F,nlevels=40)
lines(adaGrad,type='o',col = 'red',pch=20)


adam <- function(func, init.x, eta, beta1, beta2, epsilon, n.iter){
  x = init.x
  x.hist = init.x
  m = rep(0,length(x))
  v = rep(0,length(x))
  for (i.iter in 1:n.iter) {
    grad = numerical.grad(func, x)
    m = beta1*m + (1-beta1)*grad
    v = beta2*v + (1-beta2)*grad*grad
    m.hat = m/(1-beta1)
    v.hat = v/(1-beta2)
    x = x - eta/((sqrt(v.hat)+epsilon))*m.hat
    x.hist = rbind(x.hist,x)
  }
  return(x.hist)
}
x.init = matrix(c(-7,2),nrow = 1)
adam = adam("func02R",x.init,0.2,0.9,0.999,1e-8,100)
contour(x,y,Z.mesh,drawlabels = F,nlevels=40)
lines(adam,type='o',col = 'red',pch=20)

### w/ functions
relu.forwd <- function(x){
  return(pmax(x,0))
}
relu.bckwd <- function(z, dout){
  dout[which(z <= 0)] = 0
  return(dout)
}
# sigmoid
sigmoid.forwd <- function(x){
  return(1/(1+exp(-x)))
}
sigmoid.bckwd <- function(z, dout){
  return(dout*(1-z)*z)
}
# Affine
affine.forwd <- function(x, W, b){
  return(x%*%W + matrix(1, nrow = nrow(x), ncol = 1)%*%b)
}
affine.bckwd <- function(x, W, dout){
  dx = dout%*%t(W)
  dW = t(x)%*%dout
  db = colSums(dout)
  return(list(dx = dx, dW = dW, db = db))
}
# softmax with CE
softmax.forwd <- function(x, target){
  max.x = apply(x,1,max)
  C = ncol(x)
  x = x - max.x%*%matrix(1,nrow=1,ncol=C)
  y = exp(x)/rowSums(exp(x))
  delta = 1e-7;
  R = nrow(as.matrix(y))
  return(list(smx = y, ce = -sum(target*log(y + delta))/R))
}
softmax.bckwd <- function(smx, target){
  R = nrow(smx)
  return((smx-target)/R)
}

activation <- function(A, target, actFun){
  if (actFun == "sigmoid"){
    return(sigmoid.forwd(A))
  }
  if (actFun == "relu"){
    return(relu.forwd(A))
  }
  if (actFun == "softmax"){
    return(softmax.forwd(A, target))
  }
}

Dact <- function(z, smx, target, dout, actFun){
  if (actFun == "sigmoid"){
    return(sigmoid.bckwd(z, dout))
  }
  if (actFun == "relu"){
    return(relu.bckwd(z, dout))
  }
  if (actFun == "softmax"){
    return(softmax.bckwd(smx, target))
  }
}

# function for initialization
init.network <- function(n.neurons, actFun, Opt, sdv){
  n.layer = length(n.neurons)
  W = list(); b = list();
  mW = list(); mb = list();    # momentum
  hW = list(); hb = list();    # adaGrad
  aMW = list(); aMb = list();  # adam M
  aVW = list(); aVb = list();  # adam V
  for (i.layer in 1:(n.layer-1)){
    if (nargs() == 3) {
      if (actFun[i.layer]=="sigmoid"){
        # Xavier
        sdv = 1/sqrt(n.neurons[i.layer])
      } else {
        # He - assumes ReLU
        sdv = sqrt(2/n.neurons[i.layer])
      }
    }
    W[[i.layer]] = matrix(rnorm(n.neurons[i.layer]*n.neurons[(i.layer+1)], sd = sdv),
                          nrow=n.neurons[i.layer])
    b[[i.layer]] =  matrix(rnorm(n.neurons[(i.layer+1)], sd = sdv), nrow = 1)
    if (Opt == "momentum"){
      mW[[i.layer]] = matrix(0, nrow=n.neurons[i.layer], ncol=n.neurons[(i.layer+1)])
      mb[[i.layer]] = matrix(0, nrow = 1, ncol=n.neurons[(i.layer+1)])
    }
    if (Opt == "adagrad"){
      hW[[i.layer]] = matrix(0, nrow=n.neurons[i.layer], ncol=n.neurons[(i.layer+1)])
      hb[[i.layer]] = matrix(0, nrow = 1, ncol=n.neurons[(i.layer+1)])
    }
    if (Opt == "adam"){
      aMW[[i.layer]] = matrix(0, nrow=n.neurons[i.layer], ncol=n.neurons[(i.layer+1)])
      aMb[[i.layer]] = matrix(0, nrow = 1, ncol=n.neurons[(i.layer+1)])
      aVW[[i.layer]] = matrix(0, nrow=n.neurons[i.layer], ncol=n.neurons[(i.layer+1)])
      aVb[[i.layer]] = matrix(0, nrow = 1, ncol=n.neurons[(i.layer+1)])
    }
  }
  return(list(W = W, b = b, actFun = actFun, optimizer = Opt,
              mW=mW, mb=mb,
              hW=hW, hb=hb,
              aMW=aMW,aMb=aMb,aVW=aVW))
}

OPT<-function(net, Daff, HP){
  if (net$optimizer == "momentum"){
    return(Opt.mom(net, Daff, HP))
  }
  if (net$optimizer == "adagrad"){
    return(Opt.adagrad(net, Daff, HP))
  }
  if (net$optimizer == "adam"){
    return(Opt.adam(net, Daff, HP))
  }
}

Opt.mom <- function(net, Daff, HP) {
  # HP[3] = learning rate
  # HP[4] = weight decay
  # HP[5] = momentum
  n.layer <- length(net$W)
  for (i.layer in 1:n.layer) {
    net$mW[[i.layer]] = HP[5]*net$mW[[i.layer]] - HP[3]*Daff[[i.layer]]$dW - HP[4]*net$W[[i.layer]]
    net$mb[[i.layer]] = HP[5]*net$mb[[i.layer]] - HP[3]*Daff[[i.layer]]$db - HP[4]*net$b[[i.layer]]
    net$W[[i.layer]] = net$W[[i.layer]] + net$mW[[i.layer]]
    net$b[[i.layer]] = net$b[[i.layer]] + net$mb[[i.layer]]
  }
  return(net=net)
}

Opt.adagrad <- function(net, Daff, HP) {
  # HP[3] = learning rate
  # HP[4] = weight decay
  n.layer <- length(net$W)
  for (i.layer in 1:n.layer) {
    net$hW[[i.layer]] = net$hW[[i.layer]] + Daff[[i.layer]]$dW*Daff[[i.layer]]$dW
    net$hb[[i.layer]] = net$hb[[i.layer]] + Daff[[i.layer]]$db*Daff[[i.layer]]$db
    net$W[[i.layer]] = net$W[[i.layer]]-HP[3]/(sqrt(net$hW[[i.layer]])+1e-7)*Daff[[i.layer]]$dW - HP[4]*net$W[[i.layer]]
    net$b[[i.layer]] = net$b[[i.layer]]-HP[3]/(sqrt(net$hb[[i.layer]])++1e-7)*Daff[[i.layer]]$db - HP[4]*net$b[[i.layer]]
  }
  return(net=net)
}
cu.nnet = function(train.x, train.y, net, HP = c(10,1000,0.05,0.01,0.1,0.999,0.9)){
  # HP: Hyperparameters
  # HP[1] = batch size
  # HP[2] = n iteration
  # HP[3] = learning rate
  # HP[4] = weight decay
  # HP[5] = momentum
  # HP[6] = beta1 (adam)
  # HP[7] = beta2 (adam)
  n.layer <- length(net$W)
  loss = rep(0,HP[2])
  A = list(); z = list(); Dact = list(); Daff = list()
  for (i.iter in 1:HP[2]){
    batch_mask = sample(1:nrow(train.x), HP[1])
    x.batch = train.x[batch_mask,]
    t.batch = train.y[batch_mask,]
    for (i.layer in 1:n.layer){
      if (i.layer == 1){
        x = x.batch
      } else {
        x = z[[(i.layer - 1)]]
      }
      A[[i.layer]] = affine.forwd(x, net$W[[i.layer]], net$b[[i.layer]])
      z[[i.layer]] = activation(A[[i.layer]], t.batch, net$actFun[i.layer])
    }
    loss[i.iter] = z[[i.layer]]$ce
    smx = z[[i.layer]]$smx
    for (i.layerR in n.layer:1){
      if (i.layerR == n.layer){
        dout = 1
      } else {
        dout = Daff[[(i.layerR+1)]]$dx
      }
      Dact[[i.layerR]] = Dact(z[[i.layerR]], smx, t.batch, dout, net$actFun[i.layerR])
      if (i.layerR==1){
        x = x.batch
      } else {
        x = A[[(i.layerR-1)]]
      }
      Daff[[i.layerR]] = affine.bckwd(x, net$W[[i.layerR]], Dact[[i.layerR]])
    }
    net = OPT(net, Daff, HP)
  }
  return(list(loss = loss, net = net))
}

train.x = as.matrix(iris[,1:4])
train.y.temp = as.numeric(iris[,5])
train.y = matrix(0,nrow = nrow(train.x), ncol =3)
train.y[which(train.y.temp==1), 1]=1
train.y[which(train.y.temp==2), 2]=1
train.y[which(train.y.temp==3), 3]=1
actF = c("relu","softmax")
network = init.network(c(4,15,3), actF, "momentum")
res = cu.nnet(train.x, train.y, network, HP=c(15,1000,0.01,0.0001,0.9,0.999,0.9))
hist(res$net$W[[1]])
plot(res$loss,type='l')

MNIST.dat<-read.csv("http://www.matsuka.info/univ/course_folder/MNSTtrain.csv")

train <- data.matrix(MNIST.dat)
train.x <- as.matrix(train[,-1]/255)
train.y.temp <- train[,1]
train.y = matrix(0,nrow = nrow(train), ncol = 10)
for (i in 1:nrow(train.x)){
  train.y[i,(train.y.temp[i]+1)]=1
}
actF = c("relu","relu","softmax")
network = init.network(c(784,100,50,10), actF, "momentum")
res = cu.nnet(train.x, train.y, network,HP=c(100,2000,0.1,0.001,0.8,0.999,0.9))
plot(res$loss,type='l')
Posted in UT

データ解析基礎論B LogLinear Analysis

freq<-c(33,37,7,23)
pref<-factor(c('soba','udon','soba','udon'))
region<-factor(c('east','east','west','west'))
dat<-data.frame(pref,region,freq)
dat.table=table(pref,region)
dat.table[cbind(pref,region)]<-freq

ME.lrt=2*sum((freq)*log(freq/25))

dat.loglinCE_A<-loglin(dat.table, list(1), fit=T,param=T)
1-pchisq(dat.loglinCE_A$lrt, dat.loglinCE_A$df)

dat.loglinCE_B<-loglin(dat.table,list(2), fit=T,param=T)
1-pchisq(dat.loglinCE_B$lrt, dat.loglinCE_B$df)

dat.loglinIND<-loglin(dat.table,list(1,2), fit=T,param=T)
1-pchisq(dat.loglinIND$lrt, dat.loglinIND$df)

dat.loglinSAT<-loglin(dat.table,list(c(1,2)), fit=T,param=T)

ME.lrt-dat.loglinCE_A$lrt					
1-pchisq(ME.lrt-dat.loglinCE_A$lrt,1)
dat.loglinCE_A$lrt-dat.loglinIND$lrt
1-pchisq(dat.loglinCE_A$lrt-dat.loglinIND$lrt,1)	
dat.loglinIND$lrt-dat.loglinSAT$lrt              	
1-pchisq(dat.loglinIND$lrt-dat.loglinSAT$lrt,1) 


freq<-c(9,5,2,4,16,10,26,28)
gener<-factor(c(rep('female',4),c(rep('male',4))))
affil<-factor(rep(c('L','L','E','E'),2))
circle<-factor(rep(c('tennis','astro'),4))
dat<-data.frame(gener,affil,circle,freq)

dat.table<-table(gender,affil,circle)
dat.table[cbind(gender,affil,circle)]<-freq

dat.loglin2<-loglin(dat.table,list(1), fit=T,param=T)
dat.loglin3<-loglin(dat.table,list(1,3), fit=T,param=T)
dat.loglin4<-loglin(dat.table,list(1,2,3), fit=T,param=T)
dat.loglin5<-loglin(dat.table,list(c(1,3),2), fit=T,param=T)
dat.loglin6<-loglin(dat.table,list(c(1,3),c(1,2)), fit=T,param=T)
dat.loglin7<-loglin(dat.table,list(c(1,3),c(1,2),c(2,3)), fit=T,param=T)
dat.loglin8<-loglin(dat.table,list(c(1,2,3)), fit=T,param=T)

source('http://peach.l.chiba-u.ac.jp/course_folder/cuUtil02.R')


dat<-read.csv("http://www.matsuka.info/data_folder/cda7-16.csv")
dat.tab <- table(dat)
dat.LL <- loglin(dat.tab, list(c(1,4),c(2,4),c(3,4)), fit = T, param=T)
1-pchisq(dat.LL$lrt, dat.LL$df)