Disclaimer

このcourselogにあるコードは、主に学部生・博士課程前期向けの講義・演習・実習で例として提示しているもので、原則直感的に分かりやすいように書いているつもりです。例によってはとても非効率なものもあるでしょうし、「やっつけ」で書いているものあります。また、普段はMATLABを使用していますので、変な癖がでているかもしれません。これらの例を使用・参考にする場合はそれを踏まえてたうえで使用・参考にして下さい。
卒業論文に関する資料:[2015PDF] [word template] [latex template] [表紙] [レポートの書き方] [引用文献など]
A Brief Guide to R (Rのコマンドの手引きおよび例)はこちらをどうぞ

認知情報解析 01

# 大数の法則に関するシミュレーション

# サイコロをN回ふって6が出る確率の推移を検証する関数
die.6 <- function(N){
  die <- sample(1:6, N, replace=T)
  six.cumsum = cumsum(die==6)
  six.prob = six.cumsum/(1:N)
  return(six.prob=six.prob)
}

# 関数の実行&結果の可視化
N = 3000
result = die.6(N)
plot(1:N, result, ylim=c(0,1), col='red', lwd=3, type='l',
     xlab="Trial",ylab="Prob.")
abline(h = 1/6, lty=2, lwd=2)
legend("topright",c("Theoretical","Empirical"),lty=c(2,1),
       col=c("black","red"), lwd=2)

# 大数の法則を繰り返し検証するスクリプト
N = 3000; n.rep = 500
result = matrix(0, nrow=n.rep, ncol=N)
plot(1,1,type='n',xlim = c(0,N), ylim=c(0,1),
  xlab="Trial", ylab="P(die = 6)")
for (i.rep in 1:n.rep){
  result[i.rep,] = die(N) 
  lines(result[i.rep,],col='gray')
}
lines(1:N, colMeans(six.p), col='red', lwd = 2)
abline(h = 1/6, lty=2, lwd=2)
legend("topright",c("Theoretical","Average","Indiv. trial"),lty=c(2,1,1),
       col=c("black","red","gray"), lwd=2)

# 大数の法則を繰り返し検証するスクリプトを関数にしたもの
die.6Mult <- function(N, n.rep) {
  plot(1,1,type='n',xlim = c(0,N), ylim=c(0,1), xlab="Trial", 
    ylab="P(die = 6)")
  die <- matrix(sample(1:6, N*n.rep, replace=T), nrow = n.rep)
  six.cumsum = apply(die,1, function(x){cumsum(x==6)})
  six.prob = six.cumsum/(1:N)
  for (i.rep in 1:n.rep){
    lines(six.prob[,i.rep],col='gray')
  }
  lines(1:N, rowMeans(six.prob), col='red', lwd = 2)
  abline(h = 1/6, lty=2, lwd=2)
  legend("topright",c("Theoretical","Average","Indiv. trial"),lty=c(2,1,1),
         col=c("black","red","gray"), lwd=2)
  return(six.prob=six.prob)
}
#  上記の関数の実行例
result <- die.6Mult(3000,500)

2018 DAA W03 Data Viz & Descrptive statistics

dat<-read.table("http://www.matsuka.info/data_folder/aov01.txt")
par(mfrow=c(1,2))
hist(dat[dat$gender=='F',]$h, main="Dist. of Height for Female Participants", 
 xlab="Height", xlim=c(140,190), probability=T)
dens.F = density(dat[dat$gender=='F',]$h)
lines(dens.F, col='blue',lwd=2) 
hist(dat[dat$gender=="M",]$h, 
   main="Dist. of Height for Male Participants", 
   xlab="Height", xlim=c(140,190), probability=T,ylim=c(0,0.08))
dens.M = density(dat[dat$gender=='M',]$h)
lines(dens.M, col='green', lwd=2)

par(mfrow=c(1,1))
plot(dens.F,col='blue',lwd=2, ylab='density', xlim=c(140,190), 
  main="Dist. of Height by gender",xlab='Height')  
lines(dens.M,col='green',lwd=2)
legend("topleft", c('Female','Male'), col=c('blue','green'), cex=1.5,lwd=2)

text(x = 157.5, y = 0.04, 'Female', col='blue', cex=2)
text(x = 170, y = 0.04,'Male', col='green', cex=2)

plot(dat$shoesize, dat$h, main="Relationship b/w shoesize and height",
   xlab = "shoesize", ylab="height", pch=19, col="red")
     
txt = paste("r =",round(cor(dat$shoesize,dat$h), 4))
text(22, 175, txt, cex = 1.5)

abline(h = mean(dat$h), col='blue');
abline(v = mean(dat$shoesize), col='green');
abline(lm(dat$h~dat$shoesize), lty=2, lwd=2)

plot(dat[dat$gender=='F',]$shoesize, dat[dat$gender=='F',]$h,  
     main="Relationship b/w shoesize and height", xlab='shoesize', ylab='height', 
     cex.lab=1.5, pch=19, col='blue', xlim=c(20,29), ylim=c(140,190))
points(dat[dat$gender=='M',]$shoesize,dat[dat$gender=='M',]$h, 
    pch = 15, col = 'green')
legend("topleft", c('Female','Male'), pch =c(19,15), 
   col = c('blue','green'), cex = 1.5)


dat<-read.csv("http://www.matsuka.info/data_folder/tdkReg01.csv")
plot(dat, pch=20, col='blue')

dat.pca<-read.table("http://www.matsuka.info/data_folder/tdkPCA01.txt")
plot(dat.pca, pch = rownames(dat.pca), cex = 1.7, col = 'blue')

2018 DAA W02 Data visualization

v1 = seq(-3,3,0.1)
v2 = v1^2
plot(x = v1, y = v2)
plot(v1, v2, col = 'red')
plot(v1, v2, col=“red”, pch = 20)
plot(v1, v2, col="red", pch = 20, cex = 3)
plot(v1, v2, type = "l")
plot(v1, v2,type="l",lty=4,lwd=3)

plot(v1, v2, main = "THIS IS THE TITLE", 
     xlab = "Label for X-axis",
     ylab = "Label for Y-axis")

plot(v1, v2, main = "THIS IS THE TITLE", cex.lab = 1.5,
     xlab = "Label for X-axis",ylab = "Label for Y-axis")


plot(v1, v2, main = "TITLE", xlab = "X here",ylab = "Y here",
     xlim = c(-3.5, 3.5), 
     ylim = c(-0.5, 10))

plot(v1, v2, col = "blue", type = "o", lty = 2, pch = 19, 
     cex.lab = 1.5, lwd = 3, main = "Y=X*X", xlab = "X", 
     ylab="X*X", xlim=c(-3.5,3.5), ylim=c(-0.5, 10))

dat<- read.csv("http://www.matsuka.info/data_folder/datWA01.txt")
hist(dat$h)
hist(dat$h, breaks = 20, main = "Histogram of Height", 
     xlab = "Height", col = 'blue', xlim = c(140, 190))

dens<-density(dat$h); # 確率密度の算出
hist(dat$h, main = "Histogram of Height", xlab = "Height",  
     xlim = c(140,190), probability = T)
lines(dens, lwd = 2, col = "red", lty=2) 

plot(v1, v2, col = "blue", type = "l", pch = 19, cex.lab = 1.5, lwd = 3, xlab = "X", 
     ylab="f(X)", xlim=c(-3.5,3.5), ylim=c(-0.5, 10))
lines(v1, v1^3, col='red',lwd = 3)
legend("bottomright", c("x^2","x^3"), col=c('blue','red'), lwd=2)


boxplot(dat$h,main="Boxplot of Height", ylab="Height", col='cyan', ylim=c(140,190))
boxplot(dat$h,main="Boxplot of Height", xlab="Height", col='orange', horizontal=T)


boxplot(dat$h ~ dat$gender,
        main="Distribution of Height by Gender", 
        ylab="Gender", xlab="Height", col=c('blue','cyan'),
        ylim=c(140,190), horizontal=T)

dat<-read.table("http://www.matsuka.info/data_folder/aov01.txt")
boxplot(dat$h ~ dat$gender + dat$affil, 
        main="Distribution of  Height by Gender and Affiliation",   
        ylab="Gender x Affiliation", xlab="Height", col=c("blue","cyan","red","magenta"), 
        ylim=c(140,190),horizontal=T)

interaction.plot(dat$gender,
                 dat$affil,
                 dat$h, 
                 pch=c(20,20), 
                 col=c("skyblue","orange"), 
                 xlab="gender", ylab="height", 
                 lwd=3,type='b',cex=2,
                 trace.label="Affiliation")

par(mfrow=c(1,2)) 
hist(dat[dat$gender=="F",]$h, main="Dist. of Height for Female Participants", xlab="Height", xlim=c(140,190), probability=T)
dens.F = density(dat[dat$gender=='F',]$h)
lines(dens.F, col='blue',lwd=2) 
hist(dat[dat$gender=="M",]$h, main="Dist. of Height for Male 
     Participants", xlab="Height", xlim=c(140,190), probability=T,ylim=c(0,0.08))
dens.M = density(dat[dat$gender=='M',]$h)
lines(dens.M, col='green', lwd=2)

par(mfrow=c(1,1))
plot(dens.F,col='blue',lwd=2, ylab='density', xlim=c(140,190), main="Dist. of Height by gender",xlab='Height')  
lines(dens.M,col='green',lwd=2)
legend("topleft", c('Female','Male'), col=c('blue','green'), cex=1.2,lwd=2)

plot(dat$shoesize, dat$h, main="Relationship b/w shoesize and height",
   xlab = "shoesize", ylab="height", pch=19, col="red")

2018基礎実習1 – R programming 1

課題MA01 課題は2つあります。各課題ではRのコマンドと関数を実行した後に出力される図を提出してください。

課題MA01.1
以下のRのスクリプトを関数に変更してください。
なお、Nは関数の引数としてください。また、図はN=2000で実行したものを提出してください。

N = 1000
die.all <- sample(1:6,N,replace=T)
six.index <- die.all==6
par(oma=c(2,2,0,0),mar=c(4,4,1,1),mfrow=c(2,1))
plot(1:N, die.all, pch=20, col = 'red', ylim = c(0,7), ylab = "Result", xlab = "trial")
plot(1:N,cumsum(six.index)/(1:N), type='l', ylim=c(0,1), 
  lwd=2, ylab = "P(die = 6)", xlab = "trial")

課題MA01.2
以下のRのスクリプトを関数に変更してください。
なお、NとnRepは関数の引数としてください。また、図はN=5,nRep=100000で実行したものを提出してください。

N=10;nRep=10000
dat<-matrix(runif(N*nRep),nrow=N)
means<-colMeans(dat)
hist(means,nclass=50,probability=T)
dens<-density(means)
lines(dens,col='skyblue',lwd=3)  
xs=seq(-0,1,0.01)
theo.dens<-dnorm(xs,mean=0.5,sd=sqrt((1/12)/N))
lines(xs,theo.dens,col='orange',lwd=3,lty=2)
N = 10000
# N = 1000
random.data = rnorm(N, mean=0, sd=1)
hist(random.data, nclass = 50, col = "navy", xlab = "Data", 
  probability = T, main = "Histogram of Random Data")
dens = density(random.data)
lines(dens, col = "orange", lwd = 4)
x = seq(-4,4,0.1)  
true.norm = dnorm(x, mean = 0, sd = 1)
lines(x,true.norm, col = "green", lty = 3, lwd = 4)
legend("topleft",c("empirical", "theoretical"), 
  lty = c(1,3), col = c('orange','green'),lwd=4)

sample(1:10,3)   
sample(c("gu","choki","pa"),1) 

sample(1:10)
sample(c("a","b","c","d","e","f","g"), replace=F)

sample(0:1, 10, replace=T)
sample(c("Head","Tail"), 10, replace=T)
sample(c("Head","Tail"), 10, replace=T, prob=c(0.9,0.1)) 

for (i_loop in 1:5){
  print(i_loop)
}

for (i_loop in 1:5){
  print(c(i_loop, 2^i_loop))
}

counter <- 1
while(counter<=10){
  print(counter)
  counter<-counter+1
}
counter <- 1
while(counter^2 <= 10){
  print(c(counter, counter^2))
  counter<-counter+1
}

affil<-"phil"
if (affil=="cogsci") {
  print("you are wonderful")
} else {
  print("still, you are wonderful")
}

v1=1633;v2=355;
repeat {
  r=v1%%v2
  print(paste('v1 =',v1,v2 = ',v2, remainder = ',r))
  v1=v2;v2=r
  if (r==0){ break}
}

counter=6
repeat{ 
  print(counter)
  counter = counter + 1
  if(counter>5){break}
}

counter=6
repeat{
  if(counter>5){break}
  print(counter)
  counter+counter+1
}

six.counter=0
for (i_loop in 1:1000) {
  die<-sample(1:6,1)
  if (die==6) {
    six.counter=six.counter+1
  }
}

N=1000;six.counter=rep(0,N);
for (i_loop in 1:N) {
  die<-sample(1:6,1)
  if (die==6) {six.counter[i_loop]=1}
}
plot(1:N,cumsum(six.counter)/(1:1000),type='l',ylim=c(0,1),lwd=2)

N = 1000
die.all <- sample(1:6,N,replace=T)
six.index <- die.all==6

par(oma=c(2,2,0,0),mar=c(4,4,1,1),mfrow=c(2,1))
plot(1:N, die.all, pch=20, col = 'red', ylim = c(0,7), ylab = "Result", xlab = "trial")
plot(1:N,cumsum(six.index)/(1:N), type='l', ylim=c(0,1), 
  lwd=2, ylab = "P(die = 6)", xlab = "trial")

N=10;nRep=10000;
means<-rep(0,nRep)
for (i_rep in 1:nRep) {  
  dat<-runif(N) 
  means[i_rep]=mean(dat)
}
hist(means,nclass=50,probability=T)
dens<-density(means)
lines(dens,col='skyblue',lwd=3) 
xs=seq(-0,1,0.01)
theo.dens<-dnorm(xs,mean=0.5,sd=sqrt((1/12)/N))
lines(xs,theo.dens,col='orange',lwd=3,lty=2)

N=10
nRep=10000
dat<-matrix(runif(N*nRep),nrow=N)
means<-colMeans(dat)
hist(means,nclass=50,probability=T)
dens<-density(means)lines(dens,col='skyblue',lwd=3)  
xs=seq(-0,1,0.01)
theo.dens<-dnorm(xs,mean=0.5,sd=sqrt((1/12)/N))
lines(xs,theo.dens,col='orange',lwd=3,lty=2)

r=-99;v1=1633;v2=355
while (r!=0){
  r=v1%%v2
  print(paste('v1 =',v1,', v2 = ',v2,',remainder = ',r))
  v1=v2
  v2=r
}

summation<-function(y1,y2){
  y.sum=y1+y2
  return(y.sum)
}

GCD<-function(v1,v2){  
  real.v1=max(c(v1,v2))  
  real.v2=min(c(v1,v2))
  repeat{
    r=real.v1%%real.v2;real.v1=real.v2;real.v2=r    
    if (r==0){
      print(paste('GCD  is',real.v1)); 
      return(real.v1);
      break}
  }
}

データ解析基礎論B review02

n.subj = 5
set.seed(10)
groupA1 = rnorm(n.subj, mean = 100, sd =15)
groupA2 = rnorm(n.subj, mean = 100, sd =15)
groupID = c(rep("A1",n.subj),rep("A2",n.subj))
dat = data.frame(score = c(groupA1,groupA2),ID = groupID)
grand.mean = mean(dat$score)
mean.A1 = mean(groupA1)
mean.A2 = mean(groupA2)
plot(dat$ID, dat$score)
points(as.numeric(dat$ID), dat$score, col=c(rep('red',n.subj),rep("blue",n.subj)),pch=20)

idx = 1:(n.subj+n.subj)
plot(idx,dat$score,col=c(rep('red',n.subj),rep("blue",n.subj)),
     pch=20,ylim=c(65,145),ylab ="score",xlab="subjID")
abline(h=grand.mean,lwd=2)
abline(h=mean.A1,lty=2,col='red',lwd=2)
abline(h=mean.A2,lty=2,col='blue',lwd=2)
legend("topright",c("overall mean", "G1 mean","G2 mean"),
       lty=c(1,2,2),col=c('black','red','blue'),lwd=2)

ss.total = sum((dat$score-grand.mean)^2)
ss.group1 = n.subj*(mean.A1-grand.mean)^2
ss.group2 = n.subj*(mean.A2-grand.mean)^2
ss.group = ss.group1+ss.group2
ss.error1 = sum((groupA1-mean.A1)^2)
ss.error2 = sum((groupA2-mean.A2)^2)
ss.error = ss.error1+ss.error2
df.group = 2 - 1
df.error = (n.subj-1)+(n.subj-1)
ms.group = ss.group/df.group
ms.error = ss.error/df.error
f.value = ms.group/ms.error
p.value = 1 - pf(f.value, df.group, df.error)
summary(aov(score~ID, data=dat))
n.rep = 1e5
n.subj = 5
f.hist = rep(0,n.rep)
for (i.rep in 1:n.rep){
  groupA1 = rnorm(n.subj, mean = 100, sd =15)
  groupA2 = rnorm(n.subj, mean = 100, sd =15)
  groupID = c(rep("A1",n.subj),rep("A2",n.subj))
  dat = data.frame(score = c(groupA1,groupA2),ID = groupID)
  grand.mean = mean(dat$score)
  mean.A1 = mean(groupA1)
  mean.A2 = mean(groupA2)
  ss.total = sum((dat$score-grand.mean)^2)
  ss.group = n.subj*(mean.A1-grand.mean)^2+n.subj*(mean.A2-grand.mean)^2
  ss.error = sum((groupA1-mean.A1)^2)+sum((groupA2-mean.A2)^2)
  df.group = 2 - 1
  df.error = (n.subj-1)+(n.subj-1)
  ms.group = ss.group/df.group
  ms.error = ss.error/df.error
  f.hist[i.rep] = ms.group/ms.error
}

hist(f.hist,1000,xlim=c(0,15),probability = T)
f.temp = seq(0,15,0.05)
f.true = df(f.temp,df.group,df.error)
lines(f.temp,f.true,col='red',lty=2,lwd=2)
thres.f = qf(0.95,df.group,df.error)
abline(v = thres.f, col='green')
prop.FP = sum(f.hist > thres.f)/n.rep

データ解析基礎論B Reivew01

# central limit theorem
n.sample = 1e6
n.data = 10
dat = matrix(runif(n.sample*n.data), nrow = n.data)
means = colMeans(dat)
sds = apply(dat,2,sd)

hist(means,100,probability = T)
x.temp  = seq(0,1, 0.01)
true.sd = sqrt(1/12)
true.dens = dnorm(x.temp,mean=0.5, sd = true.sd/sqrt(n.data))
lines(x.temp, true.dens,col='red',lwd=2)
ci.low = qnorm(0.025,mean=0.5, sd = true.sd/sqrt(n.data))
ci.high = qnorm(0.975,mean=0.5, sd = true.sd/sqrt(n.data))
abline(v=ci.low,col='blue',lwd=2)
abline(v=ci.high,col='blue',lwd=2)

# standardizing data
z = (means-0.5)/(true.sd/sqrt(n.data))
hist(z,100,probability = T)
x.tempZ = seq(-4,4,0.01)
z.dens = dnorm(x.tempZ,mean=0, sd = 1)
lines(x.tempZ, z.dens,col='red',lwd=2)
ci.lowZ = qnorm(0.025,mean=0, sd = 1)
ci.highZ = qnorm(0.975,mean=0, sd = 1)
abline(v=ci.lowZ,col='blue',lwd=2)
abline(v=ci.highZ,col='blue',lwd=2)

# t distribution
t.temp = seq(-4,4,0.01)
t.dens = dt(t.temp, df=(n.data-1))
lines(t.temp, t.dens,col='green',lwd=2)
ci.lowT = qt(0.025,df=(n.data-1))
ci.highT = qt(0.975,df=(n.data-1))
abline(v=ci.lowT,col='cyan',lwd=2)
abline(v=ci.highT,col='cyan',lwd=2)


### cell automata
nCell=201
nGen=100
res=matrix(0,nrow=nGen,ncol=nCell)
res[1,100]=1
for (i_gen in 2:nGen) {
  for (i_cell in 2:(nCell-1)) {
    res[i_gen,i_cell]=sum(res[i_gen-1,(i_cell-1):(i_cell+1)])%%2;
  } 
 # 左端
  res[i_gen,1]=(res[i_gen-1,nCell]+res[i_gen-1,1]+res[i_gen-1,2])%%2 
 # 右端
  res[i_gen,nCell]=(res[i_gen-1,(nCell-1)]+res[i_gen-1,nCell]+res[i_gen-1,1])%%2;
} 
image(res)


# general version
dec2bin<-function(num, digits=8) {
  bin=c()
  if (num==0){
    bin=0
  } else {
    while(num!=0){
      rem=num%%2
     num=num%/%2
      bin=c(rem,bin)
    }
  }
  if (length(bin) < digits){
    res=matrix(0,nrow=1,ncol=digits)
    res[(digits-length(bin)+1):digits]=bin
  } else {res=bin}
  return(res)
}

transFUN<-function(st,ruleID){
  output=dec2bin(ruleID,8);
  a=matrix(c(1,1,1,1,1,0,1,0,1,1,0,0,0,1,1,0,1,0,0,0,1,0,0,0),nrow=8,byrow=T)
  newSt=output[which(apply(a,1,function(x) {all(x==st)}))]
  return(newSt)
}
ECA<-function(nCell, nGen,ruleID){
  res=matrix(0,nrow=nGen,ncol=nCell)
  res[1,ceiling(nCell/2)]=1;
  for (i_gen in 2:nGen) {
    for (i_cell in 2:(nCell-1)) {
      res[i_gen,i_cell]=transFUN(res[i_gen-1,(i_cell-1):(i_cell+1)],ruleID)
     }
   res[i_gen,1]=transFUN(c(res[i_gen-1,nCell],res[i_gen-1,1],res[i_gen-1,2]),ruleID)
   res[i_gen,nCell]=transFUN(c(res[i_gen-1,(nCell-1)],res[i_gen-1,nCell],res[i_gen-1,1]),ruleID)
  }
  return(res)
}
res<-ECA(200,100,99)
image(res) 

広域システム特別講義II W12

### first bayesian analysis
theta = seq(0,1,0.1)
prior  = c(0:5,4:0)
prior = prior/sum(prior)
likelihood = theta
posterior = likelihood*prior
posterior = posterior/sum(posterior)
par(mfrow = c(3, 1))
barplot(prior)
barplot(likelihood)
barplot(posterior)

### plotting beta dist.
par(mfrow = c(3, 1))
theta = seq(0, 1, 0.001)
prior = dbeta(theta, 0.1, 1)
plot(prior,type ='l',lwd=3)
prior = dbeta(theta, 2, 2)
plot(prior,type ='l',lwd=3)
prior = dbeta(theta, 4, 1)
plot(prior,type ='l',lwd=3)

### jisshu 1
posterior_beta <- function(a, b, z, N){
  par(mfrow = c(3, 1))
  theta = seq(0, 1, 0.001)
  prior = dbeta(theta, a, b)
  likelihood = theta^z * (1-theta)^(N - z)
  posterior = theta^(z+a-1) * (1-theta)^(N-z+b-1) / beta(z+a, N-z+b)
  # normalizing posterior
  posterior = posterior/sum(posterior)

  # plotting prior
  plot(theta,prior,type='l',lwd=2)
  polygon(theta,prior,col='orange')

  # plotting likelihood
  plot(theta,likelihood,type='l',lwd=2)
  polygon(theta,likelihood,col='orange')

  # plotting posterior
  plot(theta,posterior,type='l',lwd=2)
  polygon(theta,posterior,col='orange')
  par(mfrow=c(1,1))
}

###   jisshu 2 - island hopping
# initialization
n.iter = 10000
location = 4
location.history = rep(0, n.iter)
location.history[1] = location
location.counter = rep(0, 7)
location.counter[location] = location.counter[location] + 1
# end initialization

# main loop
for (i.iter in 2:n.iter){
  proposed = location + sample(c(-1,1),1)
  if (proposed == 8){ proposed = 7}
  # if (proposed = 0){ proposed = 1}
  p.move = min(1, proposed/location)
  if (runif(1) < p.move){
    location = proposed
  } # else {location = location}
  location.history[i.iter] = location
  location.counter[location] = location.counter[location] + 1
}
par(mfrow = c(2,1))
plot(location.history, 1:n.iter, type = 'o', log = 'y')
barplot(location.counter)


### jisshu 3 metropolis method
metropolis.ex01 <- function(n.iter=1e6, theta.init=0.1, sigma=0.2, plot.yn = T){

  # "posterior of theta" function
  posterior.theta <- function(theta, N, z, a, b) {
    posterior = theta^z * (1-theta)^(N-z) * theta^(a-1) * (1 - theta)^(b-1) / beta(a,b)
  }

  # initialization
  theta.history = rep(0,nrow = n.iter,ncol = 1)
  theta.current = theta.init
  theta.history[1] = theta.current
  # values given in text
  mu = 0
  N = 20
  z = 14
  a = 1
  b = 1

  # main loop
  for (i_iter in 2:n.iter) {
    theta.proposed = theta.current + rnorm(1, mean=mu, sd=sigma)
    if (theta.proposed < 0 | theta.proposed > 1) {
      p.move = 0
    } else {
      p.current = posterior.theta(theta.current, N, z, a, b)
      p.proposed = posterior.theta(theta.proposed, N, z, a, b)
      p.move = min(p.proposed/p.current, 1)
    }
    if (runif(1) < p.move) {
      theta.current = theta.proposed
    }
    theta.history[i_iter] = theta.current
  }

  # plotting results
  if (plot.yn == T) {
    par(mfrow = c(3, 1))
    hist(theta.history, nclass = 100, col = "orange", probability = T, xlab = "theta")
    den=density(theta.history)
    lines(den)
    plot(theta.history[(n.iter-100):n.iter], ((n.iter-100):n.iter), type = 'o', xlim = c(0,1), xlab="theta", ylab = "step in chain")
    plot(theta.history[1:100],1:100, type = 'o', xlim = c(0,1), xlab = "theta", ylab = "step in chain")
  }
  return(theta.history)
}

res = metropolis.ex01(10000, 0.1)


### exact beta posterior 2 variable
Exact_beta_posterior2V <- function(a1,b1,a2,b2,z1,N1,z2,N2){
  # DBDA ch. 7 exact posterior distribution
  # WARNING: posterior is not nomarlized
  try(library(plot3D))
  try(library(MASS))
  temp.thetas=seq(0,1,0.01)
  M=mesh(temp.thetas,temp.thetas)
  theta1=M$x
  theta2=M$y
  prior=dbeta(theta1,a1,b1)*dbeta(theta2,a2,b2)
  likelihood=theta1^z1 * (1-theta1)^(N1-z1)*theta2^z2 * (1-theta2)^(N2-z2)
  posterior=prior*likelihood

  par(mfrow=c(3,2))
  par(mar=c(4,1,1,1))
  persp(prior,theta =30,phi=20,lwd=0.1,col="orange")
  contour(prior,labels="",lwd=1.5,col="navy")

  persp(likelihood,theta =30,phi=20,lwd=0.1,col="orange")
  contour(likelihood,labels="",lwd=1.5,col="navy")

  persp(posterior,theta =30,phi=20,lwd=0.1,col="orange")
  contour(posterior,labels="",lwd=1.5,col="navy")

  par(mfrow=c(1,1))
}

### metropolis 2 var
metropolis.ex02 <- function(n.iter=1e6, theta.init=c(0.5,0.5), sigma=0.2){
  # example from DBDA 2nd Ed. (Kruschke) ch. 07
  # metropolis algo. with 2 parameters

  # "posterior of theta" function
  posterior.theta2<-function(theta, N1, z1, a1, b1, N2, z2, a2, b2) {
    posterior = theta[1]^z1 * (1-theta[1])^(N1-z1) *
      theta[2]^z2*(1-theta[2])^(N2-z2) *
      theta[1]^(a1-1)*(1 - theta[1])^(b1-1) *
      theta[2]^(a2-1)*(1 - theta[2])^(b2-1) /
      (beta(a1,b1)*beta(a2,b2))
  }

  # initialization
  try(library(MASS))
  theta.history=matrix(0,nrow=n.iter,ncol=2)
  theta.current=theta.init
  theta.history[1,]=theta.current
  # values given in text
  mu=0
  N1=8
  z1=6
  a1=2
  b1=2
  N2=7
  z2=2
  a2=2
  b2=2

  # main loop
  for (i_iter in 2:n.iter) {
    theta.proposed=theta.current+rnorm(2,mean=mu,sd=sigma)
    if (any(theta.proposed<0) | any(theta.proposed>1)) {
      p.move=0
    } else {
      p.current=posterior.theta2(theta.current, N1, z1, a1, b1, N2, z2, a2, b2)
      p.proposed=posterior.theta2(theta.proposed, N1, z1, a1, b1, N2, z2, a2, b2)
      p.move=min(p.proposed/p.current, 1)
    }
    if (runif(1) < p.move) {
      theta.current = theta.proposed
    }
    theta.history[i_iter,] = theta.current
  }
  # plotting results
  par(mfrow=c(2,2))
  hist(theta.history[,1],nclass=100, probability = T, col="orange")
  den=density(theta.history[,1])
  lines(den)
  den3d <- kde2d(theta.history[,1],theta.history[,2], n = 100)
  par(mar=c(1,1,1,1))
  persp(den3d, box=FALSE, col = "orange", lwd=0.1, theta=10, phi=20)
  plot(theta.history[,1],theta.history[,2],type='o', ylim=c(0,1), xlim=c(0,1), col="orange")
  hist(theta.history[,2], nclass=100, probability = T, col="orange")
  den=density(theta.history[,2])
  lines(den)

  return(theta.history)
}

res=metropolis.ex02(10000,c(0.1,0.1),0.2)

### jisshu 4 Gibbs sampling
gibbs.ex01 <- function(n.iter=1e6, theta.init=c(0.5,0.5), sigma = 0.2) {

  # posterior of theta function
  posterior.thetaG<-function(theta, N, z, a, b, idx) {
    p = dbeta(theta[idx],z[idx]+a[idx],N[idx]-z[idx]+b[idx])
  }

  # initialization
  try(library(MASS))
  n.theta = 2
  theta.history = matrix(0, nrow = n.iter, ncol = n.theta)
  theta.current = theta.init
  theta.proposed = rep(0, n.theta)
  theta.history[1,] = theta.current
  # values given in text
  mu = 0
  N = c(8,7)
  z = c(6,2)
  a = c(2,2)
  b = c(2,2)

  # main loop
  for (i_iter in 2:n.iter) {
    for (i_theta in 1:n.theta) {
      theta.proposed[i_theta] = theta.current[i_theta] + rnorm(1, mean=mu, sd=sigma)
      if (theta.proposed[i_theta]<0 | theta.proposed[i_theta]>1) {
        p.move=0
      } else {
        p.current = posterior.thetaG(theta.current, N, z, a, b, i_theta)
        p.proposed = posterior.thetaG(theta.proposed,N, z, a, b, i_theta)
        p.move = min(p.proposed/p.current, 1)
      }
      if (runif(1) < p.move) {
        theta.current[i_theta] = theta.proposed[i_theta]
      }
      theta.history[i_iter, i_theta] = theta.current[i_theta]
    }
  }
  # plotting results
  par(mfrow=c(2,2))
  hist(theta.history[,1], nclass = 100, probability = T, col="orange")
  den=density(theta.history[,1])
  lines(den)
  den3d <- kde2d(theta.history[,1],theta.history[,2], n = 100)
  par(mar=c(1,1,1,1))
  persp(den3d, box=FALSE, col = "orange", lwd=0.1, theta=10, phi=20)
  plot(theta.history[,1],theta.history[,2],type='o', ylim=c(0,1), xlim=c(0,1), col="orange")
  hist(theta.history[,2], nclass=100, probability = T, col="orange")
  den=density(theta.history[,2])
  lines(den)

  return(theta.history)
}

res=gibbs.ex01(10000,c(0.1,0.1),0.2)
Posted in UT

広域システム特別講義II

# ryukou
for (t in 1:max_time) {
  x[(t+1)] = x[t] - alpha*x[t]*y[t]*dt
  y[(t+1)] = y[t] + (alpha*x[t]*y[t] - beta*y[t])*dt
  z[(t+1)] = z[t] + beta*y[t]*dt
}

# renai
initX=c(rep(-40,5),rep(-20,5),rep(0,5),rep(20,5),rep(40,5))
initY=c(rep(c(-40,-20,0,20,40),5))

# cake game
n.cake = 10
pay.mat = matrix(0,11,11)
for (i.cake in 1:n.cake){
  pay.mat[(i.cake+1),1:(n.cake-i.cake+1)] =i.cake
}

p.cake = runif(n.cake+1)
p.cake = p.cake/sum(p.cake)
max.time = 50
dt = 0.01
t = seq(0,max.time,dt)
n.iter = length(t)
p.hist = matrix(0,nrow = n.iter, ncol = (n.cake+1))
p.hist[1,] = p.cake
for (i.time in 2:n.iter){
  W = colSums(p.cake*t(pay.mat))
  W.ave = sum(outer(p.cake,p.cake)*pay.mat)
  p.cake = p.cake + p.cake*(W - W.ave)/W.ave * dt
  p.hist[i.time,] = p.cake
}


plot(p.hist[,1],ylim=c(0,1),type='l',lwd=2,ylab = 'Proportion',xlab="time")  
for (i.strat in 2:(n.cake+1)){
  lines(p.hist[,i.strat],col=i.strat,lwd=2)
}
legend("topleft",paste("request = ",0:10),col=1:(n.cake+1),lwd =2,cex=0.75)

Posted in UT

広域システム特別講義II W10

実習課題の解答例

act.V=matrix(c(1,0,0,1,-1,0,0,-1),nrow=4,byrow=T)
wind=matrix(c(0,0,0,0,0,0,1,0,1,0,1,0,2,0,2,0,1,0,0,0),byrow=T,nrow=10)

Qlearn.ex<-function(maxItr,alpha,gamma,epsilon) {
  # field size: 7row x 10column
  # horizontal move ->  COLUMN 
  # vertical move     ->  ROW 
  # effect of wind     ->  ROW
  # actions: 1-up, 2-right, 3-down, 4-left 
  act.V=matrix(c(1,0,0,1,-1,0,0,-1),nrow=4,byrow=T)
  wind=matrix(c(0,0,0,0,0,0,1,0,1,0,1,0,2,0,2,0,1,0,0,0),byrow=T,nrow=10)
  goal=c(4,8)
  Qs=array(0,dim=c(7,10,4))
  for (i_rep in 1:maxItr) {
    state=c(4,1) # start
    while (!all(state==goal)) {
      if (runif(1) > epsilon) {
        move=which.max(Qs[state[1],state[2],])
      } else { move=sample(1:4,1)}
      sIDX=state
      state=state+act.V[move,]+wind[state[2],]
      if (state[1]<1) {state[1]=1}
      if (state[1]>7) {state[1]=7}
      if (state[2]<1) {state[2]=1}
      if (state[2]>10) {state[2]=10}   
      max.Q=max(Qs[state[1],state[2],])
      rew=ifelse(all(state==goal),0,-1)
      Qs[sIDX[1],sIDX[2],move]=Qs[sIDX[1],sIDX[2],move]+alpha*(rew+gamma* max.Q-Qs[sIDX[1],sIDX[2],move])
    }
  }
  return(Qs)
}

Qs=Qlearn.ex(1e6,0.05,1,0.1)
# sim optimal actions
state=c(4,1);goal=c(4,8);
state.hist=state
while (!all(state==goal)) {
  moveID=which.max(Qs[state[1],state[2],])
  state=state+act.V[moveID,]+wind[state[2],]
  if (state[1]<1) {state[1]=1}
  if (state[1]>7) {state[1]=7}
  if (state[2]<1) {state[2]=1}
  if (state[2]>10) {state[2]=10}
  state.hist=rbind(state.hist,state)
}
# plotting results
plot(0,0,type='n',xlim=c(0,11),ylim=c(0,8),xlab="",ylab="",
     main="Learned policies -- Q-learning")
lines(1,4,type='p',pch=19,col='red',cex=2)
lines(8,4,type='p',pch=19,col='red',cex=2)
dirs=c("up","right","down","left" )
for (i_row in 1:7) {
  for (i_col in 1:10) {
    best.move=dirs[which.max(Qs[i_row,i_col,])]
    text(i_col,i_row,best.move)
  }
}
lines(state.hist[,2],state.hist[,1],col="red",lwd=2) 
# MC value evaluation
north=c(1:3,15,1:10)
east=2:15;east[ c(3,7,11)]=c(3,7,11)
south=c(5:15,12:14)
west=c(15,1:13);west[ c(4,8,12)]=c(4,8,12)
trM=cbind(north,east,south,west)

# defining Reward & trans. prob.
r=-1;P=rep(0.25,4);V = rep(0,14)max.iter = 10000;
state.count=rep(0,15)
for (i.iter in 1:max.iter){
  state = sample(1:14,1)
  state.seq = state
  while(state!=15){
    action = sample(1:4,1,prob = P)
    state.seq = c(state.seq,trM[state,action])
    state = trM[state,action]  
  }
  uniq.seq = unique(state.seq)
  for (i.uniq in 1:(length(uniq.seq)-1)){
    first.visit = which(state.seq == uniq.seq[i.uniq])[1]
    V[uniq.seq[i.uniq]] = V[uniq.seq[i.uniq]] + r*(length(state.seq)-first.visit-1)
  }
  state.count[uniq.seq] = state.count[uniq.seq] + 1
}
V = matrix(c(0,V/state.count[1:14],0),nrow=4)



# black jack problem 01
# function to calc. value of cards
card.value<-function(adj.cards) {
 sum.cards=sum(adj.cards)
 if (any(adj.cards==1) & sum.cards<=11) {
   sum.cards=sum.cards+10;
   usableA=1          #true
  } else {usableA=2}  #false
 return(c(sum.cards,usableA))
}
 
# function to calc. reward
calc.reward<-function(p.val,d.val) {
  if (p.val>21) { reward=-1
  } else {if (d.val>21) { reward=1
    } else {if (p.val==d.val) {reward=0
      } else{ reward=ifelse(p.val>d.val,1,-1)}
}}}
 
# main function
BJ_MC_fixedPolicy<-function(policy=20,maxIter=1e6){
  rew.sum=array(0,dim=c(10,10,2))
  rew.count=array(0,dim=c(10,10,2))
  for (i_play in 1:maxIter) {
    cards=sample(rep(1:13,4))
    player=cards[1:2];  adj.player=pmin(player,10)
    dealer=cards[3:4];  adj.dealer=pmin(dealer,10)
    cards=cards[-(1:4)]
    d.val=card.value(adj.dealer)
    p.val=card.value(adj.player)
    state.hist=c(adj.dealer[1],p.val[1],p.val[2])
    while (p.val[1] < policy) {
      player=c(player,cards[1]); adj.player=pmin(player,10)
      cards=cards[-1]
      p.val=card.value(adj.player)
      state.hist=rbind(state.hist,c(adj.dealer[1],p.val[1],p.val[2]))
    }
    while (d.val[1] < 17) {
      dealer=c(dealer,cards[1]); adj.dealer=pmin(dealer,10)
      cards=cards[-1]
      d.val=card.value(adj.dealer)
    }
    rew=calc.reward(p.val[1],d.val[1])
    n.state=nrow(state.hist)
    if (is.null(n.state)) {
      n.state=1
      state.hist=t(as.matrix(state.hist))
    }
    for (i_state in 1:n.state) {
      if (state.hist[i_state,2] > 11 & state.hist[i_state,2] < 22) {
        rew.sum[state.hist[i_state,1],(state.hist[i_state,2]-11),state.hist[i_state,3]]
          = rew.sum[state.hist[i_state,1],(state.hist[i_state,2]-11),state.hist[i_state,3]]+rew
        rew.count[state.hist[i_state,1],(state.hist[i_state,2]-11),state.hist[i_state,3]]
          =rew.count[state.hist[i_state,1],(state.hist[i_state,2]-11),state.hist[i_state,3]]+1
      }
    }
  }
  return(rew.sum/rew.count)
}
 
# function 2 plot results
plot.BJ_MC<-function(V){
 par(mfrow=c(1,2))
 image(V[,,1],main="with usable Ace",xaxt='n',yaxt='n',
   xlab="Dealer showing",ylab="Player sum")
 axis(1,at=seq(0,1,length.out=10),label=c("A",paste(2:10)))
 axis(2,at=seq(0,1,length.out=10),label=12:21)
 image(V[,,2],main="without usable Ace",xaxt='n',yaxt='n',
   xlab="Dealer showing",ylab="Player sum")
 axis(1,at=seq(0,1,length.out=10),label=c("A",paste(2:10)))
 axis(2,at=seq(0,1,length.out=10),label=12:21)
}
 
V=BJ_MC(17)
plot.BJ_MC(V)

# black jack problem 2
# function to calc. value of cards
card.value<-function(adj.cards) {
 sum.cards=sum(adj.cards)
 if (any(adj.cards==1) & sum.cards<=11) {
   sum.cards=sum.cards+10;
   usableA=1             #true
  } else {usableA=2}     #false
 return(c(sum.cards,usableA))
}
 
# function to calc. reward
calc.reward<-function(p.val,d.val) {
  if (p.val>21) { reward=-1
  } else {if (d.val>21) { reward=1
    } else {if (p.val==d.val) {reward=0
      } else{ reward=ifelse(p.val>d.val,1,-1)}
}}}
 
# main function
BJ_MC<-function(maxIter=1e6){
  rew.sum=array(0,dim=c(10,10,2,2))
  rew.count=array(1,dim=c(10,10,2,2))
  Q=array(0,dim=c(10,10,2))
  V=array(0,dim=c(10,10,2))
  policy=array(sample(0:1,10*10*2,replace=T),dim=c(10,10,2))
  # policy: 1 = hit, 0 = stay
  for (i_play in 1:maxIter) {
    # initial draw
    cards=sample(c(rep(1:10,4),rep(10,12)))
    player=cards[1:2]
    dealer=cards[3:4]
    cards=cards[-(1:4)]
    d.val=card.value(dealer)
    p.val=card.value(player)
      
    while( p.val[1] < 12 ) {
      player=c(player,cards[1])
      cards=cards[-1]
      p.val=card.value(player)
    }
    action=sample(0:1,1)
    state.hist=c(dealer[1],p.val[1],p.val[2],(action+1))
     
    # player's action
    while (action==1 & p.val[1]<22) {
      player=c(player,cards[1])
      cards=cards[-1]
      p.val=card.value(player)
      state.hist=rbind(state.hist,c(dealer[1],p.val[1],p.val[2],(action+1)))
      if (p.val[1]<22) {
        action=policy[dealer[1],(p.val[1]-11),p.val[2]]
      }
    }
     
    # dealer's action
    while (d.val[1]<17) {
      dealer=c(dealer,cards[1])
      cards=cards[-1]
      d.val=card.value(dealer)
    }
    rew=calc.reward(p.val[1],d.val[1])
    n.state=nrow(state.hist)
    if (is.null(n.state)) {
      n.state=1
      state.hist=t(as.matrix(state.hist))
    }
    for (i_state in 1:n.state) {
      if (state.hist[i_state,2]>11 & state.hist[i_state,2]<22) {
        ind=state.hist[i_state,]-c(0,11,0,0)
        rew.sum[ind[1],ind[2],ind[3],ind[4]]= rew.sum[ind[1],ind[2],ind[3],ind[4]]+rew
        rew.count[ind[1],ind[2],ind[3],ind[4]]=rew.count[ind[1],ind[2],ind[3],ind[4]]+1
        Q=rew.sum/rew.count;
        policy[,,1]=Q[,,1,1] < Q[,,1,2]
        policy[,,2]=Q[,,2,1] < Q[,,2,2]
      }
    }
  }
  V[,,1]=(rew.sum[,,1,1]+rew.sum[,,1,2])/(rew.count[,,1,1]+rew.count[,,1,2])
  V[,,2]=(rew.sum[,,2,1]+rew.sum[,,2,2])/(rew.count[,,2,1]+rew.count[,,2,2])
  return(list(policy,V,Q))
}
 
 
# function 2 plot results
plot.BJ_MC2<-function(V){
 par(mfrow=c(2,2))
 image(V[[2]][,,1],main="Utility with usable Ace",xaxt='n',yaxt='n',
  xlab="Dealer showing",ylab="Player sum")
 axis(1,at=seq(0,1,length.out=10),label=c("A",paste(2:10)))
 axis(2,at=seq(0,1,length.out=10),label=12:21)
 image(V[[2]][,,2],main="Utility without usable Ace",xaxt='n',yaxt='n',
  xlab="Dealer showing",ylab="Player sum")
 axis(1,at=seq(0,1,length.out=10),label=c("A",paste(2:10)))
 axis(2,at=seq(0,1,length.out=10),label=12:21)
 image(V[[1]][,,1],main="Policy with usable Ace",xaxt='n',yaxt='n',
  xlab="Dealer showing",ylab="Player sum")
 axis(1,at=seq(0,1,length.out=10),label=c("A",paste(2:10)))
 axis(2,at=seq(0,1,length.out=10),label=12:21)
 image(V[[1]][,,2],main="Policy without usable Ace",xaxt='n',yaxt='n',
  xlab="Dealer showing",ylab="Player sum")
 axis(1,at=seq(0,1,length.out=10),label=c("A",paste(2:10)))
 axis(2,at=seq(0,1,length.out=10),label=12:21)
}
 
V=BJ_MC(5e6)
plot.BJ_MC2(V)


# Sarsa learning
sarsa.ex6.5<-function(maxItr,alpha,gamma,epsilon) {
  # field size: 7row x 10column
  # horizontal move ->  COLUMN 
  # vertical move     ->  ROW 
  # effect of wind     ->  ROW
  # actions: 1-up, 2-right, 3-down, 4-left 
  act.V=matrix(c(1,0,0,1,-1,0,0,-1),nrow=4,byrow=T)
  wind=matrix(c(0,0,0,0,0,0,1,0,1,0,1,0,2,0,2,0,1,0,0,0),byrow=T,nrow=10)
  goal=c(4,8)
  Qs=array(0,dim=c(7,10,4))
  for (i_rep in 1:maxItr) {
    state=c(4,1) # start
    if (runif(1) > epsilon) {
      max.id = which(Qs[state[1],state[2],]==max(Qs[state[1],state[2],]))
      if (length(max.id)>1){
        move = sample(max.id,1)
      } else {move=max.id}
    } else { move=sample(1:4,1)}
    while (!all(state==goal)) {
      st.old=state
      mv.old=move
      state=state+act.V[move,]+wind[state[2],]
      if (state[1]<1) {state[1]=1}
      if (state[1]>7) {state[1]=7}
      if (state[2]<1) {state[2]=1}
      if (state[2]>10) {state[2]=10}
      if (runif(1) > epsilon) {
        max.id = which(Qs[state[1],state[2],]==max(Qs[state[1],state[2],]))
        if (length(max.id)>1){
          move = sample(max.id,1)
        } else {move=max.id}
      } else { move=sample(1:4,1)}
      rew=ifelse(all(state==goal),0,-1)
      Qs[st.old[1],st.old[2],mv.old]=Qs[st.old[1],st.old[2],mv.old]+alpha*(rew+gamma* Qs[state[1],state[2],move]-Qs[st.old[1],st.old[2],mv.old])
    }
  }
  return(Qs)
}  

Qs=sarsa.ex6.5(10000,0.1,1,0.1)
# sim optimal actions
state=c(4,1);goal=c(4,8);
state.hist=state
while (!all(state==goal)) {
  moveID=which.max(Qs[state[1],state[2],])
  state=state+act.V[moveID,]+wind[state[2],]
  if (state[1]<1) {state[1]=1}
  if (state[1]>7) {state[1]=7}
  if (state[2]<1) {state[2]=1}
  if (state[2]>10) {state[2]=10}
  state.hist=rbind(state.hist,state)
}
# plotting results
plot(0,0,type='n',xlim=c(0,11),ylim=c(0,8),xlab="",ylab="",
     main="Learned policies -- Sarsa")
lines(1,4,type='p',pch=19,col='red',cex=2)
lines(8,4,type='p',pch=19,col='red',cex=2)
dirs=c("up","right","down","left" )
for (i_row in 1:7) {
  for (i_col in 1:10) {
    best.move=dirs[which.max(Qs[i_row,i_col,])]
    text(i_col,i_row,best.move)
  }
}
lines(state.hist[,2],state.hist[,1],col="red",lwd=2)
Posted in UT

データ解析基礎論B W10

install.packages("nnet")
library(nnet)

# regression
dat<-read.csv("http://www.matsuka.info/data_folder/tdkReg01.csv")
for (i.iter in 1:10){
set.seed(5)
print(i.iter)
x = dat[,1:3]
y = dat[,4]
dat.nnet = nnet(x,y, size = 150, linout= TRUE,maxit = 1000)
nnet.pred <-predict(dat.nnet,dat)
print(c(cor(nnet.pred,dat$sales),cor(nnet.pred,dat$sales)^2))
plot(dat.nnet$fitted.values, dat$sales,pch=20,col='black')
points(predict(dat.lm), dat$sales,pch=20,col='red')
abline(h=max(dat$sales),col='green')
abline(v=max(dat$sales),col='green')
abline(h=min(dat$sales),col='green')
abline(v=min(dat$sales),col='green')
abline(h=mean(dat$sales),col='green')
abline(v=mean(dat$sales),col='green')


n.data = nrow(dat);n.sample = n.data*0.6; n.rep = 1000 
trainNN.cor = rep(0,n.rep); trainLM.cor = rep(0,n.rep)
testNN.cor = rep(0,n.rep); testLM.cor = rep(0,n.rep)
for (i.rep in 1:n.rep){  
  randperm = sample(1:n.data)  
  train.idx = randperm[1:n.sample]  
  test.idx = randperm[(n.sample+1):n.data]  
  dat.nnet <- nnet(sales~.,size = 10, linout=T,decay= 0.1, maxit=1000, data = dat[train.idx,])  
  dat.lm <-lm(sales~.,data=dat[train.idx, ])  
  trainNN.cor[i.rep] <- cor(predict(dat.nnet,dat[train.idx, ]), dat[train.idx,]$sales)  
  trainLM.cor[i.rep] <- cor(predict(dat.lm,dat[train.idx, ]), dat[train.idx,]$sales)  
  testNN.cor[i.rep] <- cor(predict(dat.nnet,dat[test.idx, ]), dat[test.idx,]$sales)  
  testLM.cor[i.rep] <- cor(predict(dat.lm,dat[test.idx, ]), dat[test.idx,]$sales)
}
print(c(mean(trainNN.cor,na.rm=T),mean(testNN.cor,na.rm=T)))
print(c(mean(trainLM.cor,na.rm=T),mean(testLM.cor,na.rm=T)))

# logistic regression
dat<-read.csv("http://www.matsuka.info/data_folder/tdkDA01.csv")
dat.nnet<-nnet(class~.,dat,size=30,maxit=1000,decay=0.05)
dat.pred<-predict(dat.nnet,dat,type="class")
table(dat.pred,dat$class)

dat.glm<-glm(class~., family="binomial",dat)
glm.pred<-predict(dat.glm, dat, type="response")>0.5
table(glm.pred,dat$class)
dat.glm<-glm(class~social*coop.*dilig.*enterp., family="binomial",dat)


dat<-read.csv("http://www.matsuka.info/data_folder/cda7-16.csv")
dat.nnet<-nnet(survival~., dat, size=30, maxit=1000, decay=0.01)
dat.pred<-predict(dat.nnet,dat,type="class")
table(dat.pred,dat$survival)

Ns = summary(dat$survival)
(Ns[1]/Ns[2])^-1

wts = rep(1,nrow(dat))
wts[which(dat$survival=="no")]=45
dat.nnet<-nnet(survival~., weights=wts, dat, size=30, maxit=1000, decay=0.01)
dat.pred<-predict(dat.nnet,dat,type="class")
table(dat.pred,dat$survival)

# discriminant analysis
dat<-read.csv("http://www.matsuka.info/data_folder/tdkDA02.csv")
class.id<-class.ind(dat$class)
x = dat[,1:6]
dat.nnet<-nnet(x,class.id,size=30, maxit=1000, decay=0.01, softmax=TRUE)
max.id = apply(dat.nnet$fitted.values,1,which.max)
table(max.id,dat$class)


dat<-read.table("http://www.matsuka.info/data_folder/tdkPCA01.txt")
dat.nnet<-nnet(dat,dat,size=2, maxit=1000, decay=0.01, linout=TRUE)