認知情報解析演習A W12

ES

n.parent = 10
n.theta = 10
parent = list(theta = matrix(rnorm(n.parent*n.theta),nrow=n.parent),
              sigma = matrix(runif(n.parent*n.theta)+1,nrow = n.parent))

ES_crossover<-function(parent){
  n.parent = nrow(parent$theta)
  rand.order = sample(1:n.parent)
  p1.idx= rand.order[1:(n.parent/2)]
  p2.idx= rand.order[(n.parent/2+1):n.parent]
  co.idx = sample(1:(length(parent)/2),length(parent)/4)
  c1 = parent$theta[p1.idx,]
  c1.copy = c1
  c2 = parent$theta[p2.idx,]
  c1[co.idx] = c2[co.idx]
  c2[co.idx] = c1.copy[co.idx]
  sigma = (parent$sigma[p1.idx,]+parent$sigma[p2.idx,])/2
  return(child = list(theta = rbind(c1,c2), sigma = rbind(sigma,sigma)))
}

ES_mutate<-function(child, tau){
  child$sigma = child$sigma*
    exp(tau*matrix(rnorm(length(child$sigma)),nrow=nrow(child$sigma)))
  child$theta = child$theta + matrix(rnorm(length(child$sigma),0,child$sigma),nrow=nrow(child$sigma))
  return(child)
}

ES_survive<-function(parent, child, fitP, fitC){
  nPop=nrow(parent$theta);
  fitT=c(fitP,fitC);
  fitMax=sort(fitT,index.return=TRUE,decreasing = FALSE)
  tempTheta=rbind(parent$theta, child$theta)
  tempSigma=rbind(parent$sigma, child$sigma)
  fittest=list(theta = tempTheta[fitMax$ix[1:nPop],], sigma = tempSigma[fitMax$ix[1:nPop],])
  return(fittest)
}

set.seed(20); nData = 100
X=matrix(rnorm(9*nData,mean=10,sd=2),ncol=9);X=cbind(rep(1,nData),X)
y=X%*%c(10,2,5,-3,-5,0,0,0,0,0)+rnorm(nData,mean=0,sd=2);
#summary(lm(y~X[,2:10]))



reg.error<-function(b,X,y){
  yhat<-X%*%b
  return(sum((y-yhat)^2))
}
apply(child$theta,1,reg.error, X, y)

ES<-function(parent, nGen,tau,X,Y){
  optHist=matrix(0,nrow=nGen,ncol=1)
  fittest.hist = matrix(0,nrow=nGen,ncol = ncol(parent$theta))
  nPop = nrow(parent$theta)
  nVar = ncol(parent$theta)
  fitP = apply(parent$theta, 1, reg.error, X, Y)
  #fitC = fitP
  optHist[1]=max(c(fitP))
  fittest.hist[1,] = parent$theta[which.max(fitP),]
  for (i_gen in 2:nGen) {
    child<-ES_crossover(parent)
    child<-ES_mutate(child,tau)
    fitP = apply(parent$theta,1, reg.error, X, Y)
    fitC = apply(child$theta, 1, reg.error, X, Y)
    parent<-ES_survive(parent, child, fitP, fitC)
    optHist[i_gen]=max(c(fitP,fitC))
    fittest.hist[i_gen, ] = parent$theta[1,]
  }
  return(list(fittest = parent, optHist = optHist, fittest.hist = fittest.hist))
}

> res<-ES(parent,50000,1,X,y)
> res$fittest$theta[1,]
 [1]  9.508492513  2.074884770  4.929865865 -2.989781723 -5.129436719  0.050919574  0.024012318
 [8] -0.003250697  0.081489175 -0.028049091
> coef(lm(y~X[,2:9]))
 (Intercept)    X[, 2:9]1    X[, 2:9]2    X[, 2:9]3    X[, 2:9]4    X[, 2:9]5    X[, 2:9]6    X[, 2:9]7 
 9.424585305  2.072861468  4.929182784 -2.993701821 -5.135528730  0.048416459  0.023772334 -0.005105199 
   X[, 2:9]8 
 0.078469949 

ES & PSO

 
funZ<-function(x,y) {x^4-16*x^2-5*x+y^4-16*y^2-5*y}
xmin=-5;xmax=5;n_len=100;
x<-seq(xmin, xmax,length.out=n_len);
y<-x;
z<-outer(x,y,funZ)
contour(x,y,z,nlevels= 50, drawlabels = F,col='blue')

# useful memo
funZ<-function(x,y) {x^4-16*x^2-5*x+y^4-16*y^2-5*y}


funZ<-function(x) {
  x[1]^4-16*x[1]^2-5*x[1]+x[2]^4-16*x[2]^2-5*x[2]
}

n.theta = 10;
n.iter = 100;
Wp = 1;
Wg = 1;
theta = matrix(rnorm(n.theta*2), nrow=n.theta)
v = matrix(rnorm(n.theta*2), nrow=n.theta)
theta.hist = array(0,c(n.theta,2,n.iter))
theta.hist[,,1]=theta
p.best.v <- apply(theta,1,funZ)
p.best = theta
g.b.v = min(p.best.v)
g.b.idx = which.min(p.best.v)
g.best <- theta[g.b.idx,]
v = v + Wp*runif(n.theta)*(p.best - theta)+ Wg*runif(n.theta)*t(g.best - t(theta))
theta = theta + v

認知情報解析学 MCMC

import numpy as np
import scipy as sp
import matplotlib.pyplot as plt
import seaborn as sb

# one variable
N = 20; z = 14
a = 1; b = 1
sigma = 0.2
nIter = 100000
theta = np.random.rand(1)
thetaHist = np.zeros(nIter)
for i_iter in range(nIter):
    proposed = theta + np.random.normal(0,sigma,1)
    while proposed > 1 or proposed < 0:
        proposed = theta + np.random.normal(1)*sigma
    vProp = (proposed**z * (1-proposed)**(N-z) * proposed**(a-1) * (1-proposed)**(b-1) )
    vCurr = (theta**z * (1-theta)**(N-z) * theta**(a-1) * (1-theta)**(b-1) )
    pmove = min(1,vProp/vCurr)
    if np.random.rand(1) < pmove:
        theta = proposed
    thetaHist[i_iter] = theta 
sb.distplot( thetaHist, bins=100 )

# two variable
N1 = 8; z1 = 6
N2 = 7; z2 = 2
a1 = 2; b1 = 2
a2 = 2; b2 = 2

sigma = 0.2
nIter = 1000000
theta1 = np.random.rand(1)
theta2 = np.random.rand(1)
thetaHist = np.zeros([nIter,2])
for i_iter in range(nIter):
    
    prop1 = theta1 + np.random.normal(0,sigma,1)
    while prop1 > 1 or prop1 < 0:
        prop1 = theta1 + np.random.normal(0,sigma,1)
    
    prop2 = theta2 + np.random.normal(0,sigma,1)
    while prop2 > 1 or prop2 < 0:
        prop2 = theta2 + np.random.normal(0,sigma,1)


    vProp = (prop1**z1* (1-prop1)**(N1-z1)* prop2**z2 *(1-prop2)**(N2-z2) 
             *prop1**(a1-1) *(1-prop1)**(b1-1) *prop2**(a2-1) *(1-prop2)**(b2-1))
    vCurr = (theta1**z1*(1-theta1)**(N1-z1)*theta2**z2*(1-theta2)**(N2-z2)
             *theta1**(a1-1)*(1-theta1)**(b1-1)*theta2**(a2-1)*(1-theta2)**(b2-1))
    pmove = min(1,vProp/vCurr)
    if np.random.rand(1) < pmove:
        theta1 = prop1
        theta2 = prop2
    thetaHist[i_iter,...] = [theta1,theta2] 
print(np.mean(thetaHist[...,1]),np.mean(thetaHist[...,0]))
sb.jointplot(x=thetaHist[...,0], y=thetaHist[...,1], kind='kde',space=0, ratio=4)

データ解析基礎論A W11

source("http://www.matsuka.info/univ/course_folder/cutil.R")
dat<-read.csv("http://www.matsuka.info/data_folder/dktb312.csv")
dat$method=factor(dat$method, levels(dat$method)[c(1,3,4,2)])
dat.aov<-aov(result~method, data=dat)
new.alpha<-adj.alpha(5,0.05) 
cu.bonF1F(dat.aov,dat,c(-3,1,1,1),new.alpha)
cu.bonF1F(dat.aov,dat,c(-1,1,0,0),new.alpha)
cu.bonF1F(dat.aov,dat,c(-1,0,1,0),new.alpha)
cu.bonF1F(dat.aov,dat,c(-1,0,0,1),new.alpha)

new.f<-adj.f(4,3,28,0.05)
cu.scheffe1F(aov.obj,dat,c(-1,0,1,0))
cu.scheffe1F(aov.obj,dat,c(-1,0,0,1))


dat<-read.csv("http://matsuka.info/univ/course_folder/datW03R.txt")
interaction.plot(dat$gender,
                 dat$affil,
                 dat$shoesize, 
                 pch=c(20,20), 
                 col=c("skyblue","orange"), 
                 xlab="gender", ylab="shoesize", 
                 lwd=3,type='b',cex=2,
                 trace.label="Affiliation")
dat.aov=aov(shoesize~gender*affil, data=dat)
dat.aov.sum=summary(dat.aov)

means<-tapply(dat$shoesize, list(dat$gender,dat$affil), mean)
SS_gen_CS<- 5*(means[2,1]^2 + means[1,1]^2 -0.5*sum(means[,1])^2) # SS_gender CS
SS_gen_PS<- 5*(means[2,2]^2 + means[1,2]^2 -0.5*sum(means[,2])^2) # SS_gender PS
dat.aov.sum=summary(dat.aov)   # ANOVA table
MSe=dat.aov.sum[[1]][4,3]      # MSE from ANOVA table or MSe=0.62
dfE=dat.aov.sum[[1]][4,1]      # DF for error or dfE=16
dfG=1                          # DF for gender
F_gen_CS=(SS_gen_CS/dfG)/MSe   # F-value for gender effect given CS
F_gen_PS=(SS_gen_PS/dfG)/MSe   # F-value for gender effect given PS
P_gen_CS=1-pf(F_gen_CS,1,dfE)  # p-value for gender effect given CS
P_gen_PS=1-pf(F_gen_PS,1,dfE)  # p-value for gender effect given PS

SS_affil_F<- 5*(means[1,1]^2+means[1,2]^2-0.5*sum(means[1,])^2) #SS_affil | F
SS_affil_M<- 5*(means[2,1]^2+means[2,2]^2-0.5*sum(means[2,])^2) #SS_affil | M
dfA=1				          # DF for affil
F_affil_F=SS_affil_F/dfA/MSe         # F-value for affiliation effect | F
F_affil_M=SS_affil_M/dfA/MSe         # F-value for affiliation effect | M
P_affil_F=1-pf(F_affil_F,1,dfE)      # p-value for affiliation effect | F
P_affil_M=1-pf(F_affil_M,1,dfE)      # p-value for affiliation effect | M

tsme = CRF.tsme(dat.aov, dat)

dat<-read.csv("http://www.matsuka.info/data_folder/dktb321.txt")
interaction.plot(dat$duration,dat$method,dat$result, pch=c(20,20), col=c("skyblue","orange"), ylab="score", xlab="Duration",lwd=3,type='b',cex=2,trace.label="Method")
mod1=aov(result~method+duration,data=dat)
mod1.sum=print(summary(mod1))
mod2=aov(result~method*duration,data=dat)
mod2.sum=print(summary(mod2))

qv=qtukey(0.95,DFd+1,DFe)
hsd=qv*(sqrt(MSe/5))
means<-tapply(dat$result,list(dat$method,dat$duration),mean)
print(diffM<-outer(means[1,],means[1,],"-"))
abs(diffM)>hsd       

CRF.tsme(mod2, dat)

認知情報解析演習 W10

n.var = 10
n.child = 10
child = matrix(sample(0:1,n.var*n.child, replace =T),nrow =n.child)

GA_mutate <- function(child, p){
  mut.mat = matrix(sample(0:1,length(child),replace = T, 
                          prob = c(1-p, p)),nrow=nrow(child))
  return(abs(child-mut.mat))
}

GA_crossover<-function(parent){
  n.parent = nrow(parent)
  rand.order = sample(1:n.parent)
  p1.idx= rand.order[1:(n.parent/2)]
  p2.idx= rand.order[(n.parent/2+1):n.parent]
  co.idx = sample(1:(length(parent)/2),length(parent)/4)
  c1 = parent[p1.idx,]
  c1.copy = c1
  c2 = parent[p2.idx,]
  c1[co.idx] = c2[co.idx]
  c2[co.idx] = c1.copy[co.idx]
  return(child = rbind(c1,c2))
}


GA_survive<-function(parent, child, fitP, fitC){
  nPop=nrow(parent);
  fitT=c(fitP,fitC);
  fitMax=sort(fitT,index.return=TRUE,decreasing = TRUE)
  tempX=rbind(parent,child);
  fittest=tempX[fitMax$ix[1:nPop],]
  return(fittest)
}

GA<-function(parent, nGen,p,X,Y){
  optHist=matrix(0,nrow=nGen,ncol=1)
  fittest.hist = matrix(0,nrow=nGen,ncol = ncol(parent))
  nPop = nrow(parent)
  nVar = ncol(parent)
  fitP = rep(0,nPop)
  fitC = fitP
  for (i.pop in 1:nPop){
    dat.temp = X[,which(parent[i.pop,]==1)]
    fitP[i.pop] = summary(lm(Y~dat.temp))$adj.r.squared
  }
  optHist[1]=max(c(fitP))
  fittest.hist[1,] = parent[which.max(fitP),]
  for (i_gen in 2:nGen) {
    child<-GA_crossover(parent)
    child<-GA_mutate(child,p)
    # assuming same numbers of set of genes
    for (i.pop in 1:nPop){
      dat.temp = X[,which(parent[i.pop,]==1)]
      fitP[i.pop] = summary(lm(Y~dat.temp))$adj.r.squared
      if (sum(child[i.pop,])==0){
        child[i.pop,sample(1:ncol(child),sample(1:nVar,1))]=1
      }
      dat.temp = X[,which(child[i.pop,]==1)]
      fitC[i.pop] = summary(lm(Y~dat.temp))$adj.r.squared
    }
    parent<-GA_survive(parent, child, fitP, fitC)
    optHist[i_gen]=max(c(fitP,fitC))
    fittest.hist[i_gen, ] = parent[1,]
  }
  return(list(fittest = parent, optHist = optHist, fittest.hist = fittest.hist))
}

set.seed(19)
nVar = 10
nData = 50
X=matrix(rnorm(nVar*nData,mean=10,sd=5),ncol=nVar);
y=X%*%c(-2,0,2,0,2,0,-3,0,-2,0)+rnorm(nData,mean=0,sd=8);
summary(lm(y~X[,seq(1,9,2)]))
summary(lm(y~X))
parent = matrix(sample(0:1,300,replace = T),nrow=30)
res<-GA(parent,100,0.1,X,y)
head(res$fittest)

より複雑なモデルへ

C = combn(3,2)
paste("X",C,sep="")
temp.lab =toString(paste("X",C[ , 1], sep = ""))
gsub(",", ":", temp.lab) 

n.comb = ncol(C)
var.labels=c()
for (i.comb in 1: n.comb){
  temp.label = toString(paste("X",C[ , i.comb], sep = ""))
  var.labels = c(var.labels, gsub(",", ":", temp.label) )
}


mk.labels <- function(C){
  var.labels = c()
  n.comb = ncol(C)
  for (i.comb in 1: n.comb){
    temp.label = toString(paste("X",C[ , i.comb], sep = ""))
    var.labels = c(var.labels, gsub(",", ":", temp.label) )
  }
  return(var.labels)
}
                                    
n.var = 3
max.interaction = 3
# single variable
var.labels = paste("X", 1:n.var, sep = "")


# interaction terms
for (i.interaction in 2:max.interaction ){
  combination = combn(n.var, i.interaction)
  var.labels = c(var.labels, mk.labels(combination))
}

model.def = paste("Y ~", gsub(",", "+",   toString(var.labels[c(1,1,1,1,1,0,0) == 1]))) 

set.seed(20)
nVar = 3
nData = 50
X=matrix(rnorm(nVar*nData,mean=10,sd=5),ncol=nVar);
y=X%*%c(1,1,-2)+rnorm(nData,mean=0,sd=3);
dat = data.frame(Y=y,X1=X[,1],X2=X[,2],X3=X[,3])

# example
model.def1 = paste("Y ~", gsub(",", "+",  toString(var.labels[c(1,1,1,0,0,0,0) == 1]))) 
model.lm = lm(model.def1, dat)

Evolutionary Strategy

n.parent = 10
n.theta = 10
parent = list(theta = matrix(rnorm(n.parent*n.theta),nrow=n.parent),
              sigma = matrix(runif(n.parent*n.theta)+1,nrow = n.parent))

ES_crossover<-function(parent){
  n.parent = nrow(parent$theta)
  rand.order = sample(1:n.parent)
  p1.idx= rand.order[1:(n.parent/2)]
  p2.idx= rand.order[(n.parent/2+1):n.parent]
  co.idx = sample(1:(length(parent)/2),length(parent)/4)
  c1 = parent$theta[p1.idx,]
  c1.copy = c1
  c2 = parent$theta[p2.idx,]
  c1[co.idx] = c2[co.idx]
  c2[co.idx] = c1.copy[co.idx]
  sigma = (parent$sigma[p1.idx,]+parent$sigma[p2.idx,])/2
  return(child = list(theta = rbind(c1,c2), sigma = rbind(sigma,sigma)))
}

ES_mutate<-function(child, tau){
  child$sigma = child$sigma*
    exp(tau*matrix(rnorm(length(child$sigma)),nrow=nrow(child$sigma)))
  child$theta = child$theta + 
    matrix(rnorm(length(child$sigma),0,child$sigma),nrow=nrow(child$sigma))
  return(child)
}

set.seed(20); nData = 100
X=matrix(rnorm(9*nData,mean=10,sd=2),ncol=9);X=cbind(rep(1,nData),X)
y=X%*%c(10,2,5,-3,-5,0,0,0,0,0)+rnorm(nData,mean=0,sd=2);
#summary(lm(y~X[,2:10]))

# 評価関数
reg.error<-function(b,X,y){
  yhat<-X%*%b
  return(sum((y-yhat)^2))
}
# 評価の使用例
fitC = apply(child$theta,1,reg.error, X, y)

データ解析基礎論 W10

f=c(24.1,23.9,24.4,24.4,23.5)
m=c(25.6,26.1,25.8,25.9,26)

G.mean=mean(c(f,m))
ss.total=sum((c(f,m)-G.mean)^2)
ss.tr=sum((mean(f)-G.mean)^2)*5+sum((mean(m)-G.mean)^2)*5
ss.error=sum((f-mean(f))^2)+sum((m-mean(m))^2)
df.tr = 2 - 1
df.error = (5-1)*2
ms.tr = ss.tr /df.tr
ms.error = ss.error /df.error

dat<-data.frame(ssize=c(f,m),gender=c(rep("f",5),rep("m",5)))
dat.aov<-aov(ssize ~ gender,data=dat)
summary(dat.aov)

dat<-read.table("http://www.matsuka.info/data_folder/aov01.txt")
dat.aov<-aov(shoesize~club, dat)
summary(dat.aov)

qT<-qtukey(0.95, 3, 67)
HSD<-qT*sqrt((2.243*(1/23+1/24))/2)
means<-tapply(dat$shoesize,dat$club,mean)
abs(outer(means,means,"-"))
abs(outer(means,means,"-"))>HSD

TukeyHSD(dat.aov)

dat<-read.csv("http://www.matsuka.info/data_folder/dktb312.csv")
dat$method=factor(dat$method, levels(dat$method)[c(1,3,4,2)])

dat.aov<-aov(result~method, data=dat)
summary(dat.aov)
dat.means<-tapply(dat$result,dat$method,mean)
new.alpha = 1-(1-0.05)^(1/5)
cont=c(-3,1,1,1)
bunshi=sum(cont*dat.means)
bunbo=sqrt(5.29*(sum((cont^2)/8)))
t.value=bunshi/bunbo
2*(1-pt(t.value,28))

iterative prisoners’ dilemma

pmat = np.array([[-25,0],[-20,-5]])

def iterPD(action1, action2, pmat):
# A = defect & defect (-10); B = defect & coop (1)
# C = coop & defect   (9)  ;   D = coop & coop (11)

    result = action1 + action2
    if result == -10:
        po = [pmat[0,0],pmat[0,0]]
    elif result == -9:
        po = [pmat[1,0],pmat[0,1]]
    elif result == 10:
        po = [pmat[0,1],pmat[1,0]]
    else:
        po = [pmat[1,1],pmat[1,1]]
    
    return(po)

def strCoop(mode):
    if mode == "p1":
        return(1)
    else:
        return(10)
    
def strDef(mode):
    if mode == "p1":
        return(0)
    else:
        return(-10)
    
def strTfT(lastPlay, mode):
    if lastPlay == "D":
        if mode == "p1":
            return(0)
        else:
            return(-10)
    else:
        if mode == "p1":
            return(1)
        else:
            return(10)
Nrep = 10
p1Hist = np.zeros([Nrep,2])
p2Hist = np.zeros([Nrep,2])
  
for i_rep in range(Nrep):
    p1 = strDef("p1")
    p2 = strCoop("p2")
    po = iterPD(p1,p2,pmat)
    p1Hist[i_rep,...] = [p1,po[0]]
    p2Hist[i_rep,...] = [p2,po[1]]

lastPlay = "C"
for i_rep in range(Nrep):
    p1 = strCoop("p1")
    p2 = strTfT(lastPlay, "p2")
    if p1 == 0:
        lastPlay = "D"
    else:
        lastPlay = "C"
    po = iterPD(p1,p2,pmat)
    p1Hist[i_rep,...] = [p1,po[0]]
    p2Hist[i_rep,...] = [p2,po[1]]

データ解析基礎論A W09

dat <- read.csv("http://www.matsuka.info/data_folder/tdkReg01.csv")
dat.lm1 <- lm(sales~price,data = dat)
summary(dat.lm1)

dat.lm2 <- lm(sales~price+material,data = dat)
summary(dat.lm2)

dat.lm01<-lm(sales~price, data=as.data.frame(scale(dat)))
plot(dat.lm01,which=1)
plot(dat.lm01,which=2)

norm.vars=rnorm(300)
qqnorm(norm.vars)
qqline(norm.vars,col='red',lwd=2)

unif.vars=runif(300)
qqnorm(unif.vars)
qqline(unif.vars,col='green',lwd=2)

par(mfrow=c(2,2))
plot(dat.lm01)

dat<-read.csv("http://www.matsuka.info/data_folder/dktb312.csv")
dat$method=factor(dat$method, levels(dat$method)[c(1,3,4,2)])
dat.lm<-lm(result~method,data=dat)
plot(dat.lm)

dat <- read.csv("http://www.matsuka.info/data_folder/tdkReg01.csv")
dat2 <- rbind(dat,c(0,0,100,250))
tail(dat2)
plot(dat2[1:50,],col=c(rep("black",50),"red"),pch=20,cex=3)
plot(dat2[1:50,],pch=20,cex=3)
dat.lm<-lm(sales~design,data=dat)
summary(dat.lm)
dat2.lm<-lm(sales~design,data=dat2)
summary(dat2.lm)

dat<-read.csv("http://www.matsuka.info/data_folder/tdkReg02.csv")
dat.lm<-lm(sales~., data=dat) 
install.packages("DAAG")
library(DAAG)
vif(dat.lm)

dat<-read.csv("http://www.matsuka.info/data_folder/waa07_2.csv")
poly.lm<-lm(grade~study+study.sq, data= dat)

dat<-read.table("http://www.matsuka.info/data_folder/tdkPATH01.txt",header=TRUE)
plot(dat,pch=20,cex=2)
dat.lm1<-lm(absence~interest,dat)
dat.lm2<-lm(study~interest+knowledge,dat)
dat.lm3<-lm(grade~knowledge+study+absence,dat)

dat.STD<-as.data.frame(scale(dat))
summary(lm(absence~interest,dat.STD))
summary(lm(study~interest+knowledge,dat.STD))
summary(lm(grade~study+absence+knowledge,dat.STD))

認知情報解析演習 08

  # genetic algorithm - identify the best linear model
  GA_recomb<-function(G) {
    nPop=nrow(G);
    nVar=ncol(G);
    child = G;
    G.permuted = G[sample(1:nPop),]
    recomb.idx=which(matrix(sample(0:1,nPop*nVar,replace=T),nrow=nPop)==1)
    child[recomb.idx] = G.permuted[recomb.idx]
    return(child)
  }
  
  GA_mutate = function(child, p){
    n.pop = nrow(child)
    n.var = ncol(child)
    mut.mat= matrix((runif(n.pop*n.var) < p), nrow = n.pop)
    child = abs(child-mut.mat)
    return(child)
  }
  
  GA_survive<-function(G, child, fitG, fitC){
    nPop=nrow(G);
    fitT=c(fitG,fitC);
    fitMax=sort(fitT,index.return=TRUE,decreasing = TRUE)
    tempX=rbind(G,child);
    G=tempX[fitMax$ix[1:nPop],]
    return(G)
  }
  
  GA<-function(G, nGen,p,X,Y){
    optHist=matrix(0,nrow=nGen,ncol=1)
    G.hist = matrix(0,nrow=nGen,ncol = ncol(G))
    nPop = nrow(G)
    nVar = ncol(G)
    fitG = rep(0,nPop)
    fitC = fitG
    for (i.pop in 1:nPop){
      dat.temp = X[,which(G[i.pop,]==1)]
      fitG[i.pop] = summary(lm(Y~dat.temp))$adj.r.squared
    }
    optHist[1]=max(c(fitG))
    G.hist[1,] = G[which.max(fitG),]
    for (i_gen in 2:nGen) {
      child<-GA_recomb(G)
      child<-GA_mutate(child,p)
      # assuming same numbers of set of genes
      for (i.pop in 1:nPop){
        dat.temp = X[,which(G[i.pop,]==1)]
        fitG[i.pop] = summary(lm(Y~dat.temp))$adj.r.squared
        if (sum(child[i.pop,])==0){
          child[i.pop,sample(1:ncol(child.all),sample(1:nVar,1))]=1
        }
        dat.temp = X[,which(child[i.pop,]==1)]
        fitC[i.pop] = summary(lm(Y~dat.temp))$adj.r.squared
      }
      G<-GA_survive(G, child, fitG, fitC)
      optHist[i_gen]=max(c(fitG,fitC))
      G.hist[i_gen, ] = G[1,]
    }
    return(list(G = G, optHist = optHist, G.hist = G.hist))
  }
  
  set.seed(19)
  nVar = 10
  nData = 50
  X=matrix(rnorm(nVar*nData,mean=10,sd=5),ncol=nVar);
  y=X%*%c(-2,0,2,0,2,0,-3,0,-2,0)+rnorm(nData,mean=0,sd=8);
  summary(lm(y~X[,seq(1,9,2)]))
  summary(lm(y~X))
  G = matrix(sample(0:1,300,replace = T),nrow=30)
  res<-GA(G,100,0.1,X,y)
  head(res$G)
  
  # GA - challenging task
  C = combn(3,2)
  temp.lab =toString(paste("X",C[ , i.comb], sep = ""))
  gsub(",", "*", temp.lab) 
  
  n.comb = ncol(C)
  var.labels = c()
  for (i.comb in 1: n.comb){
    temp.label = toString(paste("X",C[ , i.comb], sep = ""))
    var.labels = c(var.labels, gsub(",", "*", temp.label) )
  }
  
  mk.labels <- function(C){
    var.labels = c()
    n.comb = ncol(C)
    for (i.comb in 1: n.comb){
      temp.label = toString(paste("X",C[ , i.comb], sep = ""))
      var.labels = c(var.labels, gsub(",", "*", temp.label) )
    }
    return(var.labels)
  }
  var.labels = paste("X", 1:n.var, sep = "")
  
  # interaction terms
  for (i.interaction  in 2:max.interaction ){
    combination = combn(n.var, i.interaction)
    var.labels = c(var.labels, mk.labels(combination))
  }
  model.def = paste("Y ~", gsub(",", "+", toString(var.labels[c(1,1,1,1,1,0,0) == 1]))) 
  X=matrix(rnorm(nVar*nData,mean=10,sd=5),ncol=nVar);
  y=X%*%c(1,1,-2)+rnorm(nData,mean=0,sd=3);
  dat = data.frame(y=y,X1=X[,1],X2=X[,2],X3=X[,3])
  
  model.lm = lm(model.def, dat)
  

データ解析基礎論 W08

dat <- read.csv("http://www.matsuka.info/data_folder/tdkReg01.csv")
dat.lm <- lm(sales~.,data = dat)
summary(dat.lm)
anova(dat.lm)
sse = anova(dat.lm)[[2]][4]
ssr = sum(anova(dat.lm)[[2]][1:3])
df.err = anova(dat.lm)[[1]][4]
df.model = sum(anova(dat.lm)[[1]][1:3])
F.val = (ssr/3) / (sse/46)
1 - pf(F.val, df.model, df.err)

dat<-read.csv("http://peach.l.chiba-u.ac.jp/course_folder/dktb312.csv")
tapply(dat$result,dat$method,mean)

dat2<-data.frame(result=dat$result,
                 c1=c(rep(0,8),rep(1,8),rep(0,16)),
                 c2=c(rep(0,16),rep(1,8),rep(0,8)),
                 c3=c(rep(0,24),rep(1,8)))
dat2.lm<-lm(result~.,data=dat2)


dat3<-data.frame(result=dat$result, 
                 c1=c(rep(-3,8), rep(1,24)), 
                 c2=c(rep(0,8),rep(-2,8),rep(1,16)),
                 c3=c(rep(0,16),rep(-1,8),rep(1,8)))
dat3.lm<-lm(result~c1+c2+c3,data=dat3
            
            
dat<-read.csv("http://www.matsuka.info/data_folder/dktb321.txt")
plot(dat$result~dat$duration,data=dat[dat$method=="method.X",])
result<-dat$result[dat$method=="method.X"]
CL=c(rep(-3,5),rep(-1,5),rep(1,5),rep(3,5))
CQ=c(rep(-1,5),rep(1,5),rep(1,5),rep(-1,5))
CC=c(rep(-3,5),rep(1,5),rep(-1,5),rep(3,5))

trend.lm<-lm(result~CL+CQ+CC)
summary(trend.lm)

dat.x = dat[dat$method=="method.X",]
trend.lm2 <- lm(result~duration, contrasts=list(duration = "contr.poly"),data=dat.x)

dat.x$result[16:20]=dat.x$result[16:20]-3
plot(tapply(dat.x$result,dat.x$duration,mean),
     pch=20,col="red",lwd=3, type="o",cex=3,ylab="mean",xlab="time")

trend.lm2 <- lm(result~duration, contrasts=list(duration = "contr.poly"),data=dat.x)

subj.gender = c(rep("male",30),rep("female",30))
pic.gender = rep(c(rep("male",15),rep("female",15)),2)
# 15m-m, 15m-f, 15f-m, 15f-f 
eye.fix = c(round(rnorm(15,50,5)),
            round(rnorm(15,70,5)),
            round(rnorm(15,65,5)),
            round(rnorm(15,50,5)))
datE = data.frame(eye.fix = eye.fix, subj.gender= subj.gender,pic.gender=pic.gender)
interaction.plot(datE$subj.gender, datE$pic.gender, datE$eye.fix, 
                 trace.label = "Gender of Stimuli", xlab = "Gender of Participants",
                 ylab = "Number of fixation",lwd = 3,type = "b",pch = c(17,20),cex =3,
                 col = c('skyblue','coral'),legend =T)
datE.lm <- lm(eye.fix~subj.gender*pic.gender,data=datE)
summary(datE.lm)

dat<-read.csv("http://www.matsuka.info/data_folder/tdkReg01.csv") 
dat.lm01<-lm(sales~price, data=as.data.frame(scale(dat)))
plot(dat.lm01,which=1)
plot(dat.lm01,which=2)
par(mfrow=c(2,2))
plot(dat.lm01)

dat<-read.csv("http://peach.l.chiba-u.ac.jp/course_folder/dktb312.csv")
levels(dat$method)
dat$method=factor(dat$method, levels(dat$method)[c(1,3,4,2)])
dat.lm<-lm(result~method,data=dat)
par(mfrow=c(2,2))
plot(dat.lm)

dat<-read.csv("http://www.matsuka.info/data_folder/tdkReg02.csv")
plot(dat)
dat.lm<-lm(sales~., data=dat)