# 認知情報解析演習　06

Traveling Salesman Problem

```TSP.inv <- function(x, n){
x.length = length(x)
start.idx = sample(1:x.length,1)
end.idx = min(x.length, (start.idx + n -1))
x[start.idx:end.idx]=x[end.idx:start.idx]
return(x)
}

TSP.switch <- function(x){
x.length = length(x)
switch.idx = sample(1:x.length,2)
x[switch.idx] = x[rev(switch.idx)]
return(x)
}

TSP.trans <-function(x){
x.length = length(x)
trans.idx = sample(1:x.length,2)
new.x = x[-trans.idx[1]]
new.x = append(new.x, x[trans.idx[1]],after = (trans.idx[2]-1))
return(new.x)
}

TSP.init<-function(n_city=10) {
return(matrix(runif(n_city*2,1,100),nrow=n_city,ncol=2))
}

cities = TSP.init(10)

TSP.totDist<-function(route, cities) {
route=c(route,route[1]);
n.cities=nrow(cities);
totDist=0;
for (i.cities in 1:n.cities) {
totDist = totDist + dist(cities[route[i.cities:(i.cities + 1)],])
}
return(totDist)
}

TSP.totDist(route,cities)

TSP.demo<-function(n.city=20, maxItr=1000, parm.set = c(1,0.99, 50)) {
loc=TSP.init(n.city);
route=1:n.city
## param. for simulated annealing - change values if necessary
C=parm.set[1];
eta=parm.set[2];
TEMP=parm.set[3];
##
optDist=TSP.totDist(route,loc)
optHist=matrix(0,nrow=maxItr,ncol=(length(route)+1))
optHist[1,]=c(route,optDist)
for (i_loop in 2:maxItr) {
rand.op=sample(c('inv','sw','trans'),1,prob=c(0.75,0.125,0.125))
if (rand.op=='inv') {
new.route=TSP.inv(route,sample(2:n.city,1))
} else if (rand.op=='sw') {
new.route=TSP.switch(route)
} else {
new.route=TSP.trans(route)
}
new.dist=TSP.totDist(new.route,loc)
delta=new.dist-optDist
prob=1/(1+exp(delta/(C*TEMP)))
if (runif(1) < prob) {
route=new.route;optDist=new.dist;
}
optHist[i_loop,]=c(route,optDist);
TEMP=min(TEMP*eta,0.1)
}
par(mfrow=c(1,2))
plot(rbind(loc,loc[1,]),type='o',pch=20,cex=2.5, col='red',
xlab='location @X',ylab='location @Y',main='Initial route')
plot(loc[optHist[1000,c(1:n.city,1)],],type='o',pch=20, col='magenta',
cex=2.5,xlab='location @X',ylab='location @Y',main='Optimized route')
return(optHist)
}
```
```# regression with SimAnneal.
reg.ERR<-function(b,X,y){
yhat<-X%*%b
return(sum((y-yhat)^2))
}
##
SA.reg <- function(maxItr,parm.set=c(C=1,eta=0.995,TEMP=100,sigma=100)){
set.seed(20)
nData = 100
x=matrix(rnorm(9*nData,mean=10,sd=2),ncol=9);X=cbind(rep(1,nData),x)
y=X%*%c(10,2,5,-3,-5,0,0,0,0,0)+rnorm(nData,mean=0,sd=2);
lm.sum = summary(lm(y~X[,2:10]))
C = parm.set[1]
eta = parm.set[2]
TEMP= parm.set[3]
sigma = parm.set[4]
b = rnorm(10,0,100)
optERR=reg.ERR(b,X,y)
optHist=matrix(0,nrow=maxItr,ncol=(10+1))
optHist[1,]=c(b,optERR)
for (i_loop in 2:maxItr) {
new.b = b + rnorm(10,0,sigma*TEMP)
new.ERR=Fun.reg(new.b, X,y)
delta=new.ERR-optERR
prob=1/(1+exp(delta/(C*TEMP)))
if (runif(1) < prob) {
b=new.b;
optERR=new.ERR;
}
optHist[i_loop,]=c(b,optERR);
TEMP=max(TEMP*eta,0.01)
}
print(optHist[maxItr,])
print(coef(lm.sum)[,1])
return(list(optHist=optHist, gt = coef(lm.sum)))
}
res=SA.reg(1e5,c(80,0.999,1,1))

```

# データ解析基礎論A W07

```dat<-read.csv("http://www.matsuka.info/data_folder/hwsk8-17-6.csv")
dat.lm <- lm(ani~otouto, data=dat)
summary(dat.lm)
t.test(dat\$ani, dat\$otouto, var.equal=T)

dat2 <- data.frame(score=c(dat\$ani,dat\$otouto),order=c(rep("ani",10),rep("otouto",10)))
plot(dat2\$score~as.numeric(dat2\$order),pch=20,xlab="order",
ylab="score",xlim=c(0.5,2.5),cex=2,xaxt="n")
axis(1,c(1,2),c("ani","otouto"))
dat2.lm<-lm(score~order,data=dat2)
abline(dat2.lm,col='red',lwd=3)

dat.D = dat\$ani - dat\$otouto
boxplot(dat.D,col="skyblue",ylab="Difference")
t.test(dat.D)
plot(dat.D~rep(1,10),pch=20,xlab="",ylab="Difference",cex=3)
dat.D.lm<-lm(dat.D~1)
abline(dat.D.lm,col='red',lwd=3)

```

# 認知情報解析演習　04

```fun01 <- function(x1,x2){
return(x1^2 - x1*x2 -4*x1 +x2^2 -x2 )
}

x1 = seq(-10,10,0.05); x2 = seq(-10,10,0.05)
v <- outer(x1,x2,function(x1,x2) fun01(x1,x2))
contour(x1,x2,v,nlevels= 100, drawlabels = F,col='blue')

a = runif(2,0,10); b = runif(2,0,10);c = runif(2,0,10)

sortABC <-  function(a,b,c,func){
fA = do.call(func,list(a[1],a[2]))
fB = do.call(func,list(b[1],b[2]))
fC = do.call(func,list(c[1],c[2]))
sort.F = sort(c(fA,fB,fC),index.return=T)
temp.set = rbind(a,b,c)
a = temp.set[sort.F\$ix[1],]; fA = sort.F\$x[1]
b = temp.set[sort.F\$ix[2],]; fB = sort.F\$x[2]
c = temp.set[sort.F\$ix[3],]; fC = sort.F\$x[3]
return(list(abc = rbind(a,b,c), fs = c(fA,fB,fC)))
}

nelder.mead01 <- function(a,b,c, func, tol){
# not in the original form
# expansion -> reflection -> contraction -> shrink
# minimizes f with 2 variables
repeat {
sort.abc = sortABC(a,b,c,func)
a = sort.abc\$abc[1,];b = sort.abc\$abc[2,];c = sort.abc\$abc[3,]
fA = sort.abc\$fs[1];fB = sort.abc\$fs[2];fC = sort.abc\$fs[3];
max.diffs = max(abs(outer(c(fA,fB,fC),c(fA,fB,fC),'-')))
if (max.diffs < tol){
return(list(abc=rbind(a,b,c), fs = c(fA,fB,fC)));
break
}
points(a[1],a[2],pch = 20, col ='red')
points(b[1],b[2],pch = 20, col ='cyan')
points(c[1],c[2],pch = 20, col ='green')
m = (a + b)/2
e = m + 2*(m - c)
fE = do.call(func,list(e[1],e[2]))
if (fE < fB){
c = e; fC = fE
} else {
r = 2*m -c
fR = do.call(func,list(r[1],r[2]))
if (fR < fC){
c = r; fC = fR
}
if (fR >= fB){
s = (c + m)/2; fS = do.call(func,list(s[1],s[2]))
if (fS < fC){
c = s; fC =fS
} else {
b = m; fB = do.call(func,list(m[1],m[2]))
c = (a + c)/2; fC = do.call(func,list(c[1],c[2]))
}
}
}
}
}
```
```# simulated annealing
# version 1
SimAneal01<-function(func, initXY, maxItr=1000, C=1, eta=0.99, width=10) {
x=initXY
opt.value = do.call(func,list(x))
n.var = length(x)
opt.hist=matrix(0, nrow=maxItr, ncol=5)
opt.hist[1,]=c(x,x,opt.value,opt.value,0)
for (i_loop in 2:maxItr) {
accept = 0
temp.x = x + rnorm(n.var, mean = 0, sd=width)
temp.value= do.call(func,list(temp.x))
delta=temp.value-opt.value;
prob=1/(1+exp(delta/(C*width)))
if (runif(1) < prob) {
x = temp.x;
opt.value = temp.value;
accept = 1
}
opt.hist[i_loop,]=c(x, temp.x, opt.value, temp.value, accept);
width=width*eta
}
return(data.frame(opt.hist))
}

set.seed(50)
n.iter =500
fun01<-function(x){x^2+2*x}
res <- SimAneal01(fun01, 3, n.iter, 1, 0.985, 1)

# version 2
funZ<-function(x,y) {x^4-16*x^2-5*x+y^4-16*y^2-5*y}
minSA<-function(initXY,maxItr=1000,C=1,eta=0.99,width) {
x=initXY[1];y=initXY[2];z=funZ(x,y);
opHist=matrix(0,nrow=maxItr,ncol=3)
opHist[1,]=c(x,y,z)
for (i_loop in 2:maxItr) {
xTemp=x+rnorm(1,mean=0,sd=width)
yTemp=y+rnorm(1,mean=0,sd=width)
zTemp=funZ(xTemp, yTemp);
delta=zTemp-z;
prob=1/(1+exp(delta/(C*width)))
if (runif(1) < prob) {
x=xTemp;y=yTemp;z=zTemp;
}
opHist[i_loop,]=c(x,y,z);
width=width*eta
}
return(opHist)
}

funZ<-function(x,y) {x^4-16*x^2-5*x+y^4-16*y^2-5*y}
xmin=-5;xmax=5;n_len=100;
x<-seq(xmin, xmax,length.out=n_len);
y<-x;z<-outer(x,y,funZ)

res<-minSA(c(0,0),1000,1,0.99,10)
lines(res[,1:2],type='o',lwd=2,col='green',pch=20)
```

# データ解析基礎論A 06

```ssize = c(24,25,26,23.5,25,27,24,22,27.5,28)
t.test(ssize, mu=24)

t.test(dat\$h[dat\$gender=="M"],mu=171)
t.test(dat\$h[dat\$gender=="F"],mu=158)

A=c(12,19,10,10,14,18,15,11,16)
B=c(15,20,16,14,17,16,12,12,19)
d=A-B
t.test(d, mu=0)

X1=c(78,70,66,76,78,76,88,76)
X2=c(76,72,60,72,70,72,84,70)
t.value=(mean(X1)-mean(X2))/sqrt((var(X1)+var(X2))/8)

t.test(X1,X2,var.equal=T)
t.test(X1,X2)

###
may = dat\$height[dat\$mob=="may"]
dec = dat\$height[dat\$mob=="dec"]
dat3 = data.frame(height = c(may,dec),
mob = c(rep('may',length(may)),rep('dec',length(dec))))
boxplot(height~mob, data=dat3,col=c('skyblue','orange'),
ylab = "height", xlab ="mob")

spring = dat\$height[dat\$mob =="march"|
dat\$mob =="April"|
dat\$mob =="may"]
fall = dat\$height[dat\$mob =="sep"|
dat\$mob =="oct"|
dat\$mob =="nov"]
dat4 = data.frame(height = c(spring,fall),
season = c(rep('spring',length(spring)),
rep('fall',length(fall))))
boxplot(height~season, data=dat4, col=c('skyblue','orange'),
ylab = "height", xlab ="season")
```

# 認知情報解析 03

```# objective: find x that minimizes y: y = x^2 +2*x

# simple gradient descent (GD)
tol = 1e-7;lr = 0.1;
x = 10; x.hist = x
repeat{
grad = 2*x + 2
if (abs(grad) <= tol) { break }
x = x - lr*grad
x.hist = c(x.hist,x)
}
x.temp = seq(-10,10,length.out = 100)
plot(x.temp, x.temp^2+2*x.temp,type='l',lwd = 3, ylim = c(-5,120),
ylab = "y",xlab="x")
lines(x.hist,x.hist^2+2*x.hist,type='o',col='red',lwd=2,pch=19)
points(x.hist,rep(-4,length(x.hist)),col='red',pch="I")

# GD w/ momentum
tol = 1e-7;lr = 0.1; gamma = 0.36
x = 10; x.histM = x; v = 0;
repeat{
grad = 2*x + 2
if (abs(grad) <= tol) { break }
v = gamma*v - lr*grad
x = x + v
x.histM = c(x.histM,x)
}
lines(x.histM,x.histM^2+2*x.histM,type='o',col='blue',pch=19)
points(x.histM,rep(-2,length(x.histM)),col='blue',pch="I")
legend("topleft",c("standard GD","GD w/ moment."), pch=1,lwd=2,col=c('red','blue'))
c(length(x.hist),length(x.histM))
```

# データ解析基礎論a W05

```
# Male
mean.M <-mean(dat\$h[dat\$gender=="M"])
stddev = 10
n.M = length(dat\$h[dat\$gender=="M"])
z.value=(mean.M-171)/(sqrt(stddev^2/n.M))
(1-pnorm(abs(z.value)))*2
# Female
mean.F <-mean(dat\$h[dat\$gender=="F"])
stddev = 5
n.F = length(dat\$h[dat\$gender=="F"])
z.value=(mean.F-158)/(sqrt(stddev^2/n.F))
(1-pnorm(abs(z.value)))*2
```

# データ解析基礎論A 練習課題WA04

データ解析基礎論A 練習課題（提出不要）

マーケティングリサーチで商品Aと商品Bをレーティングしてもらい、そしてどちらか一方をレーティングに基づいて選択してもらった結果が以下のデータです。

```   S1 S2 S3 S4 S5 S6 S7 S8 S9
A  12 19 10 10 14 18 15 11 16
B  15 20 16 14 17 16 12 12 19
-----------------------------------
B  B  B  B  B  A  A  B  B
```

つまり、９人中７人が商品Bを選択しました。

A: ９人中７人以上がBを選択する確率を計算してください。
B: ９人中７人以上が一方の商品を選択する確率を計算してください。

ある通りの平日午前10~11時の1時間の車の交通量は800台で標準偏差が40台だとします。また、この通りの同じ曜日の同じ時間帯の交通量は正規分布に従うと仮定して、以下の数値を求めてください；
A) 交通量が750台から850台になる確率
B）交通量が700台以下となる確率
C）平均値の800を中心に95%の範囲（下限、上限）
D）99%の上限（99%の確率でX台以下となるようなX）

A) 運動部Xの赤血球の量は一般成人と同等であると仮定した場合、部員の赤血球の平均値が165ユニット以上となる確率を求めてください。
B）運動部Xの赤血球の量は一般成人と同等であると仮定したが、Aで得られた確率がどの程度だとこの仮説が妥当だと思いますか？

# 認知情報解析　02

```# ケーキの分配ゲーム
n.cake = 10
# 利得行列
pay.mat = matrix(0,(n.cake+1),(n.cake+1))
for (i.cake in 1:n.cake){
pay.mat[(i.cake+1),1:(n.cake-i.cake+1)] =i.cake
}
# 初期化（各戦略の確率や時間など）
p.cake = runif(n.cake+1)
p.cake = p.cake/sum(p.cake)

max.time = 50
dt = 0.01
t = seq(0,max.time,dt)
n.iter = length(t)
p.hist = matrix(0,nrow = n.iter, ncol = (n.cake+1))
p.hist[1,] = p.cake

# メインのスクリプト
for (i.time in 2:n.iter){
W = colSums(p.cake*t(pay.mat))
W.ave = sum(outer(p.cake,p.cake)*pay.mat)
p.cake = p.cake + p.cake*(W - W.ave)/W.ave * dt
p.hist[i.time,] = p.cake
}

# 結果の可視化
plot(p.hist[,1],ylim=c(0,1),type='l',lwd=2,ylab = 'Proportion',xlab="time")
for (i.strat in 2:(n.cake+1)){
lines(p.hist[,i.strat],col=i.strat,lwd=2)
}
legend("topleft",paste("request = ",0:10),col=1:(n.cake+1),lwd =2,cex=0.75)
```
```# 中央極限定理の実験
ss.female = rnorm(6000,mean = 24, sd = 0.5)
ss.male = rnorm(6000,mean = 26, sd = 0.5)
pop = c(ss.female,ss.male)
parm.mu = mean(pop)
parm.sd = sd(pop)

n.per.one.sample = 10;
n.rep = 100000;

samp.data = matrix(sample(pop,n.per.one.sample*n.rep,replace = T),
ncol = n.per.one.sample)
samp.mean = rowMeans(samp.data)

par(mfrow=c(1,3))
# fig 1
hist(pop,  col='skyblue',breaks = 50, probability = T,
main = "Population Dist.",xlab = "shoesizd")
pop.dens<-density(pop)
lines(pop.dens,col='orange',lwd=4)

# fig 2
rand.p = sample(1:n.rep,300)
dat.dens = density(samp.data[rand.p[1],])
plot(dat.dens, col='gray',xlim=c(22,28),ylim=c(0,0.5),
main = "Sample Dist. (N = 10)",xlab = "shoesizd")
for (i.d in 2:300){
dat.dens = density(samp.data[rand.p[i.d],])
lines(dat.dens, col='gray')
}

# fig 3
mean.dens<-density(samp.mean)
hist(samp.mean, col='skyblue',breaks = 50, probability = T,
main = 'Dist of Sample Means',xlab = "shoesizd")
lines(mean.dens,col='red',lwd=4)
x = seq(22,27,0.01)
y = dnorm(x,parm.mu, parm.sd/sqrt(n.per.one.sample))
lines(x, y ,col='darkblue',lty=3,lwd=3)
legend("topright",c("Empirical", "Theoretical"),
lty=c(1,3),col=c("red","blue"),lwd=4,cex=1)
```