# データ解析基礎論A Week03

```dat<-read.table("http://www.matsuka.info/data_folder/aov01.txt",header=T)
summary(dat)
boxplot(dat\$shoesize, col = "skyblue", main = "Dist. of Shoesize",
ylab = "Size of shoe (in cm)")
boxplot(dat\$h, col = "coral", main = "Dist. of Height", ylab = "Height (in cm)")

dat.meter = dat[,1:2]*0.01
dat.h.ext5 = dat\$h+5
dat.ss.ext1 = dat\$shoesize+1
hANDshoe = dat\$h+dat\$shoesize
dat.h.meter.ext5 = dat\$h*0.01+0.05
```

# ch03 neural networks

```step.func <- function(x){
return(as.numeric(x > 0))
}
x = seq(-5, 5, 0.1)
y = step.func(x)
plot(x,y, ylab = 'y', xlab = 'a', type ="l", lwd =2)

sigmoid.func <- function(x){
return(1/(1+exp(-x)))
}

y = sigmoid.func(x)
plot(x,y, ylab = 'y', xlab = 'a', type ="l", lwd =2)

y.step = step.func(x)
y.sigm = sigmoid.func(x)
plot(x,y.step, ylab = 'y', xlab = 'a', type ="l", lwd =2)
lines(x,y.sigm, lwd =2, lty = 2)

relu.func <- function(x){
return(pmax(0,x))
}

y.relu = relu.func(x)
plot(x,y.relu, ylab = 'y', xlab = 'a', type ="l", lwd =2)

A = matrix(1:4, nrow = 2, byrow = T)
B = matrix(5:8, nrow = 2, byrow = T)

A = matrix(1:6, nrow = 3, byrow = T)
B = matrix(7:8, nrow = 2, byrow = T)

x = c(1,0.5)
W1 = matrix((1:6)*0.1, nrow = 2)
B1 = (1:3)*0.1
A1 = x%*%W1 + B1
Z1 = sigmoid.func(A1)

W2 = matrix((1:6)*0.1, nrow = 3)
B2 = c(0.1, 0.2)
A2 = Z1%*%W2 + B2
Z2 = sigmoid.func(A2)

W3 = matrix((1:4)*0.1, nrow = 2)
B3 = c(0.1, 0.2)
A3 = Z2%*%W3+ B3
Z3 = A3

# function to initialize 3L network
init.3L.network <- function(){
W1 = matrix((1:6)*0.1, nrow = 2)
B1 = (1:3)*0.1
W2 = matrix((1:6)*0.1, nrow = 3)
B2 = c(0.1, 0.2)
W3 = matrix((1:4)*0.1, nrow = 2)
B3 = c(0.1, 0.2)
return(list(W1 = W1, B1 = B1, W2 = W2, B2 = B2, W3 = W3, B3 = B3))
}
# feedforward process
forward.3L <- function(network, x){
A1 = x%*%network\$W1 + network\$B1
Z1 = sigmoid.func(A1)
A2 = Z1%*%network\$W2 + network\$B2
Z2 = sigmoid.func(A2)
A3 = Z2%*%network\$W3 + network\$B3
Z3 = sigmoid.func(A3)
A3 = Z3
return(A3)
}

network<-init.3L.network()
y = forward.3L(network, c(1, 0.5))

a = c(1010,1000,990)
exp(a)/sum(exp(a))

softmax.func <- function(x){
max.x = max(x)
return(exp(x-max.x)/sum(exp(x-max.x)))
}

train <- data.matrix(train)
train.x <- train[,-1]
train.y <- train[,1]
train.x <- t(train.x/255)
"trNetwork.Rdata")
network=trNetwork

n.train = ncol(train.x)
correct.cl = 0
conf.matrix = matrix(0,10,10)
for (i.loop in 1:n.train){
y = forward.3L(network,train.x[,i.loop])
max.y = max.col(y)
conf.matrix[max.y, (train.y[i.loop]+1)] = conf.matrix[max.y, (train.y[i.loop]+1)] + 1
}
accuracy = sum(diag(conf.matrix))/n.train

# batch
batch_size = 200
conf.matrix = matrix(0,10,10)
for (i.batch in seq(1,n.train, batch_size)){
y = forward.3L(network, train.x[,(i.batch:(i.batch+batch_size-1))])
pred = max.col(y)
conf.matrix = conf.matrix+table(pred,
(train.y[i.batch:(i.batch+batch_size-1)]+1))
}
accuracy = sum(diag(conf.matrix))/n.train

```

# データ解析基礎論A グラフの基礎

データ解析基礎論A 第2週　グラフの基礎

```# descriptive statistics
summary(dat)
mean(dat\$shoesize)
var(dat[,1:2])
cov(dat[,1:2])
cor(dat[,1:2])

# basics - plotting
x=seq(-3,3,0.1);y=x^2;
plot(x,y)
plot(x,y,col='red')
plot(x,y,pch=20)
plot(x,y,type='l')
plot(x,y,type='l',lty=4,lwd=3)
plot(x,y,main="THIS IS THE TITLE", xlab="Label for X-axis",ylab="Label for Y-axis")
plot(x,y,main="TITLE", xlab="X here",ylab="Y here",xlim=c(-3.5,3.5),ylim=c(-0.5, 10))
plot(x,y,col='blue',type='o',lty=2,pch=19,lwd=3,main="Y=X*X", xlab="X",ylab="X*X",
xlim=c(-3.5,3.5),ylim=c(-0.5, 10))

# histogram
hist(dat\$h,main="Histogram of Height",xlab="Height",col='blue',xlim=c(140,190))
dens<-density(dat\$h);
hist(dat\$h,main="Histogram of Height",xlab="Height",xlim=c(140,190),probability=T)
lines(dens,lwd=2,col='red',lty=2)

# boxplot
boxplot(dat\$h,main="Boxplot of Height",ylab="Height",col='cyan',ylim=c(140,190))
boxplot(dat\$h~dat\$gender,main="Distribution of Height by Gender",ylab="Gender",
xlab="Height",col=c('blue','cyan'),ylim=c(140,190),horizontal=T)
boxplot(dat\$h~dat\$gender+dat\$affil, main="Distribution of Height by Gender and Affiliation",
ylab="Gender x Affiliation", xlab="Height", col=c('blue','cyan','red','magenta'),
ylim=c(140,190),horizontal=T)

# barplot
install.packages("gplots")
library(gplots)

means <- tapply(dat\$h, dat\$gender, mean)
sds<-tapply(dat\$h,dat\$gender,sd)
ns<-tapply(dat\$h,dat\$gender,length)
sems = sds/sqrt(ns)
barplot2(means, plot.ci=T,
ci.l = means - sems,
ci.u = means + sems,
ylim = c(140,180),
names.arg = c("Female","Male"),
xpd = F,
ylab = "height",
xlab = "gender")

# another barplot
means <- tapply(dat\$h,list(dat\$gender,dat\$affil),mean)
sds <- tapply(dat\$h,list(dat\$gender,dat\$affil),sd)
ns <- tapply(dat\$h,list(dat\$gender,dat\$affil),length)
sem = sds/sqrt(ns)
barplot2(means[1:4], plot.ci=T, ci.l=means[1:4]-sem[1:4],
ci.u=means[1:4] + sem[1:4], ylim=c(150,175),
names.arg=c("Female,CS","Male,CS","Female,PSY","Male,PSY"),
xpd=F,ylab="height",xlab="gender & affiliation")

# histogram again
par(mfrow=c(1,2))
hist(dat[dat\$gender=='F',]\$h,main="Dist. of Height for Female Participants",
xlab="Height",xlim=c(140,190),probability=T)
dens.F=density(dat[dat\$gender=='F',]\$h);lines(dens.F,col='blue',lwd=2)
hist(dat[dat\$gender=='M',]\$h,main="Dist. of Height for Male Participants",
xlab="Height",xlim=c(140,190),probability=T,ylim=c(0,0.08))
dens.M=density(dat[dat\$gender=='M',]\$h);lines(dens.M,col='green',lwd=2)

par(mfrow=c(2,1))

par(mfrow=c(1,1))
plot(dens.F,col='blue',lwd=2,main="Dist. of Height by gender",xlab='Height',
ylab='density',xlim=c(140,190))
lines(dens.M,col='green',lwd=2)
legend("topleft", c('Female','Male'),col=c('blue','green'),cex=1.5,lwd=2)

# inserting text
text(157.5,0.04,'Female',col='blue',cex=2)
text(170,0.04,'Male',col='green',cex=2)

# scatterplot
plot(dat\$shoesize,dat\$h,main="Relationship b/w shoesize and height",xlab='shoe size',
ylab='height',pch=19,col='red')
text(22,175,paste("r =",substr(cor(dat\$shoesize,dat\$h),1,5)),cex=1.5)
abline(h=mean(dat\$h),col='blue');
abline(v=mean(dat\$shoesize),col='green');
text(21.5,165,'mean height',col='blue')
text(25.7,145,'mean shoesize',col='green')
abline(lm(dat\$h~dat\$shoesize),lty=2,lwd=2)

plot(dat[dat\$gender=='F',]\$shoesize,dat[dat\$gender=='F',]\$h,
main="Relationship b/w shoesize and height",xlab='shoesize',ylab='height',
cex.lab=1.5,pch=19,col='blue',xlim=c(20,29),ylim=c(140,190))
lines(dat[dat\$gender=='M',]\$shoesize,dat[dat\$gender=='M',]\$h,type='p',pch=15,col='green')
legend("topleft", c('Female','Male'), pch =c(19,15),col=c('blue','green'),cex=1.5)
plot(dat.reg,pch=20,col=c('blue'))

# intro to central limit theorem
ckCLT=function(n_rep,n_sample){
dat<-matrix(rnorm(n_rep*n_sample),nrow=n_rep,ncol=n_sample);
means<-rowMeans(dat)}
n_rep=10^6
n5=ckCLT(n_rep,5)
hist(n5,main="Dist. of sample meanx",xlab="sample mean",xlim=c(-3,3),probability=T)
den5=density(n5);lines(den5,col='blue',lwd=2)

n10=ckCLT(n_rep,10)
n25=ckCLT(n_rep,25)
n100=ckCLT(n_rep,100)
plot(den5,col='blue',lwd=2,,main="Dist. of sample meanx",xlab="sample mean",
xlim=c(-2,2),ylim=c(0,4))
den10=density(n10);lines(den10,col='red',lwd=2)
den25=density(n25);lines(den25,col='black',lwd=2)
den100=density(n100);lines(den100,col='green',lwd=2)
legend("topleft", c('N=5','N=10','N=25','N=100'),col=c('blue','red','black','green'),
cex=1.5,lwd=2)

```

# データ解析基礎論a WAA01

データ解析基礎論A　Weekly Assignment WAA01

WAA01.1

```> dat
id gender score
1      M    73
2      M    64
3      M    78
4      F    74
5      F    84
6      F    78
```

WAA01.2
gender == “M” や　gender == “F” などを用いて、上記のデータ男性と女性のscoreの平均値を求めてください。

WAA01.3
このファイルをダウンロードして、scoreが100の男性と女性の数を調べてください。

```解答例
WAA01.1

dat<-data.frame(id = 1:6, gender = c(rep("M",3),rep("F",3)),
score = c(73,64,78,74,84,78))

id = 1:6
gender = c(rep("M",3),rep("F",3))
score = c(73,64,78,74,84,78)
dat2 <- data.frame(id = id, gender = gender, score = score)

WAA01.2

mean(dat\$score[which(dat\$gender=="M")])
mean(dat\$score[which(dat\$gender=="F")])

dat.male = dat[which(dat\$gender=="M"),]
mean(dat.male\$score)
dat.female = dat[which(dat\$gender=="F"),]
mean(dat.female\$score)

WAA01.3

which(dat3[which(dat3\$gender == "M"),]\$score == 100)
which(dat3[which(dat3\$gender == "F"),]\$score == 100)

dat3.male = dat3[which(dat3\$gender == "M"), ]
dat3.female = dat3[which(dat3\$gender == "F"), ]
dat3.male\$id[which(dat3.male\$score == 100)]
dat3.female\$id[which(dat3.female\$score == 100)]
```

# 認知情報解析 2017 – Ch 02

Chapter 2: Perceptron

```# with THRESHOLD (theta)
AND.gate <- function(x1, x2){
w1 = 0.5
w2 = 0.5
theta = 0.7
y.temp = w1*x1 + w2*x2
if (y.temp <= theta){
y = 0
} else {
y = 1
}
return(y)
}

AND.gate <- function(x1, x2){
w1 = 0.5; w2 = 0.5; theta = 0.7
return(as.numeric(w1*x1 + w2*x2 > theta))
}

NAND.gate <- function(x1, x2){
w1 = -0.5; w2 = -0.5; theta = -0.7
return(as.numeric(w1*x1 + w2*x2 > theta))
}

OR.gate <- function(x1, x2){
w1 = 0.5; w2 = 0.5; theta = 0.3
return(as.numeric(w1*x1 + w2*x2 > theta))
}

# with BIAS (b)
AND.gate <- function(x1, x2){
w1 = 0.5
w2 = 0.5
b = -0.7
y.temp = w1*x1 + w2*x2 + b
if (y.temp <= 0){
y = 0
} else {
y = 1
}
return(y)
}

AND.gate <- function(x1, x2){
w1 = 0.5; w2 = 0.5; b = -0.7
return(as.numeric(w1*x1 + w2*x2 + b > 0))
}

NAND.gate <- function(x1, x2){
w1 = -0.5; w2 = -0.5; b = 0.7
return(as.numeric(w1*x1 + w2*x2 + b > 0))
}

OR.gate <- function(x1, x2){
w1 = 0.5; w2 = 0.5; b = -0.3
return(as.numeric(w1*x1 + w2*x2 + b > 0))
}

NOR.gate <- function(x1, x2){
w1 = -0.5; w2 = -0.5; b = 0.3
return(as.numeric(w1*x1 + w2*x2 + b > 0))
}

plot.logic <- function(logic.oper){
x1 = c(0,0,1,1);
x2 = c(0,1,0,1);
if (logic.oper == "and") {
w1 = 0.5; w2 = 0.5; theta = 0.7;
true.point = AND.gate(x1,x2)
} else if (logic.oper == "or") {
w1 = 0.5; w2 = 0.5; theta = 0.3;
true.point = OR.gate(x1,x2)
} else if (logic.oper == "nand") {
w1 = -0.5; w2 = -0.5; theta = -0.7;
true.point = NAND.gate(x1,x2)
} else if (logic.oper == "nor"){
w1 = -0.5; w2 = -0.5; theta = -0.3;
true.point = NOR.gate(x1,x2)
} else {warning("incompatible operator");stop() }
plot(c(0,0,1,1),c(0,1,0,1),xlim = c(-0.5, 1.5), ylim = c(-0.5, 1.5),
pch = 20, cex= 2, col = true.point+1)
abline(a = theta/w1, b = -w1/w2, lwd = 3)
}

XOR.gate <- function(x1, x2){
gate1 <- NAND.gate(x1,x2)
gate2 <- OR.gate(x1,x2)
y <- AND.gate(gate1,gate2)
return(y)
}

plot.XOR <- function(){
x1 = c(0,0,1,1);
x2 = c(0,1,0,1);
w11 = -0.5; w21 = -0.5; theta1 = -0.7
w12 = 0.5; w22 = 0.5; theta2 = 0.3
true.point = XOR.gate(x1, x2)
plot(c(0,0,1,1),c(0,1,0,1),xlim = c(-0.5, 1.5), ylim = c(-0.5, 1.5),
pch = 20, cex= 2, col = true.point+1)
abline(a = theta1/w11, b = -w11/w21, lwd = 3)
abline(a = theta2/w12, b = -w12/w22, lwd = 3)
}
```

# データ解析基礎論a 2017 W01

データ解析基礎論a 2017 W01で使用するコードなど

```data01<-data.frame(score = c(2,4,3,4),
dose = c(rep(10,2),rep(100,2)),
condition = rep(c('exp','control'),2))