# データ解析基礎論a 分散分析３

```source("http://www.matsuka.info/univ/course_folder/cutil.R")

dat.aov<-aov(words~method+subj+Error(subj:method),data=dat)
dat.aov2<-aov(words~method+Error(subj+subj:method),data=dat)
dat.aov.BTW <-aov(words~method,data=dat)

summary(dat.aov)
RB.qtukey(dat.aov,dat, 0.05)

interaction.plot(dat\$duration,  # x軸
dat\$method,    # まとめる変数
dat\$result,    # y軸
pch=c(20,20),
col=c("skyblue","orange"),
ylab="score",
xlab="Duration",
lwd=3,
type='b',
cex=2,
trace.label="Method")

dat.aov <- aov(result~method*duration+Error(s+s:duration),dat)
TSME<-SPF.tsme(dat.aov,dat,"result")

dat.aov<-aov(result~method+duration+method:duration + Error(s+method:s+duration:s+method:duration:s), data=dat)
TSME<-RBF.tsme(dat.aov, dat, "result")
```

# 2019年度　データ解析基礎論a 分散分析２

```source("http://www.matsuka.info/univ/course_folder/cutil.R")

# anova
dat\$method=factor(dat\$method, levels(dat\$method)[c(1,3,4,2)])
dat.aov<-aov(result~method, data=dat)
summary(dat.aov)

# multiple comparison
# do not use these command
# use simpler one  - "cu.bonF1F"
dat.means<-tapply(dat\$result,dat\$method,mean)
new.alpha = 1-(1-0.05)^(1/5)
cont=c(-3,1,1,1)
bunshi=sum(cont*dat.means)
bunbo=sqrt(5.29*(sum((cont^2)/8)))
t.value=bunshi/bunbo
2*(1-pt(t.value,28))

# cu.bonF1F
cu.bonF1F(dat.aov,dat,c(-3,1,1,1),new.alpha)
cu.bonF1F(dat.aov,dat,c(-1,1,0,0),new.alpha)
cu.bonF1F(dat.aov,dat,c(-1,0,1,0),new.alpha)
cu.bonF1F(dat.aov,dat,c(-1,0,0,1),new.alpha)
cu.bonF1F(dat.aov,dat,c(0,-2,1,1),new.alpha)

# cu.scheffe1F
cu.scheffe1F(dat.aov,dat,c(-3,1,1,1))

# 2 way ANOVA
interaction.plot(dat\$gender,
dat\$affil,
dat\$shoesize,
pch=c(20,20),
col=c("skyblue","orange"),
xlab="gender", ylab="shoesize",
lwd=3,type='b',cex=2,
trace.label="Affiliation")
dat.aov=aov(shoesize~gender*affil, data=dat)
dat.aov.sum=summary(dat.aov)

# testing simple main effect
# do not use these command
# use simpler one - CRF.tsme
means<-tapply(dat\$shoesize, list(dat\$gender,dat\$affil), mean)
SS_gen_CS<- 5*(means[2,1]^2 + means[1,1]^2 -0.5*sum(means[,1])^2) # SS_gender CS
SS_gen_PS<- 5*(means[2,2]^2 + means[1,2]^2 -0.5*sum(means[,2])^2) # SS_gender PS
dat.aov.sum=summary(dat.aov)   # ANOVA table
MSe=dat.aov.sum[[1]][4,3]      # MSE from ANOVA table or MSe=0.62
dfE=dat.aov.sum[[1]][4,1]      # DF for error or dfE=16
dfG=1                          # DF for gender
F_gen_CS=(SS_gen_CS/dfG)/MSe   # F-value for gender effect given CS
F_gen_PS=(SS_gen_PS/dfG)/MSe   # F-value for gender effect given PS
P_gen_CS=1-pf(F_gen_CS,1,dfE)  # p-value for gender effect given CS
P_gen_PS=1-pf(F_gen_PS,1,dfE)
SS_affil_F<- 5*(means[1,1]^2+means[1,2]^2-0.5*sum(means[1,])^2) #SS_affil | F
SS_affil_M<- 5*(means[2,1]^2+means[2,2]^2-0.5*sum(means[2,])^2) #SS_affil | M
dfA=1				          # DF for affil
F_affil_F=SS_affil_F/dfA/MSe         # F-value for affiliation effect | F
F_affil_M=SS_affil_M/dfA/MSe         # F-value for affiliation effect | M
P_affil_F=1-pf(F_affil_F,1,dfE)      # p-value for affiliation effect | F
P_affil_M=1-pf(F_affil_M,1,dfE)      # p-value for affiliation effect | M

# testint simple main effect w/ CRF.tsme
tsme = CRF.tsme(dat.aov, dat)

# another 2-way ANOVA
interaction.plot(dat\$duration,dat\$method,dat\$result, pch=c(20,20),
col=c("skyblue","orange"), ylab="score", xlab="Duration",
lwd=3,type='b',cex=2,trace.label="Method")
mod1=aov(result~method+duration,data=dat)
mod1.sum=print(summary(mod1))

mod2=aov(result~method*duration,data=dat)
mod2.sum=print(summary(mod2))
CRF.tsme(mod2, dat)

# plotting
means<-tapply(dat\$shoesize,list(dat\$gender, dat\$affil),mean)
Ns<-tapply(dat\$shoesize,list(dat\$gender, dat\$affil),length)
sds<-tapply(dat\$shoesize,list(dat\$gender, dat\$affil),sd)
sems<-sds/sqrt(Ns)
plot(c(0,1),means[,1],type='o',col='skyblue', xlim=c(-0.1,1.1), lwd=2, cex=2, pch=20,
ylim=c(min(means)*0.975, max(means)*1.025), xlab="gender", ylab="shoesize", xaxt="n")
axis(1,c(0,1),c("female","male"))
lines(c(0,1),means[,2],type='o',col='orange',lwd=2,cex=2,pch=20)
lines(c(0,0),c(means[1,1]-sems[1,1],means[1,1]+sems[1,1]),col="skyblue",lwd=2)
lines(c(0,0),c(means[1,2]-sems[1,2],means[1,2]+sems[1,2]),col="orange",lwd=2)
lines(c(1,1),c(means[2,1]-sems[2,1],means[2,1]+sems[2,1]),col="skyblue",lwd=2)
lines(c(1,1),c(means[2,2]-sems[2,2],means[2,2]+sems[2,2]),col="orange",lwd=2)
legend("topleft",c("CogSci","PsySci"),col=c("skyblue","orange"),lwd=2)
```

# 2019 データ解析基礎論A 分散分析１

```f=c(24.1,23.9,24.4,24.4,23.5)
m=c(25.6,26.1,25.8,25.9,26)

# ANOVA w/o aov
G.mean=mean(c(f,m))
ss.total=sum((c(f,m)-G.mean)^2)
ss.tr=sum((mean(f)-G.mean)^2)*5+sum((mean(m)-G.mean)^2)*5
ss.error=sum((f-mean(f))^2)+sum((m-mean(m))^2)
ss.tr+ss.error
df.tr = 2 - 1
df.error = (4-1)*2
ms.tr = ss.tr /df.tr
ms.error = ss.error /df.error
1-pf(f.value,1,8)

# ANOVA w/ aov
dat<-data.frame(ssize=c(f,m),gender=c(rep("f",5),rep("m",5)))
dat.aov<-aov(ssize ~ gender, data=dat)
summary(dat.aov)

dat.aov<-aov(shoesize~club, dat)

# TukeyHSD w/o TukeyHSD command
qT<-qtukey(0.95, 3, 67)
HSD<-qT*sqrt((2.243*(1/23+1/24))/2)
means<-tapply(dat\$shoesize,dat\$club,mean)
abs(outer(means,means,"-"))
abs(outer(means,means,"-"))>HSD

# TukeyHSD
TukeyHSD(dat.aov)

dat.aov<-aov(result~method, data=dat)
summary(dat.aov)
source("http://www.matsuka.info/univ/course_folder/cutil.R")

# multiple t-test
dat.means<-tapply(dat\$result,dat\$method,mean)
new.alpha = 1-(1-0.05)^(1/5)
cont=c(-3,1,1,1)
bunshi=sum(cont*dat.means)
bunbo=sqrt(5.29*(sum((cont^2)/8)))
t.value=bunshi/bunbo
2*(1-pt(t.value,28)) > new.alpha

# bonferroni w/ cUTIL
cu.bonF1F(dat.aov,dat,c(-3,1,1,1),new.alpha)
cu.bonF1F(dat.aov,dat,c(-1,1,0,0),new.alpha)
cu.bonF1F(dat.aov,dat,c(-1,0,1,0),new.alpha)
cu.bonF1F(dat.aov,dat,c(-1,0,0,1),new.alpha)
cu.bonF1F(dat.aov,dat,c(0,-2,1,1),new.alpha)

# scheffe w/ cUTIL
cu.scheffe1F(dat.aov,dat,c(-3,1,1,1))
cu.scheffe1F(dat.aov,dat,c(-1,0,1,0))
cu.scheffe1F(dat.aov,dat,c(-1,0,0,1))
```

# データ解析基礎論　回帰分析３

```dat <- read.csv("http://www.matsuka.info/data_folder/hwsk8-17-6.csv")
dat.lm<-lm(ani~otouto, dat)

plot(hatvalues(dat.lm))
dat.lm\$residuals
x = dat\$otouto
mean.x = mean(x)

h = 1/length(x)+(x-mean.x)^2/sum((x - mean.x)^2)
hatvalues(dat.lm)

e.std = e/(slm\$sigma*sqrt(1-h))
rstandard(dat.lm)

(e^2*h)/(2*slm\$sigma^2*(1-h)^2)
cooks.distance(dat.lm)

plot(dat)
dat.lm<-lm(sales~., data=dat)
install.packages("DAAG")
library(DAAG)
vif(dat.lm)
lm.price<-lm(price~material+design+dump, data=dat)
p.rsq = summary(lm.price)\$r.squared
vif.p = 1/(1-p.rsq)
summary(dat.lm)

plot(dat,pch=20,cex=2)
dat.lm1<-lm(absence~interest,dat)
dat.lm2<-lm(study~interest+knowledge,dat)
```

# 2019 データ解析基礎論A 回帰分析2

```# two sample t-test
boxplot(dat\$ani, dat\$otouto, col = c("skyblue", "orange"),ylab = "Score",
axnt ="n")
axis(1, at = c(1,2),labels = c("Ani", "Otouto"))
t.test(dat\$ani, dat\$otouto, var.equal=T)
dat2<-data.frame(score=c(dat\$ani,dat\$otouto),
order=c(rep("ani",10),rep("otouto",10)))

plot(dat2\$score~as.numeric(dat2\$order), pch=20, xlab="order",
ylab="score", xlim=c(0.5,2.5), cex=2, xaxt="n")
axis(1, at = c(1,2),labels = c("Ani", "Otouto"))
dat2.lm<-lm(score~order,data=dat2)
abline(dat2.lm,col='red',lwd=3)

# one sample t-test
dat.D = dat\$ani - dat\$otouto
boxplot(dat.D,col="skyblue",ylab="Difference")
t.test(dat.D)

plot(dat.D~rep(1,10), pch=20, xlab="", ylab="Difference", cex=3, xaxt="n")
dat.D.lm<-lm(dat.D~1)
abline(dat.D.lm,col='red',lwd=3)

# LM with 3 or more categories
dat2<-data.frame(result=dat\$result, method=dat\$method,
c1=c(rep(0,8),rep(1,8),rep(0,16)),
c2=c(rep(0,16),rep(1,8),rep(0,8)),
c3=c(rep(0,24),rep(1,8)))

dat2.lm<-lm(result~c1+c2+c3,data=dat2)
dat.lm <- lm(result~method, dat)

dat3<-data.frame(result=dat\$result,
c1=c(rep(-3,8), rep(1,24)),
c2=c(rep(0,8),rep(-2,8),rep(1,16)),
c3=c(rep(0,16),rep(-1,8),rep(1,8)))

# trend analysis
plot(dat\$result~dat\$duration,data=dat[dat\$method=="method.X",])
result<-dat\$result[dat\$method=="method.X"]
CL=c(rep(-3,5),rep(-1,5),rep(1,5),rep(3,5))
CQ=c(rep(-1,5),rep(1,5),rep(1,5),rep(-1,5))
CC=c(rep(-3,5),rep(1,5),rep(-1,5),rep(3,5))
trend.lm<-lm(result~CL+CQ+CC)
trend.lm2 <- lm(result~duration, contrasts=list(duration = "contr.poly"),data=dat.x)
summary(trend.lm2)

# trend analysis - numeric predictor
set.seed(15)
x = runif(50,0,30)
y = x^0.5 + rnorm(50,9,0.2)
dat.xy = data.frame(y=y,x=x, xsq = x^2)
plot(x,y, xlab = "Hours Studies", ylab="score",
pch=20, cex=2, col="orange")
summary(lm(y~x+xsq, data=dat.xy))

# regression diagnostic
dat.lm01<-lm(sales~price, data=as.data.frame(scale(dat)))
plot(dat.lm01,which=1)
plot(dat.lm01,which=2)

norm.vars=rnorm(300)
qqnorm(norm.vars)
qqline(norm.vars,col='red',lwd=2)

unif.vars=runif(300)
qqnorm(unif.vars)
qqline(unif.vars,col='green',lwd=2)

plot(sort(unif.vars),sort(unif.vars))
plot(sort(norm.vars),sort(unif.vars))

par(mfrow=c(2,2))
plot(dat.lm01)

plot(dat)
dat.lm<-lm(sales~., data=dat)
install.packages("DAAG")
library(DAAG)
vif(dat.lm)
lm.price<-lm(price~material+design+dump, data=dat)
p.rsq = summary(lm.price)\$r.squared
vif.p = 1/(1-p.rsq)
```

# 2019データ解析基礎論a 回帰分析

```dat <- read.csv("http://www.matsuka.info/data_folder/hwsk8-17-6.csv")
plot(dat\$otouto,dat\$ani,pch=20, cex =3,
xlab = "score of younger brother",
ylab = "score of elder brother")

dat.lm <- lm(ani~otouto, data=dat)
summary(dat.lm)
abline(dat.lm, col = 'red',lwd = 2.5)

dat.reg1<-lm(sales~material,data=dat)
dat.reg2<-lm(sales~price,data=dat)
dat.reg3<-lm(sales~design,data=dat)

dat.regALL<-lm(sales~material+price+design,data=dat)
dat.regALL<-lm(sales~.,data=dat)

t.test(dat\$ani, dat\$otouto, var.equal=T)
dat2<-data.frame(score=c(dat\$ani,dat\$otouto),order=c(rep("ani",10),rep("otouto",10)))
plot(dat2\$score~as.numeric(dat2\$order), pch=20, xlab="order",
ylab="score", xlim=c(0.5,2.5), cex=2,xaxt="n")
axis(1,c(1,2),c("ani","otouto"))
dat2.lm<-lm(score~order,data=dat2)
abline(dat2.lm,col='red',lwd=3)

dat.D = dat\$ani - dat\$otouto
boxplot(dat.D,col="skyblue",ylab="Difference")
t.test(dat.D)
plot(dat.D~rep(1,10),pch=20,xlab="",ylab="Difference",cex=3)
dat.D.lm<-lm(dat.D~1)
abline(dat.D.lm,col='red',lwd=3)
```

# 2019年度　データ解析基礎論a T検定

```x.temp = 0:11
m = dbinom(x.temp, 11, prob=0.5)
names(m) <- paste(0:11)
barplot(m, col = c(rep("red",3),rep("red",6),rep("red",3)))

ssize = c(24,25,26,23.5,25,27,24,22,27.5,28)ssize.mean = mean(ssize)
ssize.var = var(ssize)
N = 10
t.value=(ssize.mean-24)/(sqrt(ssize.var/N))

x.temp = seq(-4,4,0.01)
y.temp = dt(x.temp,df = 9)
plot(x.temp,y.temp, type='l',lwd=3,ylab = "Density", xlab = 't-value')
abline(v = t.value, col='red',lwd=3,lty=2)
abline(v = -t.value, col='red',lwd=3,lty=2)

A=c(12,19,10,10,14,18,15,11,16)
B=c(15,20,16,14,17,16,12,12,19)
d=A-B
tValue<-mean(d)/sqrt(var(d)/length(d))
(1-pt(abs(tValue), df=8))*2
x.temp = seq(-4,4,0.01)
y.temp = dt(x.temp,df = 8)
plot(x.temp,y.temp, type='l',lwd=3,ylab = "Density", xlab = 't-value')
abline(v = t.value, col='red',lwd=3,lty=2)
abline(v = -t.value, col='red',lwd=3,lty=2)

X1=c(78,70,66,76,78,76,88,76)
X2=c(76,72,60,72,70,72,84,70)
t.value=(mean(X1)-mean(X2))/sqrt((var(X1)+var(X2))/8)
2*(1-pt(abs(t.value),14))
```

# 2019年度　データ解析基礎論B W06

```x.temp = 0:9
mass= matrix(dbinom(x.temp,9,0.5),nrow=1)
colnames(mass) <- paste(0:9)
barplot(mass)

sum(dbinom(7:9,9,0.5))
2*(0.5-pnorm(750,800,40))
pnorm(700,800,40)
qnorm(c(0.025, 0.975),800,40)
qnorm(0.99,800,40)

zA=(165-150)/(sqrt(15^2/10))
1-pnorm(zA)
(1-pnorm(zA))*2

mean.M <-mean(dat\$h[dat\$gender=="M"])
sigma = 10
n.M = length(dat\$h[dat\$gender=="M"])
z.value=(mean.M-171)/(sqrt(sigma))

ssize = c(24,25,26,23.5,25,27,24,22,27.5,28)
ssize.mean = mean(ssize)
ssize.var = var(ssize)
N = 10
t.value=(ssize.mean-24)/(sqrt(ssize.var/N))
(1-pt(abs(t.value),df=9))*2
t.test(ssize, mu=24)
```

# 2019 データ解析基礎論A DAA04

```dat<- read.csv("http://www.matsuka.info/data_folder/datWA01.txt")
plot(dat\$shoesize, dat\$h, main="Relationship b/w shoesize and height",
xlab = "shoesize", ylab="height", pch=19, col="red")
txt = paste("r =", round(cor(dat\$shoesize,dat\$h), 4))
text(22, 175, txt, cex = 1.5)
abline(h = mean(dat\$h), col='blue');
abline(v = mean(dat\$shoesize), col='darkgreen');
text(x = 21, y = mean(dat\$h)+3,
paste("mean height =", round(mean(dat\$h),2)),

text(x = mean(dat\$shoesize)+0.2, y = 145,
paste("mean shoesize =", round(mean(dat\$shoesize),2)),

abline(lm(dat\$h~dat\$shoesize), lty=2, lwd=2)

plot(dat[dat\$gender=='F',]\$shoesize, dat[dat\$gender=='F',]\$h,
main="Relationship b/w shoesize and height", xlab='shoesize', ylab='height',
cex.lab=1.5, pch=19, col='blue', xlim=c(20,29), ylim=c(140,190))
plot(dat[dat\$gender=='M',]\$shoesize, dat[dat\$gender=='M',]\$h,
main="Relationship b/w shoesize and height", xlab='shoesize', ylab='height',
cex.lab=1.5, pch=15, col='green', xlim=c(20,29), ylim=c(140,190))

plot(dat[dat\$gender=='F',]\$shoesize, dat[dat\$gender=='F',]\$h,
main="Relationship b/w shoesize and height", xlab='shoesize', ylab='height',
cex.lab=1.5, pch=19, col='blue', xlim=c(20,29), ylim=c(140,190))
points(dat[dat\$gender=='M',]\$shoesize,dat[dat\$gender=='M',]\$h,
pch = 15, col = 'green')
legend("topleft", c('Female','Male'), pch =c(19,15),
col = c('blue','green'), cex = 1.5)

plot(dat, pch=20, col='blue')

plot(dat.pca, pch = rownames(dat.pca), cex = 1.7, col = "blue")

summary(dat)
dat.meter = dat[,1:2]*0.01
dat.h.ext5 = dat\$h+5
hANDshoe = dat\$h+dat\$shoesize
dat.h.meter.ext5 = dat\$h*0.01+0.05

```

# 2019データ解析基礎論A DAA03可視化２

```v1 = seq(-3,3,0.1)
v2 = v1^2
plot(v1, v2, col = "blue", type = "o", lty = 2, pch = 19,
cex.lab = 1.5, lwd = 3, main = "Y=X*X", xlab = "X",
ylab="X*X", xlim=c(-3.5,3.5), ylim=c(-0.5, 10))

# histogram
hist(dat\$h)
hist(dat\$h, breaks = 20, main = "Histogram of Height",
xlab = "Height", col = 'blue', xlim = c(140, 190))
dens<-density(dat\$h);
hist(dat\$h, main = "Histogram of Height", xlab = "Height",
xlim = c(140,190), probability = T)
lines(dens, lwd = 2, col = "red", lty=2)
plot(v1, v2, col = "blue", type = "l", pch = 19, cex.lab = 1.5,
lwd = 3, xlab = "X", ylab="f(X)", xlim=c(-3.5,3.5), ylim=c(-0.5, 10))
lines(v1, v1^3, col='red',lwd = 3)
legend("bottomright", c("x^2","x^3"), col=c('blue','red'), lwd=2)
legend(-3,0.5, c("x^2","x^3"), col=c('blue','red'), lwd=2)

boxplot(dat\$h,main="Boxplot of Height", ylab="Height", col='cyan', ylim=c(140,190))
boxplot(dat\$h,main="Boxplot of Height", xlab="Height", col="orange", horizontal=T)

boxplot(dat\$h ~ dat\$gender,
main="Distribution of Height by Gender",
ylab="Gender", xlab="Height", col=c('blue','cyan'),
ylim=c(140,190), horizontal=T)

boxplot(dat\$h ~ dat\$gender + dat\$affil,
main="Distribution of  Height by Gender and Affiliation",
ylab="Gender x Affiliation", xlab="Height",
col=c('blue','cyan','red','magenta'), ylim=c(140,190),horizontal=T)

interaction.plot(dat\$gender,
dat\$affil,
dat\$h,
pch=c(20,20),
col=c("skyblue","orange"),
xlab="gender", ylab="height",
lwd=3,type='b',cex=2,
trace.label="Affiliation")

par(mfrow=c(1,2))
hist(dat[dat\$gender=="F",]\$h, main="Dist. of Height for Female Participants",
xlab="Height", xlim=c(140,190), probability=T)

dens.F = density(dat[dat\$gender=='F',]\$h)

lines(dens.F, col='blue',lwd=2)

#２つ目のhistogram
hist(dat[dat\$gender=="M",]\$h, main="Dist. of Height for Male
Participants", xlab="Height", xlim=c(140,190), probability=T,ylim=c(0,0.08))

dens.M = density(dat[dat\$gender=='M',]\$h)

lines(dens.M, col='green', lwd=2)

par(mfrow=c(1,1))
plot(dens.F,col='blue',lwd=2, ylab='density', xlim=c(140,190),
main="Dist. of Height by gender",xlab='Height')
lines(dens.M,col='green',lwd=2)
legend("topleft", c('Female','Male'), col=c('blue','green'), cex=1.5,lwd=2)
text(157.5, 0.04, 'Female', col='blue', cex=2)
text(170, 0.04,'Male', col='green', cex=2)
plot(dat\$shoesize, dat\$h, main="Relationship b/w shoesize and height",
xlab = "shoesize", ylab="height", pch=19, col="red")

txt = paste("r =", round(cor(dat\$shoesize,dat\$h), 3))
text(22, 175, txt, cex = 1.5)

plot(dat[dat\$gender=='F',]\$shoesize, dat[dat\$gender=='F',]\$h,
main="Relationship b/w shoesize and height", xlab='shoesize', ylab='height',
cex.lab=1.5, pch=19, col='blue', xlim=c(20,29), ylim=c(140,190))
lines(dat[dat\$gender=='M',]\$shoesize,dat[dat\$gender=='M',]\$h,
type = 'p', pch = 15, col = 'green')
legend("topleft", c('Female','Male'), pch =c(19,15),
col = c('blue','green'), cex = 1.5)
```