データ解析基礎論a 分散分析3

source("http://www.matsuka.info/univ/course_folder/cutil.R")

dat<-read.csv("http://www.matsuka.info/data_folder/dktb316.txt")
dat.aov<-aov(words~method+subj+Error(subj:method),data=dat)
dat.aov2<-aov(words~method+Error(subj+subj:method),data=dat)
dat.aov.BTW <-aov(words~method,data=dat)

summary(dat.aov)
RB.qtukey(dat.aov,dat, 0.05)

dat<-read.csv("http://www.matsuka.info/data_folder/dktb3211.txt")
interaction.plot(dat$duration,  # x軸
                 dat$method,    # まとめる変数    
                 dat$result,    # y軸 
                 pch=c(20,20), 
                 col=c("skyblue","orange"),
                 ylab="score",
                 xlab="Duration",
                 lwd=3, 
                 type='b',
                 cex=2, 
                 trace.label="Method")

dat.aov <- aov(result~method*duration+Error(s+s:duration),dat)
TSME<-SPF.tsme(dat.aov,dat,"result")

dat<-read.csv("http://www.matsuka.info/data_folder/dktb3218.txt")
dat.aov<-aov(result~method+duration+method:duration + Error(s+method:s+duration:s+method:duration:s), data=dat)
TSME<-RBF.tsme(dat.aov, dat, "result")

2019年度 データ解析基礎論a 分散分析2

source("http://www.matsuka.info/univ/course_folder/cutil.R")
adj.alpha(5,0.05)

# anova
dat<-read.csv("http://www.matsuka.info/data_folder/dktb312.csv")
dat$method=factor(dat$method, levels(dat$method)[c(1,3,4,2)])
dat.aov<-aov(result~method, data=dat)
summary(dat.aov)

# multiple comparison 
# do not use these command
# use simpler one  - "cu.bonF1F"
dat.means<-tapply(dat$result,dat$method,mean)
new.alpha = 1-(1-0.05)^(1/5)
cont=c(-3,1,1,1)
bunshi=sum(cont*dat.means)
bunbo=sqrt(5.29*(sum((cont^2)/8)))
t.value=bunshi/bunbo
2*(1-pt(t.value,28))


# cu.bonF1F
new.alpha<-adj.alpha(5,0.05) 
cu.bonF1F(dat.aov,dat,c(-3,1,1,1),new.alpha)
cu.bonF1F(dat.aov,dat,c(-1,1,0,0),new.alpha)
cu.bonF1F(dat.aov,dat,c(-1,0,1,0),new.alpha)
cu.bonF1F(dat.aov,dat,c(-1,0,0,1),new.alpha)
cu.bonF1F(dat.aov,dat,c(0,-2,1,1),new.alpha)

# cu.scheffe1F
new.f<-adj.f(4,3,28,0.05)
cu.scheffe1F(dat.aov,dat,c(-3,1,1,1))


# 2 way ANOVA 
dat <- read.csv("http://peach.l.chiba-u.ac.jp/course_folder/datW03R.txt")
interaction.plot(dat$gender,
                 dat$affil,
                 dat$shoesize, 
                 pch=c(20,20), 
                 col=c("skyblue","orange"), 
                 xlab="gender", ylab="shoesize", 
                 lwd=3,type='b',cex=2,
                 trace.label="Affiliation")
dat.aov=aov(shoesize~gender*affil, data=dat)
dat.aov.sum=summary(dat.aov)

# testing simple main effect
# do not use these command
# use simpler one - CRF.tsme
means<-tapply(dat$shoesize, list(dat$gender,dat$affil), mean)
SS_gen_CS<- 5*(means[2,1]^2 + means[1,1]^2 -0.5*sum(means[,1])^2) # SS_gender CS
SS_gen_PS<- 5*(means[2,2]^2 + means[1,2]^2 -0.5*sum(means[,2])^2) # SS_gender PS
dat.aov.sum=summary(dat.aov)   # ANOVA table
MSe=dat.aov.sum[[1]][4,3]      # MSE from ANOVA table or MSe=0.62
dfE=dat.aov.sum[[1]][4,1]      # DF for error or dfE=16
dfG=1                          # DF for gender
F_gen_CS=(SS_gen_CS/dfG)/MSe   # F-value for gender effect given CS
F_gen_PS=(SS_gen_PS/dfG)/MSe   # F-value for gender effect given PS
P_gen_CS=1-pf(F_gen_CS,1,dfE)  # p-value for gender effect given CS
P_gen_PS=1-pf(F_gen_PS,1,dfE)  
SS_affil_F<- 5*(means[1,1]^2+means[1,2]^2-0.5*sum(means[1,])^2) #SS_affil | F
SS_affil_M<- 5*(means[2,1]^2+means[2,2]^2-0.5*sum(means[2,])^2) #SS_affil | M
dfA=1				          # DF for affil
F_affil_F=SS_affil_F/dfA/MSe         # F-value for affiliation effect | F
F_affil_M=SS_affil_M/dfA/MSe         # F-value for affiliation effect | M
P_affil_F=1-pf(F_affil_F,1,dfE)      # p-value for affiliation effect | F
P_affil_M=1-pf(F_affil_M,1,dfE)      # p-value for affiliation effect | M

# testint simple main effect w/ CRF.tsme
tsme = CRF.tsme(dat.aov, dat)
             
# another 2-way ANOVA
dat <-read.csv("http://www.matsuka.info/data_folder/dktb321.txt")
interaction.plot(dat$duration,dat$method,dat$result, pch=c(20,20), 
                 col=c("skyblue","orange"), ylab="score", xlab="Duration",
                 lwd=3,type='b',cex=2,trace.label="Method")
mod1=aov(result~method+duration,data=dat)
mod1.sum=print(summary(mod1))

mod2=aov(result~method*duration,data=dat)
mod2.sum=print(summary(mod2))
CRF.tsme(mod2, dat)


# plotting 
dat<-read.csv("http://www.matsuka.info/univ/course_folder/datW03R.txt",header=T)
means<-tapply(dat$shoesize,list(dat$gender, dat$affil),mean)
Ns<-tapply(dat$shoesize,list(dat$gender, dat$affil),length)
sds<-tapply(dat$shoesize,list(dat$gender, dat$affil),sd)
sems<-sds/sqrt(Ns)
plot(c(0,1),means[,1],type='o',col='skyblue', xlim=c(-0.1,1.1), lwd=2, cex=2, pch=20,
     ylim=c(min(means)*0.975, max(means)*1.025), xlab="gender", ylab="shoesize", xaxt="n")
axis(1,c(0,1),c("female","male"))
lines(c(0,1),means[,2],type='o',col='orange',lwd=2,cex=2,pch=20)
lines(c(0,0),c(means[1,1]-sems[1,1],means[1,1]+sems[1,1]),col="skyblue",lwd=2)
lines(c(0,0),c(means[1,2]-sems[1,2],means[1,2]+sems[1,2]),col="orange",lwd=2)
lines(c(1,1),c(means[2,1]-sems[2,1],means[2,1]+sems[2,1]),col="skyblue",lwd=2)
lines(c(1,1),c(means[2,2]-sems[2,2],means[2,2]+sems[2,2]),col="orange",lwd=2)
legend("topleft",c("CogSci","PsySci"),col=c("skyblue","orange"),lwd=2)

2019 データ解析基礎論A 分散分析1

f=c(24.1,23.9,24.4,24.4,23.5)
m=c(25.6,26.1,25.8,25.9,26)

# ANOVA w/o aov
G.mean=mean(c(f,m))
ss.total=sum((c(f,m)-G.mean)^2)
ss.tr=sum((mean(f)-G.mean)^2)*5+sum((mean(m)-G.mean)^2)*5
ss.error=sum((f-mean(f))^2)+sum((m-mean(m))^2)
ss.tr+ss.error
df.tr = 2 - 1
df.error = (4-1)*2
ms.tr = ss.tr /df.tr
ms.error = ss.error /df.error
1-pf(f.value,1,8)

# ANOVA w/ aov
dat<-data.frame(ssize=c(f,m),gender=c(rep("f",5),rep("m",5)))
dat.aov<-aov(ssize ~ gender, data=dat)
summary(dat.aov)

dat<-read.table("http://www.matsuka.info/data_folder/aov01.txt")
dat.aov<-aov(shoesize~club, dat)

# TukeyHSD w/o TukeyHSD command
qT<-qtukey(0.95, 3, 67)
HSD<-qT*sqrt((2.243*(1/23+1/24))/2)
means<-tapply(dat$shoesize,dat$club,mean)
abs(outer(means,means,"-"))
abs(outer(means,means,"-"))>HSD

# TukeyHSD
TukeyHSD(dat.aov)


dat<-read.csv("http://www.matsuka.info/data_folder/dktb312.csv")
dat.aov<-aov(result~method, data=dat)
summary(dat.aov)
source("http://www.matsuka.info/univ/course_folder/cutil.R")
adj.alpha(5,0.05)

# multiple t-test
dat.means<-tapply(dat$result,dat$method,mean)
new.alpha = 1-(1-0.05)^(1/5)
cont=c(-3,1,1,1)
bunshi=sum(cont*dat.means)
bunbo=sqrt(5.29*(sum((cont^2)/8)))
t.value=bunshi/bunbo
2*(1-pt(t.value,28)) > new.alpha

# bonferroni w/ cUTIL
cu.bonF1F(dat.aov,dat,c(-3,1,1,1),new.alpha)
cu.bonF1F(dat.aov,dat,c(-1,1,0,0),new.alpha)
cu.bonF1F(dat.aov,dat,c(-1,0,1,0),new.alpha)
cu.bonF1F(dat.aov,dat,c(-1,0,0,1),new.alpha)
cu.bonF1F(dat.aov,dat,c(0,-2,1,1),new.alpha)

# scheffe w/ cUTIL
new.f<-adj.f(4,3,28,0.05)
cu.scheffe1F(dat.aov,dat,c(-3,1,1,1))
cu.scheffe1F(dat.aov,dat,c(-1,0,1,0))
cu.scheffe1F(dat.aov,dat,c(-1,0,0,1))

データ解析基礎論 回帰分析3

dat <- read.csv("http://www.matsuka.info/data_folder/hwsk8-17-6.csv")
dat.lm<-lm(ani~otouto, dat)

plot(hatvalues(dat.lm))
dat.lm$residuals
x = dat$otouto
mean.x = mean(x)

h = 1/length(x)+(x-mean.x)^2/sum((x - mean.x)^2)
hatvalues(dat.lm)

e.std = e/(slm$sigma*sqrt(1-h))
rstandard(dat.lm)

(e^2*h)/(2*slm$sigma^2*(1-h)^2)
cooks.distance(dat.lm)


dat<-read.csv("http://www.matsuka.info/data_folder/tdkReg02.csv")
plot(dat)
dat.lm<-lm(sales~., data=dat) 
install.packages("DAAG")
library(DAAG)
vif(dat.lm)
lm.price<-lm(price~material+design+dump, data=dat)
p.rsq = summary(lm.price)$r.squared
vif.p = 1/(1-p.rsq) 
summary(dat.lm)

dat<-read.table("http://www.matsuka.info/data_folder/tdkPATH01.txt",header=TRUE)
plot(dat,pch=20,cex=2)
dat.lm1<-lm(absence~interest,dat)
dat.lm2<-lm(study~interest+knowledge,dat)
dat.lm3<-lm(grade~knowledge+study+absence,dat)

2019 データ解析基礎論A 回帰分析2

# two sample t-test
dat <- read.csv("http://www.matsuka.info/data_folder/hwsk8-17-6.csv")
boxplot(dat$ani, dat$otouto, col = c("skyblue", "orange"),ylab = "Score",
        axnt ="n")
axis(1, at = c(1,2),labels = c("Ani", "Otouto"))
t.test(dat$ani, dat$otouto, var.equal=T)
dat2<-data.frame(score=c(dat$ani,dat$otouto),
                 order=c(rep("ani",10),rep("otouto",10)))

plot(dat2$score~as.numeric(dat2$order), pch=20, xlab="order",
     ylab="score", xlim=c(0.5,2.5), cex=2, xaxt="n")
axis(1, at = c(1,2),labels = c("Ani", "Otouto"))
dat2.lm<-lm(score~order,data=dat2)
abline(dat2.lm,col='red',lwd=3)

# one sample t-test
dat.D = dat$ani - dat$otouto
boxplot(dat.D,col="skyblue",ylab="Difference")
t.test(dat.D)

plot(dat.D~rep(1,10), pch=20, xlab="", ylab="Difference", cex=3, xaxt="n")
dat.D.lm<-lm(dat.D~1)
abline(dat.D.lm,col='red',lwd=3)

# LM with 3 or more categories
dat<-read.csv("http://www.matsuka.info/data_folder/dktb312.csv")
dat2<-data.frame(result=dat$result, method=dat$method,
                 c1=c(rep(0,8),rep(1,8),rep(0,16)),
                 c2=c(rep(0,16),rep(1,8),rep(0,8)),
                 c3=c(rep(0,24),rep(1,8)))

dat2.lm<-lm(result~c1+c2+c3,data=dat2)
dat.lm <- lm(result~method, dat)

dat3<-data.frame(result=dat$result, 
                 c1=c(rep(-3,8), rep(1,24)), 
                 c2=c(rep(0,8),rep(-2,8),rep(1,16)),
                 c3=c(rep(0,16),rep(-1,8),rep(1,8)))

# trend analysis
dat<-read.csv("http://www.matsuka.info/data_folder/dktb321.txt")
plot(dat$result~dat$duration,data=dat[dat$method=="method.X",])
result<-dat$result[dat$method=="method.X"]
CL=c(rep(-3,5),rep(-1,5),rep(1,5),rep(3,5))
CQ=c(rep(-1,5),rep(1,5),rep(1,5),rep(-1,5))
CC=c(rep(-3,5),rep(1,5),rep(-1,5),rep(3,5))
trend.lm<-lm(result~CL+CQ+CC)
trend.lm2 <- lm(result~duration, contrasts=list(duration = "contr.poly"),data=dat.x)
summary(trend.lm2)

# trend analysis - numeric predictor
set.seed(15)
x = runif(50,0,30)
y = x^0.5 + rnorm(50,9,0.2)
dat.xy = data.frame(y=y,x=x, xsq = x^2)
plot(x,y, xlab = "Hours Studies", ylab="score",
     pch=20, cex=2, col="orange")
summary(lm(y~x+xsq, data=dat.xy))

# regression diagnostic
dat<-read.csv("http://www.matsuka.info/data_folder/tdkReg01.csv") 
dat.lm01<-lm(sales~price, data=as.data.frame(scale(dat)))
plot(dat.lm01,which=1)
plot(dat.lm01,which=2)

norm.vars=rnorm(300)
qqnorm(norm.vars)
qqline(norm.vars,col='red',lwd=2) 

unif.vars=runif(300)
qqnorm(unif.vars)
qqline(unif.vars,col='green',lwd=2)

plot(sort(unif.vars),sort(unif.vars))
plot(sort(norm.vars),sort(unif.vars))

par(mfrow=c(2,2))
plot(dat.lm01)

dat<-read.csv("http://www.matsuka.info/data_folder/tdkReg02.csv")
plot(dat)
dat.lm<-lm(sales~., data=dat) 
install.packages("DAAG")
library(DAAG)
vif(dat.lm)
lm.price<-lm(price~material+design+dump, data=dat)
p.rsq = summary(lm.price)$r.squared
vif.p = 1/(1-p.rsq) 

2019データ解析基礎論a 回帰分析

dat <- read.csv("http://www.matsuka.info/data_folder/hwsk8-17-6.csv")
plot(dat$otouto,dat$ani,pch=20, cex =3,
     xlab = "score of younger brother",
     ylab = "score of elder brother")

dat.lm <- lm(ani~otouto, data=dat)
summary(dat.lm)
abline(dat.lm, col = 'red',lwd = 2.5)

dat<-read.csv("http://www.matsuka.info/data_folder/tdkReg01.csv")
dat.reg1<-lm(sales~material,data=dat)
dat.reg2<-lm(sales~price,data=dat)
dat.reg3<-lm(sales~design,data=dat)

dat.regALL<-lm(sales~material+price+design,data=dat)
dat.regALL<-lm(sales~.,data=dat)

dat <- read.csv("http://www.matsuka.info/data_folder/hwsk8-17-6.csv")
t.test(dat$ani, dat$otouto, var.equal=T)
dat2<-data.frame(score=c(dat$ani,dat$otouto),order=c(rep("ani",10),rep("otouto",10)))
plot(dat2$score~as.numeric(dat2$order), pch=20, xlab="order",
     ylab="score", xlim=c(0.5,2.5), cex=2,xaxt="n")
axis(1,c(1,2),c("ani","otouto"))
dat2.lm<-lm(score~order,data=dat2)
abline(dat2.lm,col='red',lwd=3)

dat.D = dat$ani - dat$otouto
boxplot(dat.D,col="skyblue",ylab="Difference")
t.test(dat.D)
plot(dat.D~rep(1,10),pch=20,xlab="",ylab="Difference",cex=3)
dat.D.lm<-lm(dat.D~1)
abline(dat.D.lm,col='red',lwd=3)

2019年度 データ解析基礎論a T検定

x.temp = 0:11
m = dbinom(x.temp, 11, prob=0.5)
names(m) <- paste(0:11)
barplot(m, col = c(rep("red",3),rep("red",6),rep("red",3)))

ssize = c(24,25,26,23.5,25,27,24,22,27.5,28)ssize.mean = mean(ssize)
ssize.var = var(ssize)
N = 10
t.value=(ssize.mean-24)/(sqrt(ssize.var/N))

x.temp = seq(-4,4,0.01)
y.temp = dt(x.temp,df = 9)
plot(x.temp,y.temp, type='l',lwd=3,ylab = "Density", xlab = 't-value')
abline(v = t.value, col='red',lwd=3,lty=2)
abline(v = -t.value, col='red',lwd=3,lty=2)

A=c(12,19,10,10,14,18,15,11,16)
B=c(15,20,16,14,17,16,12,12,19)
d=A-B
tValue<-mean(d)/sqrt(var(d)/length(d))
(1-pt(abs(tValue), df=8))*2
x.temp = seq(-4,4,0.01)
y.temp = dt(x.temp,df = 8)
plot(x.temp,y.temp, type='l',lwd=3,ylab = "Density", xlab = 't-value')
abline(v = t.value, col='red',lwd=3,lty=2)
abline(v = -t.value, col='red',lwd=3,lty=2)

X1=c(78,70,66,76,78,76,88,76)
X2=c(76,72,60,72,70,72,84,70)
t.value=(mean(X1)-mean(X2))/sqrt((var(X1)+var(X2))/8)
2*(1-pt(abs(t.value),14))

2019年度 データ解析基礎論B W06

x.temp = 0:9
mass= matrix(dbinom(x.temp,9,0.5),nrow=1)
colnames(mass) <- paste(0:9)
barplot(mass)

sum(dbinom(7:9,9,0.5))
2*(0.5-pnorm(750,800,40))
pnorm(700,800,40)
qnorm(c(0.025, 0.975),800,40)
qnorm(0.99,800,40)

zA=(165-150)/(sqrt(15^2/10))
1-pnorm(zA)
(1-pnorm(zA))*2

dat<-read.csv("http://www.matsuka.info/data_folder/datWA01.txt")
mean.M <-mean(dat$h[dat$gender=="M"])
sigma = 10
n.M = length(dat$h[dat$gender=="M"])
z.value=(mean.M-171)/(sqrt(sigma))


ssize = c(24,25,26,23.5,25,27,24,22,27.5,28)
ssize.mean = mean(ssize)
ssize.var = var(ssize)
N = 10
t.value=(ssize.mean-24)/(sqrt(ssize.var/N))
(1-pt(abs(t.value),df=9))*2
t.test(ssize, mu=24)

2019 データ解析基礎論A DAA04

dat<- read.csv("http://www.matsuka.info/data_folder/datWA01.txt")
plot(dat$shoesize, dat$h, main="Relationship b/w shoesize and height",
     xlab = "shoesize", ylab="height", pch=19, col="red")
txt = paste("r =", round(cor(dat$shoesize,dat$h), 4))
text(22, 175, txt, cex = 1.5)
abline(h = mean(dat$h), col='blue');
abline(v = mean(dat$shoesize), col='darkgreen');
text(x = 21, y = mean(dat$h)+3,
        paste("mean height =", round(mean(dat$h),2)),
        col="blue",adj = 0)

text(x = mean(dat$shoesize)+0.2, y = 145,
        paste("mean shoesize =", round(mean(dat$shoesize),2)),
        col="darkgreen",adj = 0)

abline(lm(dat$h~dat$shoesize), lty=2, lwd=2)

plot(dat[dat$gender=='F',]$shoesize, dat[dat$gender=='F',]$h,
     main="Relationship b/w shoesize and height", xlab='shoesize', ylab='height',
     cex.lab=1.5, pch=19, col='blue', xlim=c(20,29), ylim=c(140,190))
plot(dat[dat$gender=='M',]$shoesize, dat[dat$gender=='M',]$h,
     main="Relationship b/w shoesize and height", xlab='shoesize', ylab='height',
     cex.lab=1.5, pch=15, col='green', xlim=c(20,29), ylim=c(140,190))

plot(dat[dat$gender=='F',]$shoesize, dat[dat$gender=='F',]$h,
     main="Relationship b/w shoesize and height", xlab='shoesize', ylab='height',
     cex.lab=1.5, pch=19, col='blue', xlim=c(20,29), ylim=c(140,190))
points(dat[dat$gender=='M',]$shoesize,dat[dat$gender=='M',]$h,
       pch = 15, col = 'green')
legend("topleft", c('Female','Male'), pch =c(19,15),
       col = c('blue','green'), cex = 1.5)

dat<-read.csv("http://www.matsuka.info/data_folder/tdkReg01.csv")
plot(dat, pch=20, col='blue')

dat.pca<-read.table("http://www.matsuka.info/data_folder/tdkPCA01.txt")
plot(dat.pca, pch = rownames(dat.pca), cex = 1.7, col = "blue")

dat<-read.table("http://www.matsuka.info/data_folder/aov01.txt",header=T)
summary(dat)
dat.meter = dat[,1:2]*0.01
dat.h.ext5 = dat$h+5
hANDshoe = dat$h+dat$shoesize
dat.h.meter.ext5 = dat$h*0.01+0.05

2019データ解析基礎論A DAA03可視化2

v1 = seq(-3,3,0.1)
v2 = v1^2
plot(v1, v2, col = "blue", type = "o", lty = 2, pch = 19,
     cex.lab = 1.5, lwd = 3, main = "Y=X*X", xlab = "X",
     ylab="X*X", xlim=c(-3.5,3.5), ylim=c(-0.5, 10))

# histogram
dat<- read.csv("http://www.matsuka.info/data_folder/datWA01.txt")
hist(dat$h)
hist(dat$h, breaks = 20, main = "Histogram of Height",
     xlab = "Height", col = 'blue', xlim = c(140, 190))
dens<-density(dat$h);
hist(dat$h, main = "Histogram of Height", xlab = "Height",
     xlim = c(140,190), probability = T)
lines(dens, lwd = 2, col = "red", lty=2)
plot(v1, v2, col = "blue", type = "l", pch = 19, cex.lab = 1.5,
     lwd = 3, xlab = "X", ylab="f(X)", xlim=c(-3.5,3.5), ylim=c(-0.5, 10))
lines(v1, v1^3, col='red',lwd = 3)
legend("bottomright", c("x^2","x^3"), col=c('blue','red'), lwd=2)
legend(-3,0.5, c("x^2","x^3"), col=c('blue','red'), lwd=2)

boxplot(dat$h,main="Boxplot of Height", ylab="Height", col='cyan', ylim=c(140,190))
boxplot(dat$h,main="Boxplot of Height", xlab="Height", col="orange", horizontal=T)

boxplot(dat$h ~ dat$gender,
        main="Distribution of Height by Gender",
        ylab="Gender", xlab="Height", col=c('blue','cyan'),
        ylim=c(140,190), horizontal=T)

dat<-read.table("http://www.matsuka.info/data_folder/aov01.txt")
boxplot(dat$h ~ dat$gender + dat$affil,
        main="Distribution of  Height by Gender and Affiliation",
        ylab="Gender x Affiliation", xlab="Height",
       col=c('blue','cyan','red','magenta'), ylim=c(140,190),horizontal=T)

interaction.plot(dat$gender,
                 dat$affil,
                 dat$h,
                 pch=c(20,20),
                 col=c("skyblue","orange"),
                 xlab="gender", ylab="height",
                 lwd=3,type='b',cex=2,
                 trace.label="Affiliation")

par(mfrow=c(1,2))
hist(dat[dat$gender=="F",]$h, main="Dist. of Height for Female Participants",
     xlab="Height", xlim=c(140,190), probability=T)

dens.F = density(dat[dat$gender=='F',]$h)

lines(dens.F, col='blue',lwd=2)

#2つ目のhistogram
hist(dat[dat$gender=="M",]$h, main="Dist. of Height for Male
     Participants", xlab="Height", xlim=c(140,190), probability=T,ylim=c(0,0.08))

dens.M = density(dat[dat$gender=='M',]$h)

lines(dens.M, col='green', lwd=2)

par(mfrow=c(1,1))
plot(dens.F,col='blue',lwd=2, ylab='density', xlim=c(140,190),
     main="Dist. of Height by gender",xlab='Height')
lines(dens.M,col='green',lwd=2)
legend("topleft", c('Female','Male'), col=c('blue','green'), cex=1.5,lwd=2)
text(157.5, 0.04, 'Female', col='blue', cex=2)
text(170, 0.04,'Male', col='green', cex=2)
plot(dat$shoesize, dat$h, main="Relationship b/w shoesize and height",
   xlab = "shoesize", ylab="height", pch=19, col="red")

txt = paste("r =", round(cor(dat$shoesize,dat$h), 3))
text(22, 175, txt, cex = 1.5)

plot(dat[dat$gender=='F',]$shoesize, dat[dat$gender=='F',]$h,
     main="Relationship b/w shoesize and height", xlab='shoesize', ylab='height',
     cex.lab=1.5, pch=19, col='blue', xlim=c(20,29), ylim=c(140,190))
lines(dat[dat$gender=='M',]$shoesize,dat[dat$gender=='M',]$h,
      type = 'p', pch = 15, col = 'green')
legend("topleft", c('Female','Male'), pch =c(19,15),
       col = c('blue','green'), cex = 1.5)