データ解析基礎論B review02

n.subj = 5
set.seed(10)
groupA1 = rnorm(n.subj, mean = 100, sd =15)
groupA2 = rnorm(n.subj, mean = 100, sd =15)
groupID = c(rep("A1",n.subj),rep("A2",n.subj))
dat = data.frame(score = c(groupA1,groupA2),ID = groupID)
grand.mean = mean(dat$score)
mean.A1 = mean(groupA1)
mean.A2 = mean(groupA2)
plot(dat$ID, dat$score)
points(as.numeric(dat$ID), dat$score, col=c(rep('red',n.subj),rep("blue",n.subj)),pch=20)

idx = 1:(n.subj+n.subj)
plot(idx,dat$score,col=c(rep('red',n.subj),rep("blue",n.subj)),
     pch=20,ylim=c(65,145),ylab ="score",xlab="subjID")
abline(h=grand.mean,lwd=2)
abline(h=mean.A1,lty=2,col='red',lwd=2)
abline(h=mean.A2,lty=2,col='blue',lwd=2)
legend("topright",c("overall mean", "G1 mean","G2 mean"),
       lty=c(1,2,2),col=c('black','red','blue'),lwd=2)

ss.total = sum((dat$score-grand.mean)^2)
ss.group1 = n.subj*(mean.A1-grand.mean)^2
ss.group2 = n.subj*(mean.A2-grand.mean)^2
ss.group = ss.group1+ss.group2
ss.error1 = sum((groupA1-mean.A1)^2)
ss.error2 = sum((groupA2-mean.A2)^2)
ss.error = ss.error1+ss.error2
df.group = 2 - 1
df.error = (n.subj-1)+(n.subj-1)
ms.group = ss.group/df.group
ms.error = ss.error/df.error
f.value = ms.group/ms.error
p.value = 1 - pf(f.value, df.group, df.error)
summary(aov(score~ID, data=dat))
n.rep = 1e5
n.subj = 5
f.hist = rep(0,n.rep)
for (i.rep in 1:n.rep){
  groupA1 = rnorm(n.subj, mean = 100, sd =15)
  groupA2 = rnorm(n.subj, mean = 100, sd =15)
  groupID = c(rep("A1",n.subj),rep("A2",n.subj))
  dat = data.frame(score = c(groupA1,groupA2),ID = groupID)
  grand.mean = mean(dat$score)
  mean.A1 = mean(groupA1)
  mean.A2 = mean(groupA2)
  ss.total = sum((dat$score-grand.mean)^2)
  ss.group = n.subj*(mean.A1-grand.mean)^2+n.subj*(mean.A2-grand.mean)^2
  ss.error = sum((groupA1-mean.A1)^2)+sum((groupA2-mean.A2)^2)
  df.group = 2 - 1
  df.error = (n.subj-1)+(n.subj-1)
  ms.group = ss.group/df.group
  ms.error = ss.error/df.error
  f.hist[i.rep] = ms.group/ms.error
}

hist(f.hist,1000,xlim=c(0,15),probability = T)
f.temp = seq(0,15,0.05)
f.true = df(f.temp,df.group,df.error)
lines(f.temp,f.true,col='red',lty=2,lwd=2)
thres.f = qf(0.95,df.group,df.error)
abline(v = thres.f, col='green')
prop.FP = sum(f.hist > thres.f)/n.rep