データ解析基礎論B W03

# RB
dat<-read.csv("http://www.matsuka.info/data_folder/dktb316.txt")
colnames(dat)<-c("id",'method','subj','words')
dat.aov<-aov(words~method+subj+Error(subj:method),data=dat)
dat.aov.sum = summary(dat.aov)

# Tukey HSD
mse <- dat.aov.sum[[1]][[1]][3,3]
q<-qtukey(0.95,4,df=21)
hsd<-q*sqrt(mse/8)
dat.means=tapply(dat$words, dat$method, mean)
ck.hsd<-abs(outer(dat.means,dat.means,'-'))>hsd

# SPF
dat<-read.csv("http://www.matsuka.info/data_folder/dktb3211.txt")
interaction.plot(dat$duration,  # x軸
                 dat$method,    # まとめる変数    
                 dat$result,    # y軸 
                 pch=c(20,20), 
                 col=c("skyblue","orange"),
                 ylab="score",
                 xlab="Duration",
                 lwd=3, 
                 type='b',
                 cex=2, 
                 trace.label="Method")

source("http://peach.l.chiba-u.ac.jp/course_folder/tsme.txt")
dat<-read.csv("http://www.matsuka.info/data_folder/dktb3211.txt")
dat.aov<-aov(result~method*duration+Error(s+s:duration),dat)

データ解析基礎論B ー 期末テストの解説

# Q2 example
# visualization
interaction.plot(dat.Q2$med,dat.Q2$ingred,dat.Q2$bp,type='o',
   col = c("skyblue", "coral"),lwd =3, cex = 2,
   pch=c(18,19),xlab = "quantitily", ylab = "Blood Pressure",
   trace.label="type of medicine",legend = T)
# descriptive stats
tapply(dat.Q2$bp,list(dat.Q2$med,dat.Q2$ingred),mean)
# ANOVA
dat.Q2.aov = aov(bp~med*ingred,data=dat.Q2)
summary(dat.Q2.aov)
# Linear Regression
dat.Q2.lm = lm(bp~med*ingred, contrasts=list(med=contr.poly), data=dat.Q2)
summary(dat.Q2.lm)
anova(dat.Q2.lm)

# Q3 example 1
# visualization
plot(hours~btype, data= dat.Q3V1)
# ANOVA
dat.Q3V1.aov = aov(hours~btype,data =dat.Q3V1)
summary(dat.Q3V1.aov)
TukeyHSD(dat.Q3V1.aov)
# Linear Regression
dat.Q3V1.lm = lm(hours~btype, data = dat.Q3V1)
summary(dat.Q3V1.aov)

# Q3 example 2
# visualization
plot(dat.Q3V1$hours, dat.Q3V1$A,type = 'p',pch=20,cex =2, 
   xlab = "hours cleaned", ylab= "Blood Type",yaxt ="n")
axis(2, at = c(0,1), labels = c("not A", "A"))
# GLM
dat.Q3V1.glm=glm(A~hours,family="binomial",data=dat.Q3V1 )
# plotting result
x.temp = seq(100,330,1)
glm.cf = coef(dat.Q3V1.glm)
y.temp = 1/(1+exp(-1*(glm.cf[1]+glm.cf[2]*x.temp)))
lines(x.temp,y.temp,col='red',lwd=2)

# Q4 example
plot(score~group,data=dat.Q4)
dat.Q4.G = lm(score~group,data=dat.Q4)

#
dat.Q4.NG1 = lm(score~income, data=dat.Q4)
dat.Q4.NG2 = lm(score~gender, data=dat.Q4)
dat.Q4.NG3 = lm(score~study, data=dat.Q4)
dat.Q4.NG4 = lm(score~height, data=dat.Q4)

dat.Q4.NG5 = lm(score~study+group,data=dat.Q4)
dat.Q4.NG6 = lm(score~study+income,data=dat.Q4)

データ解析基礎論A week12

dat<-read.csv("http://peach.l.chiba-u.ac.jp/course_folder/datW03R.txt")

means<-tapply(dat$shoesize,list(dat$gender, dat$affil),mean)
Ns<-tapply(dat$shoesize,list(dat$gender, dat$affil),length)
sds<-tapply(dat$shoesize,list(dat$gender, dat$affil),sd)
sems<-sds/sqrt(Ns)
plot(c(0,1),means[,1],type='o',col='skyblue', xlim=c(-0.1,1.1), lwd=2, 
     cex=2, pch=20, ylim=c(min(means)*0.975, max(means)*1.025), xlab="gender", ylab="shoesize",
     xaxt="n")
axis(1,c(0,1),c("female","male"))
lines(c(0,1),means[,2],type='o',col='orange',lwd=2,cex=2,pch=20)
lines(c(0,0),c(means[1,1]-sems[1,1],means[1,1]+sems[1,1]),col="skyblue",lwd=2)
lines(c(0,0),c(means[1,2]-sems[1,2],means[1,2]+sems[1,2]),col="orange",lwd=2)
lines(c(1,1),c(means[2,1]-sems[2,1],means[2,1]+sems[2,1]),col="skyblue",lwd=2)
lines(c(1,1),c(means[2,2]-sems[2,2],means[2,2]+sems[2,2]),col="orange",lwd=2)
legend("topleft",c("CogSci","PsySci"),col=c("skyblue","orange"),lwd=2)


interaction.plot(dat$gender,
                 dat$affil,
                 dat$shoesize, 
                 pch=c(20,20), 
                 col=c("skyblue","orange"), 
                 xlab="gender", ylab="shoesize", 
                 lwd=3,type='b',cex=2,
                 trace.label="Affiliation")

dat.aov=aov(shoesize~gender*affil, data=dat)
dat.aov.sum=summary(dat.aov)

# testing simple main effect of gender
means<-tapply(dat$shoesize, list(dat$gender,dat$affil), mean)
SS_gen_CS<- 5*(means[2,1]^2 + means[1,1]^2 -0.5*sum(means[,1])^2) # SS_gender CS
SS_gen_PS<- 5*(means[2,2]^2 + means[1,2]^2 -0.5*sum(means[,2])^2) # SS_gender PS
dat.aov.sum=summary(dat.aov)   # ANOVA table
MSe=dat.aov.sum[[1]][4,3]      # MSE from ANOVA table or MSe=0.62
dfE=dat.aov.sum[[1]][4,1]      # DF for error or dfE=16
dfG=1                          # DF for gender
F_gen_CS=(SS_gen_CS/dfG)/MSe   # F-value for gender effect given CS
F_gen_PS=(SS_gen_PS/dfG)/MSe   # F-value for gender effect given PS
P_gen_CS=1-pf(F_gen_CS,1,dfE)  # p-value for gender effect given CS
P_gen_PS=1-pf(F_gen_PS,1,dfE)  # p-value for gender effect given PS

# testing simple main effect of affiliation
SS_affil_F<- 5*(means[1,1]^2+means[1,2]^2-0.5*sum(means[1,])^2) #SS_affil | F
SS_affil_M<- 5*(means[2,1]^2+means[2,2]^2-0.5*sum(means[2,])^2) #SS_affil | M
dfA=1				          # DF for affil
F_affil_F=SS_affil_F/dfA/MSe         # F-value for affiliation effect | F
F_affil_M=SS_affil_M/dfA/MSe         # F-value for affiliation effect | M
P_affil_F=1-pf(F_affil_F,1,dfE)      # p-value for affiliation effect | F
P_affil_M=1-pf(F_affil_M,1,dfE)      # p-value for affiliation effect | M

# ANOVA 2x4 
dat<-read.csv("http://www.matsuka.info/data_folder/dktb321.txt")
interaction.plot(dat$duration,dat$method,dat$result, pch=c(20,20), col=c("skyblue","orange"), ylab="score", xlab="Duration",lwd=3,type='b',cex=2,trace.label="Method")
dat.aov=aov(result~method*duration,data=dat)

means<-tapply(dat$result,list(dat$method,dat$duration),mean)
ssM_1=5*(sum(means[,1]^2)-0.5*(sum(means[,1])^2))
ssM_2=5*(sum(means[,2]^2)-0.5*(sum(means[,2])^2))
ssM_3=5*(sum(means[,3]^2)-0.5*(sum(means[,3])^2))
ssM_4=5*(sum(means[,4]^2)-0.5*(sum(means[,4])^2))
MSe=mod2.sum[[1]][4,3]
DFe=mod2.sum[[1]][4,1]
DFm=1
fM_1=(ssM_1/DFm)/MSe
1-pf(fM_1,DFm,DFe)

ssD_X=5*(sum(means[1,]^2)-1/4*(sum(means[1,])^2))
ssD_Y=5*(sum(means[2,]^2)-1/4*(sum(means[2,])^2))
DFd=3
fD_X=(ssD_X/DFd)/MSe
fD_Y=(ssD_Y/DFd)/MSe
1-pf(fD_X,DFd,DFe)
1-pf(fD_X,DFd,DFe)

qv=qtukey(0.95,DFd+1,DFe)
hsd=qv*(sqrt(MSe/5))
print(diffM<-outer(means[1,],means[1,],"-"))           
abs(diffM)>hsd    

# useful r functions
source("http://peach.l.chiba-u.ac.jp/course_folder/tsme2017.R")
CRF.tsme(dat.aov, dat)
   

データ解析基礎論A week11

f=c(24.1,23.9,24.4,24.4,23.5)
m=c(25.6,26.1,25.8,25.9,26)

G.mean=mean(c(f,m))
ss.total=sum((c(f,m)-G.mean)^2)
ss.tr=sum((mean(f)-G.mean)^2)*5+sum((mean(m)-G.mean)^2)*5
ss.error=sum((f-mean(f))^2)+sum((m-mean(m))^2)

dat<-data.frame(ssize=c(f,m),gender=c(rep("f",5),rep("m",5)))
dat.aov<-aov(ssize ~ gender,data=dat)
dat<-read.table("http://www.matsuka.info/data_folder/aov01.txt")
dat.aov<-aov(shoesize~club, dat)

qT<-qtukey(0.95, 3, 67)
HSD<-qT*sqrt((2.243*(1/23+1/24))/2)


dat<-read.csv("http://www.matsuka.info/data_folder/dktb312.txt",
              col.names=c("dump","method","result"))
levels(dat$method)<-c('free','repeat','sentence','image')
dat.aov<-aov(result~method, data=dat)
summary(dat.aov)


dat.means<-tapply(dat$result,dat$method,mean)
cont=c(-3,1,1,1)
bunshi=sum(cont*dat.means)
bunbo=sqrt(5.29*(sum((cont^2)/8)))

t.value=bunshi/bunbo
2*(1-pt(t.value,28))

dat.means<-tapply(dat$result,dat$method,mean)
cont=c(-3,1,1,1)
bunshi=sum(cont*dat.means)
bunbo=sqrt(5.29*(sum((cont^2)/8)))
F.value=(bunshi/bunbo)^2
new.F=3*qf(0.95,3,28)

データ解析基礎論A. Week10

freq<-c(33,37,7,23)
pref<-factor(c('soba','udon','soba','udon'))
region<-factor(c('east','east','west','west'))
dat<-data.frame(pref,region,freq)
dat.table=table(pref,region)
dat.table[cbind(pref,region)]<-freq

ME.lrt=2*sum((freq)*log(freq/25))
dat.loglinCE_A<-loglin(dat.table,list(1), fit=T,param=T)
dat.loglinCE_B<-loglin(dat.table,list(2), fit=T,param=T)
dat.loglinIND<-loglin(dat.table,list(1,2), fit=T,param=T)

ME.lrt-dat.loglinCE_A$lrt				
1-pchisq(ME.lrt-dat.loglinCE_A$lrt,1)		

dat.loglinCE_A$lrt-dat.loglinIND$lrt	
1-pchisq(dat.loglinCE_A$lrt-dat.loglinIND$lrt,1)	

dat.loglinIND$lrt-dat.loglinSAT$lrt            
1-pchisq(dat.loglinIND$lrt-dat.loglinSAT$lrt,1) 	

freq<-c(9,5,2,4,16,10,26,28)
gender<-factor(c(rep('female',4),c(rep('male',4))))
affil<-factor(rep(c('L','L','E','E'),2))
circle<-factor(rep(c('tennis','astro'),4))
dat<-data.frame(gener,affil,circle,freq)

dat.table<-table(gender,affil,circle)
dat.table[cbind(gender,affil,circle)]<-freq

dat.loglin2<-loglin(dat.table,list(1), fit=T,param=T)

dat.loglin2<-loglin(dat.table,list(1), fit=T,param=T)
dat.loglin3<-loglin(dat.table,list(1,3), fit=T,param=T)
dat.loglin4<-loglin(dat.table,list(1,2,3), fit=T,param=T)
dat.loglin5<-loglin(dat.table,list(c(1,3),2), fit=T,param=T)
dat.loglin6<-loglin(dat.table,list(c(1,3),c(1,2)), fit=T,param=T)
dat.loglin7<-loglin(dat.table,list(c(1,3),c(1,2),c(2,3)), fit=T,param=T)
dat.loglin8<-loglin(dat.table,list(c(1,2,3)), fit=T,param=T)

データ解析基礎論A week09

dat<-read.csv("http://www.matsuka.info/data_folder/datWA01.txt")
plot(dat$shoesize, dat$gender, pch=20, cex=3, xlab="Shoe size", 
     ylab="gender", col='blue', ylim = c(0.5, 2.5), yaxt="n")
axis(2, at = c(1,2),labels=c("female","male"))
dat.lm<-lm(as.numeric(dat$gender)~dat$shoesize,data=dat)
abline(dat.lm,lwd=4,col='red')

chisq.test(c(72,23,16,49),p=rep(40,4),rescale.p=T)
M=matrix(c(52,48,8,42),nrow=2)
chisq.test(M,correct=F)

plot(dat$shoesize, dat$gender, pch=20, cex=3, xlab="Shoe size", 
     ylab="gender", col='blue', ylim = c(0.5, 2.5), yaxt="n")
axis(2, at = c(1,2),labels=c("female","male"))
plot(dat$shoesize, dat$gender, pch=20, cex=3, xlab="Shoe size", 
     ylab="gender", col='blue', ylim = c(0.5, 2.5), yaxt="n")
axis(2, at = c(1,2),labels=c("female","male"))
abline(dat.lm,lwd=4,col='red')

p = seq(0.1,0.9,length.out = 20)
plot(p/(1-p),col="red",pch=20,cex=3)
plot(log(p/(1-p)),col="red",pch=20,cex=3)


dat.lr<-glm(gender~shoesize, family=binomial, data=dat)
x=seq((min(dat$shoesize)-2), (max(dat$shoesize)+2), 0.1)
keisu  = coef(dat.lr)
y.prob=1/(1+exp(-1*(keisu[1]+keisu[2]*x)))
plot(dat$shoesize, dat$gender, pch=20, cex=3, xlab="Shoe size", 
     ylab="gender", col='blue', ylim = c(0.5, 2.5), yaxt="n")
axis(2, at = c(1,2),labels=c("female","male"))
lines(x,y.prob+1,lty=2,lwd=4,col='green')
abline(dat.lm,lwd=4,col='red')

anova(dat.lr, test ="Chisq")

dat.lr0<-glm(gender~1,family="binomial",data=dat)
dat.lrS<-glm(gender~shoesize,family=binomial,data=dat)
dat.lrH<-glm(gender~h,family="binomial",data=dat)


dat<-read.csv("http://www.matsuka.info/data_folder/cda7-16.csv")
dat.glmAllAdd=glm(survival~age+Ncigarettes+NdaysGESTATION,family=binomial,data=dat)
dat.glmAllMult=glm(survival~age*Ncigarettes*NdaysGESTATION,family=binomial,data=dat)
stepAIC(dat.glmAllMult)

データ解析基礎論A week07

dat<-read.csv("http://www.matsuka.info/data_folder/dktb312.txt")
dat2<-data.frame(result=dat$result,
                 c1=c(rep(0,8),rep(1,8),rep(0,16)),
                 c2=c(rep(0,16),rep(1,8),rep(0,8)),
                 c3=c(rep(0,24),rep(1,8)))
dat2.lm<-lm(result~.,data=dat2)


dat3<-data.frame(result=dat$result, 
                 c1=c(rep(-3,8), rep(1,24)), 
                 c2=c(rep(0,8),rep(-2,8),rep(1,16)),
                 c3=c(rep(0,16),rep(-1,8),rep(1,8)))
dat3.lm<-lm(result~c1+c2+c3,data=dat3)


dat<-read.csv("http://www.matsuka.info/data_folder/dktb321.txt")
plot(dat$result~dat$duration,data=dat[dat$method=="method.x",])
result<-dat$result[dat$method=="method.X"]
CL=c(rep(-3,5),rep(-1,5),rep(1,5),rep(3,5))
CQ=c(rep(-1,5),rep(1,5),rep(1,5),rep(-1,5))
CC=c(rep(-3,5),rep(1,5),rep(-1,5),rep(3,5))

# or
dat2<-dat[dat$method=="method.X",]
dat2.lm<-lm(result~duration, data=dat2,
  contrasts=list(duration="contr.poly"))

result2=result;
result2[16:20]=result2[16:20]-3
plot(tapply(result2,dat$duration[dat$method=="method.X"],mean),
     pch=20,col="red",lwd=3, type="o",cex=3,ylab="mean",xlab="time")
trend2.lm<-lm(result2~CL+CQ+CC)

Cont=contr.poly(4) 

##
subj.gender = c(rep("male",30),rep("female",30))
pic.gender = rep(c(rep("male",15),rep("female",15)),2)
# 15m-m, 15m-f, 15f-m, 15f-f 
eye.fix = c(round(rnorm(15,50,5)),
           round(rnorm(15,70,5)),
           round(rnorm(15,65,5)),
           round(rnorm(15,50,5)))
datE = data.frame(eye.fix = eye.fix, subj.gender= subj.gender,pic.gender=pic.gender)
interaction.plot(datE$subj.gender, datE$pic.gender, datE$eye.fix, 
                 trace.label = "Gender of Stimuli", xlab = "Gender of Participants",
                 ylab = "Number of fixation",lwd = 3,type = "o",pch = c(17,20),cex =3,
                 col = c('skyblue','coral'),legend =T)
datE.lm <- lm(eye.fix~subj.gender*pic.gender,data=datE)

dat<-read.csv("http://www.matsuka.info/data_folder/ancova01.csv")
dat$pretest=dat$pretest*0.1

dat<-read.csv("http://www.matsuka.info/data_folder/tdkReg01.csv") 
dat.lm01<-lm(sales~price, data=as.data.frame(scale(dat)))
plot(dat.lm01,which=1)
plot(dat.lm01,which=2)


par(mfrow=c(2,2))
plot(dat.lm01)

dat<-read.csv("http://peach.l.chiba-u.ac.jp/course_folder/dktb312.csv")
dat$method=factor(dat$method, levels(dat$method)[c(1,3,4,2)])

dat<-read.csv("http://peach.l.chiba-u.ac.jp/course_folder/forbesdata.txt") 
boil.lm<-lm(log(pressure)~temp, data=boil)
par(mfrow=c(2,2))
plot(boil.lm)

boil.lm<-lm(log(pressure)~temp, data=boil[-12,])
plot(boil.lm)

dat<-read.csv("http://www.matsuka.info/data_folder/tdkReg02.csv")
plot(dat)
dat.lm<-lm(sales~., data=dat) 
install.packages("DAAG")
library(DAAG)
vif(dat.lm)

データ解析基礎論A Week06

# 2017 week 06 
# 
# regression
dat<-read.csv("http://www.matsuka.info/data_folder/hwsk8-17-6.csv")
plot(ani~otouto,data=dat,xlab="Score of Younger Brother", 
  ylab = "Score of Elder Brother", pch=20,cex=2,
  xlim=c(5,27),ylim = c(5,27))
dat.lm <- lm(ani~otouto, data=dat)
summary(dat.lm)
abline(dat.lm, col = 'red',lwd = 2.5)

# two sample t-test
boxplot(dat,col=c('skyblue','coral'),ylab = "score")
t.test(dat$ani, dat$otouto, var.equal=T)
dat2<-data.frame(score = c(dat$ani, dat$otouto),order=c(rep("ani",10),rep("otouto",10)))
plot(dat2$score~as.numeric(dat2$order),pch=20,xlab="order",
  ylab="score",xlim=c(0.5,2.5),cex=2,xaxt="n")
axis(1,c(1,2),c("ani","otouto"))
dat2.lm<-lm(score~order,data=dat2)
abline(dat2.lm,col='red',lwd=3)

# one sample t-test
dat.D = dat$ani - dat$otouto
boxplot(dat.D,col="skyblue",ylab="Difference")
t.test(dat.D)
dat.D.lm<-lm(dat.D~1)
plot(dat.D~rep(1,10),pch=20,xlab="",ylab="Difference",cex=3)
summary(dat.D.lm)

# plotting errors
plot(ani~otouto,data=dat,xlab="Score of Younger Brother", 
  ylab = "Score of Elder Brother", pch=20,cex=2,
   xlim=c(5,27),ylim = c(5,27))
dat.lm <- lm(ani~otouto, data=dat)
abline(h=mean(dat$ani),lty=2,col="green",lwd=3)
abline(dat.lm,col='red',lwd=3)
pred.lm<-predict(dat.lm)
for (i.ani in 1:10){
  lines(rep(dat$otouto[i.ani],2),c(dat$ani[i.ani],pred.lm[i.ani]),
    col='blue',lwd=3)
}

# multiple regression
dat<-read.csv("http://www.matsuka.info/data_folder/tdkReg01.csv")
dat.regALL<-lm(sales~price+design+material,data=dat)

# ANCOVA
dat<-read.csv("http://www.matsuka.info/data_folder/ancova01.csv")
dat$pretest=dat$pretest*0.1