# RB
dat<-read.csv("http://www.matsuka.info/data_folder/dktb316.txt")
colnames(dat)<-c("id",'method','subj','words')
dat.aov<-aov(words~method+subj+Error(subj:method),data=dat)
dat.aov.sum = summary(dat.aov)
# Tukey HSD
mse <- dat.aov.sum[[1]][[1]][3,3]
q<-qtukey(0.95,4,df=21)
hsd<-q*sqrt(mse/8)
dat.means=tapply(dat$words, dat$method, mean)
ck.hsd<-abs(outer(dat.means,dat.means,'-'))>hsd
# SPF
dat<-read.csv("http://www.matsuka.info/data_folder/dktb3211.txt")
interaction.plot(dat$duration, # x軸
dat$method, # まとめる変数
dat$result, # y軸
pch=c(20,20),
col=c("skyblue","orange"),
ylab="score",
xlab="Duration",
lwd=3,
type='b',
cex=2,
trace.label="Method")
source("http://peach.l.chiba-u.ac.jp/course_folder/tsme.txt")
dat<-read.csv("http://www.matsuka.info/data_folder/dktb3211.txt")
dat.aov<-aov(result~method*duration+Error(s+s:duration),dat)
Category Archives: データ解析
データ解析基礎論B ー 期末テストの解説
# Q2 example
# visualization
interaction.plot(dat.Q2$med,dat.Q2$ingred,dat.Q2$bp,type='o',
col = c("skyblue", "coral"),lwd =3, cex = 2,
pch=c(18,19),xlab = "quantitily", ylab = "Blood Pressure",
trace.label="type of medicine",legend = T)
# descriptive stats
tapply(dat.Q2$bp,list(dat.Q2$med,dat.Q2$ingred),mean)
# ANOVA
dat.Q2.aov = aov(bp~med*ingred,data=dat.Q2)
summary(dat.Q2.aov)
# Linear Regression
dat.Q2.lm = lm(bp~med*ingred, contrasts=list(med=contr.poly), data=dat.Q2)
summary(dat.Q2.lm)
anova(dat.Q2.lm)
# Q3 example 1
# visualization
plot(hours~btype, data= dat.Q3V1)
# ANOVA
dat.Q3V1.aov = aov(hours~btype,data =dat.Q3V1)
summary(dat.Q3V1.aov)
TukeyHSD(dat.Q3V1.aov)
# Linear Regression
dat.Q3V1.lm = lm(hours~btype, data = dat.Q3V1)
summary(dat.Q3V1.aov)
# Q3 example 2
# visualization
plot(dat.Q3V1$hours, dat.Q3V1$A,type = 'p',pch=20,cex =2,
xlab = "hours cleaned", ylab= "Blood Type",yaxt ="n")
axis(2, at = c(0,1), labels = c("not A", "A"))
# GLM
dat.Q3V1.glm=glm(A~hours,family="binomial",data=dat.Q3V1 )
# plotting result
x.temp = seq(100,330,1)
glm.cf = coef(dat.Q3V1.glm)
y.temp = 1/(1+exp(-1*(glm.cf[1]+glm.cf[2]*x.temp)))
lines(x.temp,y.temp,col='red',lwd=2)
# Q4 example
plot(score~group,data=dat.Q4)
dat.Q4.G = lm(score~group,data=dat.Q4)
#
dat.Q4.NG1 = lm(score~income, data=dat.Q4)
dat.Q4.NG2 = lm(score~gender, data=dat.Q4)
dat.Q4.NG3 = lm(score~study, data=dat.Q4)
dat.Q4.NG4 = lm(score~height, data=dat.Q4)
dat.Q4.NG5 = lm(score~study+group,data=dat.Q4)
dat.Q4.NG6 = lm(score~study+income,data=dat.Q4)
データ解析基礎論A weekly assignment 12
課題 WAA12はここにあります。
解答例はここにあります。
過去のテストの例はデータ解析基礎論のページにあります。
データ解析基礎論A week12
dat<-read.csv("http://peach.l.chiba-u.ac.jp/course_folder/datW03R.txt")
means<-tapply(dat$shoesize,list(dat$gender, dat$affil),mean)
Ns<-tapply(dat$shoesize,list(dat$gender, dat$affil),length)
sds<-tapply(dat$shoesize,list(dat$gender, dat$affil),sd)
sems<-sds/sqrt(Ns)
plot(c(0,1),means[,1],type='o',col='skyblue', xlim=c(-0.1,1.1), lwd=2,
cex=2, pch=20, ylim=c(min(means)*0.975, max(means)*1.025), xlab="gender", ylab="shoesize",
xaxt="n")
axis(1,c(0,1),c("female","male"))
lines(c(0,1),means[,2],type='o',col='orange',lwd=2,cex=2,pch=20)
lines(c(0,0),c(means[1,1]-sems[1,1],means[1,1]+sems[1,1]),col="skyblue",lwd=2)
lines(c(0,0),c(means[1,2]-sems[1,2],means[1,2]+sems[1,2]),col="orange",lwd=2)
lines(c(1,1),c(means[2,1]-sems[2,1],means[2,1]+sems[2,1]),col="skyblue",lwd=2)
lines(c(1,1),c(means[2,2]-sems[2,2],means[2,2]+sems[2,2]),col="orange",lwd=2)
legend("topleft",c("CogSci","PsySci"),col=c("skyblue","orange"),lwd=2)
interaction.plot(dat$gender,
dat$affil,
dat$shoesize,
pch=c(20,20),
col=c("skyblue","orange"),
xlab="gender", ylab="shoesize",
lwd=3,type='b',cex=2,
trace.label="Affiliation")
dat.aov=aov(shoesize~gender*affil, data=dat)
dat.aov.sum=summary(dat.aov)
# testing simple main effect of gender
means<-tapply(dat$shoesize, list(dat$gender,dat$affil), mean)
SS_gen_CS<- 5*(means[2,1]^2 + means[1,1]^2 -0.5*sum(means[,1])^2) # SS_gender CS
SS_gen_PS<- 5*(means[2,2]^2 + means[1,2]^2 -0.5*sum(means[,2])^2) # SS_gender PS
dat.aov.sum=summary(dat.aov) # ANOVA table
MSe=dat.aov.sum[[1]][4,3] # MSE from ANOVA table or MSe=0.62
dfE=dat.aov.sum[[1]][4,1] # DF for error or dfE=16
dfG=1 # DF for gender
F_gen_CS=(SS_gen_CS/dfG)/MSe # F-value for gender effect given CS
F_gen_PS=(SS_gen_PS/dfG)/MSe # F-value for gender effect given PS
P_gen_CS=1-pf(F_gen_CS,1,dfE) # p-value for gender effect given CS
P_gen_PS=1-pf(F_gen_PS,1,dfE) # p-value for gender effect given PS
# testing simple main effect of affiliation
SS_affil_F<- 5*(means[1,1]^2+means[1,2]^2-0.5*sum(means[1,])^2) #SS_affil | F
SS_affil_M<- 5*(means[2,1]^2+means[2,2]^2-0.5*sum(means[2,])^2) #SS_affil | M
dfA=1 # DF for affil
F_affil_F=SS_affil_F/dfA/MSe # F-value for affiliation effect | F
F_affil_M=SS_affil_M/dfA/MSe # F-value for affiliation effect | M
P_affil_F=1-pf(F_affil_F,1,dfE) # p-value for affiliation effect | F
P_affil_M=1-pf(F_affil_M,1,dfE) # p-value for affiliation effect | M
# ANOVA 2x4
dat<-read.csv("http://www.matsuka.info/data_folder/dktb321.txt")
interaction.plot(dat$duration,dat$method,dat$result, pch=c(20,20), col=c("skyblue","orange"), ylab="score", xlab="Duration",lwd=3,type='b',cex=2,trace.label="Method")
dat.aov=aov(result~method*duration,data=dat)
means<-tapply(dat$result,list(dat$method,dat$duration),mean)
ssM_1=5*(sum(means[,1]^2)-0.5*(sum(means[,1])^2))
ssM_2=5*(sum(means[,2]^2)-0.5*(sum(means[,2])^2))
ssM_3=5*(sum(means[,3]^2)-0.5*(sum(means[,3])^2))
ssM_4=5*(sum(means[,4]^2)-0.5*(sum(means[,4])^2))
MSe=mod2.sum[[1]][4,3]
DFe=mod2.sum[[1]][4,1]
DFm=1
fM_1=(ssM_1/DFm)/MSe
1-pf(fM_1,DFm,DFe)
ssD_X=5*(sum(means[1,]^2)-1/4*(sum(means[1,])^2))
ssD_Y=5*(sum(means[2,]^2)-1/4*(sum(means[2,])^2))
DFd=3
fD_X=(ssD_X/DFd)/MSe
fD_Y=(ssD_Y/DFd)/MSe
1-pf(fD_X,DFd,DFe)
1-pf(fD_X,DFd,DFe)
qv=qtukey(0.95,DFd+1,DFe)
hsd=qv*(sqrt(MSe/5))
print(diffM<-outer(means[1,],means[1,],"-"))
abs(diffM)>hsd
# useful r functions
source("http://peach.l.chiba-u.ac.jp/course_folder/tsme2017.R")
CRF.tsme(dat.aov, dat)
データ解析基礎論A week11
f=c(24.1,23.9,24.4,24.4,23.5)
m=c(25.6,26.1,25.8,25.9,26)
G.mean=mean(c(f,m))
ss.total=sum((c(f,m)-G.mean)^2)
ss.tr=sum((mean(f)-G.mean)^2)*5+sum((mean(m)-G.mean)^2)*5
ss.error=sum((f-mean(f))^2)+sum((m-mean(m))^2)
dat<-data.frame(ssize=c(f,m),gender=c(rep("f",5),rep("m",5)))
dat.aov<-aov(ssize ~ gender,data=dat)
dat<-read.table("http://www.matsuka.info/data_folder/aov01.txt")
dat.aov<-aov(shoesize~club, dat)
qT<-qtukey(0.95, 3, 67)
HSD<-qT*sqrt((2.243*(1/23+1/24))/2)
dat<-read.csv("http://www.matsuka.info/data_folder/dktb312.txt",
col.names=c("dump","method","result"))
levels(dat$method)<-c('free','repeat','sentence','image')
dat.aov<-aov(result~method, data=dat)
summary(dat.aov)
dat.means<-tapply(dat$result,dat$method,mean)
cont=c(-3,1,1,1)
bunshi=sum(cont*dat.means)
bunbo=sqrt(5.29*(sum((cont^2)/8)))
t.value=bunshi/bunbo
2*(1-pt(t.value,28))
dat.means<-tapply(dat$result,dat$method,mean)
cont=c(-3,1,1,1)
bunshi=sum(cont*dat.means)
bunbo=sqrt(5.29*(sum((cont^2)/8)))
F.value=(bunshi/bunbo)^2
new.F=3*qf(0.95,3,28)
データ解析基礎論A. Week10
freq<-c(33,37,7,23)
pref<-factor(c('soba','udon','soba','udon'))
region<-factor(c('east','east','west','west'))
dat<-data.frame(pref,region,freq)
dat.table=table(pref,region)
dat.table[cbind(pref,region)]<-freq
ME.lrt=2*sum((freq)*log(freq/25))
dat.loglinCE_A<-loglin(dat.table,list(1), fit=T,param=T)
dat.loglinCE_B<-loglin(dat.table,list(2), fit=T,param=T)
dat.loglinIND<-loglin(dat.table,list(1,2), fit=T,param=T)
ME.lrt-dat.loglinCE_A$lrt
1-pchisq(ME.lrt-dat.loglinCE_A$lrt,1)
dat.loglinCE_A$lrt-dat.loglinIND$lrt
1-pchisq(dat.loglinCE_A$lrt-dat.loglinIND$lrt,1)
dat.loglinIND$lrt-dat.loglinSAT$lrt
1-pchisq(dat.loglinIND$lrt-dat.loglinSAT$lrt,1)
freq<-c(9,5,2,4,16,10,26,28)
gender<-factor(c(rep('female',4),c(rep('male',4))))
affil<-factor(rep(c('L','L','E','E'),2))
circle<-factor(rep(c('tennis','astro'),4))
dat<-data.frame(gener,affil,circle,freq)
dat.table<-table(gender,affil,circle)
dat.table[cbind(gender,affil,circle)]<-freq
dat.loglin2<-loglin(dat.table,list(1), fit=T,param=T)
dat.loglin2<-loglin(dat.table,list(1), fit=T,param=T)
dat.loglin3<-loglin(dat.table,list(1,3), fit=T,param=T)
dat.loglin4<-loglin(dat.table,list(1,2,3), fit=T,param=T)
dat.loglin5<-loglin(dat.table,list(c(1,3),2), fit=T,param=T)
dat.loglin6<-loglin(dat.table,list(c(1,3),c(1,2)), fit=T,param=T)
dat.loglin7<-loglin(dat.table,list(c(1,3),c(1,2),c(2,3)), fit=T,param=T)
dat.loglin8<-loglin(dat.table,list(c(1,2,3)), fit=T,param=T)
データ解析基礎論A week09
dat<-read.csv("http://www.matsuka.info/data_folder/datWA01.txt")
plot(dat$shoesize, dat$gender, pch=20, cex=3, xlab="Shoe size",
ylab="gender", col='blue', ylim = c(0.5, 2.5), yaxt="n")
axis(2, at = c(1,2),labels=c("female","male"))
dat.lm<-lm(as.numeric(dat$gender)~dat$shoesize,data=dat)
abline(dat.lm,lwd=4,col='red')
chisq.test(c(72,23,16,49),p=rep(40,4),rescale.p=T)
M=matrix(c(52,48,8,42),nrow=2)
chisq.test(M,correct=F)
plot(dat$shoesize, dat$gender, pch=20, cex=3, xlab="Shoe size",
ylab="gender", col='blue', ylim = c(0.5, 2.5), yaxt="n")
axis(2, at = c(1,2),labels=c("female","male"))
plot(dat$shoesize, dat$gender, pch=20, cex=3, xlab="Shoe size",
ylab="gender", col='blue', ylim = c(0.5, 2.5), yaxt="n")
axis(2, at = c(1,2),labels=c("female","male"))
abline(dat.lm,lwd=4,col='red')
p = seq(0.1,0.9,length.out = 20)
plot(p/(1-p),col="red",pch=20,cex=3)
plot(log(p/(1-p)),col="red",pch=20,cex=3)
dat.lr<-glm(gender~shoesize, family=binomial, data=dat)
x=seq((min(dat$shoesize)-2), (max(dat$shoesize)+2), 0.1)
keisu = coef(dat.lr)
y.prob=1/(1+exp(-1*(keisu[1]+keisu[2]*x)))
plot(dat$shoesize, dat$gender, pch=20, cex=3, xlab="Shoe size",
ylab="gender", col='blue', ylim = c(0.5, 2.5), yaxt="n")
axis(2, at = c(1,2),labels=c("female","male"))
lines(x,y.prob+1,lty=2,lwd=4,col='green')
abline(dat.lm,lwd=4,col='red')
anova(dat.lr, test ="Chisq")
dat.lr0<-glm(gender~1,family="binomial",data=dat)
dat.lrS<-glm(gender~shoesize,family=binomial,data=dat)
dat.lrH<-glm(gender~h,family="binomial",data=dat)
dat<-read.csv("http://www.matsuka.info/data_folder/cda7-16.csv")
dat.glmAllAdd=glm(survival~age+Ncigarettes+NdaysGESTATION,family=binomial,data=dat)
dat.glmAllMult=glm(survival~age*Ncigarettes*NdaysGESTATION,family=binomial,data=dat)
stepAIC(dat.glmAllMult)
データ解析基礎論 week 08
source("http://peach.l.chiba-u.ac.jp/course_folder/jn_plot.R")
protest<-read.csv("http://peach.l.chiba-u.ac.jp/course_folder/protest.csv")
lm <- lm(liking~sexism*protest,data=protest)
summary(lm)
jn_plot(lm, "protest", "sexism", alpha=.05)
データ解析基礎論A week07
dat<-read.csv("http://www.matsuka.info/data_folder/dktb312.txt")
dat2<-data.frame(result=dat$result,
c1=c(rep(0,8),rep(1,8),rep(0,16)),
c2=c(rep(0,16),rep(1,8),rep(0,8)),
c3=c(rep(0,24),rep(1,8)))
dat2.lm<-lm(result~.,data=dat2)
dat3<-data.frame(result=dat$result,
c1=c(rep(-3,8), rep(1,24)),
c2=c(rep(0,8),rep(-2,8),rep(1,16)),
c3=c(rep(0,16),rep(-1,8),rep(1,8)))
dat3.lm<-lm(result~c1+c2+c3,data=dat3)
dat<-read.csv("http://www.matsuka.info/data_folder/dktb321.txt")
plot(dat$result~dat$duration,data=dat[dat$method=="method.x",])
result<-dat$result[dat$method=="method.X"]
CL=c(rep(-3,5),rep(-1,5),rep(1,5),rep(3,5))
CQ=c(rep(-1,5),rep(1,5),rep(1,5),rep(-1,5))
CC=c(rep(-3,5),rep(1,5),rep(-1,5),rep(3,5))
# or
dat2<-dat[dat$method=="method.X",]
dat2.lm<-lm(result~duration, data=dat2,
contrasts=list(duration="contr.poly"))
result2=result;
result2[16:20]=result2[16:20]-3
plot(tapply(result2,dat$duration[dat$method=="method.X"],mean),
pch=20,col="red",lwd=3, type="o",cex=3,ylab="mean",xlab="time")
trend2.lm<-lm(result2~CL+CQ+CC)
Cont=contr.poly(4)
##
subj.gender = c(rep("male",30),rep("female",30))
pic.gender = rep(c(rep("male",15),rep("female",15)),2)
# 15m-m, 15m-f, 15f-m, 15f-f
eye.fix = c(round(rnorm(15,50,5)),
round(rnorm(15,70,5)),
round(rnorm(15,65,5)),
round(rnorm(15,50,5)))
datE = data.frame(eye.fix = eye.fix, subj.gender= subj.gender,pic.gender=pic.gender)
interaction.plot(datE$subj.gender, datE$pic.gender, datE$eye.fix,
trace.label = "Gender of Stimuli", xlab = "Gender of Participants",
ylab = "Number of fixation",lwd = 3,type = "o",pch = c(17,20),cex =3,
col = c('skyblue','coral'),legend =T)
datE.lm <- lm(eye.fix~subj.gender*pic.gender,data=datE)
dat<-read.csv("http://www.matsuka.info/data_folder/ancova01.csv")
dat$pretest=dat$pretest*0.1
dat<-read.csv("http://www.matsuka.info/data_folder/tdkReg01.csv")
dat.lm01<-lm(sales~price, data=as.data.frame(scale(dat)))
plot(dat.lm01,which=1)
plot(dat.lm01,which=2)
par(mfrow=c(2,2))
plot(dat.lm01)
dat<-read.csv("http://peach.l.chiba-u.ac.jp/course_folder/dktb312.csv")
dat$method=factor(dat$method, levels(dat$method)[c(1,3,4,2)])
dat<-read.csv("http://peach.l.chiba-u.ac.jp/course_folder/forbesdata.txt")
boil.lm<-lm(log(pressure)~temp, data=boil)
par(mfrow=c(2,2))
plot(boil.lm)
boil.lm<-lm(log(pressure)~temp, data=boil[-12,])
plot(boil.lm)
dat<-read.csv("http://www.matsuka.info/data_folder/tdkReg02.csv")
plot(dat)
dat.lm<-lm(sales~., data=dat)
install.packages("DAAG")
library(DAAG)
vif(dat.lm)
データ解析基礎論A Week06
# 2017 week 06
#
# regression
dat<-read.csv("http://www.matsuka.info/data_folder/hwsk8-17-6.csv")
plot(ani~otouto,data=dat,xlab="Score of Younger Brother",
ylab = "Score of Elder Brother", pch=20,cex=2,
xlim=c(5,27),ylim = c(5,27))
dat.lm <- lm(ani~otouto, data=dat)
summary(dat.lm)
abline(dat.lm, col = 'red',lwd = 2.5)
# two sample t-test
boxplot(dat,col=c('skyblue','coral'),ylab = "score")
t.test(dat$ani, dat$otouto, var.equal=T)
dat2<-data.frame(score = c(dat$ani, dat$otouto),order=c(rep("ani",10),rep("otouto",10)))
plot(dat2$score~as.numeric(dat2$order),pch=20,xlab="order",
ylab="score",xlim=c(0.5,2.5),cex=2,xaxt="n")
axis(1,c(1,2),c("ani","otouto"))
dat2.lm<-lm(score~order,data=dat2)
abline(dat2.lm,col='red',lwd=3)
# one sample t-test
dat.D = dat$ani - dat$otouto
boxplot(dat.D,col="skyblue",ylab="Difference")
t.test(dat.D)
dat.D.lm<-lm(dat.D~1)
plot(dat.D~rep(1,10),pch=20,xlab="",ylab="Difference",cex=3)
summary(dat.D.lm)
# plotting errors
plot(ani~otouto,data=dat,xlab="Score of Younger Brother",
ylab = "Score of Elder Brother", pch=20,cex=2,
xlim=c(5,27),ylim = c(5,27))
dat.lm <- lm(ani~otouto, data=dat)
abline(h=mean(dat$ani),lty=2,col="green",lwd=3)
abline(dat.lm,col='red',lwd=3)
pred.lm<-predict(dat.lm)
for (i.ani in 1:10){
lines(rep(dat$otouto[i.ani],2),c(dat$ani[i.ani],pred.lm[i.ani]),
col='blue',lwd=3)
}
# multiple regression
dat<-read.csv("http://www.matsuka.info/data_folder/tdkReg01.csv")
dat.regALL<-lm(sales~price+design+material,data=dat)
# ANCOVA
dat<-read.csv("http://www.matsuka.info/data_folder/ancova01.csv")
dat$pretest=dat$pretest*0.1