dat<-read.csv("http://www.matsuka.info/data_folder/datWA01.txt")
mean.M <-mean(dat$h[dat$gender=="M"])
sigma = 10
n.M = length(dat$h[dat$gender=="M"])
z.value=(mean.M-171)/(sqrt(sigma/n.M))
(1-pnorm(abs(z.value)))*2
ssize = c(24,25,26,23.5,25,27,24,22,27.5,28)
ssize.mean = mean(ssize)
ssize.var = var(ssize)
N = 10
t.value=(ssize.mean-24)/(sqrt(ssize.var/N))
(1-pt(abs(t.value),df=9))*2
h.mean.M <-mean(dat$h[dat$gender=="M"])
h.var.M <- var(dat$h[dat$gender=="M"])
n.M = length(dat$h[dat$gender=="M"])
t.value=(h.mean.M-171)/(sqrt(h.var.M/n.M))
(1-pt(abs(t.value),df = (n.M-1)))*2
# RCMD
A=c(12,19,10,10,14,18,15,11,16)
B=c(15,20,16,14,17,16,12,12,19)
d=A-B
tValue<-mean(d)/sqrt(var(d)/length(d))
(1-pt(abs(tValue), df=8))*2
t.test(d,mu=0)
X1=c(78,70,66,76,78,76,88,76)
X2=c(76,72,60,72,70,72,84,70)
t.value=(mean(X1)-mean(X2))/sqrt((var(X1)+var(X2))/8)
2*(1-pt(abs(t.value),14))
Category Archives: データ解析
データ解析基礎論a Week04
nSample=10;nRep=10^5;
CLT.unif <- function(nSample, nRep) {
x=matrix(runif(nSample*nRep),nrow=nSample);
x.means<-colMeans(x)
hist(x.means,50,main='Distribution of Means of Uniform Distribution',
xlab='Means', probability=T)
x.means.dens<-density(x.means)
lines(x.means.dens,lwd=3,lty=1,col='red')
x2=seq(0,1,0.001);CLT=dnorm(x2,mean=0.5,sd=(sqrt(1/12))/(sqrt(nSample)))
lines(x2,CLT,lwd=3,lty=3,col='cyan')
legend("topright",c("Density of Actual Means","Normal Distribution"),
lty=c(1,3), col=c('red','cyan'),lwd=3)
}
> CLT.unif(10,100000)
データ解析基礎論A Week03
dat<-read.table("http://www.matsuka.info/data_folder/aov01.txt",header=T)
head(dat)
summary(dat)
boxplot(dat$shoesize, col = "skyblue", main = "Dist. of Shoesize",
ylab = "Size of shoe (in cm)")
boxplot(dat$h, col = "coral", main = "Dist. of Height", ylab = "Height (in cm)")
dat.meter = dat[,1:2]*0.01
dat.h.ext5 = dat$h+5
dat.ss.ext1 = dat$shoesize+1
hANDshoe = dat$h+dat$shoesize
dat.h.meter.ext5 = dat$h*0.01+0.05
データ解析基礎論A グラフの基礎
データ解析基礎論A 第2週 グラフの基礎
# descriptive statistics
dat<-read.table("http://www.matsuka.info/data_folder/aov01.txt",header=T)
summary(dat)
mean(dat$shoesize)
var(dat[,1:2])
cov(dat[,1:2])
cor(dat[,1:2])
# basics - plotting
x=seq(-3,3,0.1);y=x^2;
plot(x,y)
plot(x,y,col='red')
plot(x,y,pch=20)
plot(x,y,type='l')
plot(x,y,type='l',lty=4,lwd=3)
plot(x,y,main="THIS IS THE TITLE", xlab="Label for X-axis",ylab="Label for Y-axis")
plot(x,y,main="TITLE", xlab="X here",ylab="Y here",xlim=c(-3.5,3.5),ylim=c(-0.5, 10))
plot(x,y,col='blue',type='o',lty=2,pch=19,lwd=3,main="Y=X*X", xlab="X",ylab="X*X",
xlim=c(-3.5,3.5),ylim=c(-0.5, 10))
# histogram
dat<-read.table("http://www.matsuka.info/data_folder/aov01.txt",header=T)
hist(dat$h,main="Histogram of Height",xlab="Height",col='blue',xlim=c(140,190))
dens<-density(dat$h);
hist(dat$h,main="Histogram of Height",xlab="Height",xlim=c(140,190),probability=T)
lines(dens,lwd=2,col='red',lty=2)
# boxplot
boxplot(dat$h,main="Boxplot of Height",ylab="Height",col='cyan',ylim=c(140,190))
boxplot(dat$h~dat$gender,main="Distribution of Height by Gender",ylab="Gender",
xlab="Height",col=c('blue','cyan'),ylim=c(140,190),horizontal=T)
boxplot(dat$h~dat$gender+dat$affil, main="Distribution of Height by Gender and Affiliation",
ylab="Gender x Affiliation", xlab="Height", col=c('blue','cyan','red','magenta'),
ylim=c(140,190),horizontal=T)
# barplot
install.packages("gplots")
library(gplots)
means <- tapply(dat$h, dat$gender, mean)
sds<-tapply(dat$h,dat$gender,sd)
ns<-tapply(dat$h,dat$gender,length)
sems = sds/sqrt(ns)
barplot2(means, plot.ci=T,
ci.l = means - sems,
ci.u = means + sems,
ylim = c(140,180),
names.arg = c("Female","Male"),
xpd = F,
ylab = "height",
xlab = "gender")
# another barplot
means <- tapply(dat$h,list(dat$gender,dat$affil),mean)
sds <- tapply(dat$h,list(dat$gender,dat$affil),sd)
ns <- tapply(dat$h,list(dat$gender,dat$affil),length)
sem = sds/sqrt(ns)
barplot2(means[1:4], plot.ci=T, ci.l=means[1:4]-sem[1:4],
ci.u=means[1:4] + sem[1:4], ylim=c(150,175),
names.arg=c("Female,CS","Male,CS","Female,PSY","Male,PSY"),
xpd=F,ylab="height",xlab="gender & affiliation")
# histogram again
par(mfrow=c(1,2))
hist(dat[dat$gender=='F',]$h,main="Dist. of Height for Female Participants",
xlab="Height",xlim=c(140,190),probability=T)
dens.F=density(dat[dat$gender=='F',]$h);lines(dens.F,col='blue',lwd=2)
hist(dat[dat$gender=='M',]$h,main="Dist. of Height for Male Participants",
xlab="Height",xlim=c(140,190),probability=T,ylim=c(0,0.08))
dens.M=density(dat[dat$gender=='M',]$h);lines(dens.M,col='green',lwd=2)
par(mfrow=c(2,1))
par(mfrow=c(1,1))
plot(dens.F,col='blue',lwd=2,main="Dist. of Height by gender",xlab='Height',
ylab='density',xlim=c(140,190))
lines(dens.M,col='green',lwd=2)
legend("topleft", c('Female','Male'),col=c('blue','green'),cex=1.5,lwd=2)
# inserting text
text(157.5,0.04,'Female',col='blue',cex=2)
text(170,0.04,'Male',col='green',cex=2)
# scatterplot
plot(dat$shoesize,dat$h,main="Relationship b/w shoesize and height",xlab='shoe size',
ylab='height',pch=19,col='red')
text(22,175,paste("r =",substr(cor(dat$shoesize,dat$h),1,5)),cex=1.5)
abline(h=mean(dat$h),col='blue');
abline(v=mean(dat$shoesize),col='green');
text(21.5,165,'mean height',col='blue')
text(25.7,145,'mean shoesize',col='green')
abline(lm(dat$h~dat$shoesize),lty=2,lwd=2)
plot(dat[dat$gender=='F',]$shoesize,dat[dat$gender=='F',]$h,
main="Relationship b/w shoesize and height",xlab='shoesize',ylab='height',
cex.lab=1.5,pch=19,col='blue',xlim=c(20,29),ylim=c(140,190))
lines(dat[dat$gender=='M',]$shoesize,dat[dat$gender=='M',]$h,type='p',pch=15,col='green')
legend("topleft", c('Female','Male'), pch =c(19,15),col=c('blue','green'),cex=1.5)
dat.reg<-read.csv("http://www.matsuka.info/data_folder/tdkReg01.csv", header=T)
plot(dat.reg,pch=20,col=c('blue'))
dat.pca<-read.table("http://www.matsuka.info/data_folder/tdkPCA01.txt",header=T)
# intro to central limit theorem
ckCLT=function(n_rep,n_sample){
dat<-matrix(rnorm(n_rep*n_sample),nrow=n_rep,ncol=n_sample);
means<-rowMeans(dat)}
n_rep=10^6
n5=ckCLT(n_rep,5)
hist(n5,main="Dist. of sample meanx",xlab="sample mean",xlim=c(-3,3),probability=T)
den5=density(n5);lines(den5,col='blue',lwd=2)
n10=ckCLT(n_rep,10)
n25=ckCLT(n_rep,25)
n100=ckCLT(n_rep,100)
plot(den5,col='blue',lwd=2,,main="Dist. of sample meanx",xlab="sample mean",
xlim=c(-2,2),ylim=c(0,4))
den10=density(n10);lines(den10,col='red',lwd=2)
den25=density(n25);lines(den25,col='black',lwd=2)
den100=density(n100);lines(den100,col='green',lwd=2)
legend("topleft", c('N=5','N=10','N=25','N=100'),col=c('blue','red','black','green'),
cex=1.5,lwd=2)
データ解析基礎論a WAA01
データ解析基礎論A Weekly Assignment WAA01
提出期限:2017.04.18 4限開始まで
提出物: Rのコマンドとその出力
WAA01.1
下記のデータをdata.frameのコマンドを用いてRに入力してください。
> dat id gender score 1 M 73 2 M 64 3 M 78 4 F 74 5 F 84 6 F 78
WAA01.2
gender == “M” や gender == “F” などを用いて、上記のデータ男性と女性のscoreの平均値を求めてください。
WAA01.3
このファイルをダウンロードして、scoreが100の男性と女性の数を調べてください。
解答例
WAA01.1
例1
dat<-data.frame(id = 1:6, gender = c(rep("M",3),rep("F",3)),
score = c(73,64,78,74,84,78))
例2
id = 1:6
gender = c(rep("M",3),rep("F",3))
score = c(73,64,78,74,84,78)
dat2 <- data.frame(id = id, gender = gender, score = score)
WAA01.2
例1
mean(dat$score[which(dat$gender=="M")])
mean(dat$score[which(dat$gender=="F")])
例2
dat.male = dat[which(dat$gender=="M"),]
mean(dat.male$score)
dat.female = dat[which(dat$gender=="F"),]
mean(dat.female$score)
WAA01.3
例1
dat3 <- read.csv("http://peach.l.chiba-u.ac.jp/course_folder/waa01.csv")
head(dat3)
which(dat3[which(dat3$gender == "M"),]$score == 100)
which(dat3[which(dat3$gender == "F"),]$score == 100)
例2
dat3.male = dat3[which(dat3$gender == "M"), ]
dat3.female = dat3[which(dat3$gender == "F"), ]
dat3.male$id[which(dat3.male$score == 100)]
dat3.female$id[which(dat3.female$score == 100)]
データ解析基礎論a 2017 W01
データ解析基礎論a 2017 W01で使用するコードなど
data01<-data.frame(score = c(2,4,3,4),
dose = c(rep(10,2),rep(100,2)),
condition = rep(c('exp','control'),2))
dat01<-read.csv("http://www.matsuka.info/data_folder/temp_data01.txt",
header=T)
dat02<-read.csv("http://www.matsuka.info/data_folder/temp_data02.txt",
header=T, row.name=1)
dat03<-read.table("http://www.matsuka.info/data_folder/temp_data03.txt",
header=T, row.name=4)
dat04<-data.frame(score=c(dat03$x,dat03$y,dat03$z),
names=rep(c('sazae','masuo','tarachan'),3),
condition=sort(rep(c("x","y","z"),3)))
dat<-read.csv("http://www.matsuka.info/data_folder/datWA01.txt",
header=T);
mean(dat$shoesize[dat$gender == "M"])
mean(dat$shoesize[dat$gender == "F"])
mean(dat$shoesize[dat$h > 180])
単純主効果の検定など
SPF.tsmeとRBF.tsmeを更新しました。
使用方法は:
SPF.tsme(分散分析の結果、データ名、従属変数名)
RBF.tsme(分散分析の結果、データ名、従属変数名)
具体例は以下の通りです。
なお、これらを使用する前に
source(“http://peach.l.chiba-u.ac.jp/course_folder/tsme.txt”)
を必ず実行してください。
source("http://peach.l.chiba-u.ac.jp/course_folder/tsme.txt")
# SPFの例
dat<-read.csv("http://www.matsuka.info/data_folder/dktb3211.txt")
dat.aov<-aov(result~method*duration+Error(s+s:duration),dat)
tsme <- SPF.tsme(dat.aov, dat, "result")
simple main effect test for BETWEEEN subject factor
ss df ms f p
1hr 2.5 1 2.5 1.0417 0.3150887
2hr 0.4 1 0.4 0.1667 0.6858104
3hr 12.1 1 12.1 5.0417 0.0317831
4hr 32.4 1 32.4 13.5000 0.0008662
residual 76.8 32 2.4
Tukey HSD test - between subject factor @ duration = 3hr
D E
D FALSE TRUE
E TRUE FALSE
Tukey HSD test - between subject factor @ duration = 4hr
D E
D FALSE TRUE
E TRUE FALSE
simple main effect test for WITHIN subject factor
ss df ms f p
D 49.2 3 16.4000 12.990 3.047e-05
E 1.0 3 0.3333 0.264 8.506e-01
residual 30.3 24 1.2625
Tukey HSD test - within subject factor @ method = D
1hr 2hr 3hr 4hr
1hr FALSE FALSE TRUE TRUE
2hr FALSE FALSE FALSE TRUE
3hr TRUE FALSE FALSE FALSE
4hr TRUE TRUE FALSE FALSE
# RBFの例
dat<-read.csv("http://www.matsuka.info/data_folder/dktb3218.txt")
dat.aov<-aov(result~method*duration+Error(s+s:duration+s:method),dat)
tsme <- RBF.tsme(dat.aov, dat, "result")
simple main effect test for the 1st within-subject factor
ss df ms f p
1hr 2.5 1 2.5 1.563 0.2292743
2hr 0.4 1 0.4 0.250 0.6238816
3hr 12.1 1 12.1 7.562 0.0142341
4hr 32.4 1 32.4 20.250 0.0003635
residual 25.6 16 1.6
Tukey HSD test - 1st within-subject factor @ duration = 3hr
D E
D FALSE TRUE
E TRUE FALSE
Tukey HSD test - 1st within-subject factor @ duration = 4hr
D E
D FALSE TRUE
E TRUE FALSE
simple main effect test for the 2nd within-subject factor
ss df ms f p
D 49.2 3 16.4000 12.990 3.047e-05
E 1.0 3 0.3333 0.264 8.506e-01
residual 30.3 24 1.2625
Tukey HSD test - 2nd within-subject factor @ method = D
1hr 2hr 3hr 4hr
1hr FALSE FALSE TRUE TRUE
2hr FALSE FALSE FALSE TRUE
3hr TRUE FALSE FALSE FALSE
4hr TRUE TRUE FALSE FALSE
データ解析基礎論 分散分析の例
# 可視化
interaction.plot(dat$gender,
dat$affil,
dat$shoesize,
pch=c(20,20),
col=c("skyblue","orange"),
xlab="gender", ylab="shoesize",
lwd=3,type='b',cex=2,
trace.label="Affiliation")
# 可視化2(非効率)
means<-tapply(dat$shoesize,list(dat$gender, dat$affil),mean)
Ns<-tapply(dat$shoesize,list(dat$gender, dat$affil),length)
sds<-tapply(dat$shoesize,list(dat$gender, dat$affil),sd)
sems<-sds/sqrt(Ns)
plot(c(0,1),means[,1],type='o',col='skyblue',
ylim=c(min(means)*0.975,max(means)*1.025),
xlim=c(-0.1,1.1),lwd=2,cex=2,pch=20,xlab="gender",ylab="shoesize")
lines(c(0,1),means[,2],type='o',col='orange',lwd=2,cex=2,pch=20)
legend("topleft",c("CogSci","PsySci"),col=c("skyblue","orange"),lwd=2)
lines(c(0,0),c(means[1,1]-sems[1,1],means[1,1]+sems[1,1]),col="skyblue",lwd=2.5)
lines(c(1,1),c(means[2,1]-sems[2,1],means[2,1]+sems[2,1]),col="skyblue",lwd=2.5)
lines(c(0,0),c(means[1,2]-sems[1,2],means[1,2]+sems[1,2]),col="orange",lwd=2.5)
lines(c(1,1),c(means[2,2]-sems[2,2],means[2,2]+sems[2,2]),col="orange",lwd=2.5)
# 分散分析
dat.aov=aov(shoesize~gender*affil, data=dat)
dat.aov.sum=summary(dat.aov)
# 単純主効果の検定
# 各所属講座における性別の効果
means<-tapply(dat$shoesize, list(dat$gender,dat$affil), mean)
SS_gen_CS<- 5*(means[2,1]^2 + means[1,1]^2 -0.5*sum(means[,1])^2) # SS_gender CS
SS_gen_PS<- 5*(means[2,2]^2 + means[1,2]^2 -0.5*sum(means[,2])^2) # SS_gender PS
dat.aov.sum=summary(dat.aov) # ANOVA table
MSe=dat.aov.sum[[1]][4,3] # MSE from ANOVA table or MSe=0.62
dfE=dat.aov.sum[[1]][4,1] # DF for error or dfE=16
dfG=1 # DF for gender
F_gen_CS=(SS_gen_CS/dfG)/MSe # F-value for gender effect given CS
F_gen_PS=(SS_gen_PS/dfG)/MSe # F-value for gender effect given PS
P_gen_CS=1-pf(F_gen_CS,1,dfE) # p-value for gender effect given CS
P_gen_PS=1-pf(F_gen_PS,1,dfE) # p-value for gender effect given PS
# 各性別における所属講座の効果
SS_affil_F<- 5*(means[1,1]^2+means[1,2]^2-0.5*sum(means[1,])^2) #SS_affil | F
SS_affil_M<- 5*(means[2,1]^2+means[2,2]^2-0.5*sum(means[2,])^2) #SS_affil | M
dfA=1 # DF for affil
F_affil_F=SS_affil_F/dfA/MSe # F-value for affiliation effect | F
F_affil_M=SS_affil_M/dfA/MSe # F-value for affiliation effect | M
P_affil_F=1-pf(F_affil_F,1,dfE) # p-value for affiliation effect | F
P_affil_M=1-pf(F_affil_M,1,dfE) # p-value for affiliation effect | M
# 例2
interaction.plot(dat$duration,dat$method,dat$result,
pch=c(20,20), col=c("skyblue","orange"), ylab="score",
xlab="Duration",lwd=3,type='b',cex=2,trace.label="Method")
mod1=aov(result~method+duration,data=dat)
mod1.sum=print(summary(mod1))
mod2=aov(result~method*duration,data=dat)
mod2.sum=print(summary(mod2))
means<-tapply(dat$result,list(dat$method,dat$duration),mean)
ssM_1=5*(sum(means[,1]^2)-0.5*(sum(means[,1])^2))
ssM_2=5*(sum(means[,2]^2)-0.5*(sum(means[,2])^2))
ssM_3=5*(sum(means[,3]^2)-0.5*(sum(means[,3])^2))
ssM_4=5*(sum(means[,4]^2)-0.5*(sum(means[,4])^2))
MSe=mod2.sum[[1]][4,3]
DFe=mod2.sum[[1]][4,1]
DFm=1
fM_1=(ssM_1/DFm)/MSe
1-pf(fM_1,DFm,DFe)
fM_2=(ssM_2/DFm)/MSe
1-pf(fM_2,DFm,DFe)
fM_3=(ssM_3/DFm)/MSe
1-pf(fM_3,DFm,DFe)
fM_4=(ssM_4/DFm)/MSe
1-pf(fM_4,DFm,DFe)
ssD_X=5*(sum(means[1,]^2)-1/4*(sum(means[1,])^2))
ssD_Y=5*(sum(means[2,]^2)-1/4*(sum(means[2,])^2))
DFd=3
fD_X=(ssD_X/DFd)/MSe
fD_Y=(ssD_Y/DFd)/MSe
1-pf(fD_X,DFd,DFe)
1-pf(fD_X,DFd,DFe)
qv=qtukey(0.95,DFd+1,DFe)
hsd=qv*(sqrt(MSe/5))
print(diffM<-outer(means[1,],means[1,],"-"))
abs(diffM)>hsd
CLT simulation
中央極限定理の実験
source("http://peach.l.chiba-u.ac.jp/course_folder/ckCLT.txt")
# r command
genR<-function(n_rep,n_sample,prob,distID){
switch(distID,
binom=rbinom(n_sample*n_rep,n_sample,prob),
normal=rnorm(n_rep*n_sample),
uniform=runif(n_rep*n_sample))
}
ckCLT=function(n_rep,n_sample,prob,Distr){
vecR<-genR(n_rep,n_sample,prob,Distr)
dat<-matrix(vecR,nrow=n_rep,ncol=n_sample);
means<-rowMeans(dat)
par(mfrow=c(2,1))
hist(vecR,main="Dist. of the original data set")
hist(means,main="Dist. of sample meanx",xlab="sample mean",probability=T)
if (Distr=="binom"){
denS=density(means,bw=0.125)
} else {denS=density(means)}
lines(denS,col='blue',lwd=2)
return(means);
}