データ分析基礎論B weekly assignment WAB05はここにあります。
データ解析基礎論B W04
# Split Plot Factorial (SFP)
source("http://peach.l.chiba-u.ac.jp/course_folder/tsme.txt")
dat<-read.csv("http://www.matsuka.info/data_folder/dktb3211.txt")
dat.aov<-aov(result~method*duration+Error(s+s:duration),dat)
TSME<-SPF.tsme(dat.aov,dat,"result")
dat.aov.sum<-summary(dat.aov)
# within-subjcts factor
DF_error.W = dat.aov.sum[[2]][[1]][3,1]
MS_error.W = dat.aov.sum[[2]][[1]][3,3]
q <- qtukey(0.95,4,DF_error.W)
hsd<-q*sqrt(MS_error.W/(5*2))
dat.means.duration<-tapply(dat$result,dat$duration,mean)
abs(outer(dat.means.duration,dat.means.duration,'-'))>hsd
# between-subjects factor
DF_error.B = dat.aov.sum[[1]][[1]][2,1]
MS_error.B = dat.aov.sum[[1]][[1]][2,3]
q <- qtukey(0.95,2,DF_error.B)
hsd<-q*sqrt(MS_error.B/(5*4))
dat.means.method<-tapply(dat$result,dat$method,mean)
abs(outer(dat.means.method,dat.means.method,'-'))>hsd
# randomized block factorial
dat<-read.csv("http://www.matsuka.info/data_folder/dktb3218.txt")
dat.aov<-aov(result~method+duration+method:duration +
Error(s+method:s+duration:s+method:duration:s), data=dat)
TSME<-RBF.tsme(dat.aov, dat, "result")
dat.aov.sum<-summary(dat.aov)
# testing for method
mse = dat.aov.sum[[2]][[1]][2,3]
qv=qtukey(0.95,2,4)
hsd=qv*sqrt(mse/(5*4))
dat.means.method=tapply(dat$result,dat$method,mean)
abs(outer(dat.means.method,dat.means.method,'-')) > hsd
# testing for duration
mse = dat.aov.sum[[3]][[1]][2,3]
qv=qtukey(0.95,4,12)
hsd=qv*sqrt(mse/(5*2))
dat.means.duration=tapply(dat$result,dat$duration,mean)
abs(outer(dat.means.duration,dat.means.duration,'-')) > hsd
人文科学入門(色実験 ー 仮)
ケーキの分配ゲーム
# ケーキの総数
n.cake = 10
# 利得行列
pay.mat = matrix(0,(n.cake+1),(n.cake+1))
for (i.cake in 1:n.cake){
pay.mat[(i.cake+1),1:(n.cake-i.cake+1)] =i.cake
}
# 初期化(各戦略の確率や時間など)
p.cake = runif(n.cake+1)
p.cake = p.cake/sum(p.cake)
max.time = 50
dt = 0.01
t = seq(0,max.time,dt)
n.iter = length(t)
p.hist = matrix(0,nrow = n.iter, ncol = (n.cake+1))
p.hist[1,] = p.cake
# メインのスクリプト
for (i.time in 2:n.iter){
W = colSums(p.cake*t(pay.mat))
W.ave = sum(outer(p.cake,p.cake)*pay.mat)
p.cake = p.cake + p.cake*(W - W.ave)/W.ave * dt
p.hist[i.time,] = p.cake
}
# 結果の可視化
plot(p.hist[,1],ylim=c(0,1),type='l',lwd=2,ylab = 'Proportion',xlab="time")
for (i.strat in 2:(n.cake+1)){
lines(p.hist[,i.strat],col=i.strat,lwd=2)
}
legend("topleft",paste("request = ",0:10),col=1:(n.cake+1),lwd =2,cex=0.75)
データ解析基礎論B W03
# RB
dat<-read.csv("http://www.matsuka.info/data_folder/dktb316.txt")
colnames(dat)<-c("id",'method','subj','words')
dat.aov<-aov(words~method+subj+Error(subj:method),data=dat)
dat.aov.sum = summary(dat.aov)
# Tukey HSD
mse <- dat.aov.sum[[1]][[1]][3,3]
q<-qtukey(0.95,4,df=21)
hsd<-q*sqrt(mse/8)
dat.means=tapply(dat$words, dat$method, mean)
ck.hsd<-abs(outer(dat.means,dat.means,'-'))>hsd
# SPF
dat<-read.csv("http://www.matsuka.info/data_folder/dktb3211.txt")
interaction.plot(dat$duration, # x軸
dat$method, # まとめる変数
dat$result, # y軸
pch=c(20,20),
col=c("skyblue","orange"),
ylab="score",
xlab="Duration",
lwd=3,
type='b',
cex=2,
trace.label="Method")
source("http://peach.l.chiba-u.ac.jp/course_folder/tsme.txt")
dat<-read.csv("http://www.matsuka.info/data_folder/dktb3211.txt")
dat.aov<-aov(result~method*duration+Error(s+s:duration),dat)
データ解析基礎論B ー 期末テストの解説
# Q2 example
# visualization
interaction.plot(dat.Q2$med,dat.Q2$ingred,dat.Q2$bp,type='o',
col = c("skyblue", "coral"),lwd =3, cex = 2,
pch=c(18,19),xlab = "quantitily", ylab = "Blood Pressure",
trace.label="type of medicine",legend = T)
# descriptive stats
tapply(dat.Q2$bp,list(dat.Q2$med,dat.Q2$ingred),mean)
# ANOVA
dat.Q2.aov = aov(bp~med*ingred,data=dat.Q2)
summary(dat.Q2.aov)
# Linear Regression
dat.Q2.lm = lm(bp~med*ingred, contrasts=list(med=contr.poly), data=dat.Q2)
summary(dat.Q2.lm)
anova(dat.Q2.lm)
# Q3 example 1
# visualization
plot(hours~btype, data= dat.Q3V1)
# ANOVA
dat.Q3V1.aov = aov(hours~btype,data =dat.Q3V1)
summary(dat.Q3V1.aov)
TukeyHSD(dat.Q3V1.aov)
# Linear Regression
dat.Q3V1.lm = lm(hours~btype, data = dat.Q3V1)
summary(dat.Q3V1.aov)
# Q3 example 2
# visualization
plot(dat.Q3V1$hours, dat.Q3V1$A,type = 'p',pch=20,cex =2,
xlab = "hours cleaned", ylab= "Blood Type",yaxt ="n")
axis(2, at = c(0,1), labels = c("not A", "A"))
# GLM
dat.Q3V1.glm=glm(A~hours,family="binomial",data=dat.Q3V1 )
# plotting result
x.temp = seq(100,330,1)
glm.cf = coef(dat.Q3V1.glm)
y.temp = 1/(1+exp(-1*(glm.cf[1]+glm.cf[2]*x.temp)))
lines(x.temp,y.temp,col='red',lwd=2)
# Q4 example
plot(score~group,data=dat.Q4)
dat.Q4.G = lm(score~group,data=dat.Q4)
#
dat.Q4.NG1 = lm(score~income, data=dat.Q4)
dat.Q4.NG2 = lm(score~gender, data=dat.Q4)
dat.Q4.NG3 = lm(score~study, data=dat.Q4)
dat.Q4.NG4 = lm(score~height, data=dat.Q4)
dat.Q4.NG5 = lm(score~study+group,data=dat.Q4)
dat.Q4.NG6 = lm(score~study+income,data=dat.Q4)
UT 特別講義II week01
# SEC: random numbers
N = 10000
# N = 1000
random.data = rnorm(N, mean=0, sd=1)
hist(random.data, nclass = 50, col = "navy", xlab = "Data",
probability = T, main = "Histogram of Random Data")
# density of generated data
dens = density(random.data)
lines(dens, col = "orange", lwd = 4)
# theoretical density
x = seq(-4,4,0.1)
true.norm = dnorm(x, mean = 0, sd = 1)
lines(x,true.norm, col = "green", lty = 3, lwd = 4)
legend("topleft",c("empirical", "theoretical"), lty = c(1,3),
col = c('orange','green'),lwd=4)
# SEC: law of large numbers
# simplest version
six.counter=0; N = 1000
for (i_loop in 1:N) {
die<-sample(1:6,1)
if (die==6) {six.counter=six.counter+1}
}
six.counter/N
# simpler version
N=1000; six.counter=rep(0,N);
for (i_loop in 1:N) {
die<-sample(1:6,1)
if (die==6) {six.counter[i_loop]=1}
}
plot(1:N,cumsum(six.counter)/(1:1000),type='l',ylim=c(0,1),lwd=2)
abline(h=1/6,lwd=2,col='red')
# simple version
N = 1000
die.all <- sample(1:6,N,replace=T)
six.index <- die.all==6
par(mfrow = c(2,1))
par(oma=c(2,2,0,0),mar=c(4,4,1,1),mfrow=c(2,1))
plot(1:N, die.all, pch=20, col = 'red', ylim = c(0,7),
ylab = "Result", xlab = "trial")
plot(1:N,cumsum(six.index)/(1:1000), type='l', ylim=c(0,1), lwd=2,
ylab = "P(die = 6)", xlab = "trial")
abline(h=1/6,lwd=2,col='red')
# CLT
par(mfrow=c(1,1))
N=10
nRep=10000
dat<-matrix(runif(N*nRep),nrow=N)
means<-colMeans(dat)
hist(means,nclass=50,probability=T)
dens<-density(means);lines(dens,col='skyblue',lwd=3)
xs=seq(-0,1,0.01)
theo.dens<-dnorm(xs,mean=0.5,sd=sqrt((1/12)/N))
lines(xs,theo.dens,col='orange',lwd=3,lty=2)
# GCD
# script version
r=-99;v1=1633;v2=355
while (r!=0){
r=v1%%v2
print(paste('v1 =',v1,', v2 = ',v2,',remainder = ',r))
v1=v2
v2=r
}
# function version
GCD<-function(v1,v2){
real.v1=max(c(v1,v2))
real.v2=min(c(v1,v2))
repeat{
r=real.v1%%real.v2;real.v1=real.v2;real.v2=r
if (r==0){
print(paste('GCD is',real.v1));
return(real.v1);
break}
}
}
GCD(1633,355)
# digit conversion
# binary to dec
bin2dec=function(bin) {
ones=which(rev(bin)==1)-1
dec=sum(2^ones)
return(dec)
}
# dec 2 bin - script
num=150;bin=c()
while(num!=0) {
rem=num%%2;
num=num%/%2
bin=print(c(rem,bin))
}
# dec 2 bin - function
dec2bin<-function(num, digits=8) {
bin=c()
if (num==0){
bin=0
} else {
while(num!=0){
rem=num%%2
num=num%/%2
bin=c(rem,bin)
}
}
if (length(bin)
データ解析基礎論A weekly assignment 12
課題 WAA12はここにあります。
解答例はここにあります。
過去のテストの例はデータ解析基礎論のページにあります。
データ解析基礎論A week12
dat<-read.csv("http://peach.l.chiba-u.ac.jp/course_folder/datW03R.txt")
means<-tapply(dat$shoesize,list(dat$gender, dat$affil),mean)
Ns<-tapply(dat$shoesize,list(dat$gender, dat$affil),length)
sds<-tapply(dat$shoesize,list(dat$gender, dat$affil),sd)
sems<-sds/sqrt(Ns)
plot(c(0,1),means[,1],type='o',col='skyblue', xlim=c(-0.1,1.1), lwd=2,
cex=2, pch=20, ylim=c(min(means)*0.975, max(means)*1.025), xlab="gender", ylab="shoesize",
xaxt="n")
axis(1,c(0,1),c("female","male"))
lines(c(0,1),means[,2],type='o',col='orange',lwd=2,cex=2,pch=20)
lines(c(0,0),c(means[1,1]-sems[1,1],means[1,1]+sems[1,1]),col="skyblue",lwd=2)
lines(c(0,0),c(means[1,2]-sems[1,2],means[1,2]+sems[1,2]),col="orange",lwd=2)
lines(c(1,1),c(means[2,1]-sems[2,1],means[2,1]+sems[2,1]),col="skyblue",lwd=2)
lines(c(1,1),c(means[2,2]-sems[2,2],means[2,2]+sems[2,2]),col="orange",lwd=2)
legend("topleft",c("CogSci","PsySci"),col=c("skyblue","orange"),lwd=2)
interaction.plot(dat$gender,
dat$affil,
dat$shoesize,
pch=c(20,20),
col=c("skyblue","orange"),
xlab="gender", ylab="shoesize",
lwd=3,type='b',cex=2,
trace.label="Affiliation")
dat.aov=aov(shoesize~gender*affil, data=dat)
dat.aov.sum=summary(dat.aov)
# testing simple main effect of gender
means<-tapply(dat$shoesize, list(dat$gender,dat$affil), mean)
SS_gen_CS<- 5*(means[2,1]^2 + means[1,1]^2 -0.5*sum(means[,1])^2) # SS_gender CS
SS_gen_PS<- 5*(means[2,2]^2 + means[1,2]^2 -0.5*sum(means[,2])^2) # SS_gender PS
dat.aov.sum=summary(dat.aov) # ANOVA table
MSe=dat.aov.sum[[1]][4,3] # MSE from ANOVA table or MSe=0.62
dfE=dat.aov.sum[[1]][4,1] # DF for error or dfE=16
dfG=1 # DF for gender
F_gen_CS=(SS_gen_CS/dfG)/MSe # F-value for gender effect given CS
F_gen_PS=(SS_gen_PS/dfG)/MSe # F-value for gender effect given PS
P_gen_CS=1-pf(F_gen_CS,1,dfE) # p-value for gender effect given CS
P_gen_PS=1-pf(F_gen_PS,1,dfE) # p-value for gender effect given PS
# testing simple main effect of affiliation
SS_affil_F<- 5*(means[1,1]^2+means[1,2]^2-0.5*sum(means[1,])^2) #SS_affil | F
SS_affil_M<- 5*(means[2,1]^2+means[2,2]^2-0.5*sum(means[2,])^2) #SS_affil | M
dfA=1 # DF for affil
F_affil_F=SS_affil_F/dfA/MSe # F-value for affiliation effect | F
F_affil_M=SS_affil_M/dfA/MSe # F-value for affiliation effect | M
P_affil_F=1-pf(F_affil_F,1,dfE) # p-value for affiliation effect | F
P_affil_M=1-pf(F_affil_M,1,dfE) # p-value for affiliation effect | M
# ANOVA 2x4
dat<-read.csv("http://www.matsuka.info/data_folder/dktb321.txt")
interaction.plot(dat$duration,dat$method,dat$result, pch=c(20,20), col=c("skyblue","orange"), ylab="score", xlab="Duration",lwd=3,type='b',cex=2,trace.label="Method")
dat.aov=aov(result~method*duration,data=dat)
means<-tapply(dat$result,list(dat$method,dat$duration),mean)
ssM_1=5*(sum(means[,1]^2)-0.5*(sum(means[,1])^2))
ssM_2=5*(sum(means[,2]^2)-0.5*(sum(means[,2])^2))
ssM_3=5*(sum(means[,3]^2)-0.5*(sum(means[,3])^2))
ssM_4=5*(sum(means[,4]^2)-0.5*(sum(means[,4])^2))
MSe=mod2.sum[[1]][4,3]
DFe=mod2.sum[[1]][4,1]
DFm=1
fM_1=(ssM_1/DFm)/MSe
1-pf(fM_1,DFm,DFe)
ssD_X=5*(sum(means[1,]^2)-1/4*(sum(means[1,])^2))
ssD_Y=5*(sum(means[2,]^2)-1/4*(sum(means[2,])^2))
DFd=3
fD_X=(ssD_X/DFd)/MSe
fD_Y=(ssD_Y/DFd)/MSe
1-pf(fD_X,DFd,DFe)
1-pf(fD_X,DFd,DFe)
qv=qtukey(0.95,DFd+1,DFe)
hsd=qv*(sqrt(MSe/5))
print(diffM<-outer(means[1,],means[1,],"-"))
abs(diffM)>hsd
# useful r functions
source("http://peach.l.chiba-u.ac.jp/course_folder/tsme2017.R")
CRF.tsme(dat.aov, dat)
データ解析基礎論A week11
f=c(24.1,23.9,24.4,24.4,23.5)
m=c(25.6,26.1,25.8,25.9,26)
G.mean=mean(c(f,m))
ss.total=sum((c(f,m)-G.mean)^2)
ss.tr=sum((mean(f)-G.mean)^2)*5+sum((mean(m)-G.mean)^2)*5
ss.error=sum((f-mean(f))^2)+sum((m-mean(m))^2)
dat<-data.frame(ssize=c(f,m),gender=c(rep("f",5),rep("m",5)))
dat.aov<-aov(ssize ~ gender,data=dat)
dat<-read.table("http://www.matsuka.info/data_folder/aov01.txt")
dat.aov<-aov(shoesize~club, dat)
qT<-qtukey(0.95, 3, 67)
HSD<-qT*sqrt((2.243*(1/23+1/24))/2)
dat<-read.csv("http://www.matsuka.info/data_folder/dktb312.txt",
col.names=c("dump","method","result"))
levels(dat$method)<-c('free','repeat','sentence','image')
dat.aov<-aov(result~method, data=dat)
summary(dat.aov)
dat.means<-tapply(dat$result,dat$method,mean)
cont=c(-3,1,1,1)
bunshi=sum(cont*dat.means)
bunbo=sqrt(5.29*(sum((cont^2)/8)))
t.value=bunshi/bunbo
2*(1-pt(t.value,28))
dat.means<-tapply(dat$result,dat$method,mean)
cont=c(-3,1,1,1)
bunshi=sum(cont*dat.means)
bunbo=sqrt(5.29*(sum((cont^2)/8)))
F.value=(bunshi/bunbo)^2
new.F=3*qf(0.95,3,28)

