x<-matrix(1:8, nrow=2)
x<-matrix(1:8, nrow=2,byrow=T)
data01<-data.frame(score = c(2,4,3,4),
dose = c(rep(10,2),rep(100,2)),
condition = rep(c('exp','control'),2))
dat01<-read.csv("http://www.matsuka.info/data_folder/temp_data01.txt",
header=T)
dat02<-read.csv("http://www.matsuka.info/data_folder/temp_data02.txt",
header=T, row.name=1)
dat03<-read.table("http://www.matsuka.info/data_folder/temp_data03.txt",
header=T, row.name=4)
dat<-read.csv("http://www.matsuka.info/data_folder/datWA01.txt",
header=T);
mean(dat$shoesize[dat$gender == "M"])
mean(dat$shoesize[dat$gender == "F"])
mean(dat$shoesize[dat$h > 180])
v1 = seq(-3,3,0.1)
v2 = v1^2
plot(x = v1, y = v2)
plot(v1, v2, col = 'red')
plot(v1, v2, main = "THIS IS THE TITLE", cex.lab = 1.5,
xlab = "Label for X-axis",ylab = "Label for Y-axis")
plot(v1, v2, col = "blue", type = "o", lty = 2, pch = 19,
cex.lab = 1.5, lwd = 3, main = "Y=X*X", xlab = "X",
ylab="X*X", xlim=c(-3.5,3.5), ylim=c(-0.5, 10))
dat<- read.csv("http://www.matsuka.info/data_folder/datWA01.txt")
hist(dat$h)
hist(dat$h, breaks = 20, main = “Histogram of Height”,
xlab = "Height", col = 'blue', xlim = c(140, 190))
dens<-density(dat$h);
hist(dat$h, main = "Histogram of Height", xlab = "Height",
xlim = c(140,190), probability = T)
lines(dens, lwd = 2, col = ‘red’, lty=2)
plot(v1, v2, col = "blue", type = "l",
pch = 19, cex.lab = 1.5, lwd = 3,
xlab = "X", ylab="f(X)",
xlim=c(-3.5,3.5), ylim=c(-0.5, 10))
lines(v1, v1^3, col='red',lwd = 3)
legend("bottomright", c("x^2","x^3"), col=c('blue','red'), lwd=2)
boxplot(dat$h ~ dat$gender,
main="Distribution of Height by Gender",
ylab="Gender", xlab="Height", col=c('blue','cyan'),
ylim=c(140,190), horizontal=T)
interaction.plot(dat$gender,
dat$affil,
dat$h,
pch=c(20,20),
col=c("skyblue","orange"),
xlab="gender", ylab="height",
lwd=3,type='b',cex=2,
trace.label="Affiliation")
hist(dat[dat$gender=='F',]$h,
main="Dist. of Height for Female Participants",
xlab="Height", xlim=c(140,190), probability=T)
dens.F = density(dat[dat$gender=='F',]$h)
lines(dens.F, col='blue',lwd=2)
hist(dat[dat$gender==‘M’,]$h, main=“Dist. of Height for Male
Participants”, xlab=“Height”, xlim=c(140,190),
probability=T,ylim=c(0,0.08))
dens.M = density(dat[dat$gender=='M',]$h)
lines(dens.M, col='green', lwd=2)
plot(dat$shoesize, dat$h,
main="Relationship b/w shoesize and height",
xlab = 'shoesize’, ylab='height’,
pch=19, col="red")
txt = paste("r =",round(cor(dat$shoesize,dat$h), 4))
text(22, 175, txt, cex = 1.5)
abline(h = mean(dat$h), col='blue');
abline(v = mean(dat$shoesize), col='green')
plot(dat[dat$gender=='F',]$shoesize, dat[dat$gender=='F',]$h,
main="Relationship b/w shoesize and height", xlab='shoesize', ylab='height',
cex.lab=1.5, pch=19, col='blue', xlim=c(20,29), ylim=c(140,190))
lines(dat[dat$gender=='M',]$shoesize,dat[dat$gender=='M',]$h,
type = 'p', pch = 15, col = 'green')
legend("topleft", c('Female','Male'), pch =c(19,15),
col = c('blue','green'), cex = 1.5)
Category Archives: データ解析
2019 データ解析基礎論A DAA01
dat<-data.frame(score=c(78,70,66,76,78,76,88, 76, 76,72,60,72,70,72,84,70),
cond=c(rep('low',8), rep('high',8)))
boxplot(score~cond, col = c("skyblue",'skyblue4'),data=dat)
summary(aov(score ~ cond, data = dat))
dat <- read.csv("http://www.matsuka.info/data_folder/hwsk8-17-6.csv")
plot(ani~otouto, data=dat,pch=20,cex=3,xlab ="score of Otouto", ylab = "score of Ani")
dat.lm <- lm(ani~otouto, data=dat)
abline(dat.lm, col = 'red',lwd = 2.5)
dat<-read.csv("http://www.matsuka.info/data_folder/datWA01.txt")
dat.glm <- glm(gender~shoesize,family="binomial",data=dat)
plot(as.numeric(gender)-1~shoesize,data=dat,pch=20,cex=3,ylab="P(Male)")
cf = coef(dat.glm)
temp.x = seq(20,30,0.1)
y = 1/(1+exp(-1*(cf[1]+temp.x*cf[2])))
lines(temp.x,y,col='cyan',lwd=2)
dat <- read.table("http://www.matsuka.info/data_folder/tdkPCA01.txt")
dat.pca <- princomp(dat)
biplot(dat.pca)
dat<-read.csv("http://matsuka.info/data_folder/tdkClust.csv", header=TRUE, row.names=1)
dat.cluster=hclust(dist(dat),method="average")
plot(dat.cluster,cex=1.5)
data01<-data.frame(score = c(2,4,3,4),
dose = c(rep(10,2),rep(100,2)),
condition = rep(c('exp','control'),2))
dat01<-read.csv("http://www.matsuka.info/data_folder/temp_data01.txt",
header=T)
dat02<-read.csv("http://www.matsuka.info/data_folder/temp_data02.txt",
header=T, row.name=1)
dat03<-read.table("http://www.matsuka.info/data_folder/temp_data03.txt",
header=T, row.name=4)
dat<-read.csv("http://www.matsuka.info/data_folder/datWA01.txt",
header=T);
データ解析基礎論B Loglineak Analysis
freq<-c(33,37,7,23)
pref<-factor(c('soba','udon','soba','udon'))
region<-factor(c('east','east','west','west'))
dat<-data.frame(pref,region,freq)
dat.table=table(pref,region)
dat.table[cbind(pref,region)]<-freq
dat.loglinCE_A<-loglin(dat.table, list(1), fit=T,param=T)
dat.loglinCE_B<-loglin(dat.table,list(2), fit=T,param=T)
dat.loglinIND<-loglin(dat.table,list(1,2), fit=T,param=T)
dat.loglinSAT<-loglin(dat.table,list(c(1,2)), fit=T,param=T)
freq<-c(9,5,2,4,16,10,26,28)
gener<-factor(c(rep('female',4),c(rep('male',4))))
affil<-factor(rep(c('L','L','E','E'),2))
circle<-factor(rep(c('tennis','astro'),4))
dat<-data.frame(gener,affil,circle,freq)
dat.table<-table(gender,affil,circle)
dat.table[cbind(gender,affil,circle)]<-freq
dat.loglin2<-loglin(dat.table,list(1), fit=T,param=T)
dat.loglin3<-loglin(dat.table,list(1,3), fit=T,param=T)
dat.loglin4<-loglin(dat.table,list(1,2,3), fit=T,param=T)
dat.loglin5<-loglin(dat.table,list(c(1,3),2), fit=T,param=T)
dat.loglin6<-loglin(dat.table,list(c(1,3),c(1,2)), fit=T,param=T)
dat.loglin7<-loglin(dat.table,list(c(1,3),c(1,2),c(2,3)), fit=T,param=T)
dat.loglin8<-loglin(dat.table,list(c(1,2,3)), fit=T,param=T)
source('http://peach.l.chiba-u.ac.jp/course_folder/cuUtil02.R')
データ解析基礎論B テスト理論
dat<-read.table("http://peach.l.chiba-u.ac.jp/course_folder/survey_data01.txt")
install.packages("psych")
library("psych")
ca<-alpha(dat)
dat<-read.csv("http://peach.l.chiba-u.ac.jp/course_folder/survey2.csv")
image(cor(dat)[10:1,1:10])
ca1_5 = alpha(dat[,1:5])
ca1_5
ca6_10 = alpha(dat[,6:10])
ca6_10
F1<-factanal(dat[,1:5],1)
F2<-factanal(dat[,6:10],1)
library(sem)
fa.model=cfa(reference.indicator=FALSE)
F1: q1,q2,q3,q4,q5
F2: q6,q7,q8,q9,q10
fa.model<-update(fa.model)
delete, F1<->F2
fa.result<-sem(fa.model, cov(dat), 300)
summary(fa.result)
install.packages("ltm")
library(ltm)
dat<-read.table("http://peach.l.chiba-u.ac.jp/course_folder/survey_data01.txt")
dat = dat-1
descript(dat)
irt1P<-rasch(dat)
plot.rasch(irt1P)
GoF.rasch(irt1P)
person.fit(irt1P)
item.fit(irt1P)
theta = factor.scores(irt1P)
cor(rowSums(theta[[1]][,1:10]),theta[[1]]$z1)
irt2P<-ltm(dat~z1)
plot.ltm(irt2P)
person.fit(irt2P)
item.fit(irt2P)
theta2P = factor.scores(irt2P)
cor(rowSums(theta2P[[1]][,1:10]),theta2P[[1]]$z1)
データ解析基礎論B GLM
dat<-read.csv("http://peach.l.chiba-u.ac.jp/course_folder/logisticReg01.txt ")
plot(dat$study, dat$pass, pch=20,ylab="Passed or not",xlab="Hours studied",cex=2,cex.lab=1.5,yaxt='n')
dat.lr <- glm(pass~study,family=binomial, data=dat)
summary(dat.lr)
coef = coefficients(dat.glm)
pred.pass.p = 1/(1+exp(-(coef[1]+coef[2]*0:30)))
##
pred.pass.p = 1/(1+exp(-(coef[1]+coef[2]*c(10:15))))
odds=pred.pass.p/(1-pred.pass.p)
exp(coef[2])
dat<-read.csv("http://www.matsuka.info/data_folder/datWA01.txt")
dat.lr<-glm(gender~shoesize,family=binomial,data=dat)
anova(dat.lr, test ="Chisq")
dat.lr0<-glm(gender~1,family="binomial",data=dat)
dat.lrS<-glm(gender~shoesize,family=binomial,data=dat)
dat.lrh<-glm(gender~h,family="binomial",data=dat)
M=matrix(c(52,48,8,42),nrow=2)
rownames(M)<-c("present", "absent")
colnames(M)<-c("smoker",'non-smoker')
dat<-as.data.frame((as.table(M)))
colnames(dat)<-c("cancer","smoking","freq")
dat=dat[rep(1:nrow(dat),dat$freq),1:2]
rownames(dat)<-c()
dat.glm<-glm(cancer~smoking,family=binomial,data=dat)
dat<-read.csv("http://www.matsuka.info/data_folder/cda7-16.csv")
dat.glm<-glm(survival~age, family=binomial,data=dat)
dat.glm2<-glm(survival~Ncigarettes,family=binomial,data=dat)
dat.glm3<-glm(survival~NdaysGESTATION,family=binomial,data=dat)
dat.glmAllAdd=glm(survival~age+Ncigarettes+NdaysGESTATION,family=binomial,data=dat)
dat.glmAllMult=glm(survival~age*Ncigarettes*NdaysGESTATION,family=binomial,data=dat)
library(MASS)
dat<-read.csv("http://peach.l.chiba-u.ac.jp/course_folder/poissonReg01.txt ")
dat.pr<-glm(eye.count~attr,family = "poisson",data=dat)
データ解析基礎論B 因子分析+クラスター分析
source('http://peach.l.chiba-u.ac.jp/course_folder/cuUtil02.R')
dat<-read.csv("http://www.matsuka.info/data_folder/tdkCFA.csv")
dat.model1<-factanal(dat,1)
dat.model2<-factanal(dat,2)
dat.model3<-factanal(dat,3)
dat.model4<-factanal(dat,4)
library(sem)
model01=cfa(reference.indicator=FALSE)
F1:extrovert,cheerful, leadership, antisocial, talkative, motivated, hesitance, popularity
model02=cfa(reference.indicator=FALSE)
F1: extrovert, leadership, motivated, hesitance
F2: cheerful, antisocial, talkative, popularity
mod2<-sem(model02, cov(dat), 100)
summary(mod2)
opt <- options(fit.indices = c("RMSEA"))
cldata<-data.frame(var1=c(4,1,5,1,5), var2=c(1,5,4,3,1))
cldata.cluster=hclust(dist(cldata),method="average")
plot(cldata.cluster,cex=2)
dat<-read.csv("http://matsuka.info/data_folder/tdkClust.csv", header=TRUE, row.names=1)
dat.cluster=hclust(dist(dat),method="average")
plot(dat.cluster,cex=1.5)
dat.kmeans=kmeans(dat, centers=3, nstart=10)
plot(dat,col=dat.kmeans$cluster+2,pch=20,cex=2)
plot(dat[,1:2],col=dat.kmeans$cluster+1,pch=20,cex=5)
text(dat[,1:2],row.names(dat),cex=2)
res<-cu.KMC.rep(dat,10,1000)
dat<-read.csv("http://www.matsuka.info/data_folder/tdkCFA.csv")
res<-cu.KMC.rep(dat,10,1000)
# MDS
dat<-data.frame(p1=c(4,1,5,1,5),p2=c(1,5,4,3,1))
rownames(dat)<-c('a','b','c','d','e')
dat.mds<-cmdscale(dist(dat),2)
plot(dat.mds[,1],dat.mds[,2], type='n')
text(dat.mds[,1],dat.mds[,2],labels=row.names(dat))
dat.cluster=hclust(dist(dat))
plot(dat.cluster,cex=1.5)
dat<-read.csv("http://matsuka.info/data_folder/tdkMDS02.csv", row.name=1)
dat.mds<-cmdscale(dat,2,eig=T)
plot(dat.mds$points[,1],dat.mds$points[,2], type='n')
text(dat.mds$points[,1],dat.mds$points[,2],labels=row.names(dat), cex=2)
データ解析基礎論B 因子分析
## chisq test
chisq.test(c(72,23,16,49),p=rep(40,4),rescale.p=T)
M=matrix(c(52,48,8,42),nrow=2)
chisq.test(M,correct=F)
## PCA
Mdat<-read.csv("http://peach.l.chiba-u.ac.jp/course_folder/AMSA_track_mens.csv",row.names=1)
plot(dat,pch=20)
dat.pca<-princomp(dat)
dist = c(100,200,400,800,1500,5000,10000,42195)
log.dist=log(dist)
plot(log.dist, dat.pca$loadings[,1],pch=20,col='red',
cex=2,ylab="PC loadings (1st)",xlab="Log(distance)")
plot(log.dist, dat.pca$loadings[,2],pch=20,col='red',
cex=5,ylab="PC loadings (2nd)",xlab="Log(distance)")
### EFA
dat<-read.csv("http://peach.l.chiba-u.ac.jp/course_folder/FA01.csv")
dat.fa<-factanal(dat,1)
dat<-read.table("http://www.matsuka.info/data_folder/tdkPCA01.txt")
dat.pca<-princomp(dat)
dat.fa<-factanal(dat,1)
dat<-read.csv("http://www.matsuka.info/data_folder/tdkCFA.csv")
dat.fa<-factanal(dat,2,rotation="varimax",score="regression")
plot(dat.fa$loadings[,1],dat.fa$loadings[,2],pch=20,cex=2,col="red",
xlab="Factor 1", ylab="Factor 2")
abline(h=0,v=0)
text(dat.fa$loadings[,1],dat.fa$loadings[,2],colnames(dat),cex=1.4)
dat.model1<-factanal(dat,1)
dat.model2<-factanal(dat,2)
dat.model3<-factanal(dat,3)
dat.model4<-factanal(dat,4)
print(m1res<-c(dat.model1$dof,dat.model1$PVAL))
print(m2res<-c(dat.model2$dof,dat.model2$PVAL))
print(m3res<-c(dat.model3$dof,dat.model3$PVAL))
print(m4res<-c(dat.model4$dof,dat.model4$PVAL))
1-pchisq(dat.model3$STATISTIC-dat.model4$STATISTIC,dat.model3$dof-dat.model4$dof)
AIC1=2*(36-dat.model1$dof)+dat.model1$STATISTIC
AIC2=2*(36-dat.model2$dof)+dat.model2$STATISTIC
AIC3=2*(36-dat.model3$dof)+dat.model3$STATISTIC
AIC4=2*(36-dat.model4$dof)+dat.model4$STATISTIC
### CFA with sem ###
install.packages('sem')
library(sem)
model01=cfa(reference.indicator=FALSE)
F1:extrovert,cheerful, leadership, antisocial, talkative, motivated, hesitance, popularity
mod1<-sem(model01, cov(dat), nrow(dat))
summary(mod1)
model02=cfa(reference.indicator=FALSE)
F1: extrovert, leadership, motivated, hesitance
F2: cheerful, antisocial, talkative, popularity
mod2<-sem(model02, cov(dat), 100)
opt <- options(fit.indices = c("RMSEA"))
データ解析基礎論B W03
dat<-read.table("http://www.matsuka.info/data_folder/tdkPCA01.txt")
dat.pca<-princomp(dat)
biplot(dat.pca)
dat.pca$score
dat.pca$loadings
scoreP1S1 = dat.pca$loadings[1,1]*(dat[1,1]-mean(dat$writing))+
dat.pca$loadings[2,1]*(dat[1,2]-mean(dat$thesis))+
dat.pca$loadings[3,1]*(dat[1,3]-mean(dat$interview))
scoreP2S2 = dat.pca$loadings[1,2]*(dat[2,1]-mean(dat$writing))+
dat.pca$loadings[2,2]*(dat[2,2]-mean(dat$thesis))+
dat.pca$loadings[3,2]*(dat[2,3]-mean(dat$interview))
dat<-read.csv("http://www.matsuka.info/data_folder/tdkCFA.csv")
dat.pca<-princomp(dat)
screeplot(dat.pca,type="lines")
biplot(dat.pca)
biplot(dat.pca,choices=c(2,3))
dat2<-read.table("http://www.matsuka.info/data_folder/tdkPCA01.txt")
dat2[,1]=dat2[,1]*0.1
dat2.pca<-princomp(dat2)
データ解析基礎論B Rの復習
dat<-read.csv("http://www.matsuka.info/data_folder/hwsk8-17-6.csv")
dat2<-read.csv("http://www.matsuka.info/data_folder/dktb321.txt")
source("http://www.matsuka.info/univ/course_folder/cutil.R")
score.X = dat2$result[dat2$method=="method.X"]
score.Y = dat2$result[dat2$method=="method.Y"]
dat2X<-dat2[dat2$method=="method.X",]
データ解析基礎論A W12
source("http://www.matsuka.info/univ/course_folder/cutil.R")
dat<-read.csv("http://www.matsuka.info/data_folder/dktb316.txt",
col.names = c("dump","method","subj","words"))
dat.aov<-aov(words~method+subj+Error(subj:method),data=dat)
dat<-read.csv("http://www.matsuka.info/data_folder/dktb3211.txt")
interaction.plot(dat$duration,
dat$method,
dat$result,
pch=c(20,20),
col=c("skyblue","orange"),
ylab="score",
xlab="Duration",
lwd=3,
type='b',
cex=2,
trace.label="Method")
dat.aov<-aov(result~method*duration+Error(s+s:method),dat)
dat<-read.csv("http://www.matsuka.info/data_folder/dktb3218.txt")
dat.aov<-aov(result~method+duration+method:duration +
Error(s+method:s+duration:s+method:duration:s), data=dat)