2019 データ解析基礎論a DAA02

x<-matrix(1:8, nrow=2)
x<-matrix(1:8, nrow=2,byrow=T)
data01<-data.frame(score = c(2,4,3,4),     
                   dose = c(rep(10,2),rep(100,2)),  
                   condition = rep(c('exp','control'),2))

dat01<-read.csv("http://www.matsuka.info/data_folder/temp_data01.txt",  
                header=T)
dat02<-read.csv("http://www.matsuka.info/data_folder/temp_data02.txt",    
                header=T, row.name=1)
dat03<-read.table("http://www.matsuka.info/data_folder/temp_data03.txt",
                  header=T, row.name=4)
dat<-read.csv("http://www.matsuka.info/data_folder/datWA01.txt",   
              header=T);
mean(dat$shoesize[dat$gender == "M"])
mean(dat$shoesize[dat$gender == "F"])
mean(dat$shoesize[dat$h > 180])

v1 = seq(-3,3,0.1)
v2 = v1^2
plot(x = v1, y = v2)
plot(v1, v2, col = 'red')

plot(v1, v2, main = "THIS IS THE TITLE", cex.lab = 1.5,
     xlab = "Label for X-axis",ylab = "Label for Y-axis")

plot(v1, v2, col = "blue", type = "o", lty = 2, pch = 19, 
     cex.lab = 1.5, lwd = 3, main = "Y=X*X", xlab = "X", 
     ylab="X*X", xlim=c(-3.5,3.5), ylim=c(-0.5, 10))

dat<- read.csv("http://www.matsuka.info/data_folder/datWA01.txt")
hist(dat$h)
hist(dat$h, breaks = 20, main = “Histogram of Height”, 
     xlab = "Height", col = 'blue', xlim = c(140, 190))

dens<-density(dat$h); 
hist(dat$h, main = "Histogram of Height", xlab = "Height",  
     xlim = c(140,190), probability = T)
lines(dens, lwd = 2, col = ‘red’, lty=2)

plot(v1, v2, col = "blue", type = "l", 
     pch = 19, cex.lab = 1.5, lwd = 3, 
     xlab = "X", ylab="f(X)", 
     xlim=c(-3.5,3.5), ylim=c(-0.5, 10))
lines(v1, v1^3, col='red',lwd = 3)
legend("bottomright", c("x^2","x^3"), col=c('blue','red'), lwd=2)

boxplot(dat$h ~ dat$gender,
        main="Distribution of Height by Gender", 
        ylab="Gender", xlab="Height", col=c('blue','cyan'),
        ylim=c(140,190), horizontal=T)

interaction.plot(dat$gender,
                 dat$affil,
                 dat$h, 
                 pch=c(20,20), 
                 col=c("skyblue","orange"), 
                 xlab="gender", ylab="height", 
                 lwd=3,type='b',cex=2,
                 trace.label="Affiliation")

hist(dat[dat$gender=='F',]$h, 
    main="Dist. of Height for Female Participants", 
    xlab="Height", xlim=c(140,190), probability=T)
dens.F = density(dat[dat$gender=='F',]$h)
lines(dens.F, col='blue',lwd=2) 

hist(dat[dat$gender==‘M’,]$h, main=“Dist. of Height for Male 
     Participants”, xlab=“Height”, xlim=c(140,190), 
     probability=T,ylim=c(0,0.08))
dens.M = density(dat[dat$gender=='M',]$h)
lines(dens.M, col='green', lwd=2)

plot(dat$shoesize, dat$h, 
     main="Relationship b/w shoesize and height",
     xlab = 'shoesize’, ylab='height’, 
     pch=19, col="red")
txt = paste("r =",round(cor(dat$shoesize,dat$h), 4))
text(22, 175, txt, cex = 1.5)

abline(h = mean(dat$h), col='blue');
abline(v = mean(dat$shoesize), col='green')

plot(dat[dat$gender=='F',]$shoesize, dat[dat$gender=='F',]$h,  
     main="Relationship b/w shoesize and height", xlab='shoesize', ylab='height', 
     cex.lab=1.5, pch=19, col='blue', xlim=c(20,29), ylim=c(140,190))
lines(dat[dat$gender=='M',]$shoesize,dat[dat$gender=='M',]$h, 
      type = 'p', pch = 15, col = 'green')
legend("topleft", c('Female','Male'), pch =c(19,15), 
       col = c('blue','green'), cex = 1.5)

2019 データ解析基礎論A DAA01

dat<-data.frame(score=c(78,70,66,76,78,76,88, 76, 76,72,60,72,70,72,84,70),
                cond=c(rep('low',8), rep('high',8)))
boxplot(score~cond, col = c("skyblue",'skyblue4'),data=dat)
summary(aov(score ~ cond, data = dat))


dat <- read.csv("http://www.matsuka.info/data_folder/hwsk8-17-6.csv")
plot(ani~otouto, data=dat,pch=20,cex=3,xlab ="score of Otouto", ylab = "score of Ani")
dat.lm <- lm(ani~otouto, data=dat)
abline(dat.lm, col = 'red',lwd = 2.5)

dat<-read.csv("http://www.matsuka.info/data_folder/datWA01.txt")
dat.glm <- glm(gender~shoesize,family="binomial",data=dat)
plot(as.numeric(gender)-1~shoesize,data=dat,pch=20,cex=3,ylab="P(Male)")
cf = coef(dat.glm)
temp.x = seq(20,30,0.1)
y = 1/(1+exp(-1*(cf[1]+temp.x*cf[2])))
lines(temp.x,y,col='cyan',lwd=2)

dat <- read.table("http://www.matsuka.info/data_folder/tdkPCA01.txt")
dat.pca <- princomp(dat)
biplot(dat.pca)

dat<-read.csv("http://matsuka.info/data_folder/tdkClust.csv", header=TRUE, row.names=1)
dat.cluster=hclust(dist(dat),method="average")
plot(dat.cluster,cex=1.5)

data01<-data.frame(score = c(2,4,3,4),
                   dose = c(rep(10,2),rep(100,2)),
                   condition = rep(c('exp','control'),2))

dat01<-read.csv("http://www.matsuka.info/data_folder/temp_data01.txt",
                header=T)
dat02<-read.csv("http://www.matsuka.info/data_folder/temp_data02.txt",
                header=T, row.name=1)
dat03<-read.table("http://www.matsuka.info/data_folder/temp_data03.txt",
                  header=T, row.name=4)
dat<-read.csv("http://www.matsuka.info/data_folder/datWA01.txt",
              header=T);

データ解析基礎論B Loglineak Analysis

freq<-c(33,37,7,23)
pref<-factor(c('soba','udon','soba','udon'))
region<-factor(c('east','east','west','west'))
dat<-data.frame(pref,region,freq)
dat.table=table(pref,region)
dat.table[cbind(pref,region)]<-freq

dat.loglinCE_A<-loglin(dat.table, list(1), fit=T,param=T)
dat.loglinCE_B<-loglin(dat.table,list(2), fit=T,param=T)
dat.loglinIND<-loglin(dat.table,list(1,2), fit=T,param=T)
dat.loglinSAT<-loglin(dat.table,list(c(1,2)), fit=T,param=T)


freq<-c(9,5,2,4,16,10,26,28)
gener<-factor(c(rep('female',4),c(rep('male',4))))
affil<-factor(rep(c('L','L','E','E'),2))
circle<-factor(rep(c('tennis','astro'),4))
dat<-data.frame(gener,affil,circle,freq)

dat.table<-table(gender,affil,circle)
dat.table[cbind(gender,affil,circle)]<-freq

dat.loglin2<-loglin(dat.table,list(1), fit=T,param=T)
dat.loglin3<-loglin(dat.table,list(1,3), fit=T,param=T)
dat.loglin4<-loglin(dat.table,list(1,2,3), fit=T,param=T)
dat.loglin5<-loglin(dat.table,list(c(1,3),2), fit=T,param=T)
dat.loglin6<-loglin(dat.table,list(c(1,3),c(1,2)), fit=T,param=T)
dat.loglin7<-loglin(dat.table,list(c(1,3),c(1,2),c(2,3)), fit=T,param=T)
dat.loglin8<-loglin(dat.table,list(c(1,2,3)), fit=T,param=T)

source('http://peach.l.chiba-u.ac.jp/course_folder/cuUtil02.R')

データ解析基礎論B テスト理論

dat<-read.table("http://peach.l.chiba-u.ac.jp/course_folder/survey_data01.txt")

install.packages("psych")
library("psych")
ca<-alpha(dat)

dat<-read.csv("http://peach.l.chiba-u.ac.jp/course_folder/survey2.csv")
image(cor(dat)[10:1,1:10])

ca1_5 = alpha(dat[,1:5])
ca1_5

ca6_10 = alpha(dat[,6:10])
ca6_10

F1<-factanal(dat[,1:5],1)
F2<-factanal(dat[,6:10],1)

library(sem)
fa.model=cfa(reference.indicator=FALSE)
F1: q1,q2,q3,q4,q5 
F2: q6,q7,q8,q9,q10

fa.model<-update(fa.model)
delete, F1<->F2

fa.result<-sem(fa.model, cov(dat), 300)
summary(fa.result)

install.packages("ltm") 
library(ltm)
dat<-read.table("http://peach.l.chiba-u.ac.jp/course_folder/survey_data01.txt")
dat = dat-1
descript(dat)
irt1P<-rasch(dat)
plot.rasch(irt1P)
GoF.rasch(irt1P)
person.fit(irt1P)
item.fit(irt1P)
theta = factor.scores(irt1P)
cor(rowSums(theta[[1]][,1:10]),theta[[1]]$z1)

irt2P<-ltm(dat~z1) 
plot.ltm(irt2P)
person.fit(irt2P)
item.fit(irt2P)
theta2P = factor.scores(irt2P)
cor(rowSums(theta2P[[1]][,1:10]),theta2P[[1]]$z1)

データ解析基礎論B GLM

dat<-read.csv("http://peach.l.chiba-u.ac.jp/course_folder/logisticReg01.txt ")
plot(dat$study, dat$pass, pch=20,ylab="Passed or not",xlab="Hours studied",cex=2,cex.lab=1.5,yaxt='n')
dat.lr <- glm(pass~study,family=binomial, data=dat)
summary(dat.lr)
coef = coefficients(dat.glm)
pred.pass.p = 1/(1+exp(-(coef[1]+coef[2]*0:30)))

##
pred.pass.p = 1/(1+exp(-(coef[1]+coef[2]*c(10:15))))
odds=pred.pass.p/(1-pred.pass.p)
exp(coef[2])

dat<-read.csv("http://www.matsuka.info/data_folder/datWA01.txt")
dat.lr<-glm(gender~shoesize,family=binomial,data=dat)
anova(dat.lr, test ="Chisq")
dat.lr0<-glm(gender~1,family="binomial",data=dat)
dat.lrS<-glm(gender~shoesize,family=binomial,data=dat)
dat.lrh<-glm(gender~h,family="binomial",data=dat)


M=matrix(c(52,48,8,42),nrow=2)
rownames(M)<-c("present", "absent")
colnames(M)<-c("smoker",'non-smoker')
dat<-as.data.frame((as.table(M)))
colnames(dat)<-c("cancer","smoking","freq")
dat=dat[rep(1:nrow(dat),dat$freq),1:2]
rownames(dat)<-c()

dat.glm<-glm(cancer~smoking,family=binomial,data=dat)
dat<-read.csv("http://www.matsuka.info/data_folder/cda7-16.csv")
dat.glm<-glm(survival~age, family=binomial,data=dat)
dat.glm2<-glm(survival~Ncigarettes,family=binomial,data=dat)
dat.glm3<-glm(survival~NdaysGESTATION,family=binomial,data=dat)
dat.glmAllAdd=glm(survival~age+Ncigarettes+NdaysGESTATION,family=binomial,data=dat)
dat.glmAllMult=glm(survival~age*Ncigarettes*NdaysGESTATION,family=binomial,data=dat)
library(MASS)


dat<-read.csv("http://peach.l.chiba-u.ac.jp/course_folder/poissonReg01.txt ")
dat.pr<-glm(eye.count~attr,family = "poisson",data=dat)

データ解析基礎論B 因子分析+クラスター分析

source('http://peach.l.chiba-u.ac.jp/course_folder/cuUtil02.R')
dat<-read.csv("http://www.matsuka.info/data_folder/tdkCFA.csv")
dat.model1<-factanal(dat,1)
dat.model2<-factanal(dat,2)
dat.model3<-factanal(dat,3)
dat.model4<-factanal(dat,4)


library(sem)

model01=cfa(reference.indicator=FALSE)
F1:extrovert,cheerful, leadership, antisocial, talkative, motivated, hesitance, popularity

model02=cfa(reference.indicator=FALSE)
F1: extrovert, leadership, motivated, hesitance 
F2: cheerful, antisocial, talkative, popularity

mod2<-sem(model02, cov(dat), 100)
summary(mod2)
opt <- options(fit.indices = c("RMSEA"))


cldata<-data.frame(var1=c(4,1,5,1,5), var2=c(1,5,4,3,1))
cldata.cluster=hclust(dist(cldata),method="average")
plot(cldata.cluster,cex=2)

dat<-read.csv("http://matsuka.info/data_folder/tdkClust.csv", header=TRUE, row.names=1)
dat.cluster=hclust(dist(dat),method="average")
plot(dat.cluster,cex=1.5)
                   
dat.kmeans=kmeans(dat, centers=3, nstart=10)
plot(dat,col=dat.kmeans$cluster+2,pch=20,cex=2)

plot(dat[,1:2],col=dat.kmeans$cluster+1,pch=20,cex=5)
text(dat[,1:2],row.names(dat),cex=2)

res<-cu.KMC.rep(dat,10,1000)

dat<-read.csv("http://www.matsuka.info/data_folder/tdkCFA.csv")
res<-cu.KMC.rep(dat,10,1000)


# MDS
dat<-data.frame(p1=c(4,1,5,1,5),p2=c(1,5,4,3,1))
rownames(dat)<-c('a','b','c','d','e')
dat.mds<-cmdscale(dist(dat),2)
plot(dat.mds[,1],dat.mds[,2], type='n')
text(dat.mds[,1],dat.mds[,2],labels=row.names(dat))
dat.cluster=hclust(dist(dat))
plot(dat.cluster,cex=1.5)

dat<-read.csv("http://matsuka.info/data_folder/tdkMDS02.csv", row.name=1)
dat.mds<-cmdscale(dat,2,eig=T)
plot(dat.mds$points[,1],dat.mds$points[,2], type='n')
text(dat.mds$points[,1],dat.mds$points[,2],labels=row.names(dat), cex=2)

データ解析基礎論B 因子分析

## chisq test
chisq.test(c(72,23,16,49),p=rep(40,4),rescale.p=T)
M=matrix(c(52,48,8,42),nrow=2)
chisq.test(M,correct=F)

## PCA
Mdat<-read.csv("http://peach.l.chiba-u.ac.jp/course_folder/AMSA_track_mens.csv",row.names=1)
plot(dat,pch=20)
dat.pca<-princomp(dat)

dist = c(100,200,400,800,1500,5000,10000,42195)
log.dist=log(dist)

plot(log.dist, dat.pca$loadings[,1],pch=20,col='red',
  cex=2,ylab="PC loadings (1st)",xlab="Log(distance)")
plot(log.dist, dat.pca$loadings[,2],pch=20,col='red',
  cex=5,ylab="PC loadings (2nd)",xlab="Log(distance)")

### EFA
dat<-read.csv("http://peach.l.chiba-u.ac.jp/course_folder/FA01.csv")
dat.fa<-factanal(dat,1)

dat<-read.table("http://www.matsuka.info/data_folder/tdkPCA01.txt")
dat.pca<-princomp(dat)
dat.fa<-factanal(dat,1)

dat<-read.csv("http://www.matsuka.info/data_folder/tdkCFA.csv")
dat.fa<-factanal(dat,2,rotation="varimax",score="regression")
plot(dat.fa$loadings[,1],dat.fa$loadings[,2],pch=20,cex=2,col="red",
     xlab="Factor 1", ylab="Factor 2")
abline(h=0,v=0)
text(dat.fa$loadings[,1],dat.fa$loadings[,2],colnames(dat),cex=1.4)

dat.model1<-factanal(dat,1)
dat.model2<-factanal(dat,2)
dat.model3<-factanal(dat,3)
dat.model4<-factanal(dat,4)

print(m1res<-c(dat.model1$dof,dat.model1$PVAL))
print(m2res<-c(dat.model2$dof,dat.model2$PVAL))
print(m3res<-c(dat.model3$dof,dat.model3$PVAL))
print(m4res<-c(dat.model4$dof,dat.model4$PVAL))
1-pchisq(dat.model3$STATISTIC-dat.model4$STATISTIC,dat.model3$dof-dat.model4$dof)

AIC1=2*(36-dat.model1$dof)+dat.model1$STATISTIC
AIC2=2*(36-dat.model2$dof)+dat.model2$STATISTIC
AIC3=2*(36-dat.model3$dof)+dat.model3$STATISTIC
AIC4=2*(36-dat.model4$dof)+dat.model4$STATISTIC

### CFA with sem ###
install.packages('sem')
library(sem)

model01=cfa(reference.indicator=FALSE)
F1:extrovert,cheerful, leadership, antisocial, talkative, motivated, hesitance, popularity

mod1<-sem(model01, cov(dat), nrow(dat))
summary(mod1)

model02=cfa(reference.indicator=FALSE)
F1: extrovert, leadership, motivated, hesitance 
F2: cheerful, antisocial, talkative, popularity

mod2<-sem(model02, cov(dat), 100)
opt <- options(fit.indices = c("RMSEA"))

データ解析基礎論B W03

dat<-read.table("http://www.matsuka.info/data_folder/tdkPCA01.txt")
dat.pca<-princomp(dat)
biplot(dat.pca)
dat.pca$score
dat.pca$loadings

scoreP1S1 = dat.pca$loadings[1,1]*(dat[1,1]-mean(dat$writing))+
  dat.pca$loadings[2,1]*(dat[1,2]-mean(dat$thesis))+
  dat.pca$loadings[3,1]*(dat[1,3]-mean(dat$interview))

scoreP2S2 = dat.pca$loadings[1,2]*(dat[2,1]-mean(dat$writing))+
  dat.pca$loadings[2,2]*(dat[2,2]-mean(dat$thesis))+
  dat.pca$loadings[3,2]*(dat[2,3]-mean(dat$interview))

dat<-read.csv("http://www.matsuka.info/data_folder/tdkCFA.csv")
dat.pca<-princomp(dat)
screeplot(dat.pca,type="lines")
biplot(dat.pca)
biplot(dat.pca,choices=c(2,3))


dat2<-read.table("http://www.matsuka.info/data_folder/tdkPCA01.txt")
dat2[,1]=dat2[,1]*0.1
dat2.pca<-princomp(dat2)

データ解析基礎論B Rの復習

dat<-read.csv("http://www.matsuka.info/data_folder/hwsk8-17-6.csv")
dat2<-read.csv("http://www.matsuka.info/data_folder/dktb321.txt")
source("http://www.matsuka.info/univ/course_folder/cutil.R")
score.X = dat2$result[dat2$method=="method.X"]
score.Y = dat2$result[dat2$method=="method.Y"]
dat2X<-dat2[dat2$method=="method.X",]

データ解析基礎論A W12

source("http://www.matsuka.info/univ/course_folder/cutil.R")
dat<-read.csv("http://www.matsuka.info/data_folder/dktb316.txt",
   col.names = c("dump","method","subj","words"))
dat.aov<-aov(words~method+subj+Error(subj:method),data=dat)

dat<-read.csv("http://www.matsuka.info/data_folder/dktb3211.txt")
interaction.plot(dat$duration,
                 dat$method,        
                 dat$result,    
                 pch=c(20,20), 
                 col=c("skyblue","orange"),
                 ylab="score",
                 xlab="Duration",
                 lwd=3, 
                 type='b',
                 cex=2, 
                 trace.label="Method")
dat.aov<-aov(result~method*duration+Error(s+s:method),dat)

dat<-read.csv("http://www.matsuka.info/data_folder/dktb3218.txt")

dat.aov<-aov(result~method+duration+method:duration + 
  Error(s+method:s+duration:s+method:duration:s), data=dat)