x<-matrix(1:8, nrow=2) x<-matrix(1:8, nrow=2,byrow=T) data01<-data.frame(score = c(2,4,3,4), dose = c(rep(10,2),rep(100,2)), condition = rep(c('exp','control'),2)) dat01<-read.csv("http://www.matsuka.info/data_folder/temp_data01.txt", header=T) dat02<-read.csv("http://www.matsuka.info/data_folder/temp_data02.txt", header=T, row.name=1) dat03<-read.table("http://www.matsuka.info/data_folder/temp_data03.txt", header=T, row.name=4) dat<-read.csv("http://www.matsuka.info/data_folder/datWA01.txt", header=T); mean(dat$shoesize[dat$gender == "M"]) mean(dat$shoesize[dat$gender == "F"]) mean(dat$shoesize[dat$h > 180]) v1 = seq(-3,3,0.1) v2 = v1^2 plot(x = v1, y = v2) plot(v1, v2, col = 'red') plot(v1, v2, main = "THIS IS THE TITLE", cex.lab = 1.5, xlab = "Label for X-axis",ylab = "Label for Y-axis") plot(v1, v2, col = "blue", type = "o", lty = 2, pch = 19, cex.lab = 1.5, lwd = 3, main = "Y=X*X", xlab = "X", ylab="X*X", xlim=c(-3.5,3.5), ylim=c(-0.5, 10)) dat<- read.csv("http://www.matsuka.info/data_folder/datWA01.txt") hist(dat$h) hist(dat$h, breaks = 20, main = “Histogram of Height”, xlab = "Height", col = 'blue', xlim = c(140, 190)) dens<-density(dat$h); hist(dat$h, main = "Histogram of Height", xlab = "Height", xlim = c(140,190), probability = T) lines(dens, lwd = 2, col = ‘red’, lty=2) plot(v1, v2, col = "blue", type = "l", pch = 19, cex.lab = 1.5, lwd = 3, xlab = "X", ylab="f(X)", xlim=c(-3.5,3.5), ylim=c(-0.5, 10)) lines(v1, v1^3, col='red',lwd = 3) legend("bottomright", c("x^2","x^3"), col=c('blue','red'), lwd=2) boxplot(dat$h ~ dat$gender, main="Distribution of Height by Gender", ylab="Gender", xlab="Height", col=c('blue','cyan'), ylim=c(140,190), horizontal=T) interaction.plot(dat$gender, dat$affil, dat$h, pch=c(20,20), col=c("skyblue","orange"), xlab="gender", ylab="height", lwd=3,type='b',cex=2, trace.label="Affiliation") hist(dat[dat$gender=='F',]$h, main="Dist. of Height for Female Participants", xlab="Height", xlim=c(140,190), probability=T) dens.F = density(dat[dat$gender=='F',]$h) lines(dens.F, col='blue',lwd=2) hist(dat[dat$gender==‘M’,]$h, main=“Dist. of Height for Male Participants”, xlab=“Height”, xlim=c(140,190), probability=T,ylim=c(0,0.08)) dens.M = density(dat[dat$gender=='M',]$h) lines(dens.M, col='green', lwd=2) plot(dat$shoesize, dat$h, main="Relationship b/w shoesize and height", xlab = 'shoesize’, ylab='height’, pch=19, col="red") txt = paste("r =",round(cor(dat$shoesize,dat$h), 4)) text(22, 175, txt, cex = 1.5) abline(h = mean(dat$h), col='blue'); abline(v = mean(dat$shoesize), col='green') plot(dat[dat$gender=='F',]$shoesize, dat[dat$gender=='F',]$h, main="Relationship b/w shoesize and height", xlab='shoesize', ylab='height', cex.lab=1.5, pch=19, col='blue', xlim=c(20,29), ylim=c(140,190)) lines(dat[dat$gender=='M',]$shoesize,dat[dat$gender=='M',]$h, type = 'p', pch = 15, col = 'green') legend("topleft", c('Female','Male'), pch =c(19,15), col = c('blue','green'), cex = 1.5)
Category Archives: データ解析
2019 データ解析基礎論A DAA01
dat<-data.frame(score=c(78,70,66,76,78,76,88, 76, 76,72,60,72,70,72,84,70), cond=c(rep('low',8), rep('high',8))) boxplot(score~cond, col = c("skyblue",'skyblue4'),data=dat) summary(aov(score ~ cond, data = dat)) dat <- read.csv("http://www.matsuka.info/data_folder/hwsk8-17-6.csv") plot(ani~otouto, data=dat,pch=20,cex=3,xlab ="score of Otouto", ylab = "score of Ani") dat.lm <- lm(ani~otouto, data=dat) abline(dat.lm, col = 'red',lwd = 2.5) dat<-read.csv("http://www.matsuka.info/data_folder/datWA01.txt") dat.glm <- glm(gender~shoesize,family="binomial",data=dat) plot(as.numeric(gender)-1~shoesize,data=dat,pch=20,cex=3,ylab="P(Male)") cf = coef(dat.glm) temp.x = seq(20,30,0.1) y = 1/(1+exp(-1*(cf[1]+temp.x*cf[2]))) lines(temp.x,y,col='cyan',lwd=2) dat <- read.table("http://www.matsuka.info/data_folder/tdkPCA01.txt") dat.pca <- princomp(dat) biplot(dat.pca) dat<-read.csv("http://matsuka.info/data_folder/tdkClust.csv", header=TRUE, row.names=1) dat.cluster=hclust(dist(dat),method="average") plot(dat.cluster,cex=1.5) data01<-data.frame(score = c(2,4,3,4), dose = c(rep(10,2),rep(100,2)), condition = rep(c('exp','control'),2)) dat01<-read.csv("http://www.matsuka.info/data_folder/temp_data01.txt", header=T) dat02<-read.csv("http://www.matsuka.info/data_folder/temp_data02.txt", header=T, row.name=1) dat03<-read.table("http://www.matsuka.info/data_folder/temp_data03.txt", header=T, row.name=4) dat<-read.csv("http://www.matsuka.info/data_folder/datWA01.txt", header=T);
データ解析基礎論B Loglineak Analysis
freq<-c(33,37,7,23) pref<-factor(c('soba','udon','soba','udon')) region<-factor(c('east','east','west','west')) dat<-data.frame(pref,region,freq) dat.table=table(pref,region) dat.table[cbind(pref,region)]<-freq dat.loglinCE_A<-loglin(dat.table, list(1), fit=T,param=T) dat.loglinCE_B<-loglin(dat.table,list(2), fit=T,param=T) dat.loglinIND<-loglin(dat.table,list(1,2), fit=T,param=T) dat.loglinSAT<-loglin(dat.table,list(c(1,2)), fit=T,param=T) freq<-c(9,5,2,4,16,10,26,28) gener<-factor(c(rep('female',4),c(rep('male',4)))) affil<-factor(rep(c('L','L','E','E'),2)) circle<-factor(rep(c('tennis','astro'),4)) dat<-data.frame(gener,affil,circle,freq) dat.table<-table(gender,affil,circle) dat.table[cbind(gender,affil,circle)]<-freq dat.loglin2<-loglin(dat.table,list(1), fit=T,param=T) dat.loglin3<-loglin(dat.table,list(1,3), fit=T,param=T) dat.loglin4<-loglin(dat.table,list(1,2,3), fit=T,param=T) dat.loglin5<-loglin(dat.table,list(c(1,3),2), fit=T,param=T) dat.loglin6<-loglin(dat.table,list(c(1,3),c(1,2)), fit=T,param=T) dat.loglin7<-loglin(dat.table,list(c(1,3),c(1,2),c(2,3)), fit=T,param=T) dat.loglin8<-loglin(dat.table,list(c(1,2,3)), fit=T,param=T) source('http://peach.l.chiba-u.ac.jp/course_folder/cuUtil02.R')
データ解析基礎論B テスト理論
dat<-read.table("http://peach.l.chiba-u.ac.jp/course_folder/survey_data01.txt") install.packages("psych") library("psych") ca<-alpha(dat) dat<-read.csv("http://peach.l.chiba-u.ac.jp/course_folder/survey2.csv") image(cor(dat)[10:1,1:10]) ca1_5 = alpha(dat[,1:5]) ca1_5 ca6_10 = alpha(dat[,6:10]) ca6_10 F1<-factanal(dat[,1:5],1) F2<-factanal(dat[,6:10],1) library(sem) fa.model=cfa(reference.indicator=FALSE) F1: q1,q2,q3,q4,q5 F2: q6,q7,q8,q9,q10 fa.model<-update(fa.model) delete, F1<->F2 fa.result<-sem(fa.model, cov(dat), 300) summary(fa.result) install.packages("ltm") library(ltm) dat<-read.table("http://peach.l.chiba-u.ac.jp/course_folder/survey_data01.txt") dat = dat-1 descript(dat) irt1P<-rasch(dat) plot.rasch(irt1P) GoF.rasch(irt1P) person.fit(irt1P) item.fit(irt1P) theta = factor.scores(irt1P) cor(rowSums(theta[[1]][,1:10]),theta[[1]]$z1) irt2P<-ltm(dat~z1) plot.ltm(irt2P) person.fit(irt2P) item.fit(irt2P) theta2P = factor.scores(irt2P) cor(rowSums(theta2P[[1]][,1:10]),theta2P[[1]]$z1)
データ解析基礎論B GLM
dat<-read.csv("http://peach.l.chiba-u.ac.jp/course_folder/logisticReg01.txt ") plot(dat$study, dat$pass, pch=20,ylab="Passed or not",xlab="Hours studied",cex=2,cex.lab=1.5,yaxt='n') dat.lr <- glm(pass~study,family=binomial, data=dat) summary(dat.lr) coef = coefficients(dat.glm) pred.pass.p = 1/(1+exp(-(coef[1]+coef[2]*0:30))) ## pred.pass.p = 1/(1+exp(-(coef[1]+coef[2]*c(10:15)))) odds=pred.pass.p/(1-pred.pass.p) exp(coef[2]) dat<-read.csv("http://www.matsuka.info/data_folder/datWA01.txt") dat.lr<-glm(gender~shoesize,family=binomial,data=dat) anova(dat.lr, test ="Chisq") dat.lr0<-glm(gender~1,family="binomial",data=dat) dat.lrS<-glm(gender~shoesize,family=binomial,data=dat) dat.lrh<-glm(gender~h,family="binomial",data=dat) M=matrix(c(52,48,8,42),nrow=2) rownames(M)<-c("present", "absent") colnames(M)<-c("smoker",'non-smoker') dat<-as.data.frame((as.table(M))) colnames(dat)<-c("cancer","smoking","freq") dat=dat[rep(1:nrow(dat),dat$freq),1:2] rownames(dat)<-c() dat.glm<-glm(cancer~smoking,family=binomial,data=dat) dat<-read.csv("http://www.matsuka.info/data_folder/cda7-16.csv") dat.glm<-glm(survival~age, family=binomial,data=dat) dat.glm2<-glm(survival~Ncigarettes,family=binomial,data=dat) dat.glm3<-glm(survival~NdaysGESTATION,family=binomial,data=dat) dat.glmAllAdd=glm(survival~age+Ncigarettes+NdaysGESTATION,family=binomial,data=dat) dat.glmAllMult=glm(survival~age*Ncigarettes*NdaysGESTATION,family=binomial,data=dat) library(MASS) dat<-read.csv("http://peach.l.chiba-u.ac.jp/course_folder/poissonReg01.txt ") dat.pr<-glm(eye.count~attr,family = "poisson",data=dat)
データ解析基礎論B 因子分析+クラスター分析
source('http://peach.l.chiba-u.ac.jp/course_folder/cuUtil02.R') dat<-read.csv("http://www.matsuka.info/data_folder/tdkCFA.csv") dat.model1<-factanal(dat,1) dat.model2<-factanal(dat,2) dat.model3<-factanal(dat,3) dat.model4<-factanal(dat,4) library(sem) model01=cfa(reference.indicator=FALSE) F1:extrovert,cheerful, leadership, antisocial, talkative, motivated, hesitance, popularity model02=cfa(reference.indicator=FALSE) F1: extrovert, leadership, motivated, hesitance F2: cheerful, antisocial, talkative, popularity mod2<-sem(model02, cov(dat), 100) summary(mod2) opt <- options(fit.indices = c("RMSEA")) cldata<-data.frame(var1=c(4,1,5,1,5), var2=c(1,5,4,3,1)) cldata.cluster=hclust(dist(cldata),method="average") plot(cldata.cluster,cex=2) dat<-read.csv("http://matsuka.info/data_folder/tdkClust.csv", header=TRUE, row.names=1) dat.cluster=hclust(dist(dat),method="average") plot(dat.cluster,cex=1.5) dat.kmeans=kmeans(dat, centers=3, nstart=10) plot(dat,col=dat.kmeans$cluster+2,pch=20,cex=2) plot(dat[,1:2],col=dat.kmeans$cluster+1,pch=20,cex=5) text(dat[,1:2],row.names(dat),cex=2) res<-cu.KMC.rep(dat,10,1000) dat<-read.csv("http://www.matsuka.info/data_folder/tdkCFA.csv") res<-cu.KMC.rep(dat,10,1000) # MDS dat<-data.frame(p1=c(4,1,5,1,5),p2=c(1,5,4,3,1)) rownames(dat)<-c('a','b','c','d','e') dat.mds<-cmdscale(dist(dat),2) plot(dat.mds[,1],dat.mds[,2], type='n') text(dat.mds[,1],dat.mds[,2],labels=row.names(dat)) dat.cluster=hclust(dist(dat)) plot(dat.cluster,cex=1.5) dat<-read.csv("http://matsuka.info/data_folder/tdkMDS02.csv", row.name=1) dat.mds<-cmdscale(dat,2,eig=T) plot(dat.mds$points[,1],dat.mds$points[,2], type='n') text(dat.mds$points[,1],dat.mds$points[,2],labels=row.names(dat), cex=2)
データ解析基礎論B 因子分析
## chisq test chisq.test(c(72,23,16,49),p=rep(40,4),rescale.p=T) M=matrix(c(52,48,8,42),nrow=2) chisq.test(M,correct=F) ## PCA Mdat<-read.csv("http://peach.l.chiba-u.ac.jp/course_folder/AMSA_track_mens.csv",row.names=1) plot(dat,pch=20) dat.pca<-princomp(dat) dist = c(100,200,400,800,1500,5000,10000,42195) log.dist=log(dist) plot(log.dist, dat.pca$loadings[,1],pch=20,col='red', cex=2,ylab="PC loadings (1st)",xlab="Log(distance)") plot(log.dist, dat.pca$loadings[,2],pch=20,col='red', cex=5,ylab="PC loadings (2nd)",xlab="Log(distance)") ### EFA dat<-read.csv("http://peach.l.chiba-u.ac.jp/course_folder/FA01.csv") dat.fa<-factanal(dat,1) dat<-read.table("http://www.matsuka.info/data_folder/tdkPCA01.txt") dat.pca<-princomp(dat) dat.fa<-factanal(dat,1) dat<-read.csv("http://www.matsuka.info/data_folder/tdkCFA.csv") dat.fa<-factanal(dat,2,rotation="varimax",score="regression") plot(dat.fa$loadings[,1],dat.fa$loadings[,2],pch=20,cex=2,col="red", xlab="Factor 1", ylab="Factor 2") abline(h=0,v=0) text(dat.fa$loadings[,1],dat.fa$loadings[,2],colnames(dat),cex=1.4) dat.model1<-factanal(dat,1) dat.model2<-factanal(dat,2) dat.model3<-factanal(dat,3) dat.model4<-factanal(dat,4) print(m1res<-c(dat.model1$dof,dat.model1$PVAL)) print(m2res<-c(dat.model2$dof,dat.model2$PVAL)) print(m3res<-c(dat.model3$dof,dat.model3$PVAL)) print(m4res<-c(dat.model4$dof,dat.model4$PVAL)) 1-pchisq(dat.model3$STATISTIC-dat.model4$STATISTIC,dat.model3$dof-dat.model4$dof) AIC1=2*(36-dat.model1$dof)+dat.model1$STATISTIC AIC2=2*(36-dat.model2$dof)+dat.model2$STATISTIC AIC3=2*(36-dat.model3$dof)+dat.model3$STATISTIC AIC4=2*(36-dat.model4$dof)+dat.model4$STATISTIC ### CFA with sem ### install.packages('sem') library(sem) model01=cfa(reference.indicator=FALSE) F1:extrovert,cheerful, leadership, antisocial, talkative, motivated, hesitance, popularity mod1<-sem(model01, cov(dat), nrow(dat)) summary(mod1) model02=cfa(reference.indicator=FALSE) F1: extrovert, leadership, motivated, hesitance F2: cheerful, antisocial, talkative, popularity mod2<-sem(model02, cov(dat), 100) opt <- options(fit.indices = c("RMSEA"))
データ解析基礎論B W03
dat<-read.table("http://www.matsuka.info/data_folder/tdkPCA01.txt") dat.pca<-princomp(dat) biplot(dat.pca) dat.pca$score dat.pca$loadings scoreP1S1 = dat.pca$loadings[1,1]*(dat[1,1]-mean(dat$writing))+ dat.pca$loadings[2,1]*(dat[1,2]-mean(dat$thesis))+ dat.pca$loadings[3,1]*(dat[1,3]-mean(dat$interview)) scoreP2S2 = dat.pca$loadings[1,2]*(dat[2,1]-mean(dat$writing))+ dat.pca$loadings[2,2]*(dat[2,2]-mean(dat$thesis))+ dat.pca$loadings[3,2]*(dat[2,3]-mean(dat$interview)) dat<-read.csv("http://www.matsuka.info/data_folder/tdkCFA.csv") dat.pca<-princomp(dat) screeplot(dat.pca,type="lines") biplot(dat.pca) biplot(dat.pca,choices=c(2,3)) dat2<-read.table("http://www.matsuka.info/data_folder/tdkPCA01.txt") dat2[,1]=dat2[,1]*0.1 dat2.pca<-princomp(dat2)
データ解析基礎論B Rの復習
dat<-read.csv("http://www.matsuka.info/data_folder/hwsk8-17-6.csv") dat2<-read.csv("http://www.matsuka.info/data_folder/dktb321.txt") source("http://www.matsuka.info/univ/course_folder/cutil.R") score.X = dat2$result[dat2$method=="method.X"] score.Y = dat2$result[dat2$method=="method.Y"] dat2X<-dat2[dat2$method=="method.X",]
データ解析基礎論A W12
source("http://www.matsuka.info/univ/course_folder/cutil.R") dat<-read.csv("http://www.matsuka.info/data_folder/dktb316.txt", col.names = c("dump","method","subj","words")) dat.aov<-aov(words~method+subj+Error(subj:method),data=dat) dat<-read.csv("http://www.matsuka.info/data_folder/dktb3211.txt") interaction.plot(dat$duration, dat$method, dat$result, pch=c(20,20), col=c("skyblue","orange"), ylab="score", xlab="Duration", lwd=3, type='b', cex=2, trace.label="Method") dat.aov<-aov(result~method*duration+Error(s+s:method),dat) dat<-read.csv("http://www.matsuka.info/data_folder/dktb3218.txt") dat.aov<-aov(result~method+duration+method:duration + Error(s+method:s+duration:s+method:duration:s), data=dat)