データ解析基礎論B W05 Factor Analysis

chisq.test(c(72,23,16,49),p=rep(40,4),rescale.p=F)
chisq.test(c(72,23,16,49),p=rep(0.25,4),rescale.p=F)

M=matrix(c(52,48,8,42),nrow=2)

chisq.test(M,correct=T)

#(abs(52-40)-0.5)^2/40+(abs(48-60)-0.5)^2/60
# +(abs(8-20)-0.5)^2/20+(abs(42-30)-0.5)^2/30

dat<-read.csv("http://peach.l.chiba-u.ac.jp/course_folder/FA01.csv")
dat.fa<-factanal(dat,1)


dat<-read.table("http://www.matsuka.info/data_folder/tdkPCA01.txt")
dat.pca<-princomp(dat)
dat.fa<-factanal(dat,1)

dat.fa<-factanal(dat,1,score="regression")
plot(dat.fa$score~dat.pca$score[,1],pch=20,cex=2,xlab="Component Score", ylab="Factor Score")

fa_pca.scores = tibble(fa = dat.fa$scores, pca = dat.pca$scores[,1], total.score = rowSums(dat))
ggplot(fa_pca.scores) +
  geom_point(aes(x = fa, y  = pca), size = 3) +
  xlab("Factor Score") + ylab("Component Score")

cor(dat.fa$score,dat.pca$score)


ggplot(fa_pca.scores) +
  geom_point(aes(x = fa, y  = total.score), size = 3) +
  xlab("Factor Score") + ylab("Total Score")


dat<-read.csv("http://www.matsuka.info/data_folder/tdkCFA.csv")
dat.faWOR<-factanal(dat,2, rotation="none", score="regression")
dat.faWR<-factanal(dat,2, rotation="varimax", score="regression")

loadingsWOR <- dat.faWOR$loadings[] %>%
  as.tibble() %>% add_column(variable = row.names(dat.faWOR$loadings)) %>%
  gather("Factor1","Factor2", key = "factor", value = "loadings")

loadingsWR <- dat.faWR$loadings[] %>%
  as.tibble() %>% add_column(variable = row.names(dat.faWR$loadings)) %>%
  gather("Factor1","Factor2", key = "factor", value = "loadings")


ggplot(loadingsWOR, aes(variable, abs(loadings), fill=loadings)) +
  facet_wrap(~ factor, nrow=1) +
  geom_bar(stat="identity") +
  coord_flip() +
  scale_fill_gradient2(name = "loadings",
                       high = "blue", mid = "white", low = "red",
                       midpoint=0, guide=F) +
  ylab("Loading Strength")

ggplot(loadingsWR, aes(variable, abs(loadings), fill=loadings)) +
  facet_wrap(~ factor, nrow=1) +
  geom_bar(stat="identity") +
  coord_flip() +
  scale_fill_gradient2(name = "loadings",
                       high = "blue", mid = "white", low = "red",
                       midpoint=0, guide=F) +
  ylab("Loading Strength")

loadingsWR2 <- as.data.frame(dat.faWR$loadings[])
ggplot(loadingsWR2, aes(x = Factor1, y = Factor2)) +
  geom_point(size = 3, color = "red") +
  geom_vline(xintercept=0) +
  geom_hline(yintercept=0) +
  geom_text(aes(label = rownames(loadingsWR2))) +
  ylim(-1.1, 1.1) + xlim(-1.1, 1.1)


dat.model1<-factanal(dat,1)
dat.model2<-factanal(dat,2)
dat.model3<-factanal(dat,3)
dat.model4<-factanal(dat,4)

source("http://www.matsuka.info/univ/course_folder/cuUtil02.R")
cu.lrtest.csq(dat.model3,dat.model4)
cu.AIC.csq(dat.model1)

library(sem)

model01=cfa(reference.indicator=FALSE)
F1:extrovert,cheerful, leadership, antisocial, talkative, motivated, hesitance, popularity


cv.mat = cov(dat)
mod1<-sem(model01,cv.mat,100)


model02=cfa(reference.indicator=FALSE)
F1: extrovert, leadership, motivated, hesitance
F2: cheerful, antisocial, talkative, popularity


mod2<-sem(model02, cov(dat), nrow(dat))

opt <- options(fit.indices = c("RMSEA"))
summary(mod2)