# データ解析基礎論B 多変量解析

```install.packages("ggfortify")
install.packages("ggdendro")
library(ggfortify)
library(ggdendro)

# pca
dat.pca<-princomp(dat)
autoplot(dat.pca, label = TRUE, label.size = 6,
autoplot(dat.pca, shape = FALSE, label.size = 6,

cldata<-data.frame(var1=c(4,1,5,1,5), var2=c(1,5,4,3,1))
rownames(cldata)  = c("A","B","C","D","E")
autoplot(dist(cldata))
cldata.cluster=hclust(dist(cldata),method="average")
ggdendrogram(cldata.cluster, rotate = T, theme_dendro = FALSE)+ xlab("Individual")

autoplot(dist(dat))
dat.cluster=hclust(dist(dat))
ggdendrogram(dat.cluster, rotate = T, theme_dendro = FALSE)+ xlab("Occupation")
dat.pca = princomp(dat)
autoplot(dat.pca, label = TRUE, shape = FALSE, label.size = 4,

dat.HC.S=hclust(dist(dat), method = "single")
dat.HC.C=hclust(dist(dat), method = "complete")
dat.HC.A=hclust(dist(dat), method = "average")
dat.HC.W=hclust(dist(dat), method = "ward.D")
ggdendrogram(dat.HC.S, rotate = T, theme_dendro = FALSE)+ xlab("Occupation")+ggtitle("Method = Single")
ggdendrogram(dat.HC.C, rotate = T, theme_dendro = FALSE)+ xlab("Occupation")+ggtitle("Method = Complete")
ggdendrogram(dat.HC.A, rotate = T, theme_dendro = FALSE)+ xlab("Occupation")+ggtitle("Method = Average")
ggdendrogram(dat.HC.W, rotate = T, theme_dendro = FALSE)+ xlab("Occupation")+ggtitle("Method = Ward's MV")

dat.kmeans=kmeans(dat, centers=3, nstart=10)
pairs(dat,
main = "Clustering Occupations",
pch = 21,
bg = c("red", "blue", "green")
[unclass(dat.kmeans\$cluster)])
autoplot(dat.kmeans, dat, size = 3, label = TRUE, label.size = 5)

source("http://www.matsuka.info/univ/course_folder/cuUtil02.R")
res<-cu.KMC.rep(dat,10,100)

autoplot(dat.kmeans, dat, frame = TRUE, frame.type = 'norm') + ylim(-0.7,0.7)+xlim(-1.2,0.7)
autoplot(dat.kmeans, dat, frame = TRUE)+ ylim(-0.7,0.7)+xlim(-1.2,0.7)

dat<-data.frame(writing=c(68,85,50,54,66,35,56,25,43,70),
interview=c(65,80,95,70,75,55,65,75,50,40),
cl=c(rep("A",5),rep("N",5)))
library(MASS)
dat.lda<-lda(cl~.,data=dat)
intcpt = (dat.lda\$scaling[1]*dat.lda\$means[1,1]+dat.lda\$scaling[2]*dat.lda\$means[1,2]+
dat.lda\$scaling[1]*dat.lda\$means[2,1]+dat.lda\$scaling[2]*dat.lda\$means[2,2])/2
new.dim.slope = dat.lda\$scaling[1]/dat.lda\$scaling[2]

disc.intcpt = intcpt / dat.lda\$scaling[2]
disc.slope = -dat.lda\$scaling[1] / dat.lda\$scaling[2]

ggplot(dat, aes(x = writing, y= interview, color = cl)) +
geom_point(size = 4) +
geom_abline(aes(intercept = intcpt, slope = new.dim.slope )) +
geom_abline(aes(intercept = disc.intcpt, slope = disc.slope ),color = "red") + xlim(30,100)+ylim(30,100)

dat.lda<-lda(class~.,dat)
lda.pred<-predict(dat.lda,dat)
table(lda.pred\$class, dat\$class)
dat.ldaCV<-lda(class~.,dat, CV=T)