source('http://peach.l.chiba-u.ac.jp/course_folder/cuUtil02.R') dat<-read.csv("http://www.matsuka.info/data_folder/tdkCFA.csv") dat.model1<-factanal(dat,1) dat.model2<-factanal(dat,2) dat.model3<-factanal(dat,3) dat.model4<-factanal(dat,4) library(sem) model01=cfa(reference.indicator=FALSE) F1:extrovert,cheerful, leadership, antisocial, talkative, motivated, hesitance, popularity model02=cfa(reference.indicator=FALSE) F1: extrovert, leadership, motivated, hesitance F2: cheerful, antisocial, talkative, popularity mod2<-sem(model02, cov(dat), 100) summary(mod2) opt <- options(fit.indices = c("RMSEA")) cldata<-data.frame(var1=c(4,1,5,1,5), var2=c(1,5,4,3,1)) cldata.cluster=hclust(dist(cldata),method="average") plot(cldata.cluster,cex=2) dat<-read.csv("http://matsuka.info/data_folder/tdkClust.csv", header=TRUE, row.names=1) dat.cluster=hclust(dist(dat),method="average") plot(dat.cluster,cex=1.5) dat.kmeans=kmeans(dat, centers=3, nstart=10) plot(dat,col=dat.kmeans$cluster+2,pch=20,cex=2) plot(dat[,1:2],col=dat.kmeans$cluster+1,pch=20,cex=5) text(dat[,1:2],row.names(dat),cex=2) res<-cu.KMC.rep(dat,10,1000) dat<-read.csv("http://www.matsuka.info/data_folder/tdkCFA.csv") res<-cu.KMC.rep(dat,10,1000) # MDS dat<-data.frame(p1=c(4,1,5,1,5),p2=c(1,5,4,3,1)) rownames(dat)<-c('a','b','c','d','e') dat.mds<-cmdscale(dist(dat),2) plot(dat.mds[,1],dat.mds[,2], type='n') text(dat.mds[,1],dat.mds[,2],labels=row.names(dat)) dat.cluster=hclust(dist(dat)) plot(dat.cluster,cex=1.5) dat<-read.csv("http://matsuka.info/data_folder/tdkMDS02.csv", row.name=1) dat.mds<-cmdscale(dat,2,eig=T) plot(dat.mds$points[,1],dat.mds$points[,2], type='n') text(dat.mds$points[,1],dat.mds$points[,2],labels=row.names(dat), cex=2)
Monthly Archives: October 2018
認知情報解析演習b DL2 – ch.2
txt = "You say goodbye and I say hello.";txt = tolower(txt) txt = gsub('[.]', ' .',txt) words = unlist(strsplit(txt, " ")) uniq.words = unique(words) uniq.words[1] which(uniq.words=="say") n.uniq = length(uniq.words) n.words = length(words) # creating corpus corpus = rep(0,n.words) corpus = match(words,uniq.words) ## co-occurance matrix cc = matrix(0,nrow=n.uniq, ncol=n.uniq) for (i.c in 1:n.uniq){ loc = which(corpus==i.c) L = which(match(uniq.words,words[pmax(loc-1,1)])!='NA') R = which(match(uniq.words,words[pmin(loc+1,n.words)])!='NA') cc[i.c, c(L,R)]=cc[i.c, c(L,R)]+1 } diag(cc)=0 ## ppmi matrix ppmi <- function(cc, eps = 1e-8){ n.uniq = ncol(cc) PPMI = matrix(0, n.uniq, n.uniq) N = sum(cc) r.sum = rowSums(cc) pmi = log2(cc*N/outer(r.sum,r.sum)) PPMI=pmax(0,pmi) return(PPMI) } # performing SVD s = svd(PPMI) plot(s$u[,1],s$u[,2],pch=20,col='red') text(s$u[,1],s$u[,2],row.names(cc)) # calc. simliarity b/w words words.sim <- function(word1, word2, eps=1e-8){ nw1 = word1/(sqrt(sum(word1^2)) + eps) nw2 = word2/(sqrt(sum(word2^2)) + eps) return(nw1%*%nw2) } w1 = cc[which(uniq.words=="you"),] w2 = cc[which(uniq.words=="i"),] words.sim(w1,w2) # reporting most simliar words most.sim <- function(word, uniq.words, cc, N=5){ n.uniq = length(uniq.words) word2 = cc[which(uniq.words==word),] res = data.frame(words = uniq.words, similarity = rep(0,n.uniq)) top = data.frame(words = rep("",N+1), similarity=rep(0,N+1)) res$similarity = apply(cc,1, function(x) words.sim(x,word2)) sort.res = sort(res$similarity, decreasing = T, index.return=T) top$words = res$words[sort.res$ix[1:(N+1)]] top$similarity = res$similarity[sort.res$ix[1:(N+1)]] self.idx = which(top$words == word) return(top[-self.idx,]) } ## Penn TB f = file("~/courses/CogMod/CMA2018/ptb.tex") ptb<-readLines(con=f) ptb = paste(ptb,"") words = unlist(strsplit(ptb, " ")) uniq.words = unique(words) n.uniq = length(uniq.words) n.words = length(words) corpus = rep(0,n.words) corpus = match(words,uniq.words) cc <- contxt(corpus,uniq.words,words) PPMI<-ppmi(cc) library(rsvd) ## 時間がかかります。 s = rsvd(PPMI)
データ解析基礎論B 因子分析
## chisq test chisq.test(c(72,23,16,49),p=rep(40,4),rescale.p=T) M=matrix(c(52,48,8,42),nrow=2) chisq.test(M,correct=F) ## PCA Mdat<-read.csv("http://peach.l.chiba-u.ac.jp/course_folder/AMSA_track_mens.csv",row.names=1) plot(dat,pch=20) dat.pca<-princomp(dat) dist = c(100,200,400,800,1500,5000,10000,42195) log.dist=log(dist) plot(log.dist, dat.pca$loadings[,1],pch=20,col='red', cex=2,ylab="PC loadings (1st)",xlab="Log(distance)") plot(log.dist, dat.pca$loadings[,2],pch=20,col='red', cex=5,ylab="PC loadings (2nd)",xlab="Log(distance)") ### EFA dat<-read.csv("http://peach.l.chiba-u.ac.jp/course_folder/FA01.csv") dat.fa<-factanal(dat,1) dat<-read.table("http://www.matsuka.info/data_folder/tdkPCA01.txt") dat.pca<-princomp(dat) dat.fa<-factanal(dat,1) dat<-read.csv("http://www.matsuka.info/data_folder/tdkCFA.csv") dat.fa<-factanal(dat,2,rotation="varimax",score="regression") plot(dat.fa$loadings[,1],dat.fa$loadings[,2],pch=20,cex=2,col="red", xlab="Factor 1", ylab="Factor 2") abline(h=0,v=0) text(dat.fa$loadings[,1],dat.fa$loadings[,2],colnames(dat),cex=1.4) dat.model1<-factanal(dat,1) dat.model2<-factanal(dat,2) dat.model3<-factanal(dat,3) dat.model4<-factanal(dat,4) print(m1res<-c(dat.model1$dof,dat.model1$PVAL)) print(m2res<-c(dat.model2$dof,dat.model2$PVAL)) print(m3res<-c(dat.model3$dof,dat.model3$PVAL)) print(m4res<-c(dat.model4$dof,dat.model4$PVAL)) 1-pchisq(dat.model3$STATISTIC-dat.model4$STATISTIC,dat.model3$dof-dat.model4$dof) AIC1=2*(36-dat.model1$dof)+dat.model1$STATISTIC AIC2=2*(36-dat.model2$dof)+dat.model2$STATISTIC AIC3=2*(36-dat.model3$dof)+dat.model3$STATISTIC AIC4=2*(36-dat.model4$dof)+dat.model4$STATISTIC ### CFA with sem ### install.packages('sem') library(sem) model01=cfa(reference.indicator=FALSE) F1:extrovert,cheerful, leadership, antisocial, talkative, motivated, hesitance, popularity mod1<-sem(model01, cov(dat), nrow(dat)) summary(mod1) model02=cfa(reference.indicator=FALSE) F1: extrovert, leadership, motivated, hesitance F2: cheerful, antisocial, talkative, popularity mod2<-sem(model02, cov(dat), 100) opt <- options(fit.indices = c("RMSEA"))
認知情報解析演習
# data preparation dat<-read.csv("~/Downloads/DBDA2Eprograms/z15N50.csv") y=dat$y Ntotal=length(dat$y) datalist = list(y=y,Ntotal=Ntotal) # model model.txt = " model { for ( i_data in 1:Ntotal ) { y[ i_data ] ~ dbern( theta ) } theta ~ dbeta( 1, 1 ) } " writeLines(model.txt, "model.txt") # jags library(rjags) jagsModel = jags.model(file="model.txt",data=datalist,n.chains=3,n.adapt=500) update(jagsModel,n.iter=1000) codaSamples=coda.samples(jagsModel,variable.names=c("theta"),n.iter=5000) mcmcMat<-as.matrix(codaSamples) par(mfrow=c(2,2)) cols=c("orange", "skyblue","pink") # chain par(mfrow=c(2,2)) cols=c("orange", "skyblue","pink") mcmc1<-as.mcmc(codaSamples[[1]]) mcmc2<-as.mcmc(codaSamples[[2]]) mcmc3<-as.mcmc(codaSamples[[3]]) plot(as.matrix(mcmc1),type='l',col=cols[1]) lines(as.matrix(mcmc2),col=cols[2]) lines(as.matrix(mcmc3),,col=cols[3]) # autocorrelation ac1=autocorr(mcmc1,lags=0:50) ac2=autocorr(mcmc2,lags=0:50) ac3=autocorr(mcmc3,lags=0:50) plot(ac1, type='o', pch = 20, col = cols[1], ylab = "Autocorrelation", xlab = "Lag") lines(ac2, type='o', pch = 20, col = cols[2]) lines(ac3, type='o', pch = 20, col = cols[3]) # shrink factor resALL=mcmc.list(mcmc1,mcmc2,mcmc3) gd1=gelman.plot(resALL, auto.layout = F) # density den1=density(mcmc1) den2=density(mcmc2) den3=density(mcmc3) plot(den1, type='l', col = cols[1], main = " ", xlab = "param. value",lwd=2.5) lines(den2, col = cols[2], lwd=2.5) lines(den3, col = cols[3], lwd=2.5) par(mfrow=c(1,1))
認知情報解析 DL1の復習
sigmoid.forwd <- function(x){ return(1/(1+exp(-x))) } sigmoid.bckwd <- function(x, dout){ y = sigmoid.forwd(x) return(dout*(1-y)*y) } affine.forwd <- function(x, W, b){ return(x%*%W + matrix(1, nrow = nrow(x), ncol = 1)%*%b) } affine.bckwd <- function(x, W, b, dout){ dx = dout%*%t(W) dW = t(x)%*%dout db = colSums(dout) return(list(dx = dx, dW = dW, db = db)) } softmax.forwd <- function(x, target){ max.x = apply(x,1,max) C = ncol(x) x = x - max.x%*%matrix(1,nrow=1,ncol=C) y = exp(x)/rowSums(exp(x)) delta = 1e-7; R = nrow(as.matrix(y)) return(-sum(target*log(y + delta))/R) } softmax.bckwd <- function(x, target, dout = 1){ max.x = apply(x, 1, max) R = nrow(x) C = ncol(x) x = x - max.x%*%matrix(1,nrow=1,ncol=C) y = exp(x)/rowSums(exp(x)) return((y-target)/R) } softmax.pred <- function(x, target){ max.x = apply(x,1,max) C = ncol(x) x = x - max.x%*%matrix(1,nrow=1,ncol=C) y = exp(x)/rowSums(exp(x)) return(y) } N = 100; dim = 2; n.class =3; x = matrix(0,nrow=N*n.class, ncol = dim) t = matrix(0,nrow=N*n.class, ncol = n.class) for (i.cl in 1:n.class){ for (i.N in 1:N){ radius = i.N/N theta = i.cl*4+4*radius+rnorm(1,0,0.2) idx = N*(i.cl-1)+i.N x[idx,]=c(radius*sin(theta),radius*cos(theta)) t[idx,i.cl] = 1 } } # spiral data plot(x[1:100,1],x[1:100,2],pch=20,col='black',cex=2,xlim=c(-1,1),ylim=c(-1,1)) points(x[101:200,1],x[101:200,2],pch=20,col='blue',cex=2) points(x[201:300,1],x[201:300,2],pch=20,col='green',cex=2) init.network <- function(n.neurons){ n.layer = length(n.neurons) W = list(); b = list() for (i.layer in 1:(n.layer-1)){ W[[i.layer]] = matrix(rnorm(n.neurons[i.layer]*n.neurons[(i.layer+1)],sd = 0.01),nrow=n.neurons[i.layer]) b[[i.layer]] = matrix(rnorm(n.neurons[(i.layer+1)],sd = 0.01), nrow = 1) } return(list(W = W,b = b)) } train.x = x train.y = t params = init.network(c(2,30,30,3)) batch_size = 50; n.iter =50000; lambda =0.3 n.train = nrow(train.x) loss = rep(0,n.iter) for (i.iter in 1:n.iter){ batch_mask = sample(1:n.train, batch_size) x.batch = train.x[batch_mask,] t.batch = train.y[batch_mask,] a1 = affine.forwd(x.batch,params$W[[1]],params$b[[1]]) z1 = sigmoid.forwd(a1) a2 = affine.forwd(z1,params$W[[2]],params$b[[2]]) z2 = sigmoid.forwd(a2) a3 = affine.forwd(z2,params$W[[3]],params$b[[3]]) z3 = softmax.forwd(a3,t.batch) loss[i.iter] = z3 dwSM = softmax.bckwd(a3, t.batch, 1) dwA3 = affine.bckwd(a2,params$W[[3]],params$b[[3]],dwSM) dwSG2 = sigmoid.bckwd(a2,dwA3$dx) dwA2 = affine.bckwd(a1,params$W[[2]],params$b[[2]],dwSG2) dwSG = sigmoid.bckwd(a1,dwA2$dx) dwA1 = affine.bckwd(x.batch,params$W[[1]],params$b[[1]],dwSG) params$W[[3]] = params$W[[3]] - lambda*dwA3$dW params$b[[3]] = params$b[[3]] - lambda*dwA3$db params$W[[2]] = params$W[[2]] - lambda*dwA2$dW params$b[[2]] = params$b[[2]] - lambda*dwA2$db params$W[[1]] = params$W[[1]] - lambda*dwA1$dW params$b[[1]] = params$b[[1]] - lambda*dwA1$db } # plotting results plot(loss,type='l', xlab = "trial") library(plot3D) M <- mesh(seq(-1,1,length.out = 300),seq(-1,1,length.out = 300)) temp.x = cbind(as.vector(M$x),as.vector(M$y)) a1 = affine.forwd(temp.x,params$W[[1]],params$b[[1]]) z1 = sigmoid.forwd(a1) a2 = affine.forwd(z1,params$W[[2]],params$b[[2]]) z2 = sigmoid.forwd(a2) a3 = affine.forwd(z2,params$W[[3]],params$b[[3]]) cl = softmax.pred(a3) cl.pred = apply(cl,1,which.max) image(matrix(cl.pred,300,300)) x2= x*0.5+0.5 points(x2[1:100,1],x2[1:100,2],pch=20,col='black',cex=2) points(x2[101:200,1],x2[101:200,2],pch=20,col='blue',cex=2) points(x2[201:300,1],x2[201:300,2],pch=20,col='green',cex=2)
データ解析基礎論B W03
dat<-read.table("http://www.matsuka.info/data_folder/tdkPCA01.txt") dat.pca<-princomp(dat) biplot(dat.pca) dat.pca$score dat.pca$loadings scoreP1S1 = dat.pca$loadings[1,1]*(dat[1,1]-mean(dat$writing))+ dat.pca$loadings[2,1]*(dat[1,2]-mean(dat$thesis))+ dat.pca$loadings[3,1]*(dat[1,3]-mean(dat$interview)) scoreP2S2 = dat.pca$loadings[1,2]*(dat[2,1]-mean(dat$writing))+ dat.pca$loadings[2,2]*(dat[2,2]-mean(dat$thesis))+ dat.pca$loadings[3,2]*(dat[2,3]-mean(dat$interview)) dat<-read.csv("http://www.matsuka.info/data_folder/tdkCFA.csv") dat.pca<-princomp(dat) screeplot(dat.pca,type="lines") biplot(dat.pca) biplot(dat.pca,choices=c(2,3)) dat2<-read.table("http://www.matsuka.info/data_folder/tdkPCA01.txt") dat2[,1]=dat2[,1]*0.1 dat2.pca<-princomp(dat2)
データ解析基礎論B Rの復習
dat<-read.csv("http://www.matsuka.info/data_folder/hwsk8-17-6.csv") dat2<-read.csv("http://www.matsuka.info/data_folder/dktb321.txt") source("http://www.matsuka.info/univ/course_folder/cutil.R") score.X = dat2$result[dat2$method=="method.X"] score.Y = dat2$result[dat2$method=="method.Y"] dat2X<-dat2[dat2$method=="method.X",]