library(rjags) source("http://www.matsuka.info/univ/course_folder/HDI_revised.txt") island.hopping2 <- function(n.rep=1e5, init.st=4) { # example from DBDA 2nd Ed. (Kruschke) ch. 07 # intro to MCMC, island hopping # initialization state = 1:7 curr.st = init.st state.counter = rep(0,7) state.counter[curr.st] = 1 state.history=rep(0, n.rep) state.history[1]=curr.st prob.propose = matrix(1/6, nrow=7,ncol=7) diag(prob.propose)<-0 # main loop for (i.rep in 2:n.rep) { destination = sample(state, 1, prob=prob.propose[curr.st,]) prob.move = min(destination/curr.st, 1) if (runif(1) < prob.move) { curr.st = destination } state.history[i.rep] = curr.st state.counter[curr.st] = state.counter[curr.st]+1 } par(mfrow=c(2, 1)) par(mai=c(0, 1, 1, 1)) par(mar=c(4, 3, 1, 3)) barplot(state.counter, xlab="theta", ylab="Frequency") plot(state.history, 1:n.rep, type='o', log='y', xlab="theta", ylab="Time Step (log)", col="orange") } island.hopping2(10000, 4) metropolis.ex01 <- function(n.iter=1e6, theta.init=0.1, sigma=0.2, plot.yn = T){ # example from DBDA 2nd Ed. (Kruschke) ch. 07 # metropolis algo. with 1 parameter # "posterior of theta" function posterior.theta <- function(theta, N, z, a, b) { posterior = theta^z * (1-theta)^(N-z) * theta^(a-1) * (1 - theta)^(b-1) / beta(a,b) } # initialization theta.history = rep(0,nrow = n.iter,ncol = 1) theta.current = theta.init theta.history[1] = theta.current # values given in text mu = 0 N = 20 z = 14 a = 1 b = 1 # main loop for (i_iter in 2:n.iter) { theta.proposed = theta.current + rnorm(1, mean=mu, sd=sigma) if (theta.proposed < 0 | theta.proposed > 1) { p.move = 0 } else { p.current = posterior.theta(theta.current, N, z, a, b) p.proposed = posterior.theta(theta.proposed, N, z, a, b) p.move = min(p.proposed/p.current, 1) } if (runif(1) < p.move) { theta.current = theta.proposed } theta.history[i_iter] = theta.current } # plotting results if (plot.yn == T) { par(mfrow = c(3, 1)) hist(theta.history, nclass = 100, col = "orange", probability = T, xlab = "theta") den=density(theta.history) lines(den) plot(theta.history[(n.iter-100):n.iter], ((n.iter-100):n.iter), type = 'o', xlim = c(0,1), xlab="theta", ylab = "step in chain") plot(theta.history[1:100],1:100, type = 'o', xlim = c(0,1), xlab = "theta", ylab = "step in chain") } return(theta.history) } res = metropolis.ex01(10000, 0.1) # initialization n.iter = 10000; sigma = 0.1; counter = 0 z = c(6, 2); N = c(8, 7) a = c(2, 2); b = c(2, 2); n.par = 2 th.hist = matrix(0, nrow = n.iter*n.par, ncol = n.par) theta = runif(2) # function to calc. prob. move prob.gibbs <- function(theta, proposed, N, z, a, b, idx){ p.th=dbeta(theta[idx], z[idx]+a[idx], N[idx]-z[idx]+b[idx]) p.pro=dbeta(proposed, z[idx]+a[idx], N[idx]-z[idx]+b[idx]) return(p.pro/p.th) } # main loop for (i.iter in 1:n.iter){ for (i.par in 1:n.par){ proposed = theta[i.par] + rnorm(1, mean=0, sd=sigma) if (proposed > 1) {proposed = 1} if (proposed < 0) {proposed = 0} p.move= min(1, prob.gibbs(theta, proposed, N, z, a, b, i.par)) if (runif(1) < p.move){ theta[i.par] = proposed } counter = counter + 1 th.hist[counter, ] = theta } } par(mfrow=c(3,1)) HDI.plot(th.hist[,1]) HDI.plot(th.hist[,2]) plot(th.hist[,1],th.hist[,2], type='o',cex=0.1,xlim = c(0,1),ylim=c(0,1)) par(mfrow=c(1,1)) model.txt = " model { for ( i_data in 1:Ntotal ) { y[ i_data ] ~ dbern( theta ) } theta ~ dbeta( 1, 1 ) }" writeLines(model.txt, "model.txt") dat<-read.csv("http://www.matsuka.info/univ/course_folder/z15N50.csv") y=dat$y Ntotal=length(dat$y) datalist = list(y=y,Ntotal=Ntotal) # jags jagsModel = jags.model(file="model.txt",data=datalist,n.chains=3,n.adapt=500) update(jagsModel,n.iter=1000) codaSamples=coda.samples(jagsModel,variable.names=c("theta"),n.iter=5000) mcmcMat<-as.matrix(codaSamples) par(mfrow=c(2,2)) cols=c("orange", "skyblue","pink") # chain mcmc1<-as.mcmc(codaSamples[[1]]) mcmc2<-as.mcmc(codaSamples[[2]]) mcmc3<-as.mcmc(codaSamples[[3]]) plot(mcmc1,type='l') lines(mcmc2,col='red') lines(mcmc3,col='blue') # autocorrelation ac1=autocorr(mcmc1,lags=0:50) ac2=autocorr(mcmc2,lags=0:50) ac3=autocorr(mcmc3,lags=0:50) plot(ac1, type='o', pch = 20, col = cols[1], ylab = "Autocorrelation", xlab = "Lag") lines(ac2, type='o', pch = 20, col = cols[2]) lines(ac3, type='o', pch = 20, col = cols[3]) # shrink factor resALL=mcmc.list(mcmc1,mcmc2,mcmc3) gd1=gelman.plot(resALL, auto.layout = F) # density den1=density(mcmc1) den2=density(mcmc2) den3=density(mcmc3) plot(den1, type='l', col = cols[1], main = " ", xlab = "param. value",lwd=2.5) lines(den2, col = cols[2], lwd=2.5) lines(den3, col = cols[3], lwd=2.5) par(mfrow=c(1,1)) model.txt = " model { for ( i_data in 1:Ntotal ) { y[ i_data ] ~ dbern( theta[s[i_data]] ) } for ( i_s in 1:Nsubj) { theta[i_s] ~ dbeta( 2, 2 ) } }" writeLines(model.txt, "model.txt") dat<-read.csv("http://www.matsuka.info/univ/course_folder/z6N8z2N7.csv") y=dat$y s=as.numeric(dat$s) Ntotal=length(dat$y) Nsubj=length(unique(s)) datalist = list(y=y,s=s,Ntotal=Ntotal,Nsubj=Nsubj) jagsModel = jags.model(file="model.txt",data=datalist,n.chains=3,n.adapt=500) update(jagsModel,n.iter=1000) codaSamples=coda.samples(jagsModel,variable.names=c("theta"),n.iter=5000) mcmcMat<-as.matrix(codaSamples) par(mfrow=c(2,2)) cols=c("orange", "skyblue","pink") # chain mcmc1<-as.mcmc(codaSamples[[1]]) mcmc2<-as.mcmc(codaSamples[[2]]) mcmc3<-as.mcmc(codaSamples[[3]]) plot(temp1,type='l') lines(temp2,col='red') lines(temp3,col='blue') # autocorrelation ac1=autocorr(mcmc1,lags=0:50) ac2=autocorr(mcmc2,lags=0:50) ac3=autocorr(mcmc3,lags=0:50) plot(ac1, type='o', pch = 20, col = cols[1], ylab = "Autocorrelation", xlab = "Lag") lines(ac2, type='o', pch = 20, col = cols[2]) lines(ac3, type='o', pch = 20, col = cols[3]) # shrink factor resALL=mcmc.list(mcmc1,mcmc2,mcmc3) gd1=gelman.plot(resALL, auto.layout = F) # density den1=density(mcmc1) den2=density(mcmc2) den3=density(mcmc3) plot(den1, type='l', col = cols[1], main = " ", xlab = "param. value",lwd=2.5) lines(den2, col = cols[2], lwd=2.5) lines(den3, col = cols[3], lwd=2.5) par(mfrow=c(1,1)) model.txt = " model { for ( i in 1:Ntotal ) { y[i] ~ dnorm( mu , 1/sigma^2 ) } mu ~ dnorm( meanY , 1/(100*sdY)^2 ) sigma ~ dunif( sdY/1000 , sdY*1000 ) } " writeLines(model.txt, "model.txt") dat<-read.csv("http://www.matsuka.info/univ/course_folder/TwoGroupIQ.csv") y = dat$Score[dat$Group=="Smart Drug"] Ntotal = length(y) dataList = list(y = y, Ntotal = Ntotal, meanY = mean(y), sdY = sd(y)) jagsModel = jags.model("model.txt", data=dataList, n.chains=3, n.adapt=1000 ) update( jagsModel , n.iter=1000) codaSamples = coda.samples( jagsModel , variable.names=c("mu","sigma"), n.iter=5000, thin=5) mcmcMat<-as.matrix(codaSamples) # calculating & plotting normality par(mfrow=c(2,2)) HDI.plot(mcmcMat[,1]) hist(y,nclass=15,probability = T) x.temp = seq(40,200,0.1) n.plot = 100 randperm = sample(1:nrow(mcmcMat),n.plot) for (i.plot in 1:n.plot){ norm.temp=dnorm(x.temp,mean=mcmcMat[randperm[i.plot],1],sd=mcmcMat[randperm[i.plot],2]) lines(x.temp,norm.temp,col='orange') } HDI.plot(mcmcMat[,2]) # calculating & plotting effect size effect.size=(mcmcMat[,"mu"]-100)/mcmcMat[,"sigma"] HDI.plot(effect.size) # dat<-read.csv("http://www.matsuka.info/univ/course_folder/TwoGroupIQ.csv") y = dat$Score[dat$Group=="Smart Drug"] Ntotal = length(y) model.txt=" model { for ( i in 1:Ntotal ) { y[i] ~ dt( mu , 1/sigma^2 , nu ) } mu ~ dnorm( meanY , 1/(100*sdY)^2 ) sigma ~ dunif( sdY/1000 , sdY*1000 ) nu <- nuMinusOne+1 nuMinusOne ~ dexp(1/30.0) }" writeLines(model.txt, "model.txt") dataList = list(y = y, Ntotal = Ntotal, meanY = mean(y), sdY = sd(y)) jagsModel = jags.model("model.txt", data=dataList, n.chains=3, n.adapt=1000 ) update( jagsModel , n.iter=1000) codaSamples = coda.samples( jagsModel , variable.names=c("mu","sigma","nu"), n.iter=5000, thin=5) mcmcMat<-as.matrix(codaSamples) # calculating & plotting normality par(mfrow=c(3,2)) HDI.plot(mcmcMat[,1]) HDI.plot(mcmcMat[,3]) normality=log10(mcmcMat[,"nu"]) HDI.plot(normality) effect.size=(mcmcMat[,"mu"]-100)/mcmcMat[,"sigma"] HDI.plot(effect.size) hist(y,nclass=20,probability = T) n.plot = 100 randperm = sample(1:nrow(mcmcMat),n.plot) for (i.plot in 1:n.plot){ x.temp1 = seq(40,200,0.1) x.temp2 = (x.temp1 - mcmcMat[randperm[i.plot],1])/mcmcMat[randperm[i.plot],3] t.temp=dt(x.temp2,mcmcMat[randperm[i.plot],2])/mcmcMat[randperm[i.plot],3] lines(x.temp1,t.temp,col='orange') } par(mfrow=c(2,2)) plot(mcmcMat[,1],mcmcMat[,3],col='orange') plot(mcmcMat[,1],log10(mcmcMat[,2]),col='orange') plot(0,0,type='n') plot(log10(mcmcMat[,2]),mcmcMat[,3],col='orange') dat<-read.csv("http://www.matsuka.info/univ/course_folder/TwoGroupIQ.csv") y = dat$Score group = as.numeric(dat$Group) Ntotal = length(y) Ngroup = length(unique(group)) model.txt=" model { for ( i in 1:Ntotal ) { y[i] ~ dt( mu[group[i]] , 1/sigma[group[i]]^2 , nu ) } for (j in 1:Ngroup){ mu[j] ~ dnorm( meanY , 1/(100*sdY)^2 ) sigma[j] ~ dunif( sdY/1000 , sdY*1000 ) } nu <- nuMinusOne+1 nuMinusOne ~ dexp(1/29) }" writeLines(model.txt, "model.txt") dataList = list(y = y, Ntotal = Ntotal, meanY = mean(y), sdY = sd(y),Ngroup=Ngroup,group=group) jagsModel = jags.model("model.txt", data=dataList, n.chains=3, n.adapt=5000 ) update( jagsModel , n.iter=1000) codaSamples = coda.samples( jagsModel , variable.names=c("mu","sigma","nu"), n.iter=5000, thin=5) mcmcMat<-as.matrix(codaSamples) # plotting result par(mfrow=c(5,2)) HDI.plot(mcmcMat[,1],xlabel="placebo Mean") hist(y[dat$Group=="Placebo"],nclass=20,probability = T) n.plot = 100 randperm = sample(1:nrow(mcmcMat),n.plot) for (i.plot in 1:n.plot){ x.temp1 = seq(40,200,0.1) x.temp2 = (x.temp1 - mcmcMat[randperm[i.plot],1])/mcmcMat[randperm[i.plot],4] t.temp=dt(x.temp2,mcmcMat[randperm[i.plot],3])/mcmcMat[randperm[i.plot],4] lines(x.temp1,t.temp,col='orange') } HDI.plot(mcmcMat[,2],xlabel="smart drug Mean") hist(y[dat$Group=="Smart Drug"],nclass=20,probability = T) n.plot = 100 randperm = sample(1:nrow(mcmcMat),n.plot) for (i.plot in 1:n.plot){ x.temp1 = seq(40,200,0.1) x.temp2 = (x.temp1 - mcmcMat[randperm[i.plot],2])/mcmcMat[randperm[i.plot],5] t.temp=dt(x.temp2,mcmcMat[randperm[i.plot],3])/mcmcMat[randperm[i.plot],5] lines(x.temp1,t.temp,col='orange') } HDI.plot(mcmcMat[,4],xlabel="placebo scale") HDI.plot(mcmcMat[,2]-mcmcMat[,1],xlabel="Difference of Means") HDI.plot(mcmcMat[,5],xlabel="smart drug scale") HDI.plot(mcmcMat[,5]-mcmcMat[,4],xlabel="Difference of Scales") HDI.plot(log10(mcmcMat[,3]),xlabel="Normality") effect.size = (mcmcMat[,2]-mcmcMat[,1])/sqrt((mcmcMat[,5]^2+mcmcMat[,4]^2)/2) HDI.plot(effect.size,xlabel="effect size")
Monthly Archives: December 2019
データ解析基礎論B 判別分析+
library(MASS) dat<-data.frame(writing=c(68,85,50,54,66,35,56,25,43,70), interview=c(65,80,95,70,75,55,65,75,50,40), cl=c(rep("A",5),rep("N",5))) dat.lda<-lda(cl~.,data=dat) intcpt = (dat.lda$scaling[1]*dat.lda$means[1,1] + dat.lda$scaling[2]*dat.lda$means[1,2] + dat.lda$scaling[1]*dat.lda$means[2,1] + dat.lda$scaling[2]*dat.lda$means[2,2])/2 z = as.matrix(dat[,1:2])%*%dat.lda$scaling-intcpt new.dim.slope = dat.lda$scaling[1]/dat.lda$scaling[2] disc.intcpt = intcpt / dat.lda$scaling[2] disc.slope = -dat.lda$scaling[1] / dat.lda$scaling[2] ggplot(dat, aes(x = writing, y= interview, color = cl)) + geom_point(size = 4) + geom_abline(aes(intercept = intcpt, slope = new.dim.slope )) + geom_abline(aes(intercept = disc.intcpt, slope = disc.slope ),color = "red") + xlim(30,100)+ylim(30,100) dat<-read.csv("http://matsuka.info/data_folder/tdkDA01.csv", header=T) dat.lda<-lda(class~.,dat) lda.pred<-predict(dat.lda,dat) table(lda.pred$class, dat$class) dat.lda<-lda(class~.,dat, CV=T) dat.cl = dat.lda$posterior[,1]>dat.lda$posterior[,2] table(dat.cl, dat$class) dat<-read.csv("http://matsuka.info/data_folder/tdkDA02.csv",header=T) dat.lda=lda(class~.,data=dat) lda.pred <- predict(dat.lda, dat) plot(dat.lda, dimen=3, col=as.numeric(lda.pred$class),cex=2) dat.km<-kmeans(dat[,1:6],5) table(lda.pred$class,dat.km$cluster) plot(dat,col=dat.km$cluster,pch=20) plot(dat.lda, dimen=2, col=as.numeric(lda.pred$class),cex=2) dat<-read.csv("http://www.matsuka.info/data_folder/tdkCFA.csv") dat1<-subset(dat,dat$popularity<5) dat2<-subset(dat,dat$popularity>4 & dat$popularity<6) dat3<-subset(dat,dat$popularity>6) dat1$popularity="LP";dat2$popularity="MP";dat3$popularity="VP" datT=rbind(dat1,dat2,dat3) datT.lda<-lda(popularity~.,datT) datT.pred<-predict(datT.lda,datT) table(datT.pred$class,datT$popularity) par(mfrow=c(1,2)) plot(datT.lda,col=c(rep('red',20),rep('blue',28),rep('green',29)),cex=1.5) plot(datT.lda,col=as.numeric(datT.pred$class),cex=1.5) par(mfrow=c(1,1)) library(nnet) dat<-read.csv("http://www.matsuka.info/data_folder/tdkReg01.csv") set.seed(5) x = dat[,1:3] y = dat[,4] dat.nnet = nnet(x,y, size = 150, linout= TRUE, maxit = 1000) nnet.pred <-predict(dat.nnet,dat) cor(dat.nnet$fitted.values,dat$sales)^2 dat.lm<-lm(sales~.,data=dat) plot(dat.nnet$fitted.values, dat$sales,pch=20,col='black') points(predict(dat.lm), dat$sales,pch=20,col='red') n.data = nrow(dat);n.sample = n.data*0.6; n.rep = 100 trainNN.cor = rep(0,n.rep); trainLM.cor = rep(0,n.rep) testNN.cor = rep(0,n.rep); testLM.cor = rep(0,n.rep) for (i.rep in 1:n.rep){ randperm = sample(1:n.data) train.idx = randperm[1:n.sample] test.idx = randperm[(n.sample+1):n.data] dat.nnet <- nnet(sales~.,size = 100, linout=T, maxit=1000, data = dat[train.idx,]) dat.lm <-lm(sales~.,data=dat[train.idx, ]) trainNN.cor[i.rep] <- cor(predict(dat.nnet,dat[train.idx, ]), dat[train.idx,]$sales) trainLM.cor[i.rep] <- cor(predict(dat.lm,dat[train.idx, ]), dat[train.idx,]$sales) testNN.cor[i.rep] <- cor(predict(dat.nnet,dat[test.idx, ]), dat[test.idx,]$sales) testLM.cor[i.rep] <- cor(predict(dat.lm,dat[test.idx, ]), dat[test.idx,]$sales) } print(c(mean(trainNN.cor,na.rm=T),mean(testNN.cor,na.rm=T))) print(c(mean(trainLM.cor,na.rm=T),mean(testLM.cor,na.rm=T))) n.data = nrow(dat);n.sample = n.data*0.6; n.rep = 100 trainNN.cor = rep(0,n.rep); trainLM.cor = rep(0,n.rep) testNN.cor = rep(0,n.rep); testLM.cor = rep(0,n.rep) for (i.rep in 1:n.rep){ randperm = sample(1:n.data) train.idx = randperm[1:n.sample] test.idx = randperm[(n.sample+1):n.data] dat.nnet <- nnet(sales~.,size = 30, linout=T,decay= 0.1, maxit=1000, data = dat[train.idx,]) dat.lm <-lm(sales~.,data=dat[train.idx, ]) trainNN.cor[i.rep] <- cor(predict(dat.nnet,dat[train.idx, ]), dat[train.idx,]$sales) trainLM.cor[i.rep] <- cor(predict(dat.lm,dat[train.idx, ]), dat[train.idx,]$sales) testNN.cor[i.rep] <- cor(predict(dat.nnet,dat[test.idx, ]), dat[test.idx,]$sales) testLM.cor[i.rep] <- cor(predict(dat.lm,dat[test.idx, ]), dat[test.idx,]$sales) } print(c(mean(trainNN.cor,na.rm=T),mean(testNN.cor,na.rm=T))) print(c(mean(trainLM.cor,na.rm=T),mean(testLM.cor,na.rm=T))) dat<-read.csv("http://matsuka.info/data_folder/tdkDA01.csv", header =T) dat.nnet<-nnet(class~.,dat,size=30,maxit=1000,decay=0.05) dat.pred<-predict(dat.nnet,dat,type="class") table(dat.pred,dat$class) dat.glm<-glm(class~., family="binomial",dat) glm.pred<-predict(dat.glm, dat, type="response")>0.5 table(glm.pred,dat$class) dat<-read.csv("http://www.matsuka.info/data_folder/cda7-16.csv") dat.nnet<-nnet(survival~., dat, size=30, maxit=1000, decay=0.01) dat.pred<-predict(dat.nnet,dat,type="class") table(dat.pred,dat$survival) Ns = summary(dat$survival) (Ns[1]/Ns[2])^-1 wts = rep(1,nrow(dat)) wts[which(dat$survival=="no")]=45 dat.nnet<-nnet(survival~., weights=wts, dat, size=30, maxit=1000, decay=0.01) dat.pred<-predict(dat.nnet,dat,type="class") table(dat.pred,dat$survival) dat<-read.csv("http://matsuka.info/data_folder/tdkDA02.csv",header=T) class.id<-class.ind(dat$class) x = dat[,1:6] dat.nnet<-nnet(x,class.id,size=30, maxit=1000, decay=0.01, softmax=TRUE) max.id = apply(dat.nnet$fitted.values,1,which.max) table(max.id,dat$class) dat<-read.table("http://www.matsuka.info/data_folder/tdkPCA01.txt") dat.nnet<-nnet(dat,dat,size=2, maxit=1000, decay=0.01, linout=TRUE) cor(dat.nnet$fitted.values,dat)
広域システム特別講義II DL with Keras
library(keras) ### iris data dat = iris x_train = dat[,1:3] %>% as.matrix() y_train <- dat[,4] %>% to_categorical() model_iris <- keras_model_sequential() %>% layer_dense(units=20, activation = "relu", input_shape = ncol(x_train)) %>% layer_dense(units = 10, activation = "relu") %>% layer_dense(units = 3, activation = "softmax") summary(model_iris) model_iris %>% compile( optimizer = "rmsprop", loss = "categorical_crossentropy", metrics = c("accuracy") ) model_iris %>% fit( x_train, y_train, epochs = 100, batch_size = 5 ) model_iris %>% evaluate(x_train,y_train) ### with vaidation history <- model_iris %>% fit(x_train, y_train, validation_split = 0.20, epochs=300, batch_size = 5, shuffle = T) plot(history) normalizer <- function(x) { min_x = apply(x,2,min) max_x = apply(x,2,max) num <- t(x) - min_x denom <- max_x - min_x normalized = t(num/denom) return (normalized) } x_trainN = normalizer(x_train) model_iris <- keras_model_sequential() %>% layer_dense(units=20, activation = "relu", input_shape = ncol(x_train)) %>% layer_dense(units = 10, activation = "relu") %>% layer_dense(units = 3, activation = "softmax") model_iris %>% compile( optimizer = "rmsprop", loss = "categorical_crossentropy", metrics = c("accuracy") ) history_N <- model_iris %>% fit(x_trainN, y_train, validation_split = 0.20, epochs=150, batch_size = 5, shuffle = T) plot(history_N) ### reuter imdb <- dataset_imdb(num_words = 10000) c(c(train_data, train_labels), c(test_data, test_labels)) %<-% imdb vectorize_sequences <- function(sequences, dimension = 10000) { results <- matrix(0, nrow = length(sequences), ncol = dimension) for (i in 1:length(sequences)) results[i, sequences[[i]]] <- 1 results } x_trainTEMP <- vectorize_sequences(train_data) x_test <- vectorize_sequences(test_data) y_trainTEMP <- as.numeric(train_labels) y_test <- as.numeric(test_labels) val_idx = 1:1000 x_val = x_trainTEMP[val_idx, ] y_val = y_trainTEMP[val_idx] x_train = x_trainTEMP[-val_idx, ] y_train = y_trainTEMP[-val_idx] original_model <- keras_model_sequential() %>% layer_dense(units = 16, activation = "relu", input_shape = c(10000)) %>% layer_dense(units = 16, activation = "relu") %>% layer_dense(units = 1, activation = "sigmoid") original_model %>% compile( optimizer = "rmsprop", loss = "binary_crossentropy", metrics = c("accuracy") ) history_orig <- original_model %>% fit(x_train, y_train, epochs=20, batch_size = 512, validation_data = list(x_val, y_val)) plot(history_orig) original_model %>% evaluate(x_test, y_test) smaller_model <- keras_model_sequential() %>% layer_dense(units = 4, activation = "relu", input_shape = c(10000)) %>% layer_dense(units = 4, activation = "relu") %>% layer_dense(units = 1, activation = "sigmoid") smaller_model %>% compile( optimizer = "rmsprop", loss = "binary_crossentropy", metrics = c("accuracy") ) history_small <- smaller_model %>% fit(x_train, y_train, epochs=20, batch_size = 512, validation_data = list(x_val, y_val)) plot(history_small) smaller_model %>% evaluate(x_test, y_test) bigger_model <- keras_model_sequential() %>% layer_dense(units = 512, activation = "relu", input_shape = c(10000)) %>% layer_dense(units = 512, activation = "relu") %>% layer_dense(units = 1, activation = "sigmoid") bigger_model %>% compile( optimizer = "rmsprop", loss = "binary_crossentropy", metrics = c("accuracy") ) history_big <- bigger_model %>% fit(x_train, y_train, epochs=20, batch_size = 512, validation_data = list(x_val, y_val)) plot(history_big) model_L2 <- keras_model_sequential() %>% layer_dense(units = 16, kernel_regularizer = regularizer_l2(0.0001), activation = "relu", input_shape = c(10000)) %>% layer_dense(units = 16, kernel_regularizer = regularizer_l2(0.0001), activation = "relu") %>% layer_dense(units = 1, activation = "sigmoid") model_L2 %>% compile( optimizer = "rmsprop", loss = "binary_crossentropy", metrics = c("accuracy") ) history_L2 <- model_L2 %>% fit(x_train, y_train, epochs=20, batch_size = 512, validation_data = list(x_val, y_val)) plot(history_L2) model_L2 %>% evaluate(x_test, y_test) model_DO <- keras_model_sequential() %>% layer_dense(units = 16,activation = "relu", input_shape = c(10000)) %>% layer_dropout(rate = 0.5) %>% layer_dense(units = 16,activation = "relu") %>% layer_dropout(rate = 0.5) %>% layer_dense(units = 1, activation = "sigmoid") model_DO %>% compile( optimizer = "rmsprop", loss = "binary_crossentropy", metrics = c("accuracy") ) history_DO <- model_DO %>% fit(x_train, y_train, epochs=20, batch_size = 512, validation_data = list(x_val, y_val)) plot(history_DO) model_DO %>% evaluate(x_test, y_test) ### MNIST mnist <- dataset_mnist() c(c(tr_img, tr_lab),c(te_img, te_lab)) %<-% mnist tr_img <- array_reshape(tr_img, c(60000,28,28,1)) tr_img = tr_img / 255 te_img <- array_reshape(te_img, c(10000,28,28,1)) te_img = te_img / 255 tr_lab = to_categorical(tr_lab) te_lab = to_categorical(te_lab) model <- keras_model_sequential() %>% layer_conv_2d(filter = 32, kernel_size = c(3,3), activation = "relu",input_shape = c(28,28,1)) %>% layer_max_pooling_2d(pool_size = c(2,2)) %>% layer_conv_2d(filter = 64, kernel_size = c(3,3),activation = "relu") %>% layer_max_pooling_2d(pool_size = c(2,2)) %>% layer_flatten() %>% layer_dense(units =64, activation = "relu") %>% layer_dense(units =10, activation = "softmax") model %>% compile( optimizer = "rmsprop", loss = "categorical_crossentropy", metrics = c("accuracy") ) history <- model %>% fit( tr_img, tr_lab, epochs = 5, validation_split = 0.20, batch_size = 64 ) model %>% evaluate(te_img, te_lab) original_dataset_dir <- "~/Downloads/dogs-vs-cats/train" base_dir <- "~/Downloads/cats_and_dogs_small/" dir.create(base_dir) train_dir <- file.path(base_dir, "train") dir.create(train_dir) validation_dir <- file.path(base_dir, "validation") dir.create(validation_dir) test_dir <- file.path(base_dir, "test") dir.create(test_dir) train_cats_dir <- file.path(train_dir, "cats") dir.create(train_cats_dir) train_dogs_dir <- file.path(train_dir, "dogs") dir.create(train_dogs_dir) validation_cats_dir <- file.path(validation_dir, "cats") dir.create(validation_cats_dir) validation_dogs_dir <- file.path(validation_dir, "dogs") dir.create(validation_dogs_dir) test_cats_dir <- file.path(test_dir, "cats") dir.create(test_cats_dir) test_dogs_dir <- file.path(test_dir, "dogs") dir.create(test_dogs_dir) fnames <- paste0("cat.", 1:1000, ".jpg") file.copy(file.path(original_dataset_dir, fnames), file.path(train_cats_dir)) fnames <- paste0("cat.", 1001:1500, ".jpg") file.copy(file.path(original_dataset_dir, fnames), file.path(validation_cats_dir)) fnames <- paste0("cat.", 1501:2000, ".jpg") file.copy(file.path(original_dataset_dir, fnames), file.path(test_cats_dir)) fnames <- paste0("dog.", 1:1000, ".jpg") file.copy(file.path(original_dataset_dir, fnames), file.path(train_dogs_dir)) fnames <- paste0("dog.", 1001:1500, ".jpg") file.copy(file.path(original_dataset_dir, fnames), file.path(validation_dogs_dir)) fnames <- paste0("dog.", 1501:2000, ".jpg") file.copy(file.path(original_dataset_dir, fnames), file.path(test_dogs_dir)) model <- keras_model_sequential() %>% layer_conv_2d(filters = 32, kernel_size = c(3, 3), activation = "relu", input_shape = c(150, 150, 3)) %>% layer_max_pooling_2d(pool_size = c(2, 2)) %>% layer_conv_2d(filters = 64, kernel_size = c(3, 3), activation = "relu") %>% layer_max_pooling_2d(pool_size = c(2, 2)) %>% layer_conv_2d(filters = 128, kernel_size = c(3, 3), activation = "relu") %>% layer_max_pooling_2d(pool_size = c(2, 2)) %>% layer_conv_2d(filters = 128, kernel_size = c(3, 3), activation = "relu") %>% layer_max_pooling_2d(pool_size = c(2, 2)) %>% layer_flatten() %>% layer_dense(units = 512, activation = "relu") %>% layer_dense(units = 1, activation = "sigmoid") summary(model) model %>% compile( loss = "binary_crossentropy", optimizer = optimizer_rmsprop(lr = 1e-4), metrics = c("acc") ) train_datagen <- image_data_generator(rescale = 1/255) validation_datagen <- image_data_generator(rescale = 1/255) train_generator <- flow_images_from_directory( train_dir, train_datagen, target_size = c(150, 150), batch_size = 20, class_mode = "binary" ) validation_generator <- flow_images_from_directory( validation_dir, validation_datagen, target_size = c(150, 150), batch_size = 20, class_mode = "binary" ) history <- model %>% fit_generator( train_generator, steps_per_epoch = 100, epochs = 10, validation_data = validation_generator, validation_steps = 50 ) model <- keras_model_sequential() %>% layer_conv_2d(filters = 32, kernel_size = c(3, 3), activation = "relu", input_shape = c(150, 150, 3)) %>% layer_max_pooling_2d(pool_size = c(2, 2)) %>% layer_conv_2d(filters = 64, kernel_size = c(3, 3), activation = "relu") %>% layer_max_pooling_2d(pool_size = c(2, 2)) %>% layer_conv_2d(filters = 128, kernel_size = c(3, 3), activation = "relu") %>% layer_max_pooling_2d(pool_size = c(2, 2)) %>% layer_conv_2d(filters = 128, kernel_size = c(3, 3), activation = "relu") %>% layer_max_pooling_2d(pool_size = c(2, 2)) %>% layer_flatten() %>% layer_dropout(rate = 0.5) %>% layer_dense(units = 512, activation = "relu") %>% layer_dense(units = 1, activation = "sigmoid") model %>% compile( loss = "binary_crossentropy", optimizer = optimizer_rmsprop(lr = 1e-4), metrics = c("acc") ) datagen <- image_data_generator( rescale = 1/255, rotation_range = 40, width_shift_range = 0.2, height_shift_range = 0.2, shear_range = 0.2, zoom_range = 0.2, horizontal_flip = TRUE ) test_datagen <- image_data_generator(rescale = 1/255) train_generator <- flow_images_from_directory( train_dir, datagen, target_size = c(150, 150), batch_size = 32, class_mode = "binary" ) validation_generator <- flow_images_from_directory( validation_dir, test_datagen, target_size = c(150, 150), batch_size = 32, class_mode = "binary" ) history <- model %>% fit_generator( train_generator, steps_per_epoch = 100, epochs = 10, validation_data = validation_generator, validation_steps = 50 ) datagen <- image_data_generator( rotation_range = 40, width_shift_range = 0.2, height_shift_range = 0.2, shear_range = 0.2, zoom_range = 0.2, horizontal_flip = FALSE, fill_mode = "nearest" ) conv_base <- application_vgg16( weights = "imagenet", include_top = FALSE, input_shape = c(150, 150, 3) ) summary(conv_base) model <- keras_model_sequential() %>% conv_base %>% layer_flatten() %>% layer_dense(units = 256, activation = "relu") %>% layer_dense(units = 1, activation = "sigmoid") train_datagen = image_data_generator( rescale = 1/255, rotation_range = 40, width_shift_range = 0.2, height_shift_range = 0.2, shear_range = 0.2, zoom_range = 0.2, horizontal_flip = TRUE, fill_mode = "nearest" ) test_datagen <- image_data_generator(rescale = 1/255) train_generator <- flow_images_from_directory( train_dir, train_datagen, target_size = c(150, 150), batch_size = 20, class_mode = "binary" ) validation_generator <- flow_images_from_directory( validation_dir, test_datagen, target_size = c(150, 150), batch_size = 20, class_mode = "binary" ) model %>% compile( loss = "binary_crossentropy", optimizer = optimizer_rmsprop(lr = 2e-5), metrics = c("accuracy") ) history <- model %>% fit_generator( train_generator, steps_per_epoch = 100, epochs = 3, validation_data = validation_generator, validation_steps = 50 ) unfreeze_weights(conv_base, from = "block5_conv1") model %>% compile( loss = "binary_crossentropy", optimizer = optimizer_rmsprop(lr = 1e-5), metrics = c("accuracy") ) history <- model %>% fit_generator( train_generator, steps_per_epoch = 100, epochs = 10, validation_data = validation_generator, validation_steps = 50 ) test_generator <- flow_images_from_directory( test_dir, test_datagen, target_size = c(150, 150), batch_size = 20, class_mode = "binary" ) model %>% evaluate_generator(test_generator, steps = 50)
認知情報解析学演習 viz convnet
base_dir <- "/home/shiga/R_data/cat_and_dog/small_set" train_dir <- file.path(base_dir, "train") validation_dir <- file.path(base_dir, "validation") test_dir <- file.path(base_dir, "test") library(keras) model <- keras_model_sequential() %>% layer_conv_2d(filters = 32, kernel_size = c(3, 3), activation = "relu", input_shape = c(150, 150, 3)) %>% layer_max_pooling_2d(pool_size = c(2, 2)) %>% layer_conv_2d(filters = 64, kernel_size = c(3, 3), activation = "relu") %>% layer_max_pooling_2d(pool_size = c(2, 2)) %>% layer_conv_2d(filters = 128, kernel_size = c(3, 3), activation = "relu") %>% layer_max_pooling_2d(pool_size = c(2, 2)) %>% layer_conv_2d(filters = 128, kernel_size = c(3, 3), activation = "relu") %>% layer_max_pooling_2d(pool_size = c(2, 2)) %>% layer_flatten() %>% layer_dropout(rate = 0.5) %>% layer_dense(units = 512, activation = "relu") %>% layer_dense(units = 1, activation = "sigmoid") model %>% compile( loss = "binary_crossentropy", optimizer = optimizer_rmsprop(lr = 1e-4), metrics = c("acc") ) datagen <- image_data_generator( rescale = 1/255, rotation_range = 40, width_shift_range = 0.2, height_shift_range = 0.2, shear_range = 0.2, zoom_range = 0.2, horizontal_flip = TRUE ) test_datagen <- image_data_generator(rescale = 1/255) train_generator <- flow_images_from_directory( train_dir, datagen, target_size = c(150, 150), batch_size = 32, class_mode = "binary" ) validation_generator <- flow_images_from_directory( validation_dir, test_datagen, target_size = c(150, 150), batch_size = 32, class_mode = "binary" ) history <- model %>% fit_generator( train_generator, steps_per_epoch = 100, epochs = 100, validation_data = validation_generator, validation_steps = 50 ) ### img_path <- "/home/shiga/R_data/cat_and_dog/small_set/test/cat/cat.1700.jpg" img <- image_load(img_path, target_size = c(150, 150)) img_tensor <- image_to_array(img) img_tensor <- array_reshape(img_tensor, c(1, 150, 150, 3)) img_tensor <- img_tensor / 255 plot(as.raster(img_tensor[1,,,])) layer_outputs <- lapply(model$layers[1:8], function(layer) layer$output) # Creates a model that will return these outputs, given the model input: activation_model <- keras_model(inputs = model$input, outputs = layer_outputs) activations <- activation_model %>% predict(img_tensor) first_layer_activation <- activations[[1]] dim(first_layer_activation) plot_channel <- function(channel) { rotate <- function(x) t(apply(x, 2, rev)) image(rotate(channel), axes = FALSE, asp = 1, col = terrain.colors(12)) } plot_channel(first_layer_activation[1,,,5]) fifth_layer_activation <- activations[[5]] plot_channel(fifth_layer_activation[1,,,1]) plot_channel(fifth_layer_activation[1,,,3])
データ解析基礎論b テスト理論
install.packages("psych") library("psych") dat<-read.table("http://peach.l.chiba-u.ac.jp/course_folder/survey_data01.txt") ca <- alpha(dat) dat2<-read.csv("http://peach.l.chiba-u.ac.jp/course_folder/survey2.csv") image(cor(dat2)[10:1,1:10]) ca2 <- alpha(dat2) ca2 ca1_5 = alpha(dat[,1:5]) ca6_10 = alpha(dat[,6:10]) ### not recommended ### # FA F1<-factanal(dat[,1:5],1) F2<-factanal(dat[,6:10],1) library(sem) fa.model=cfa(reference.indicator=FALSE) F1: q1,q2,q3,q4,q5 F2: q6,q7,q8,q9,q10 fa.model<-update(fa.model) delete, F1<->F2 fa.result<-sem(fa.model, cov(dat2), 300) summary(fa.result) ### recommended approach install.packages("ltm") library(ltm) dat<-read.table("http://peach.l.chiba-u.ac.jp/course_folder/survey_data01.txt")-1 descript(dat) irt1P<-rasch(dat) irt1P summary(irt1P) theta = factor.scores(irt1P) GoF.rasch(irt1P) person.fit(irt1P) item.fit(irt1P) cor(rowSums(theta[[1]][,1:10]),theta[[1]]$z1) ## 2p IRT irt2P<-ltm(dat~z1) irt2P plot(irt2P) person.fit(irt2P) item.fit(irt2P) anova(irt1P,irt2P) ## jisshu dat <- read.csv("http://peach.l.chiba-u.ac.jp/course_folder/test_data01.csv")
DBDA with pymc3
from IPython.core.pylabtools import figsize import numpy as np from matplotlib import pyplot as plt from os import makedirs from urllib.request import urlretrieve import pymc3 as pm import theano.tensor as tt from pymc3 import model_to_graphviz from pymc3 import forestplot ## analysis 1 P208 urlretrieve("http://peach.l.chiba-u.ac.jp/course_folder/z15N50.csv", "data/z15N50.csv") data01 = np.loadtxt("data/z15N50.csv", skiprows=1) print(data01) with pm.Model() as model: theta = pm.Beta('theta', alpha = 1, beta = 1) obs = pm.Bernoulli('obs', theta, observed = data01) model_to_graphviz(model) with model: trace = pm.sample(10000) # checking results figsize(12.5, 4) pm.plots.traceplot(trace, var_names=["theta"]) pm.plots.plot_posterior(trace, var_names=["theta"]) pm.plots.autocorrplot(trace, var_names=["theta"]) # Analysis 2 p208 resp = [1,0,1,1,1,1,1,0,0,0,1,0,0,1,0] subj = [0,0,0,0,0,0,0,0,1,1,1,1,1,1,1] from pymc3 import model_to_graphviz N_subj=2 with pm.Model() as model2: theta = pm.Beta('theta', alpha = 2, beta = 2, shape=N_subj) y = pm.Bernoulli('y', theta, observed=resp) diff_theta = pm.Deterministic('delta_theta', theta[0] - theta[1]) model_to_graphviz(model2) # mcmc sampling with model2: trace = pm.sample(10000) # viz & checking results pm.plots.traceplot(trace, var_names=["theta"]) pm.plots.plot_posterior(trace, var_names=["theta"]) pm.plots.plot_posterior(trace, var_names=["delta_theta"]) pm.plots.autocorrplot(trace, var_names=["theta"]) ### analysis 3 p240 N_subj = len(data.s.unique()) resp = data.y subj = data.s - 1 with pm.Model() as model3: omega = pm.Beta('omega',1,1) kappaM2 = pm.Gamma('kappaM2', 0.01, 0.01) kappa = pm.Deterministic('kappa', kappaM2 + 2) theta = pm.Beta('theta', alpha = omega*(kappa-2)+1, beta = (1-omega)*(kappa-2)+1, shape = N_subj) y = pm.Bernoulli('y', theta[subj], observed=resp) model_to_graphviz(model3) ### analysis 4 p253 N_player = len(data.Player.unique()) N_pos = 9 Ns = data.AtBats Zs = data.Hits subj = data.PlayerNumber - 1 pos = data.PriPosNumber - 1 with pm.Model() as model4: omegaO = pm.Beta('omegaO',1,1) kappaM2O = pm.Gamma('kappaM2', 0.01, 0.01) kappaO = pm.Deterministic('kappaO', kappaM2O + 2) omegaPOS = pm.Beta('omegaPOS', alpha = omegaO*(kappaO-2)+1, beta = (1-omegaO)*(kappaO-2)+1, shape = N_pos) kappaM2POS = pm.Gamma('kappaM2POS', 0.01, 0.01, shape = N_pos) kappaPOS = pm.Deterministic('kappaPOS', kappaM2POS + 2) theta = pm.Beta('theta', alpha = omegaPOS[pos]*(kappaPOS[pos]-2)+1, beta = (1-omegaPOS[pos])*(kappaPOS[pos]-2)+1, shape = N_player) y = pm.Binomial('y', n = Ns, p = theta, observed = Zs) model_to_graphviz(model4)
基礎実習 B02
bin2dec <- function(bin){ # convert binary to decimal b.rev = rev(bin) ones = which(b.rev == 1) ones.adj = ones - 1 b.sq = 2^ones.adj dec = sum(b.sq) return(dec) } dec2bin <- function(dec, digit) { # convert decimal to binary bin = rep(0, digit) r.counter = 1 while(dec != 0){ r = dec %% 2 dec = dec %/% 2 bin[r.counter] = r r.counter = r.counter + 1 } return(rev(bin)) } # cellur automata # RULE 150 time = 100 n.cells = 101 cells = matrix(0, nrow = time, ncol = n.cells) state = rep(0,n.cells) state[51] = 1 cells[1,] = state for (i.time in 2:time) { left = c(state[n.cells], state[1:(n.cells - 1)]) right = c(state[2:n.cells], state[1]) s = left + state + right state = s %% 2 cells[i.time, ] = state } time = 100 n.cells = 101 cells = matrix(0, nrow = time, ncol = n.cells) cells[1,51] = 1 for (i.time in 2:time) { left = c(cells[(i.time-1), n.cells], cells[(i.time-1), 1:(n.cells - 1)]) right = c(cells[(i.time-1), 2:n.cells], cells[(i.time-1), 1]) s = left + cells[(i.time-1),] + right cells[i.time, ] = s %% 2 } transFUN<-function(st,ruleID){ output=dec2bin(ruleID,8); a=matrix(c(1,1,1, 1,1,0, 1,0,1, 1,0,0, 0,1,1, 0,1,0, 0,0,1, 0,0,0),nrow=8,byrow=T) newSt=output[which(apply(a,1,function(x) {all(x==st)}))] return(newSt) } ECA<-function(nCell, nGen,ruleID){ res=matrix(0,nrow=nGen,ncol=nCell) res[1,ceiling(nCell/2)]=1; for (i_gen in 2:nGen) { for (i_cell in 2:(nCell-1)) { res[i_gen,i_cell]=transFUN(res[i_gen-1,(i_cell-1):(i_cell+1)],ruleID) } res[i_gen,1]=transFUN(c(res[i_gen-1,nCell], res[i_gen-1,1], res[i_gen-1,2]),ruleID) res[i_gen,nCell]=transFUN(c(res[i_gen-1,(nCell-1)], res[i_gen-1,nCell], res[i_gen-1,1]),ruleID) } return(res) } res<-ECA(200,100,99) image(res)
広域システム特別講義II 教師なし学習1A
dat <- read.table("http://www.matsuka.info/data_folder/tdkPCA01.txt") dat.pca <- princomp(dat) dat.pca$score dat.pca$loadings library(nnet) dat <- read.csv("http://www.matsuka.info/data_folder/tdkReg01.csv") set.seed(5) x = dat[, 1:3] y = dat[, 4] dat.nnet = nnet(x, y, size = 150, linout = TRUE,maxit = 1000) nnet.pred <- predict(dat.nnet, dat) cor(dat.nnet$fitted.values,dat$sales)^2 dat.lm<-lm(sales~.,data=dat) summary(dat.lm) # pseudo-pca dat<-read.table("http://www.matsuka.info/data_folder/tdkPCA01.txt") dat.nnet<-nnet(dat,dat,size=2, maxit=1000, decay=0.01, linout=TRUE) cor(dat.nnet$fitted.values,dat) ### text processing txt = "You say goodbye and I say hello.";txt = tolower(txt) txt = gsub('[.]', ' .',txt) words = unlist(strsplit(txt, " ")) uniq.words = unique(words) uniq.words[1] which(uniq.words=="say") n.uniq = length(uniq.words) n.words = length(words) corpus = rep(0,n.words) corpus = match(words,uniq.words) cc = matrix(0,nrow=n.uniq, ncol=n.uniq) for (i.c in 1:n.uniq){ loc = which(corpus==i.c) L = which(match(uniq.words,words[pmax(loc-1,1)])!='NA') R = which(match(uniq.words,words[pmin(loc+1,n.words)])!='NA') cc[i.c, c(L,R)]=cc[i.c, c(L,R)]+1 } diag(cc)=0 contxt <- function(corpus, uniq.words, words){ cc = matrix(0, nrow=n.uniq, ncol=n.uniq) for (i.c in 1:n.uniq){ loc = which(corpus==i.c) L = which(match(uniq.words, words[pmax(loc-1,1)])!='NA') R = which(match(uniq.words, words[pmin(loc+1,n.words)])!='NA') cc[i.c, c(L,R)]=cc[i.c, c(L,R)]+1 } diag(cc)=0 return(cc) } words.sim <- function(word1, word2, eps=1e-8){ nw1 = word1/(sqrt(sum(word1^2)) + eps) nw2 = word2/(sqrt(sum(word2^2)) + eps) return(nw1%*%nw2) } w1 = cc[which(uniq.words=="you"),] w2 = cc[which(uniq.words=="i"),] words.sim(w1,w2) most.sim <- function(word, uniq.words, cc, N=5){ n.uniq = length(uniq.words) word2 = cc[which(uniq.words==word),] res = data.frame(words = uniq.words, similarity = rep(0,n.uniq)) top = data.frame(words = rep("",N+1), similarity=rep(0,N+1)) res$similarity = apply(cc,1, function(x) words.sim(x,word2)) sort.res = sort(res$similarity, decreasing = T, index.return=T) top$words = res$words[sort.res$ix[1:(N+1)]] top$similarity = res$similarity[sort.res$ix[1:(N+1)]] self.idx = which(top$words == word) return(top[-self.idx,]) } most.sim('i',uniq.words,cc,5) ppmi <- function(cc, eps = 1e-8){ n.uniq = ncol(cc) PPMI = matrix(0, n.uniq, n.uniq) N = sum(cc) r.sum = rowSums(cc) pmi = log2(cc*N/outer(r.sum,r.sum)) PPMI=matrix(pmax(0,pmi),nrow=n.uniq) return(PPMI) } ### LSA PPMI <- ppmi(cc) s = svd(PPMI) plot(s$u[,2],s$u[,1],pch=20,col='red',cex=5) text(s$u[,2],s$u[,1],uniq.words,cex=2) ### word2vec inefficient corp2contxt1S = function(corpus){ len.corp = length(corpus) # creating target matrix idxT = cbind(1:(len.corp-2), corpus[2:(len.corp-1)]) targ1S = matrix(0,nrow=len.corp-2,ncol=length(unique(corpus))) targ1S[idxT]=1 # creating context matrices idxC1 = cbind(1:(len.corp-2),corpus[1:(len.corp-2)]) idxC2 = cbind(1:(len.corp-2),corpus[3:len.corp]) contxt1 = matrix(0,nrow=len.corp-2,ncol=length(unique(corpus))) contxt2 = matrix(0,nrow=len.corp-2,ncol=length(unique(corpus))) contxt1[idxC1]=1 contxt2[idxC2]=1 return(list(target=targ1S,contxt1=contxt1,contxt2=contxt2)) } txt = "You say goodbye and I say hello."; corp = txt2corpus(txt) dat <- corp2contxt1S(corp) init.W2V <- function(n.uniq,size.hidden){ W.in = matrix(rnorm(n.uniq*size.hidden),nrow=n.uniq)*0.01 W.out = matrix(rnorm(n.uniq*size.hidden),nrow=size.hidden)*0.01 return(list(W.in = W.in, W.out= W.out)) } softmax.forwd <- function(x, target){ max.x = apply(x,1,max) C = ncol(x) x = x - max.x%*%matrix(1,nrow=1,ncol=C) y = exp(x)/rowSums(exp(x)) delta = 1e-7; R = nrow(as.matrix(y)) return(-sum(target*log(y + delta))/R) } softmax.bckwd <- function(x, target, dout = 1){ max.x = apply(x, 1, max) R = nrow(x) C = ncol(x) x = x - max.x%*%matrix(1,nrow=1,ncol=C) y = exp(x)/rowSums(exp(x)) return((y-target)/R) } MatMult.bckwd <- function(x, W, dout){ dx = dout%*%t(W) dW = t(x)%*%dout return(list(dx = dx, dW = dW)) } softmax.pred <- function(x, target){ max.x = apply(x,1,max) C = ncol(x) x = x - max.x%*%matrix(1,nrow=1,ncol=C) y = exp(x)/rowSums(exp(x)) return(y) } network<-init.W2V(7,5) n.batch = 3; n.data = nrow(dat$target) n.iter = 2000; lambda=0.1; loss = rep(0, n.iter) for (i.iter in 1:n.iter){ samp.idx = sample(1:n.data,n.batch) batch.c1 = dat$contxt1[samp.idx,] batch.c2 = dat$contxt2[samp.idx,] batch.y = dat$target[samp.idx,] h1 <- MatMult.forwd(batch.c1, network$W.in) h2 <- MatMult.forwd(batch.c2, network$W.in) h = 0.5 * (h1 + h2) s = MatMult.forwd(h,network$W.out) z = softmax.forwd(s,batch.y) loss[i.iter] = z ds = softmax.bckwd(s, batch.y, 1) da = MatMult.bckwd(h,network$W.out,ds) da$dW = da$dW*0.5 dIn1 = MatMult.bckwd(batch.c1,network$W.in,da$dx) dIn2 = MatMult.bckwd(batch.c2,network$W.in,da$dx) network$W.out = network$W.out - lambda*da$dW network$W.in = network$W.in - lambda*dIn1$dW network$W.in = network$W.in - lambda*dIn2$dW } plot(loss, type='l') ### word2vec txt2corpus <- function(txt){ txt = tolower(txt) txt = gsub('[.]', ' .',txt) words = unlist(strsplit(txt, " ")) uniq.words = unique(words) n.uniq = length(uniq.words) n.words = length(words) corpus = rep(0,n.words) corpus = match(words,uniq.words) return(corpus) } corp2contxt <- function(corpus){ len.corp = length(corpus) target = corpus[2:(len.corp-1)] col1 = corpus[1:(len.corp-2)] col2 = corpus[3:len.corp] context = cbind(col1,col2) return(list(context=context,target = target)) } embed.forwd <- function(W, idx){ return(W[idx,]) } h1 <- embed.forwd(W,idx1) h2 <- embed.forwd(W,idx2) h = (h1 + h2)/2 embed.dot.forwd <- function(W, h, idx){ return(O=rowSums(W[idx,]*h)) } s = embed.dot.forwd(network$W.out,h,batch.y) sigmoid.forwd <- function(O){ return(1/(1+exp(-O))) } sigmoidWL.forwd <- function(O,target){ #delta = 1e-5 #y = max(0, (1/(1+exp(-O)) - delta)) y = 1/(1+exp(-O)) loss=-sum(target*log(y)+(1-target)*log(1 - y)) return(loss) } sigmoidWL.backwd <- function(O,target,dout=1){ #delta = 1e-5 #y = 1/(1+exp(-O)) - delta y = 1/(1+exp(-O)) dW = (y - target)*dout return(dW) } embed.dot.backwd <- function(W,h,idx,dout){ dh = dout * W[idx,] dW = dout * h return(list(dh = dh, dW = dW)) } embed.backwd <- function(W, idx, dout){ dW = matrix(0,nrow(W),ncol(W)) for (i.idx in 1:length(idx)){ dW[idx[i.idx], ] = dW[idx[i.idx], ] + dout[i.idx, ] } return(dW) } init.W2V <- function(n.uniq,size.hidden){ W.in = matrix(rnorm(n.uniq*size.hidden),nrow=n.uniq)*0.01 W.out = matrix(rnorm(n.uniq*size.hidden),nrow=n.uniq)*0.01 return(list(W.in = W.in, W.out= W.out)) } ### naive W2V network<-init.W2V(8,5) n.batch = 3; txt = "You say goodbye and I say hello.." corp = txt2corpus(txt) corp = c(8,8,corp) dat<-corp2contxt(corp) n.data = length(dat$target) n.iter = 3000; lambda=0.1; loss = rep(0, n.iter) for (i.iter in 1:n.iter){ samp.idx = sample(1:n.data,n.batch) batch.c1 = dat$context[samp.idx,1] batch.c2 = dat$context[samp.idx,2] batch.y = dat$target[samp.idx] h1 <- embed.forwd(network$W.in,batch.c1) h2 <- embed.forwd(network$W.in,batch.c2) h = 0.5 * (h1 + h2) # positive only s = embed.dot.forwd(network$W.out,h,batch.y) z = sigmoidWL.forwd(s,1) loss[i.iter] = z ds = sigmoidWL.backwd(s, 1, 1) dE = embed.dot.backwd(network$W.out,h, batch.y, ds) dh = dE$dh*0.5 dIn1 = embed.backwd(network$W.in,dat$context[samp.idx,1], dh) dIn2 = embed.backwd(network$W.in,dat$context[samp.idx,2], dh) network$W.out[batch.y,] = network$W.out[batch.y,] - lambda*dE$dW network$W.in = network$W.in - lambda*dIn1 network$W.in = network$W.in - lambda*dIn2 } par(mfrow=c(1,1)) plot(loss, type='l') samp.idx = c(2:6,8,9,1) batch.c1 = dat$context[samp.idx,1] batch.c2 = dat$context[samp.idx,2] batch.y = dat$target[samp.idx] h1 <- embed.forwd(network$W.in,batch.c1) h2 <- embed.forwd(network$W.in,batch.c2) h = 0.5 * (h1 + h2) s = MatMult.forwd(h,t(network$W.out)) z = sigmoid.forwd(s) res=cbind(z,batch.y) #par(mfrow=c(8,1)) #for (i in 1:8){ # col.spec = rep("black",8) # col.spec[i]="orange" # barplot(res[i, 1:8],col=col.spec) #} par(mfrow=c(4,1)) for (i in 1:4){ col.spec = rep("black",8) col.spec[i]="orange" barplot(res[i, 1:8],col=col.spec) } par(mfrow=c(4,1)) for (i in 5:8){ col.spec = rep("black",8) col.spec[i]="orange" barplot(res[i, 1:8],col=col.spec) } ### with negative sampling unigram.sampler <- function(corpus, target, power, sample.size){ neg.corpus <- corpus[-which(corpus == target)] uniq.word = unique(neg.corpus) n.word = length(neg.corpus) prob = (as.vector(table(neg.corpus))/n.word)^power prob = prob/sum(prob) sample.idx = sample(uniq.word, sample.size, prob = prob) return(sort(sample.idx)) } get.neg.sample <- function(corpus, target, power, sample.size){ sample.id = matrix(0, nrow = length(target), ncol = sample.size) for (i.targ in 1:length(target)){ sample.id[i.targ,] = unigram.sampler(corpus, target[i.targ], power, sample.size) } return(sample.id) } network<-init.W2V(8,5) n.batch = 3; txt = "You say goodbye and I say hello.." corp = txt2corpus(txt) corp = c(8,8,corp) dat<-corp2contxt(corp) n.data = length(dat$target) n.iter = 10000; lambda=0.1; loss = rep(0, n.iter) sample.size = 2 for (i.iter in 1:n.iter){ samp.idx = sample(1:n.data,n.batch) batch.c1 = dat$context[samp.idx,1] batch.c2 = dat$context[samp.idx,2] batch.y = dat$target[samp.idx] h1 <- embed.forwd(network$W.in,batch.c1) h2 <- embed.forwd(network$W.in,batch.c2) h = 0.5 * (h1 + h2) # positive s = embed.dot.forwd(network$W.out,h,batch.y) z = sigmoidWL.forwd(s,1) loss[i.iter] = z ds = sigmoidWL.backwd(s, 1, 1) dE = embed.dot.backwd(network$W.out,h, batch.y, ds) dh = dE$dh*0.5 dIn1 = embed.backwd(network$W.in,dat$context[samp.idx,1], dh) dIn2 = embed.backwd(network$W.in,dat$context[samp.idx,2], dh) network$W.out[batch.y,] = network$W.out[batch.y,] - lambda*dE$dW network$W.in = network$W.in - lambda*dIn1 network$W.in = network$W.in - lambda*dIn2 # negative neg.set <- get.neg.sample(corp,batch.y, 0.75, sample.size) for (i.neg in 1:sample.size){ s = embed.dot.forwd(network$W.out,h,neg.set[,i.neg]) z = sigmoidWL.forwd(s,0) loss[i.iter] = loss[i.iter] + z ds = sigmoidWL.backwd(s, 0, dout=1) dE = embed.dot.backwd(network$W.out,h,neg.set[,i.neg], ds) dh = dE$dh*0.5 dIn1 = embed.backwd(network$W.in, dat$context[samp.idx,1],dh) dIn2 = embed.backwd(network$W.in, dat$context[samp.idx,2],dh) network$W.out[neg.set[,i.neg],] = network$W.out[neg.set[,i.neg],] - lambda*dE$dW network$W.in = network$W.in - lambda*dIn1 network$W.in = network$W.in - lambda*dIn2 } } par(mfrow=c(1,1)) loss.dat <- data.frame(epoch=1:length(loss), loss = loss) ggplot(loss.dat, aes(x = epoch, y = loss)) + geom_smooth(se=F) samp.idx = c(2:6,8,9,1) batch.c1 = dat$context[samp.idx,1] batch.c2 = dat$context[samp.idx,2] batch.y = dat$target[samp.idx] h1 <- embed.forwd(network$W.in,batch.c1) h2 <- embed.forwd(network$W.in,batch.c2) h = 0.5 * (h1 + h2) s = MatMult.forwd(h,t(network$W.out)) z = sigmoid.forwd(s) res=cbind(z,batch.y) #par(mfrow=c(8,1)) #for (i in 1:8){ # col.spec = rep("black",8) # col.spec[i]="orange" # barplot(res[i, 1:8],col=col.spec) #} par(mfrow=c(4,1)) for (i in 1:4){ col.spec = rep("black",8) col.spec[i]="orange" barplot(res[i, 1:8],col=col.spec) } par(mfrow=c(4,1)) for (i in 5:8){ col.spec = rep("black",8) col.spec[i]="orange" barplot(res[i, 1:8],col=col.spec) }
広域システム特別講義II 教師あり学習1B
train.x = as.matrix(iris[,1:4]) train.y.temp = as.numeric(iris[,5]) train.y = matrix(0,nrow = nrow(train.x), ncol =3) train.y[which(train.y.temp==1), 1]=1 train.y[which(train.y.temp==2), 2]=1 train.y[which(train.y.temp==3), 3]=1 init.network <- function(n.neurons){ n.layer = length(n.neurons) W = list(); b = list() for (i.layer in 1:(n.layer-1)){ W[[i.layer]] = matrix(rnorm(n.neurons[i.layer]*n.neurons[(i.layer+1)],sd = 0.1),nrow=n.neurons[i.layer]) b[[i.layer]] = matrix(rnorm(n.neurons[(i.layer+1)],sd = 0.1), nrow = 1) } return(list(W = W,b = b)) } sigmoid.forwd <- function(x){ return(1/(1+exp(-x))) } sigmoid.bckwd <- function(x, dout){ y = sigmoid.forwd(x) return(dout*(1-y)*y) } affine.forwd <- function(x, W, b){ return(x%*%W + matrix(1, nrow = nrow(x), ncol = 1)%*%b) } affine.bckwd <- function(x, W, b, dout){ dx = dout%*%t(W) dW = t(x)%*%dout db = colSums(dout) return(list(dx = dx, dW = dW, db = db)) } softmax.forwd <- function(x, target){ max.x = apply(x,1,max) C = ncol(x) x = x - max.x%*%matrix(1,nrow=1,ncol=C) y = exp(x)/rowSums(exp(x)) delta = 1e-7; R = nrow(as.matrix(y)) return(-sum(target*log(y + delta))/R) } softmax.bckwd <- function(x, target, dout = 1){ max.x = apply(x, 1, max) R = nrow(x) C = ncol(x) x = x - max.x%*%matrix(1,nrow=1,ncol=C) y = exp(x)/rowSums(exp(x)) return((y-target)/R) } params = init.network(c(4,15,3)) batch_size = 50; n.iter =5000; lambda =0.05 n.train = nrow(train.x) loss = rep(0,n.iter) for (i.iter in 1:n.iter){ batch_mask = sample(1:n.train, batch_size) x.batch = train.x[batch_mask,] t.batch = train.y[batch_mask,] a1 = affine.forwd(x.batch,params$W[[1]],params$b[[1]]) z1 = sigmoid.forwd(a1) a2 = affine.forwd(z1,params$W[[2]],params$b[[2]]) z2 = softmax.forwd(a2,t.batch) loss[i.iter] = z2 dwSM = softmax.bckwd(a2, t.batch, 1) dwA2 = affine.bckwd(a1,params$W[[2]],params$b[[2]],dwSM) dwSG = sigmoid.bckwd(a1,dwA2$dx) dwA1 = affine.bckwd(x.batch,params$W[[1]],params$b[[1]],dwSG) params$W[[2]] = params$W[[2]] - lambda*dwA2$dW params$b[[2]] = params$b[[2]] - lambda*dwA2$db params$W[[1]] = params$W[[1]] - lambda*dwA1$dW params$b[[1]] = params$b[[1]] - lambda*dwA1$db } loss.dat = data.frame(trial = 1:length(loss), loss = loss) ggplot(loss.dat,aes(x = trial, y = loss)) + geom_smooth(se=F) ### methods to improve effeciency func02R = function(x){ return(1/20*x[1]^2 + x[2]^2) } numerical.grad <- function(func, x){ h = 1e-4 R = nrow(x) C = ncol(x) grad = matrix(0, R, C) for (i.col in 1:C){ for (i.row in 1:R){ temp.x = x[i.row,i.col] x[i.row, i.col] = temp.x + h plusH = do.call(func, list(x)) x[i.row, i.col] = temp.x - h minusH = do.call(func,list(x)) grad[i.row, i.col] = (plusH - minusH)/(2*h) x[i.row, i.col] = temp.x } } return(grad) } require(plot3D) x = seq(-10,10,0.5) y = seq(-10,10,0.5) M = mesh(x,y) R = nrow(M$x) C = nrow(M$x) scaling = 0.05 plot(c(),c(),xlim = c(-10,10),ylim=c(-10,10)) for (i.col in 1:C){ for (i.row in 1:R){ ng = numerical.grad("func02R",matrix(c(M$x[i.row,i.col],M$y[i.row,i.col]),nrow=1)) arrows(M$x[i.row,i.col],M$y[i.row,i.col], (M$x[i.row,i.col]-ng[1]*scaling),(M$y[i.row,i.col]-ng[2]*scaling), length = 0.05) } } x = seq(-10,10,0.2) y = seq(-10,10,0.2) M = mesh(x,y) Z = as.vector(1/20*M$x^2)+as.vector(M$y^2) Z.mesh = matrix(Z,nrow(M$x)) contour(x,y,Z.mesh,drawlabels = F,nlevels=40) grad.desc <- function(func, init.x, lr, n.iter){ x = init.x x.hist = init.x for (i.iter in 1:n.iter) { grad = numerical.grad(func, x) x = x - lr*grad x.hist = rbind(x.hist,x) } return(x.hist) } x.init = matrix(c(-7,2),nrow = 1) gd = grad.desc("func02R",x.init,0.9,100) lines(gd,type='o',col = 'green',pch=20) gd.moment <- function(func, init.x, lr, moment, n.iter){ x = init.x x.hist = init.x grad.hist = rep(0,length(x)) for (i.iter in 1:n.iter) { grad = numerical.grad(func, x) x = x - lr*grad+moment*grad.hist x.hist = rbind(x.hist,x) grad.hist = -lr*grad } return(x.hist) } x.init = matrix(c(-7,2),nrow = 1) gdm = gd.moment("func02R",x.init,0.9,0.3,100) lines(gdm,type='o',col = 'blue',pch=20) adaGrad <- function(func, init.x, eta, n.iter){ x = init.x x.hist = init.x h = rep(0,length(x)) for (i.iter in 1:n.iter) { grad = numerical.grad(func, x) h = h + grad*grad x = x - eta*(1/(sqrt(h)+1e-7))*grad x.hist = rbind(x.hist,x) } return(x.hist) } x.init = matrix(c(-7,2),nrow = 1) adaGrad = adaGrad("func02R",x.init,0.9,100) contour(x,y,Z.mesh,drawlabels = F,nlevels=40) lines(adaGrad,type='o',col = 'red',pch=20) adam <- function(func, init.x, eta, beta1, beta2, epsilon, n.iter){ x = init.x x.hist = init.x m = rep(0,length(x)) v = rep(0,length(x)) for (i.iter in 1:n.iter) { grad = numerical.grad(func, x) m = beta1*m + (1-beta1)*grad v = beta2*v + (1-beta2)*grad*grad m.hat = m/(1-beta1) v.hat = v/(1-beta2) x = x - eta/((sqrt(v.hat)+epsilon))*m.hat x.hist = rbind(x.hist,x) } return(x.hist) } x.init = matrix(c(-7,2),nrow = 1) adam = adam("func02R",x.init,0.2,0.9,0.999,1e-8,100) contour(x,y,Z.mesh,drawlabels = F,nlevels=40) lines(adam,type='o',col = 'red',pch=20) ### w/ functions relu.forwd <- function(x){ return(pmax(x,0)) } relu.bckwd <- function(z, dout){ dout[which(z <= 0)] = 0 return(dout) } # sigmoid sigmoid.forwd <- function(x){ return(1/(1+exp(-x))) } sigmoid.bckwd <- function(z, dout){ return(dout*(1-z)*z) } # Affine affine.forwd <- function(x, W, b){ return(x%*%W + matrix(1, nrow = nrow(x), ncol = 1)%*%b) } affine.bckwd <- function(x, W, dout){ dx = dout%*%t(W) dW = t(x)%*%dout db = colSums(dout) return(list(dx = dx, dW = dW, db = db)) } # softmax with CE softmax.forwd <- function(x, target){ max.x = apply(x,1,max) C = ncol(x) x = x - max.x%*%matrix(1,nrow=1,ncol=C) y = exp(x)/rowSums(exp(x)) delta = 1e-7; R = nrow(as.matrix(y)) return(list(smx = y, ce = -sum(target*log(y + delta))/R)) } softmax.bckwd <- function(smx, target){ R = nrow(smx) return((smx-target)/R) } activation <- function(A, target, actFun){ if (actFun == "sigmoid"){ return(sigmoid.forwd(A)) } if (actFun == "relu"){ return(relu.forwd(A)) } if (actFun == "softmax"){ return(softmax.forwd(A, target)) } } Dact <- function(z, smx, target, dout, actFun){ if (actFun == "sigmoid"){ return(sigmoid.bckwd(z, dout)) } if (actFun == "relu"){ return(relu.bckwd(z, dout)) } if (actFun == "softmax"){ return(softmax.bckwd(smx, target)) } } # function for initialization init.network <- function(n.neurons, actFun, Opt, sdv){ n.layer = length(n.neurons) W = list(); b = list(); mW = list(); mb = list(); # momentum hW = list(); hb = list(); # adaGrad aMW = list(); aMb = list(); # adam M aVW = list(); aVb = list(); # adam V for (i.layer in 1:(n.layer-1)){ if (nargs() == 3) { if (actFun[i.layer]=="sigmoid"){ # Xavier sdv = 1/sqrt(n.neurons[i.layer]) } else { # He - assumes ReLU sdv = sqrt(2/n.neurons[i.layer]) } } W[[i.layer]] = matrix(rnorm(n.neurons[i.layer]*n.neurons[(i.layer+1)], sd = sdv), nrow=n.neurons[i.layer]) b[[i.layer]] = matrix(rnorm(n.neurons[(i.layer+1)], sd = sdv), nrow = 1) if (Opt == "momentum"){ mW[[i.layer]] = matrix(0, nrow=n.neurons[i.layer], ncol=n.neurons[(i.layer+1)]) mb[[i.layer]] = matrix(0, nrow = 1, ncol=n.neurons[(i.layer+1)]) } if (Opt == "adagrad"){ hW[[i.layer]] = matrix(0, nrow=n.neurons[i.layer], ncol=n.neurons[(i.layer+1)]) hb[[i.layer]] = matrix(0, nrow = 1, ncol=n.neurons[(i.layer+1)]) } if (Opt == "adam"){ aMW[[i.layer]] = matrix(0, nrow=n.neurons[i.layer], ncol=n.neurons[(i.layer+1)]) aMb[[i.layer]] = matrix(0, nrow = 1, ncol=n.neurons[(i.layer+1)]) aVW[[i.layer]] = matrix(0, nrow=n.neurons[i.layer], ncol=n.neurons[(i.layer+1)]) aVb[[i.layer]] = matrix(0, nrow = 1, ncol=n.neurons[(i.layer+1)]) } } return(list(W = W, b = b, actFun = actFun, optimizer = Opt, mW=mW, mb=mb, hW=hW, hb=hb, aMW=aMW,aMb=aMb,aVW=aVW)) } OPT<-function(net, Daff, HP){ if (net$optimizer == "momentum"){ return(Opt.mom(net, Daff, HP)) } if (net$optimizer == "adagrad"){ return(Opt.adagrad(net, Daff, HP)) } if (net$optimizer == "adam"){ return(Opt.adam(net, Daff, HP)) } } Opt.mom <- function(net, Daff, HP) { # HP[3] = learning rate # HP[4] = weight decay # HP[5] = momentum n.layer <- length(net$W) for (i.layer in 1:n.layer) { net$mW[[i.layer]] = HP[5]*net$mW[[i.layer]] - HP[3]*Daff[[i.layer]]$dW - HP[4]*net$W[[i.layer]] net$mb[[i.layer]] = HP[5]*net$mb[[i.layer]] - HP[3]*Daff[[i.layer]]$db - HP[4]*net$b[[i.layer]] net$W[[i.layer]] = net$W[[i.layer]] + net$mW[[i.layer]] net$b[[i.layer]] = net$b[[i.layer]] + net$mb[[i.layer]] } return(net=net) } Opt.adagrad <- function(net, Daff, HP) { # HP[3] = learning rate # HP[4] = weight decay n.layer <- length(net$W) for (i.layer in 1:n.layer) { net$hW[[i.layer]] = net$hW[[i.layer]] + Daff[[i.layer]]$dW*Daff[[i.layer]]$dW net$hb[[i.layer]] = net$hb[[i.layer]] + Daff[[i.layer]]$db*Daff[[i.layer]]$db net$W[[i.layer]] = net$W[[i.layer]]-HP[3]/(sqrt(net$hW[[i.layer]])+1e-7)*Daff[[i.layer]]$dW - HP[4]*net$W[[i.layer]] net$b[[i.layer]] = net$b[[i.layer]]-HP[3]/(sqrt(net$hb[[i.layer]])++1e-7)*Daff[[i.layer]]$db - HP[4]*net$b[[i.layer]] } return(net=net) } cu.nnet = function(train.x, train.y, net, HP = c(10,1000,0.05,0.01,0.1,0.999,0.9)){ # HP: Hyperparameters # HP[1] = batch size # HP[2] = n iteration # HP[3] = learning rate # HP[4] = weight decay # HP[5] = momentum # HP[6] = beta1 (adam) # HP[7] = beta2 (adam) n.layer <- length(net$W) loss = rep(0,HP[2]) A = list(); z = list(); Dact = list(); Daff = list() for (i.iter in 1:HP[2]){ batch_mask = sample(1:nrow(train.x), HP[1]) x.batch = train.x[batch_mask,] t.batch = train.y[batch_mask,] for (i.layer in 1:n.layer){ if (i.layer == 1){ x = x.batch } else { x = z[[(i.layer - 1)]] } A[[i.layer]] = affine.forwd(x, net$W[[i.layer]], net$b[[i.layer]]) z[[i.layer]] = activation(A[[i.layer]], t.batch, net$actFun[i.layer]) } loss[i.iter] = z[[i.layer]]$ce smx = z[[i.layer]]$smx for (i.layerR in n.layer:1){ if (i.layerR == n.layer){ dout = 1 } else { dout = Daff[[(i.layerR+1)]]$dx } Dact[[i.layerR]] = Dact(z[[i.layerR]], smx, t.batch, dout, net$actFun[i.layerR]) if (i.layerR==1){ x = x.batch } else { x = A[[(i.layerR-1)]] } Daff[[i.layerR]] = affine.bckwd(x, net$W[[i.layerR]], Dact[[i.layerR]]) } net = OPT(net, Daff, HP) } return(list(loss = loss, net = net)) } train.x = as.matrix(iris[,1:4]) train.y.temp = as.numeric(iris[,5]) train.y = matrix(0,nrow = nrow(train.x), ncol =3) train.y[which(train.y.temp==1), 1]=1 train.y[which(train.y.temp==2), 2]=1 train.y[which(train.y.temp==3), 3]=1 actF = c("relu","softmax") network = init.network(c(4,15,3), actF, "momentum") res = cu.nnet(train.x, train.y, network, HP=c(15,1000,0.01,0.0001,0.9,0.999,0.9)) hist(res$net$W[[1]]) plot(res$loss,type='l') MNIST.dat<-read.csv("http://www.matsuka.info/univ/course_folder/MNSTtrain.csv") train <- data.matrix(MNIST.dat) train.x <- as.matrix(train[,-1]/255) train.y.temp <- train[,1] train.y = matrix(0,nrow = nrow(train), ncol = 10) for (i in 1:nrow(train.x)){ train.y[i,(train.y.temp[i]+1)]=1 } actF = c("relu","relu","softmax") network = init.network(c(784,100,50,10), actF, "momentum") res = cu.nnet(train.x, train.y, network,HP=c(100,2000,0.1,0.001,0.8,0.999,0.9)) plot(res$loss,type='l')
データ解析基礎論B LogLinear Analysis
freq<-c(33,37,7,23) pref<-factor(c('soba','udon','soba','udon')) region<-factor(c('east','east','west','west')) dat<-data.frame(pref,region,freq) dat.table=table(pref,region) dat.table[cbind(pref,region)]<-freq ME.lrt=2*sum((freq)*log(freq/25)) dat.loglinCE_A<-loglin(dat.table, list(1), fit=T,param=T) 1-pchisq(dat.loglinCE_A$lrt, dat.loglinCE_A$df) dat.loglinCE_B<-loglin(dat.table,list(2), fit=T,param=T) 1-pchisq(dat.loglinCE_B$lrt, dat.loglinCE_B$df) dat.loglinIND<-loglin(dat.table,list(1,2), fit=T,param=T) 1-pchisq(dat.loglinIND$lrt, dat.loglinIND$df) dat.loglinSAT<-loglin(dat.table,list(c(1,2)), fit=T,param=T) ME.lrt-dat.loglinCE_A$lrt 1-pchisq(ME.lrt-dat.loglinCE_A$lrt,1) dat.loglinCE_A$lrt-dat.loglinIND$lrt 1-pchisq(dat.loglinCE_A$lrt-dat.loglinIND$lrt,1) dat.loglinIND$lrt-dat.loglinSAT$lrt 1-pchisq(dat.loglinIND$lrt-dat.loglinSAT$lrt,1) freq<-c(9,5,2,4,16,10,26,28) gener<-factor(c(rep('female',4),c(rep('male',4)))) affil<-factor(rep(c('L','L','E','E'),2)) circle<-factor(rep(c('tennis','astro'),4)) dat<-data.frame(gener,affil,circle,freq) dat.table<-table(gender,affil,circle) dat.table[cbind(gender,affil,circle)]<-freq dat.loglin2<-loglin(dat.table,list(1), fit=T,param=T) dat.loglin3<-loglin(dat.table,list(1,3), fit=T,param=T) dat.loglin4<-loglin(dat.table,list(1,2,3), fit=T,param=T) dat.loglin5<-loglin(dat.table,list(c(1,3),2), fit=T,param=T) dat.loglin6<-loglin(dat.table,list(c(1,3),c(1,2)), fit=T,param=T) dat.loglin7<-loglin(dat.table,list(c(1,3),c(1,2),c(2,3)), fit=T,param=T) dat.loglin8<-loglin(dat.table,list(c(1,2,3)), fit=T,param=T) source('http://peach.l.chiba-u.ac.jp/course_folder/cuUtil02.R') dat<-read.csv("http://www.matsuka.info/data_folder/cda7-16.csv") dat.tab <- table(dat) dat.LL <- loglin(dat.tab, list(c(1,4),c(2,4),c(3,4)), fit = T, param=T) 1-pchisq(dat.LL$lrt, dat.LL$df)