# cognitive modeling ALC.init<-function(size) { # size[1] = input dimension # size[2] = number of exemplars # size[3] = number of categories alpha=matrix(1,nrow=1,ncol=size[1])/size[1] w=matrix(rnorm(size[3]*size[2])*0.1,ncol=size[2]) c=2 # gain phi=3 # decision strength lrA=0.2 # learning rate for attentions lrW=0.1 # learning rate for associations return(list(alpha = alpha, w = w, lrA = lrA, lrW = lrW, c = c, phi = phi)) } # alcove forward process alcove.forward <- function(parmSet, inp, exemplar){ diff= matrix(abs(matrix(1,n.exemp,1)%*%inp-exemplar),nrow=nrow(exemplar)) exemp=exp(-parmSet$c*(diff%*%t(parmSet$alpha))) out=parmSet$w%*%exemp return(list(diff = diff, exemp = exemp, out = out)) } # alcove backward process alcove.backward <- function(res.forward,parmSet, target){ err=target-res.forward$out dW=parmSet$lrW*res.forward$exemp%*%t(err) dA=-parmSet$lrA*(t(err)%*%parmSet$w*t(res.forward$exemp))%*%res.forward$diff*parmSet$c return(list(dW = dW, dA = dA)) } ### ALCOVE full implementation ALC.init<-function(size) { # size[1] = input dimension # size[2] = number of exemplars # size[3] = number of categories alpha=matrix(1,nrow=1,ncol=size[1])/size[1] w=matrix(rnorm(size[3]*size[2])*0.1,ncol=size[2]) c=2 # gain phi=3 # decision strength lrA=0.2 # learning rate for attentions lrW=0.1 # learning rate for associations return(list(alpha = alpha, w = w, lrA = lrA, lrW = lrW, c = c, phi = phi)) } # alcove forward process alcove.forward <- function(parmSet, inp, exemplar){ diff= matrix(abs(matrix(1,n.exemp,1)%*%inp-exemplar),nrow=nrow(exemplar)) exemp=exp(-parmSet$c*(diff%*%t(parmSet$alpha))) out=parmSet$w%*%exemp return(list(diff = diff, exemp = exemp, out = out)) } # alcove backward process alcove.backward <- function(res.forward,parmSet, target){ err=target-res.forward$out dW=parmSet$lrW*res.forward$exemp%*%t(err) dA=-parmSet$lrA*(t(err)%*%parmSet$w*t(res.forward$exemp))%*%res.forward$diff*parmSet$c return(list(dW = dW, dA = dA)) } # main function alcove<-function(parmSet, inp, exemplar, targ,iter) { # ----ALCOVE for R --- # input arguments # parmSet - parameter set # inp - input matrix (n_sample x n_dimension) # exemplar - exemplar matrix (n_sample x n_dimension) # targ - target matrix (n_sample x n_category) # iter - # of training epochs # ----ALCOVE for R --- # initialization inpDim = ncol(inp) n.inp = nrow(inp) n.exemp = nrow(exemplar) n.cat = ncol(targ) accu=rep(0,nrow=iter+1) attn=matrix(0,nrow=iter+1,ncol=inpDim) attn[1,]=alpha # end initialization # main loop for (i_iter in 1:iter) { ro=sample(1:n.inp, n.inp) prob_temp=0; for (i_tr in 1:n.inp) { res.forward <- alcove.forward(parmSet, inp[ro[i_tr],], exemplar) res.backward <- alcove.backward(res.forward, parmSet, targ[ro[i_tr],]) parmSet$w = parmSet$w+t(res.backward$dW) parmSet$alpha = parmSet$alpha+res.backward$dA parmSet$alpha[which(parmSet$alpha<0)]=0 pT=(exp(parmSet$phi*res.forward$out)/sum(exp(parmSet$phi*res.forward$out)))*targ[ro[i_tr],] prob_temp=prob_temp+pT[which(!pT==0)] } accu[i_iter+1]=prob_temp/n.inp attn[i_iter+1,]=alpha } attnN=attn/apply(attn,1, sum) out=matrix(0,nrow=n.cat,ncol=n.inp) for (i_tr in 1:n.inp) { res.forward<-alcove.forward(parmSet, inp[i_tr,], exemplar) out[,i_tr]=res.forward$out } return(list(attn=attn,attnN=attnN,accu=accu,out=out,ps=parmSet)) } exemplar = matrix(c(0,0,0, 0,0,1, 0,1,0, 0,1,1, 1,0,0, 1,0,1, 1,1,0, 1,1,1),byrow=T,nrow=8) inp = exemplar target1 = matrix(c(0,0,0,0,1,1,1,1, 1,1,1,1,0,0,0,0),nrow=8) target2 = matrix(c(1,1,0,0,0,0,1,1, 0,0,1,1,1,1,0,0),nrow=8) target3 = matrix(c(1,1,1,0,0,1,0,0, 0,0,0,1,1,0,1,1),nrow=8) target4 = matrix(c(1,1,1,0,1,0,0,0, 0,0,0,1,0,1,1,1),nrow=8) target5 = matrix(c(1,1,1,0,0,0,0,1, 0,0,0,1,1,1,1,0),nrow=8) target6 = matrix(c(1,0,0,1,0,1,1,0, 0,1,1,0,1,0,0,1),nrow=8) seed.num = 1;n.train = 50 set.seed(seed.num) parmSet<-ALC.init(c(3,8,2)) result1<-alcove(parmSet,inp,exemplar,target1,n.train) plot(result1$accu,type='o',lwd=2,pch=20,col=1) set.seed(seed.num) parmSet<-ALC.init(c(3,8,2)) result2<-alcove(parmSet,inp,exemplar,target2,n.train) lines(result2$accu,type='o',lwd=2,pch=20,col=2) set.seed(seed.num) parmSet<-ALC.init(c(3,8,2)) result3<-alcove(parmSet,inp,exemplar,target3,n.train) lines(result3$accu,type='o',lwd=2,pch=20,col=3) set.seed(seed.num) parmSet<-ALC.init(c(3,8,2)) result4<-alcove(parmSet,inp,exemplar,target4,n.train) lines(result4$accu,type='o',lwd=2,pch=20,col=4) set.seed(seed.num) parmSet<-ALC.init(c(3,8,2)) result5<-alcove(parmSet,inp,exemplar,target5,n.train) lines(result5$accu,type='o',lwd=2,pch=20,col=5) set.seed(seed.num) parmSet<-ALC.init(c(3,8,2)) result6<-alcove(parmSet,inp,exemplar,target6,n.train) lines(result6$accu,type='o',lwd=2,pch=20,col=6) legend("bottomright",paste("type",1:6,sep=''),col=1:6,lwd=2,pch=20) ### end ALCOVE # evo. game food = 6; cost = 10 A = 0.5*(food-cost); B = food C = 0; D = food/2 pay.mat = matrix(c(A,B,C,D),nrow=2,byrow=TRUE) dt = 0.01;max_time=1000 p = rep(0,max_time) q = rep(0,max_time) p[1] = 0.2 q[1] = 1 - p[1] for (t in 1:max_time){ prob.mat = outer(c(p[t],q[t]),c(p[t],q[t])) W.ave = sum(prob.mat*pay.mat) W.h = sum(c(p[t],q[t])*pay.mat[1,]) W.d = sum(c(p[t],q[t])*pay.mat[2,]) p[(t+1)] = p[t]+(p[t]*(W.h-W.ave)/W.ave)*dt q[(t+1)] = q[t]+(q[t]*(W.d-W.ave)/W.ave)*dt } plot(p,type='l',lwd=2,col='red',ylim=c(0,1)) lines(q,type='l',lwd=2,col='blue') # cake game n.cake = 10 pay.mat = matrix(0,n.cake+1,n.cake+1) for (i.cake in 1:n.cake){ pay.mat[(i.cake+1),1:(n.cake-i.cake+1)] =i.cake } p.cake = runif(n.cake+1) p.cake = p.cake/sum(p.cake) max.time = 50 dt = 0.01 t = seq(0,max.time,dt) n.iter = length(t) p.hist = matrix(0,nrow = n.iter, ncol = (n.cake+1)) p.hist[1,] = p.cake for (i.time in 2:n.iter){ W = colSums(p.cake*t(pay.mat)) W.ave = sum(outer(p.cake,p.cake)*pay.mat) p.cake = p.cake + p.cake*(W - W.ave)/W.ave * dt p.hist[i.time,] = p.cake } plot(p.hist[,1],ylim=c(0,1),type='l',lwd=2,ylab = 'Proportion',xlab="time") for (i.strat in 2:(n.cake+1)){ lines(p.hist[,i.strat],col=i.strat,lwd=2) } legend("topleft",paste("request = ",0:10),col=1:(n.cake+1),lwd =2,cex=0.75) ### fighting couple fighting_couple<-function(a,b,c,d) { timeSep=0.05;ts=seq(1,50,timeSep);n_ts=length(ts) x=matrix(0,nrow=n_ts,ncol=1) y=matrix(0,nrow=n_ts,,ncol=1) initX=c(rep(-40,5),rep(-20,5),rep(0,5),rep(20,5),rep(40,5)) initY=c(rep(c(-40,-20,0,20,40),5)) initX=initX[-13] initY=initY[-13] lengthINI=length(initX) for (i_ini in 1:lengthINI) { x[1]=initX[i_ini];y[1]=initY[i_ini]; for (i_gen in 2:n_ts) { x[i_gen]=x[i_gen-1]+(a*x[i_gen-1]+b*y[i_gen-1])*timeSep y[i_gen]=y[i_gen-1]+(c*x[i_gen-1]+d*y[i_gen-1])*timeSep } if (i_ini==1) { plot(x,y,xlim=c(-50,50),ylim=c(-50,50),col=4,type='l',lwd=2, xlab="X's Action",ylab="Y's Action") arrows(x[2],y[2],x[3],y[3],col=4,lwd=2,length=0.15)} else { lines(x, y, col=4, lwd=2) arrows(x[2], y[2], x[3], y[3], col=4,lwd=2, length=0.15) } } } par(mfrow=c(2,3)) fighting_couple(-1,0.0,0.5,-1) fighting_couple(-1,2,-1,-1) fighting_couple(0,-1,1,0) fighting_couple(1,-2,2,0) fighting_couple(1,0,0.5,1) fighting_couple(1,-4,-4,0)
Category Archives: UT
広域システム特別講義II 確率的最適化法
naive.stochOptim <- function(obj.func, x, n.iter, width){ opt.value <- do.call(obj.func, list(x)) opt.hist = matrix(0, nrow = n.iter, ncol = 5) opt.hist[1,] = c(x, x, opt.value, opt.value, 1) for (i.iter in 2:n.iter){ accpet = 0 temp.x <- x + rnorm(1, mean = 0, sd = width) temp.value <- do.call(obj.func, list(temp.x)) if (temp.value < opt.value){ x = temp.x opt.value = temp.value accept = 1 } opt.hist[i.iter, ] = c(x, temp.x, opt.value, temp.value, 1) } return(data.frame(opt.hist)) } set.seed(50) n.iter =500 fun01<-function(x){x^2+2*x} res <- naive.stochOptim(fun01,3,n.iter,1) # vizualizing results par(mfrow=c(2,1)) # upper panel plot(res$X2,res$X4,col='red',pch=20,cex=2,xlab = "x", ylab='f(x)') legend("topleft",c("accepted","tested"),pch=c(20,20),col=c("black","red")) x.seq = seq(min(res$X2),max(res$X2),length.out = n.iter) lines(x.seq,fun01(x.seq)) points(res$X1,res$X3,col='black',pch=20) # lower panel plot(res$X3,1:n.iter,type='n',pch=20,xlim=c(min(res$X2),max(res$X2)),xlab='x',ylab="Trial") for (i.iter in 2:n.iter){ lines(c(res$X1[(i.iter-1)],res$X2[i.iter]),c((i.iter-1),i.iter),type='o',pch=20,col='red',cex=1.5) } points(res$X1,1:n.iter,type='o',pch=20) legend("topright",c("accepted","tested"),pch=c(20,20),col=c("black","red")) SimAneal01<-function(func, initXY, maxItr=1000, C=1, eta=0.99, width=10) { x=initXY opt.value = do.call(func,list(x)) n.var = length(x) opt.hist=matrix(0, nrow=maxItr, ncol=5) opt.hist[1,]=c(x,x,opt.value,opt.value,0) for (i_loop in 2:maxItr) { accept = 0 temp.x = x + rnorm(n.var, mean = 0, sd=width) temp.value= do.call(func,list(temp.x)) delta=temp.value-opt.value; prob=1/(1+exp(delta/(C*width))) if (runif(1) < prob) { x = temp.x; opt.value = temp.value; accept = 1 } opt.hist[i_loop,]=c(x, temp.x, opt.value, temp.value, accept); width=width*eta } return(data.frame(opt.hist)) } set.seed(48) n.iter = 500 res <- SimAneal01(fun01, 3, n.iter, 1, 0.985, 1) #vizualizing results par(mfrow=c(2,1)) # upper panel plot(res$X2,res$X4,col='red',pch=20,cex=2,xlab = "x", ylab='f(x)') legend("topleft",c("accepted","tested"),pch=c(20,20),col=c("black","red")) x.seq = seq(min(res$X2),max(res$X2),length.out = n.iter) lines(x.seq,fun01(x.seq)) points(res$X1,res$X3,col='black',pch=20) # lower panel plot(res$X3,1:n.iter,type='n',pch=20,xlim=c(min(res$X2),max(res$X2)),xlab='x',ylab="Trial") for (i.iter in 2:n.iter){ lines(c(res$X1[(i.iter-1)],res$X2[i.iter]),c((i.iter-1),i.iter),type='o',pch=20,col='red',cex=1.5) } points(res$X1,1:n.iter,type='o',pch=20) legend("topright",c("accepted","tested"),pch=c(20,20),col=c("black","red")) ### TSP TSP_inv<-function(route,len) { len_route=length(route) invP=sample(1:len_route,1) route[invP:min(len_route,(invP+len-1))]=rev(route[invP:min(len_route,(invP+len-1))]) return(route) } TSP_switch<-function(route){ len_route=length(route) switchP=sample(1:len_route,2) route[switchP]=route[rev(switchP)] return(route) } TSP_trans<-function(route){ len_route=length(route) transP=sample(1:len_route,2); tempR=route[-transP[1]] if (transP[2]==1){ tempR=c(route[transP[1]],tempR) } else if (transP[2]==len_route) { tempR=c(tempR,route[transP[1]]) } else { tempR=c(tempR[1:(transP[2]-1)],route[transP[1]],tempR[(transP[2]): (len_route-1)]) } return(tempR) } FUN_initTSP<-function(n_city=10) { return(matrix(runif(n_city*2,1,100),nrow=n_city,ncol=2)) } FUN_costTSP<-function(route,cities) { route=c(route,route[1]);n_cities=nrow(cities);totDist=0; for (i_cities in 1:n_cities) { totDist=totDist+dist(cities[route[i_cities:(i_cities+1)],]) } return(totDist) } TSP_demo<-function(n_city=20, maxItr=1000 , C = 1, eta = 0.99, TEMP = 50) { loc=FUN_initTSP(n_city);route=1:n_city optDist=FUN_costTSP(route,loc) optHist=matrix(0,nrow=maxItr,ncol=(length(route)+1)) optHist[1,]=c(route,optDist) for (i_loop in 2:maxItr) { rand.op=sample(c('inv','sw','trans'),1,prob=c(0.75,0.125,0.125)) if (rand.op=='inv') { new.route=TSP_inv(route,sample(2:n_city,1)) } else if (rand.op=='sw') { new.route=TSP_switch(route) } else { new.route=TSP_trans(route)} new.dist=FUN_costTSP(new.route,loc) delta=new.dist-optDist prob=1/(1+exp(delta/(C*TEMP))) if (runif(1) < prob) { route=new.route;optDist=new.dist; } optHist[i_loop,]=c(route,optDist); TEMP=TEMP*eta } par(mfrow=c(1,2)) plot(rbind(loc,loc[1,]),type='o',pch=20,cex=2.5, col='red', xlab='location @X',ylab='location @Y',main='Initial route') plot(loc[optHist[1000,c(1:n_city,1)],],type='o',pch=20, col='magenta', cex=2.5,xlab='location @X',ylab='location @Y',main='Optimized route') return(optHist) } TSP_demo() ### GA / ES # genetic algorithm - identify the best linear model GA_recomb<-function(G) { nPop=nrow(G); nVar=ncol(G); child = G; G.permuted = G[sample(1:nPop),] recomb.idx=which(matrix(sample(0:1,nPop*nVar,replace=TRUE),nrow=nPop)==1) child[recomb.idx] = G.permuted[recomb.idx] return(child) } GA_mutate = function(child, p){ n.pop = nrow(child) n.var = ncol(child) mut.mat= matrix((runif(n.pop*n.var) < p), nrow = n.pop) child = abs(child-mut.mat) return(child) } GA_survive<-function(G, child, fitG, fitC){ nPop=nrow(G); fitT=c(fitG,fitC); fitMax=sort(fitT,index.return=T,decreasing = T) tempX=rbind(G,child); G=tempX[fitMax$ix[1:nPop],] return(G) } GA<-function(G, nGen,p,X,Y){ optHist=matrix(0,nrow=nGen,ncol=1) G.hist = matrix(0,nrow=nGen,ncol = ncol(G)) nPop = nrow(G) nVar = ncol(G) fitG = rep(0,nPop) fitC = fitG for (i.pop in 1:nPop){ dat.temp = X[,which(G[i.pop,]==1)] fitG[i.pop] = summary(lm(Y~dat.temp))$adj.r.squared } optHist[1]=max(c(fitG)) G.hist[1,] = G[which.max(fitG),] for (i_gen in 2:nGen) { child<-GA_recomb(G) child<-GA_mutate(child,p) # assuming same numbers of set of genes for (i.pop in 1:nPop){ dat.temp = X[,which(G[i.pop,]==1)] fitG[i.pop] = summary(lm(Y~dat.temp))$adj.r.squared if (sum(child[i.pop,])==0){ child[i.pop,sample(1:ncol(child.all),sample(1:nVar,1))]=1 } dat.temp = X[,which(child[i.pop,]==1)] fitC[i.pop] = summary(lm(Y~dat.temp))$adj.r.squared } G<-GA_survive(G, child, fitG, fitC) optHist[i_gen]=max(c(fitG,fitC)) G.hist[i_gen, ] = G[1,] } return(list(G = G, optHist = optHist, G.hist = G.hist)) } set.seed(19) nVar = 10 nData = 50 X=matrix(rnorm(nVar*nData,mean=10,sd=5),ncol=nVar); y=X%*%c(-2,0,2,0,2,0,-3,0,-2,0)+rnorm(nData,mean=0,sd=8); summary(lm(y~X[,seq(1,9,2)])) summary(lm(y~X)) G = matrix(sample(0:1,300,replace = T),nrow=30) res<-GA(G,100,0.1,X,y) head(res$G) # GA - challenging task C = combn(3,2) temp.lab =toString(paste("X",C[ , i.comb], sep = "")) gsub(",", "*", temp.lab) n.comb = ncol(C) for (i.comb in 1: n.comb){ temp.label = toString(paste("X",C[ , i.comb], sep = "")) var.labels = c(var.labels, gsub(",", "*", temp.label) ) } mk.labels <- function(C){ var.labels = c() n.comb = ncol(C) for (i.comb in 1: n.comb){ temp.label = toString(paste("X",C[ , i.comb], sep = "")) var.labels = c(var.labels, gsub(",", "*", temp.label) ) } return(var.labels) } var.labels = paste("X", 1:n.var, sep = "") # interaction terms for (i.interaction in 2:max.interaction ){ combination = combn(n.var, i.interaction) var.labels = c(var.labels, mk.labels(combination)) } model.def = paste("Y ~", gsub(",", "+", toString(var.labels[c(1,1,1,1,1,0,0) == 1]))) X=matrix(rnorm(nVar*nData,mean=10,sd=5),ncol=nVar); y=X%*%c(1,1,-2)+rnorm(nData,mean=0,sd=3); dat = data.frame(y=y,X1=X[,1],X2=X[,2],X3=X[,3]) model.lm = lm(model.def, dat) ## full implementation # function to make model labels for paritcular set of combn mk.labels <- function(C){ var.labels = c() n.comb = ncol(C) for (i.comb in 1: n.comb){ temp.label = toString(paste("X",C[ , i.comb], sep = "")) # options for model specification # Opt 1: models that include all lower-degree interaction term # Opt 2: models that do NOT include any lower-degree interaction term # # Opt1 #var.labels = c(var.labels, gsub(",", ":", temp.label) ) # # Opt2 var.labels = c(var.labels, gsub(",", ":", temp.label) ) } return(var.labels) } # function to make all model labels given number of variable # and maximum degrees of interactions mk.labels.all <- function(n.var, max.interaction){ var.labels = paste("X", 1:n.var, sep = "") for (i.interaction in 2:max.interaction ){ combination = combn(n.var, i.interaction) var.labels = c(var.labels, mk.labels(combination)) } return(var.labels) } # crearting model label # assuming 10 variables & 5-degree interaction terms var.labels<-mk.labels.all(5,5) # generating data set set.seed(30) nData = 1000; n.var = 5 X=matrix(rnorm(n.var*nData,mean=10,sd=5),ncol=n.var); Y=X[,seq(1,5)]%*%c(1,1,-2,-1,1) + apply(X[,c(1,3)],1,prod)*0.1 + apply(X[,c(2,4)],1,prod)*-0.1 + apply(X[,c(1,3,5)],1,prod)*0.01 + apply(X[,c(2,4,5)],1,prod)*0.01+ rnorm(nData,mean=0,sd=10); dat = data.frame(Y,X) colnames(dat) <- c('Y', paste("X",1:n.var,sep="")) # checking fit of the "TRUE" model summary(lm(Y~X1*X3+X2*X4+X1:X3:X5++X2:X4:X5+X5, dat)) AIC(lm(Y~X1*X3+X2*X4+X1:X3:X5++X2:X4:X5+X5, dat)) # initializing parent population nPop = 50 G = matrix(sample(0:1,nPop*length(var.labels),replace=TRUE,prob = c(0.8, 0.2)),nrow=nPop) # recombination GA_recomb<-function(G) { nPop=nrow(G); nVar=ncol(G); child = G; G.permuted = G[sample(1:nPop),] recomb.idx=which(matrix(sample(0:1,nPop*nVar,replace=T),nrow=nPop)==1) child[recomb.idx] = G.permuted[recomb.idx] return(child) } # mutation GA_mutate = function(child, p){ n.pop = nrow(child) n.var = ncol(child) mut.mat= matrix((runif(n.pop*n.var) < p), nrow = n.pop) child = abs(child-mut.mat) return(child) } # surviver selection - works with either min or max prob. GA_survive<-function(G, child, fitG, fitC, mnmx = "min"){ nPop=nrow(G); fitT=c(fitG,fitC); if (mnmx == "max"){ fitMax=sort(fitT,index.return=TRUE,decreasing = TRUE) } else { fitMax=sort(fitT,index.return=TRUE) } tempX=rbind(G,child); G=tempX[fitMax$ix[1:nPop],] return(G) } # main function GA<-function(G, nGen,p,dat){ optHist=matrix(0,nrow=nGen,ncol=1) G.hist = matrix(0,nrow=nGen,ncol = ncol(G)) nPop = nrow(G) nVar = ncol(G) fitG = rep(0,nPop) fitC = fitG for (i.pop in 1:nPop){ model.def = paste("Y ~", gsub(",", "+", toString(var.labels[G[i.pop,] == 1]))) #fitG[i.pop] = summary(lm(model.def,dat))$adj.r.squared fitG[i.pop] = AIC(lm(model.def,dat)) } optHist[1]=max(c(fitG)) G.hist[1,] = G[which.max(fitG),] # to display progress prog.prop = round(seq(0,nGen,length.out=10)) prog.report = paste(seq(0,100,10),"% done\n",sep="") for (i_gen in 2:nGen) { if (any(i_gen == prog.prop)){ cat(prog.report[which(i_gen==prog.prop)]) } child <- GA_recomb(G) child <- GA_mutate(child,p) # assuming same numbers of set of genes for (i.pop in 1:nPop){ model.def = paste("Y ~", gsub(",", "+", toString(var.labels[G[i.pop,] == 1]))) #fitG[i.pop] = summary(lm(model.def,dat))$adj.r.squared fitG[i.pop] = AIC(lm(model.def,dat)) if (sum(child[i.pop,],na.rm=TRUE)==0){ child[i.pop,sample(1:ncol(child.all),sample(1:nVar,1))]=1 } model.def = paste("Y ~", gsub(",", "+", toString(var.labels[child[i.pop,] == 1]))) #fitC[i.pop] = summary(lm(model.def,dat))$adj.r.squared fitC[i.pop] = AIC(lm(model.def,dat)) } G <- GA_survive(G, child, fitG, fitC, "min") optHist[i_gen]=min(c(fitG,fitC)) G.hist[i_gen, ] = G[1,] } return(list(G = G, optHist = optHist, G.hist = G.hist)) } # running simulation res = GA(G,1000,0.15, dat) model.def = paste("Y ~", gsub(",", "+", toString(var.labels[res$G[1,] == 1]))) summary(lm(model.def,dat)) AIC(lm(model.def,dat)) plot(res$optHist,type='l',xlab="generation",ylab="AIC") var.labels[which(res$G[1,]==1)] ### end extensive GA # Evolutionary Strategy - estimating coefficient ES_recomb<-function(G) { nParent=nrow(G$x);nVar=ncol(G$x) child = G; for (i_child in 1:nParent) { parentID=sample(1:nParent,2) coID=sample(c(0,1),nVar,replace=T) child$x[i_child,]=G$x[parentID[1],] child$x[i_child,which(coID==1)]=G$x[parentID[2],which(coID==1)] child$sigma[i_child,]=0.5*(G$sigma[parentID[1],]+G$sigma[parentID[2],]) } return(child) } ES_mutate<-function(child,tau){ nChild=nrow(child$x);nVar=ncol(child$x) child$sigma<-child$sigma*exp(matrix(rnorm(nChild*nVar)*tau,nrow=nChild)) child$x=child$x+child$sigma*matrix(rnorm(nChild*nVar),nrow=nChild,ncol=nVar) return(child) } ES_survive<-function(G, child, fitG, fitC){ nParent=nrow(G$x); fitT=c(fitG,fitC); fitMin=sort(fitT,index.return=T) tempX=rbind(G$x,child$x); tempS=rbind(G$sigma,child$sigma) G$x=tempX[fitMin$ix[1:nParent],] G$sigma=tempS[fitMin$ix[1:nParent],] return(G) } ES<-function(G,func, nGen, tau,X,y){ optHist=matrix(0,nrow=nGen,ncol=1) G.hist = matrix(0,nrow=nGen,ncol = ncol(G$x)) fitG = fitG = apply(G$x,1,func,X,y) optHist[1] = min(fitG) G.hist[1,] = G$x[which.min(fitG),] child = G; for (i_gen in 2:nGen) { child<-ES_recomb(G) child<-ES_mutate(child,tau) fitG = apply(G$x,1,func,X,y) fitC = apply(child$x,1,func,X,y) G<-ES_survive(G, child, fitG, fitC) optHist[i_gen]=min(c(fitG,fitC)) G.hist[i_gen, ] = G$x[1,] } return(list(G = G, optHist = optHist, G.hist = G.hist)) } set.seed(20); nData = 100 X=matrix(rnorm(9*nData,mean=10,sd=2),ncol=9);X=cbind(rep(1,nData),X) y=X%*%c(10,2,5,-3,-5,0,0,0,0,0)+rnorm(nData,mean=0,sd=2); fun04<-function(b,X,y){ yhat<-X%*%b return(sum((y-yhat)^2)) } G = list() G$x = matrix(rnorm(10*30), ncol=10) G$sigma = matrix(runif(10*30,0.5,1.5), ncol=10) res=ES(G, fun04, 1000, 1,X,y) # PSO - optim. 2-variable function PSO<-function(G, wP, wG, func, maxIter){ swarm.hist = array(0, c(nrow(G), ncol(G), maxIter)) swarm.hist[,,1]=G p.b.hist = apply(G,1,func) global.best.v = min(p.b.hist) p.Best = G g.Best = matrix(G[which.min(p.b.hist),],nrow=nrow(G),ncol=ncol(G),byrow=T) v = matrix(0,nrow = nrow(G),ncol = ncol(G)) for (i.iter in 2:maxIter){ v = v + wP*runif(1)*(p.Best - G) + wG*runif(1)*(g.Best - G) G = G + v fitG = apply(G,1,func) if (min(fitG) < global.best.v){ g.Best = matrix(G[which.min(fitG),],nrow=nrow(G),ncol=ncol(G),byrow=T) global.best.v = min(fitG) } idx = which(fitG < p.b.hist) p.Best[idx,] = G[idx,] p.b.hist= fitG swarm.hist[,,i.iter]=G } return(swarm.hist) } # running PSO par(mfrow=c(1,1)) fun03<-function(x) {x[1]^4-16*x[1]^2-5*x[1]+x[2]^4-16*x[2]^2-5*x[2]} x<-seq(-5, 5,length.out=100);y<-x;z<-outer(x,y,funZ) contour(x,y,z,nlevels=30,drawlabel=F) nSwarm = 100; nVar = 2; G=matrix(rnorm(nVar*nSwarm,mean=0,sd=0.1),ncol=nVar) res<-PSO(G,0.1,0.1,fun03,500) lines(res[1,1,],res[1,2,],type='o',col='red')
広域システム特別講義II ベイズ統計
library(rjags) source("http://www.matsuka.info/univ/course_folder/HDI_revised.txt") island.hopping2 <- function(n.rep=1e5, init.st=4) { # example from DBDA 2nd Ed. (Kruschke) ch. 07 # intro to MCMC, island hopping # initialization state = 1:7 curr.st = init.st state.counter = rep(0,7) state.counter[curr.st] = 1 state.history=rep(0, n.rep) state.history[1]=curr.st prob.propose = matrix(1/6, nrow=7,ncol=7) diag(prob.propose)<-0 # main loop for (i.rep in 2:n.rep) { destination = sample(state, 1, prob=prob.propose[curr.st,]) prob.move = min(destination/curr.st, 1) if (runif(1) < prob.move) { curr.st = destination } state.history[i.rep] = curr.st state.counter[curr.st] = state.counter[curr.st]+1 } par(mfrow=c(2, 1)) par(mai=c(0, 1, 1, 1)) par(mar=c(4, 3, 1, 3)) barplot(state.counter, xlab="theta", ylab="Frequency") plot(state.history, 1:n.rep, type='o', log='y', xlab="theta", ylab="Time Step (log)", col="orange") } island.hopping2(10000, 4) metropolis.ex01 <- function(n.iter=1e6, theta.init=0.1, sigma=0.2, plot.yn = T){ # example from DBDA 2nd Ed. (Kruschke) ch. 07 # metropolis algo. with 1 parameter # "posterior of theta" function posterior.theta <- function(theta, N, z, a, b) { posterior = theta^z * (1-theta)^(N-z) * theta^(a-1) * (1 - theta)^(b-1) / beta(a,b) } # initialization theta.history = rep(0,nrow = n.iter,ncol = 1) theta.current = theta.init theta.history[1] = theta.current # values given in text mu = 0 N = 20 z = 14 a = 1 b = 1 # main loop for (i_iter in 2:n.iter) { theta.proposed = theta.current + rnorm(1, mean=mu, sd=sigma) if (theta.proposed < 0 | theta.proposed > 1) { p.move = 0 } else { p.current = posterior.theta(theta.current, N, z, a, b) p.proposed = posterior.theta(theta.proposed, N, z, a, b) p.move = min(p.proposed/p.current, 1) } if (runif(1) < p.move) { theta.current = theta.proposed } theta.history[i_iter] = theta.current } # plotting results if (plot.yn == T) { par(mfrow = c(3, 1)) hist(theta.history, nclass = 100, col = "orange", probability = T, xlab = "theta") den=density(theta.history) lines(den) plot(theta.history[(n.iter-100):n.iter], ((n.iter-100):n.iter), type = 'o', xlim = c(0,1), xlab="theta", ylab = "step in chain") plot(theta.history[1:100],1:100, type = 'o', xlim = c(0,1), xlab = "theta", ylab = "step in chain") } return(theta.history) } res = metropolis.ex01(10000, 0.1) # initialization n.iter = 10000; sigma = 0.1; counter = 0 z = c(6, 2); N = c(8, 7) a = c(2, 2); b = c(2, 2); n.par = 2 th.hist = matrix(0, nrow = n.iter*n.par, ncol = n.par) theta = runif(2) # function to calc. prob. move prob.gibbs <- function(theta, proposed, N, z, a, b, idx){ p.th=dbeta(theta[idx], z[idx]+a[idx], N[idx]-z[idx]+b[idx]) p.pro=dbeta(proposed, z[idx]+a[idx], N[idx]-z[idx]+b[idx]) return(p.pro/p.th) } # main loop for (i.iter in 1:n.iter){ for (i.par in 1:n.par){ proposed = theta[i.par] + rnorm(1, mean=0, sd=sigma) if (proposed > 1) {proposed = 1} if (proposed < 0) {proposed = 0} p.move= min(1, prob.gibbs(theta, proposed, N, z, a, b, i.par)) if (runif(1) < p.move){ theta[i.par] = proposed } counter = counter + 1 th.hist[counter, ] = theta } } par(mfrow=c(3,1)) HDI.plot(th.hist[,1]) HDI.plot(th.hist[,2]) plot(th.hist[,1],th.hist[,2], type='o',cex=0.1,xlim = c(0,1),ylim=c(0,1)) par(mfrow=c(1,1)) model.txt = " model { for ( i_data in 1:Ntotal ) { y[ i_data ] ~ dbern( theta ) } theta ~ dbeta( 1, 1 ) }" writeLines(model.txt, "model.txt") dat<-read.csv("http://www.matsuka.info/univ/course_folder/z15N50.csv") y=dat$y Ntotal=length(dat$y) datalist = list(y=y,Ntotal=Ntotal) # jags jagsModel = jags.model(file="model.txt",data=datalist,n.chains=3,n.adapt=500) update(jagsModel,n.iter=1000) codaSamples=coda.samples(jagsModel,variable.names=c("theta"),n.iter=5000) mcmcMat<-as.matrix(codaSamples) par(mfrow=c(2,2)) cols=c("orange", "skyblue","pink") # chain mcmc1<-as.mcmc(codaSamples[[1]]) mcmc2<-as.mcmc(codaSamples[[2]]) mcmc3<-as.mcmc(codaSamples[[3]]) plot(mcmc1,type='l') lines(mcmc2,col='red') lines(mcmc3,col='blue') # autocorrelation ac1=autocorr(mcmc1,lags=0:50) ac2=autocorr(mcmc2,lags=0:50) ac3=autocorr(mcmc3,lags=0:50) plot(ac1, type='o', pch = 20, col = cols[1], ylab = "Autocorrelation", xlab = "Lag") lines(ac2, type='o', pch = 20, col = cols[2]) lines(ac3, type='o', pch = 20, col = cols[3]) # shrink factor resALL=mcmc.list(mcmc1,mcmc2,mcmc3) gd1=gelman.plot(resALL, auto.layout = F) # density den1=density(mcmc1) den2=density(mcmc2) den3=density(mcmc3) plot(den1, type='l', col = cols[1], main = " ", xlab = "param. value",lwd=2.5) lines(den2, col = cols[2], lwd=2.5) lines(den3, col = cols[3], lwd=2.5) par(mfrow=c(1,1)) model.txt = " model { for ( i_data in 1:Ntotal ) { y[ i_data ] ~ dbern( theta[s[i_data]] ) } for ( i_s in 1:Nsubj) { theta[i_s] ~ dbeta( 2, 2 ) } }" writeLines(model.txt, "model.txt") dat<-read.csv("http://www.matsuka.info/univ/course_folder/z6N8z2N7.csv") y=dat$y s=as.numeric(dat$s) Ntotal=length(dat$y) Nsubj=length(unique(s)) datalist = list(y=y,s=s,Ntotal=Ntotal,Nsubj=Nsubj) jagsModel = jags.model(file="model.txt",data=datalist,n.chains=3,n.adapt=500) update(jagsModel,n.iter=1000) codaSamples=coda.samples(jagsModel,variable.names=c("theta"),n.iter=5000) mcmcMat<-as.matrix(codaSamples) par(mfrow=c(2,2)) cols=c("orange", "skyblue","pink") # chain mcmc1<-as.mcmc(codaSamples[[1]]) mcmc2<-as.mcmc(codaSamples[[2]]) mcmc3<-as.mcmc(codaSamples[[3]]) plot(temp1,type='l') lines(temp2,col='red') lines(temp3,col='blue') # autocorrelation ac1=autocorr(mcmc1,lags=0:50) ac2=autocorr(mcmc2,lags=0:50) ac3=autocorr(mcmc3,lags=0:50) plot(ac1, type='o', pch = 20, col = cols[1], ylab = "Autocorrelation", xlab = "Lag") lines(ac2, type='o', pch = 20, col = cols[2]) lines(ac3, type='o', pch = 20, col = cols[3]) # shrink factor resALL=mcmc.list(mcmc1,mcmc2,mcmc3) gd1=gelman.plot(resALL, auto.layout = F) # density den1=density(mcmc1) den2=density(mcmc2) den3=density(mcmc3) plot(den1, type='l', col = cols[1], main = " ", xlab = "param. value",lwd=2.5) lines(den2, col = cols[2], lwd=2.5) lines(den3, col = cols[3], lwd=2.5) par(mfrow=c(1,1)) model.txt = " model { for ( i in 1:Ntotal ) { y[i] ~ dnorm( mu , 1/sigma^2 ) } mu ~ dnorm( meanY , 1/(100*sdY)^2 ) sigma ~ dunif( sdY/1000 , sdY*1000 ) } " writeLines(model.txt, "model.txt") dat<-read.csv("http://www.matsuka.info/univ/course_folder/TwoGroupIQ.csv") y = dat$Score[dat$Group=="Smart Drug"] Ntotal = length(y) dataList = list(y = y, Ntotal = Ntotal, meanY = mean(y), sdY = sd(y)) jagsModel = jags.model("model.txt", data=dataList, n.chains=3, n.adapt=1000 ) update( jagsModel , n.iter=1000) codaSamples = coda.samples( jagsModel , variable.names=c("mu","sigma"), n.iter=5000, thin=5) mcmcMat<-as.matrix(codaSamples) # calculating & plotting normality par(mfrow=c(2,2)) HDI.plot(mcmcMat[,1]) hist(y,nclass=15,probability = T) x.temp = seq(40,200,0.1) n.plot = 100 randperm = sample(1:nrow(mcmcMat),n.plot) for (i.plot in 1:n.plot){ norm.temp=dnorm(x.temp,mean=mcmcMat[randperm[i.plot],1],sd=mcmcMat[randperm[i.plot],2]) lines(x.temp,norm.temp,col='orange') } HDI.plot(mcmcMat[,2]) # calculating & plotting effect size effect.size=(mcmcMat[,"mu"]-100)/mcmcMat[,"sigma"] HDI.plot(effect.size) # dat<-read.csv("http://www.matsuka.info/univ/course_folder/TwoGroupIQ.csv") y = dat$Score[dat$Group=="Smart Drug"] Ntotal = length(y) model.txt=" model { for ( i in 1:Ntotal ) { y[i] ~ dt( mu , 1/sigma^2 , nu ) } mu ~ dnorm( meanY , 1/(100*sdY)^2 ) sigma ~ dunif( sdY/1000 , sdY*1000 ) nu <- nuMinusOne+1 nuMinusOne ~ dexp(1/30.0) }" writeLines(model.txt, "model.txt") dataList = list(y = y, Ntotal = Ntotal, meanY = mean(y), sdY = sd(y)) jagsModel = jags.model("model.txt", data=dataList, n.chains=3, n.adapt=1000 ) update( jagsModel , n.iter=1000) codaSamples = coda.samples( jagsModel , variable.names=c("mu","sigma","nu"), n.iter=5000, thin=5) mcmcMat<-as.matrix(codaSamples) # calculating & plotting normality par(mfrow=c(3,2)) HDI.plot(mcmcMat[,1]) HDI.plot(mcmcMat[,3]) normality=log10(mcmcMat[,"nu"]) HDI.plot(normality) effect.size=(mcmcMat[,"mu"]-100)/mcmcMat[,"sigma"] HDI.plot(effect.size) hist(y,nclass=20,probability = T) n.plot = 100 randperm = sample(1:nrow(mcmcMat),n.plot) for (i.plot in 1:n.plot){ x.temp1 = seq(40,200,0.1) x.temp2 = (x.temp1 - mcmcMat[randperm[i.plot],1])/mcmcMat[randperm[i.plot],3] t.temp=dt(x.temp2,mcmcMat[randperm[i.plot],2])/mcmcMat[randperm[i.plot],3] lines(x.temp1,t.temp,col='orange') } par(mfrow=c(2,2)) plot(mcmcMat[,1],mcmcMat[,3],col='orange') plot(mcmcMat[,1],log10(mcmcMat[,2]),col='orange') plot(0,0,type='n') plot(log10(mcmcMat[,2]),mcmcMat[,3],col='orange') dat<-read.csv("http://www.matsuka.info/univ/course_folder/TwoGroupIQ.csv") y = dat$Score group = as.numeric(dat$Group) Ntotal = length(y) Ngroup = length(unique(group)) model.txt=" model { for ( i in 1:Ntotal ) { y[i] ~ dt( mu[group[i]] , 1/sigma[group[i]]^2 , nu ) } for (j in 1:Ngroup){ mu[j] ~ dnorm( meanY , 1/(100*sdY)^2 ) sigma[j] ~ dunif( sdY/1000 , sdY*1000 ) } nu <- nuMinusOne+1 nuMinusOne ~ dexp(1/29) }" writeLines(model.txt, "model.txt") dataList = list(y = y, Ntotal = Ntotal, meanY = mean(y), sdY = sd(y),Ngroup=Ngroup,group=group) jagsModel = jags.model("model.txt", data=dataList, n.chains=3, n.adapt=5000 ) update( jagsModel , n.iter=1000) codaSamples = coda.samples( jagsModel , variable.names=c("mu","sigma","nu"), n.iter=5000, thin=5) mcmcMat<-as.matrix(codaSamples) # plotting result par(mfrow=c(5,2)) HDI.plot(mcmcMat[,1],xlabel="placebo Mean") hist(y[dat$Group=="Placebo"],nclass=20,probability = T) n.plot = 100 randperm = sample(1:nrow(mcmcMat),n.plot) for (i.plot in 1:n.plot){ x.temp1 = seq(40,200,0.1) x.temp2 = (x.temp1 - mcmcMat[randperm[i.plot],1])/mcmcMat[randperm[i.plot],4] t.temp=dt(x.temp2,mcmcMat[randperm[i.plot],3])/mcmcMat[randperm[i.plot],4] lines(x.temp1,t.temp,col='orange') } HDI.plot(mcmcMat[,2],xlabel="smart drug Mean") hist(y[dat$Group=="Smart Drug"],nclass=20,probability = T) n.plot = 100 randperm = sample(1:nrow(mcmcMat),n.plot) for (i.plot in 1:n.plot){ x.temp1 = seq(40,200,0.1) x.temp2 = (x.temp1 - mcmcMat[randperm[i.plot],2])/mcmcMat[randperm[i.plot],5] t.temp=dt(x.temp2,mcmcMat[randperm[i.plot],3])/mcmcMat[randperm[i.plot],5] lines(x.temp1,t.temp,col='orange') } HDI.plot(mcmcMat[,4],xlabel="placebo scale") HDI.plot(mcmcMat[,2]-mcmcMat[,1],xlabel="Difference of Means") HDI.plot(mcmcMat[,5],xlabel="smart drug scale") HDI.plot(mcmcMat[,5]-mcmcMat[,4],xlabel="Difference of Scales") HDI.plot(log10(mcmcMat[,3]),xlabel="Normality") effect.size = (mcmcMat[,2]-mcmcMat[,1])/sqrt((mcmcMat[,5]^2+mcmcMat[,4]^2)/2) HDI.plot(effect.size,xlabel="effect size")
広域システム特別講義II DL with Keras
library(keras) ### iris data dat = iris x_train = dat[,1:3] %>% as.matrix() y_train <- dat[,4] %>% to_categorical() model_iris <- keras_model_sequential() %>% layer_dense(units=20, activation = "relu", input_shape = ncol(x_train)) %>% layer_dense(units = 10, activation = "relu") %>% layer_dense(units = 3, activation = "softmax") summary(model_iris) model_iris %>% compile( optimizer = "rmsprop", loss = "categorical_crossentropy", metrics = c("accuracy") ) model_iris %>% fit( x_train, y_train, epochs = 100, batch_size = 5 ) model_iris %>% evaluate(x_train,y_train) ### with vaidation history <- model_iris %>% fit(x_train, y_train, validation_split = 0.20, epochs=300, batch_size = 5, shuffle = T) plot(history) normalizer <- function(x) { min_x = apply(x,2,min) max_x = apply(x,2,max) num <- t(x) - min_x denom <- max_x - min_x normalized = t(num/denom) return (normalized) } x_trainN = normalizer(x_train) model_iris <- keras_model_sequential() %>% layer_dense(units=20, activation = "relu", input_shape = ncol(x_train)) %>% layer_dense(units = 10, activation = "relu") %>% layer_dense(units = 3, activation = "softmax") model_iris %>% compile( optimizer = "rmsprop", loss = "categorical_crossentropy", metrics = c("accuracy") ) history_N <- model_iris %>% fit(x_trainN, y_train, validation_split = 0.20, epochs=150, batch_size = 5, shuffle = T) plot(history_N) ### reuter imdb <- dataset_imdb(num_words = 10000) c(c(train_data, train_labels), c(test_data, test_labels)) %<-% imdb vectorize_sequences <- function(sequences, dimension = 10000) { results <- matrix(0, nrow = length(sequences), ncol = dimension) for (i in 1:length(sequences)) results[i, sequences[[i]]] <- 1 results } x_trainTEMP <- vectorize_sequences(train_data) x_test <- vectorize_sequences(test_data) y_trainTEMP <- as.numeric(train_labels) y_test <- as.numeric(test_labels) val_idx = 1:1000 x_val = x_trainTEMP[val_idx, ] y_val = y_trainTEMP[val_idx] x_train = x_trainTEMP[-val_idx, ] y_train = y_trainTEMP[-val_idx] original_model <- keras_model_sequential() %>% layer_dense(units = 16, activation = "relu", input_shape = c(10000)) %>% layer_dense(units = 16, activation = "relu") %>% layer_dense(units = 1, activation = "sigmoid") original_model %>% compile( optimizer = "rmsprop", loss = "binary_crossentropy", metrics = c("accuracy") ) history_orig <- original_model %>% fit(x_train, y_train, epochs=20, batch_size = 512, validation_data = list(x_val, y_val)) plot(history_orig) original_model %>% evaluate(x_test, y_test) smaller_model <- keras_model_sequential() %>% layer_dense(units = 4, activation = "relu", input_shape = c(10000)) %>% layer_dense(units = 4, activation = "relu") %>% layer_dense(units = 1, activation = "sigmoid") smaller_model %>% compile( optimizer = "rmsprop", loss = "binary_crossentropy", metrics = c("accuracy") ) history_small <- smaller_model %>% fit(x_train, y_train, epochs=20, batch_size = 512, validation_data = list(x_val, y_val)) plot(history_small) smaller_model %>% evaluate(x_test, y_test) bigger_model <- keras_model_sequential() %>% layer_dense(units = 512, activation = "relu", input_shape = c(10000)) %>% layer_dense(units = 512, activation = "relu") %>% layer_dense(units = 1, activation = "sigmoid") bigger_model %>% compile( optimizer = "rmsprop", loss = "binary_crossentropy", metrics = c("accuracy") ) history_big <- bigger_model %>% fit(x_train, y_train, epochs=20, batch_size = 512, validation_data = list(x_val, y_val)) plot(history_big) model_L2 <- keras_model_sequential() %>% layer_dense(units = 16, kernel_regularizer = regularizer_l2(0.0001), activation = "relu", input_shape = c(10000)) %>% layer_dense(units = 16, kernel_regularizer = regularizer_l2(0.0001), activation = "relu") %>% layer_dense(units = 1, activation = "sigmoid") model_L2 %>% compile( optimizer = "rmsprop", loss = "binary_crossentropy", metrics = c("accuracy") ) history_L2 <- model_L2 %>% fit(x_train, y_train, epochs=20, batch_size = 512, validation_data = list(x_val, y_val)) plot(history_L2) model_L2 %>% evaluate(x_test, y_test) model_DO <- keras_model_sequential() %>% layer_dense(units = 16,activation = "relu", input_shape = c(10000)) %>% layer_dropout(rate = 0.5) %>% layer_dense(units = 16,activation = "relu") %>% layer_dropout(rate = 0.5) %>% layer_dense(units = 1, activation = "sigmoid") model_DO %>% compile( optimizer = "rmsprop", loss = "binary_crossentropy", metrics = c("accuracy") ) history_DO <- model_DO %>% fit(x_train, y_train, epochs=20, batch_size = 512, validation_data = list(x_val, y_val)) plot(history_DO) model_DO %>% evaluate(x_test, y_test) ### MNIST mnist <- dataset_mnist() c(c(tr_img, tr_lab),c(te_img, te_lab)) %<-% mnist tr_img <- array_reshape(tr_img, c(60000,28,28,1)) tr_img = tr_img / 255 te_img <- array_reshape(te_img, c(10000,28,28,1)) te_img = te_img / 255 tr_lab = to_categorical(tr_lab) te_lab = to_categorical(te_lab) model <- keras_model_sequential() %>% layer_conv_2d(filter = 32, kernel_size = c(3,3), activation = "relu",input_shape = c(28,28,1)) %>% layer_max_pooling_2d(pool_size = c(2,2)) %>% layer_conv_2d(filter = 64, kernel_size = c(3,3),activation = "relu") %>% layer_max_pooling_2d(pool_size = c(2,2)) %>% layer_flatten() %>% layer_dense(units =64, activation = "relu") %>% layer_dense(units =10, activation = "softmax") model %>% compile( optimizer = "rmsprop", loss = "categorical_crossentropy", metrics = c("accuracy") ) history <- model %>% fit( tr_img, tr_lab, epochs = 5, validation_split = 0.20, batch_size = 64 ) model %>% evaluate(te_img, te_lab) original_dataset_dir <- "~/Downloads/dogs-vs-cats/train" base_dir <- "~/Downloads/cats_and_dogs_small/" dir.create(base_dir) train_dir <- file.path(base_dir, "train") dir.create(train_dir) validation_dir <- file.path(base_dir, "validation") dir.create(validation_dir) test_dir <- file.path(base_dir, "test") dir.create(test_dir) train_cats_dir <- file.path(train_dir, "cats") dir.create(train_cats_dir) train_dogs_dir <- file.path(train_dir, "dogs") dir.create(train_dogs_dir) validation_cats_dir <- file.path(validation_dir, "cats") dir.create(validation_cats_dir) validation_dogs_dir <- file.path(validation_dir, "dogs") dir.create(validation_dogs_dir) test_cats_dir <- file.path(test_dir, "cats") dir.create(test_cats_dir) test_dogs_dir <- file.path(test_dir, "dogs") dir.create(test_dogs_dir) fnames <- paste0("cat.", 1:1000, ".jpg") file.copy(file.path(original_dataset_dir, fnames), file.path(train_cats_dir)) fnames <- paste0("cat.", 1001:1500, ".jpg") file.copy(file.path(original_dataset_dir, fnames), file.path(validation_cats_dir)) fnames <- paste0("cat.", 1501:2000, ".jpg") file.copy(file.path(original_dataset_dir, fnames), file.path(test_cats_dir)) fnames <- paste0("dog.", 1:1000, ".jpg") file.copy(file.path(original_dataset_dir, fnames), file.path(train_dogs_dir)) fnames <- paste0("dog.", 1001:1500, ".jpg") file.copy(file.path(original_dataset_dir, fnames), file.path(validation_dogs_dir)) fnames <- paste0("dog.", 1501:2000, ".jpg") file.copy(file.path(original_dataset_dir, fnames), file.path(test_dogs_dir)) model <- keras_model_sequential() %>% layer_conv_2d(filters = 32, kernel_size = c(3, 3), activation = "relu", input_shape = c(150, 150, 3)) %>% layer_max_pooling_2d(pool_size = c(2, 2)) %>% layer_conv_2d(filters = 64, kernel_size = c(3, 3), activation = "relu") %>% layer_max_pooling_2d(pool_size = c(2, 2)) %>% layer_conv_2d(filters = 128, kernel_size = c(3, 3), activation = "relu") %>% layer_max_pooling_2d(pool_size = c(2, 2)) %>% layer_conv_2d(filters = 128, kernel_size = c(3, 3), activation = "relu") %>% layer_max_pooling_2d(pool_size = c(2, 2)) %>% layer_flatten() %>% layer_dense(units = 512, activation = "relu") %>% layer_dense(units = 1, activation = "sigmoid") summary(model) model %>% compile( loss = "binary_crossentropy", optimizer = optimizer_rmsprop(lr = 1e-4), metrics = c("acc") ) train_datagen <- image_data_generator(rescale = 1/255) validation_datagen <- image_data_generator(rescale = 1/255) train_generator <- flow_images_from_directory( train_dir, train_datagen, target_size = c(150, 150), batch_size = 20, class_mode = "binary" ) validation_generator <- flow_images_from_directory( validation_dir, validation_datagen, target_size = c(150, 150), batch_size = 20, class_mode = "binary" ) history <- model %>% fit_generator( train_generator, steps_per_epoch = 100, epochs = 10, validation_data = validation_generator, validation_steps = 50 ) model <- keras_model_sequential() %>% layer_conv_2d(filters = 32, kernel_size = c(3, 3), activation = "relu", input_shape = c(150, 150, 3)) %>% layer_max_pooling_2d(pool_size = c(2, 2)) %>% layer_conv_2d(filters = 64, kernel_size = c(3, 3), activation = "relu") %>% layer_max_pooling_2d(pool_size = c(2, 2)) %>% layer_conv_2d(filters = 128, kernel_size = c(3, 3), activation = "relu") %>% layer_max_pooling_2d(pool_size = c(2, 2)) %>% layer_conv_2d(filters = 128, kernel_size = c(3, 3), activation = "relu") %>% layer_max_pooling_2d(pool_size = c(2, 2)) %>% layer_flatten() %>% layer_dropout(rate = 0.5) %>% layer_dense(units = 512, activation = "relu") %>% layer_dense(units = 1, activation = "sigmoid") model %>% compile( loss = "binary_crossentropy", optimizer = optimizer_rmsprop(lr = 1e-4), metrics = c("acc") ) datagen <- image_data_generator( rescale = 1/255, rotation_range = 40, width_shift_range = 0.2, height_shift_range = 0.2, shear_range = 0.2, zoom_range = 0.2, horizontal_flip = TRUE ) test_datagen <- image_data_generator(rescale = 1/255) train_generator <- flow_images_from_directory( train_dir, datagen, target_size = c(150, 150), batch_size = 32, class_mode = "binary" ) validation_generator <- flow_images_from_directory( validation_dir, test_datagen, target_size = c(150, 150), batch_size = 32, class_mode = "binary" ) history <- model %>% fit_generator( train_generator, steps_per_epoch = 100, epochs = 10, validation_data = validation_generator, validation_steps = 50 ) datagen <- image_data_generator( rotation_range = 40, width_shift_range = 0.2, height_shift_range = 0.2, shear_range = 0.2, zoom_range = 0.2, horizontal_flip = FALSE, fill_mode = "nearest" ) conv_base <- application_vgg16( weights = "imagenet", include_top = FALSE, input_shape = c(150, 150, 3) ) summary(conv_base) model <- keras_model_sequential() %>% conv_base %>% layer_flatten() %>% layer_dense(units = 256, activation = "relu") %>% layer_dense(units = 1, activation = "sigmoid") train_datagen = image_data_generator( rescale = 1/255, rotation_range = 40, width_shift_range = 0.2, height_shift_range = 0.2, shear_range = 0.2, zoom_range = 0.2, horizontal_flip = TRUE, fill_mode = "nearest" ) test_datagen <- image_data_generator(rescale = 1/255) train_generator <- flow_images_from_directory( train_dir, train_datagen, target_size = c(150, 150), batch_size = 20, class_mode = "binary" ) validation_generator <- flow_images_from_directory( validation_dir, test_datagen, target_size = c(150, 150), batch_size = 20, class_mode = "binary" ) model %>% compile( loss = "binary_crossentropy", optimizer = optimizer_rmsprop(lr = 2e-5), metrics = c("accuracy") ) history <- model %>% fit_generator( train_generator, steps_per_epoch = 100, epochs = 3, validation_data = validation_generator, validation_steps = 50 ) unfreeze_weights(conv_base, from = "block5_conv1") model %>% compile( loss = "binary_crossentropy", optimizer = optimizer_rmsprop(lr = 1e-5), metrics = c("accuracy") ) history <- model %>% fit_generator( train_generator, steps_per_epoch = 100, epochs = 10, validation_data = validation_generator, validation_steps = 50 ) test_generator <- flow_images_from_directory( test_dir, test_datagen, target_size = c(150, 150), batch_size = 20, class_mode = "binary" ) model %>% evaluate_generator(test_generator, steps = 50)
広域システム特別講義II 教師なし学習1A
dat <- read.table("http://www.matsuka.info/data_folder/tdkPCA01.txt") dat.pca <- princomp(dat) dat.pca$score dat.pca$loadings library(nnet) dat <- read.csv("http://www.matsuka.info/data_folder/tdkReg01.csv") set.seed(5) x = dat[, 1:3] y = dat[, 4] dat.nnet = nnet(x, y, size = 150, linout = TRUE,maxit = 1000) nnet.pred <- predict(dat.nnet, dat) cor(dat.nnet$fitted.values,dat$sales)^2 dat.lm<-lm(sales~.,data=dat) summary(dat.lm) # pseudo-pca dat<-read.table("http://www.matsuka.info/data_folder/tdkPCA01.txt") dat.nnet<-nnet(dat,dat,size=2, maxit=1000, decay=0.01, linout=TRUE) cor(dat.nnet$fitted.values,dat) ### text processing txt = "You say goodbye and I say hello.";txt = tolower(txt) txt = gsub('[.]', ' .',txt) words = unlist(strsplit(txt, " ")) uniq.words = unique(words) uniq.words[1] which(uniq.words=="say") n.uniq = length(uniq.words) n.words = length(words) corpus = rep(0,n.words) corpus = match(words,uniq.words) cc = matrix(0,nrow=n.uniq, ncol=n.uniq) for (i.c in 1:n.uniq){ loc = which(corpus==i.c) L = which(match(uniq.words,words[pmax(loc-1,1)])!='NA') R = which(match(uniq.words,words[pmin(loc+1,n.words)])!='NA') cc[i.c, c(L,R)]=cc[i.c, c(L,R)]+1 } diag(cc)=0 contxt <- function(corpus, uniq.words, words){ cc = matrix(0, nrow=n.uniq, ncol=n.uniq) for (i.c in 1:n.uniq){ loc = which(corpus==i.c) L = which(match(uniq.words, words[pmax(loc-1,1)])!='NA') R = which(match(uniq.words, words[pmin(loc+1,n.words)])!='NA') cc[i.c, c(L,R)]=cc[i.c, c(L,R)]+1 } diag(cc)=0 return(cc) } words.sim <- function(word1, word2, eps=1e-8){ nw1 = word1/(sqrt(sum(word1^2)) + eps) nw2 = word2/(sqrt(sum(word2^2)) + eps) return(nw1%*%nw2) } w1 = cc[which(uniq.words=="you"),] w2 = cc[which(uniq.words=="i"),] words.sim(w1,w2) most.sim <- function(word, uniq.words, cc, N=5){ n.uniq = length(uniq.words) word2 = cc[which(uniq.words==word),] res = data.frame(words = uniq.words, similarity = rep(0,n.uniq)) top = data.frame(words = rep("",N+1), similarity=rep(0,N+1)) res$similarity = apply(cc,1, function(x) words.sim(x,word2)) sort.res = sort(res$similarity, decreasing = T, index.return=T) top$words = res$words[sort.res$ix[1:(N+1)]] top$similarity = res$similarity[sort.res$ix[1:(N+1)]] self.idx = which(top$words == word) return(top[-self.idx,]) } most.sim('i',uniq.words,cc,5) ppmi <- function(cc, eps = 1e-8){ n.uniq = ncol(cc) PPMI = matrix(0, n.uniq, n.uniq) N = sum(cc) r.sum = rowSums(cc) pmi = log2(cc*N/outer(r.sum,r.sum)) PPMI=matrix(pmax(0,pmi),nrow=n.uniq) return(PPMI) } ### LSA PPMI <- ppmi(cc) s = svd(PPMI) plot(s$u[,2],s$u[,1],pch=20,col='red',cex=5) text(s$u[,2],s$u[,1],uniq.words,cex=2) ### word2vec inefficient corp2contxt1S = function(corpus){ len.corp = length(corpus) # creating target matrix idxT = cbind(1:(len.corp-2), corpus[2:(len.corp-1)]) targ1S = matrix(0,nrow=len.corp-2,ncol=length(unique(corpus))) targ1S[idxT]=1 # creating context matrices idxC1 = cbind(1:(len.corp-2),corpus[1:(len.corp-2)]) idxC2 = cbind(1:(len.corp-2),corpus[3:len.corp]) contxt1 = matrix(0,nrow=len.corp-2,ncol=length(unique(corpus))) contxt2 = matrix(0,nrow=len.corp-2,ncol=length(unique(corpus))) contxt1[idxC1]=1 contxt2[idxC2]=1 return(list(target=targ1S,contxt1=contxt1,contxt2=contxt2)) } txt = "You say goodbye and I say hello."; corp = txt2corpus(txt) dat <- corp2contxt1S(corp) init.W2V <- function(n.uniq,size.hidden){ W.in = matrix(rnorm(n.uniq*size.hidden),nrow=n.uniq)*0.01 W.out = matrix(rnorm(n.uniq*size.hidden),nrow=size.hidden)*0.01 return(list(W.in = W.in, W.out= W.out)) } softmax.forwd <- function(x, target){ max.x = apply(x,1,max) C = ncol(x) x = x - max.x%*%matrix(1,nrow=1,ncol=C) y = exp(x)/rowSums(exp(x)) delta = 1e-7; R = nrow(as.matrix(y)) return(-sum(target*log(y + delta))/R) } softmax.bckwd <- function(x, target, dout = 1){ max.x = apply(x, 1, max) R = nrow(x) C = ncol(x) x = x - max.x%*%matrix(1,nrow=1,ncol=C) y = exp(x)/rowSums(exp(x)) return((y-target)/R) } MatMult.bckwd <- function(x, W, dout){ dx = dout%*%t(W) dW = t(x)%*%dout return(list(dx = dx, dW = dW)) } softmax.pred <- function(x, target){ max.x = apply(x,1,max) C = ncol(x) x = x - max.x%*%matrix(1,nrow=1,ncol=C) y = exp(x)/rowSums(exp(x)) return(y) } network<-init.W2V(7,5) n.batch = 3; n.data = nrow(dat$target) n.iter = 2000; lambda=0.1; loss = rep(0, n.iter) for (i.iter in 1:n.iter){ samp.idx = sample(1:n.data,n.batch) batch.c1 = dat$contxt1[samp.idx,] batch.c2 = dat$contxt2[samp.idx,] batch.y = dat$target[samp.idx,] h1 <- MatMult.forwd(batch.c1, network$W.in) h2 <- MatMult.forwd(batch.c2, network$W.in) h = 0.5 * (h1 + h2) s = MatMult.forwd(h,network$W.out) z = softmax.forwd(s,batch.y) loss[i.iter] = z ds = softmax.bckwd(s, batch.y, 1) da = MatMult.bckwd(h,network$W.out,ds) da$dW = da$dW*0.5 dIn1 = MatMult.bckwd(batch.c1,network$W.in,da$dx) dIn2 = MatMult.bckwd(batch.c2,network$W.in,da$dx) network$W.out = network$W.out - lambda*da$dW network$W.in = network$W.in - lambda*dIn1$dW network$W.in = network$W.in - lambda*dIn2$dW } plot(loss, type='l') ### word2vec txt2corpus <- function(txt){ txt = tolower(txt) txt = gsub('[.]', ' .',txt) words = unlist(strsplit(txt, " ")) uniq.words = unique(words) n.uniq = length(uniq.words) n.words = length(words) corpus = rep(0,n.words) corpus = match(words,uniq.words) return(corpus) } corp2contxt <- function(corpus){ len.corp = length(corpus) target = corpus[2:(len.corp-1)] col1 = corpus[1:(len.corp-2)] col2 = corpus[3:len.corp] context = cbind(col1,col2) return(list(context=context,target = target)) } embed.forwd <- function(W, idx){ return(W[idx,]) } h1 <- embed.forwd(W,idx1) h2 <- embed.forwd(W,idx2) h = (h1 + h2)/2 embed.dot.forwd <- function(W, h, idx){ return(O=rowSums(W[idx,]*h)) } s = embed.dot.forwd(network$W.out,h,batch.y) sigmoid.forwd <- function(O){ return(1/(1+exp(-O))) } sigmoidWL.forwd <- function(O,target){ #delta = 1e-5 #y = max(0, (1/(1+exp(-O)) - delta)) y = 1/(1+exp(-O)) loss=-sum(target*log(y)+(1-target)*log(1 - y)) return(loss) } sigmoidWL.backwd <- function(O,target,dout=1){ #delta = 1e-5 #y = 1/(1+exp(-O)) - delta y = 1/(1+exp(-O)) dW = (y - target)*dout return(dW) } embed.dot.backwd <- function(W,h,idx,dout){ dh = dout * W[idx,] dW = dout * h return(list(dh = dh, dW = dW)) } embed.backwd <- function(W, idx, dout){ dW = matrix(0,nrow(W),ncol(W)) for (i.idx in 1:length(idx)){ dW[idx[i.idx], ] = dW[idx[i.idx], ] + dout[i.idx, ] } return(dW) } init.W2V <- function(n.uniq,size.hidden){ W.in = matrix(rnorm(n.uniq*size.hidden),nrow=n.uniq)*0.01 W.out = matrix(rnorm(n.uniq*size.hidden),nrow=n.uniq)*0.01 return(list(W.in = W.in, W.out= W.out)) } ### naive W2V network<-init.W2V(8,5) n.batch = 3; txt = "You say goodbye and I say hello.." corp = txt2corpus(txt) corp = c(8,8,corp) dat<-corp2contxt(corp) n.data = length(dat$target) n.iter = 3000; lambda=0.1; loss = rep(0, n.iter) for (i.iter in 1:n.iter){ samp.idx = sample(1:n.data,n.batch) batch.c1 = dat$context[samp.idx,1] batch.c2 = dat$context[samp.idx,2] batch.y = dat$target[samp.idx] h1 <- embed.forwd(network$W.in,batch.c1) h2 <- embed.forwd(network$W.in,batch.c2) h = 0.5 * (h1 + h2) # positive only s = embed.dot.forwd(network$W.out,h,batch.y) z = sigmoidWL.forwd(s,1) loss[i.iter] = z ds = sigmoidWL.backwd(s, 1, 1) dE = embed.dot.backwd(network$W.out,h, batch.y, ds) dh = dE$dh*0.5 dIn1 = embed.backwd(network$W.in,dat$context[samp.idx,1], dh) dIn2 = embed.backwd(network$W.in,dat$context[samp.idx,2], dh) network$W.out[batch.y,] = network$W.out[batch.y,] - lambda*dE$dW network$W.in = network$W.in - lambda*dIn1 network$W.in = network$W.in - lambda*dIn2 } par(mfrow=c(1,1)) plot(loss, type='l') samp.idx = c(2:6,8,9,1) batch.c1 = dat$context[samp.idx,1] batch.c2 = dat$context[samp.idx,2] batch.y = dat$target[samp.idx] h1 <- embed.forwd(network$W.in,batch.c1) h2 <- embed.forwd(network$W.in,batch.c2) h = 0.5 * (h1 + h2) s = MatMult.forwd(h,t(network$W.out)) z = sigmoid.forwd(s) res=cbind(z,batch.y) #par(mfrow=c(8,1)) #for (i in 1:8){ # col.spec = rep("black",8) # col.spec[i]="orange" # barplot(res[i, 1:8],col=col.spec) #} par(mfrow=c(4,1)) for (i in 1:4){ col.spec = rep("black",8) col.spec[i]="orange" barplot(res[i, 1:8],col=col.spec) } par(mfrow=c(4,1)) for (i in 5:8){ col.spec = rep("black",8) col.spec[i]="orange" barplot(res[i, 1:8],col=col.spec) } ### with negative sampling unigram.sampler <- function(corpus, target, power, sample.size){ neg.corpus <- corpus[-which(corpus == target)] uniq.word = unique(neg.corpus) n.word = length(neg.corpus) prob = (as.vector(table(neg.corpus))/n.word)^power prob = prob/sum(prob) sample.idx = sample(uniq.word, sample.size, prob = prob) return(sort(sample.idx)) } get.neg.sample <- function(corpus, target, power, sample.size){ sample.id = matrix(0, nrow = length(target), ncol = sample.size) for (i.targ in 1:length(target)){ sample.id[i.targ,] = unigram.sampler(corpus, target[i.targ], power, sample.size) } return(sample.id) } network<-init.W2V(8,5) n.batch = 3; txt = "You say goodbye and I say hello.." corp = txt2corpus(txt) corp = c(8,8,corp) dat<-corp2contxt(corp) n.data = length(dat$target) n.iter = 10000; lambda=0.1; loss = rep(0, n.iter) sample.size = 2 for (i.iter in 1:n.iter){ samp.idx = sample(1:n.data,n.batch) batch.c1 = dat$context[samp.idx,1] batch.c2 = dat$context[samp.idx,2] batch.y = dat$target[samp.idx] h1 <- embed.forwd(network$W.in,batch.c1) h2 <- embed.forwd(network$W.in,batch.c2) h = 0.5 * (h1 + h2) # positive s = embed.dot.forwd(network$W.out,h,batch.y) z = sigmoidWL.forwd(s,1) loss[i.iter] = z ds = sigmoidWL.backwd(s, 1, 1) dE = embed.dot.backwd(network$W.out,h, batch.y, ds) dh = dE$dh*0.5 dIn1 = embed.backwd(network$W.in,dat$context[samp.idx,1], dh) dIn2 = embed.backwd(network$W.in,dat$context[samp.idx,2], dh) network$W.out[batch.y,] = network$W.out[batch.y,] - lambda*dE$dW network$W.in = network$W.in - lambda*dIn1 network$W.in = network$W.in - lambda*dIn2 # negative neg.set <- get.neg.sample(corp,batch.y, 0.75, sample.size) for (i.neg in 1:sample.size){ s = embed.dot.forwd(network$W.out,h,neg.set[,i.neg]) z = sigmoidWL.forwd(s,0) loss[i.iter] = loss[i.iter] + z ds = sigmoidWL.backwd(s, 0, dout=1) dE = embed.dot.backwd(network$W.out,h,neg.set[,i.neg], ds) dh = dE$dh*0.5 dIn1 = embed.backwd(network$W.in, dat$context[samp.idx,1],dh) dIn2 = embed.backwd(network$W.in, dat$context[samp.idx,2],dh) network$W.out[neg.set[,i.neg],] = network$W.out[neg.set[,i.neg],] - lambda*dE$dW network$W.in = network$W.in - lambda*dIn1 network$W.in = network$W.in - lambda*dIn2 } } par(mfrow=c(1,1)) loss.dat <- data.frame(epoch=1:length(loss), loss = loss) ggplot(loss.dat, aes(x = epoch, y = loss)) + geom_smooth(se=F) samp.idx = c(2:6,8,9,1) batch.c1 = dat$context[samp.idx,1] batch.c2 = dat$context[samp.idx,2] batch.y = dat$target[samp.idx] h1 <- embed.forwd(network$W.in,batch.c1) h2 <- embed.forwd(network$W.in,batch.c2) h = 0.5 * (h1 + h2) s = MatMult.forwd(h,t(network$W.out)) z = sigmoid.forwd(s) res=cbind(z,batch.y) #par(mfrow=c(8,1)) #for (i in 1:8){ # col.spec = rep("black",8) # col.spec[i]="orange" # barplot(res[i, 1:8],col=col.spec) #} par(mfrow=c(4,1)) for (i in 1:4){ col.spec = rep("black",8) col.spec[i]="orange" barplot(res[i, 1:8],col=col.spec) } par(mfrow=c(4,1)) for (i in 5:8){ col.spec = rep("black",8) col.spec[i]="orange" barplot(res[i, 1:8],col=col.spec) }
広域システム特別講義II 教師あり学習1B
train.x = as.matrix(iris[,1:4]) train.y.temp = as.numeric(iris[,5]) train.y = matrix(0,nrow = nrow(train.x), ncol =3) train.y[which(train.y.temp==1), 1]=1 train.y[which(train.y.temp==2), 2]=1 train.y[which(train.y.temp==3), 3]=1 init.network <- function(n.neurons){ n.layer = length(n.neurons) W = list(); b = list() for (i.layer in 1:(n.layer-1)){ W[[i.layer]] = matrix(rnorm(n.neurons[i.layer]*n.neurons[(i.layer+1)],sd = 0.1),nrow=n.neurons[i.layer]) b[[i.layer]] = matrix(rnorm(n.neurons[(i.layer+1)],sd = 0.1), nrow = 1) } return(list(W = W,b = b)) } sigmoid.forwd <- function(x){ return(1/(1+exp(-x))) } sigmoid.bckwd <- function(x, dout){ y = sigmoid.forwd(x) return(dout*(1-y)*y) } affine.forwd <- function(x, W, b){ return(x%*%W + matrix(1, nrow = nrow(x), ncol = 1)%*%b) } affine.bckwd <- function(x, W, b, dout){ dx = dout%*%t(W) dW = t(x)%*%dout db = colSums(dout) return(list(dx = dx, dW = dW, db = db)) } softmax.forwd <- function(x, target){ max.x = apply(x,1,max) C = ncol(x) x = x - max.x%*%matrix(1,nrow=1,ncol=C) y = exp(x)/rowSums(exp(x)) delta = 1e-7; R = nrow(as.matrix(y)) return(-sum(target*log(y + delta))/R) } softmax.bckwd <- function(x, target, dout = 1){ max.x = apply(x, 1, max) R = nrow(x) C = ncol(x) x = x - max.x%*%matrix(1,nrow=1,ncol=C) y = exp(x)/rowSums(exp(x)) return((y-target)/R) } params = init.network(c(4,15,3)) batch_size = 50; n.iter =5000; lambda =0.05 n.train = nrow(train.x) loss = rep(0,n.iter) for (i.iter in 1:n.iter){ batch_mask = sample(1:n.train, batch_size) x.batch = train.x[batch_mask,] t.batch = train.y[batch_mask,] a1 = affine.forwd(x.batch,params$W[[1]],params$b[[1]]) z1 = sigmoid.forwd(a1) a2 = affine.forwd(z1,params$W[[2]],params$b[[2]]) z2 = softmax.forwd(a2,t.batch) loss[i.iter] = z2 dwSM = softmax.bckwd(a2, t.batch, 1) dwA2 = affine.bckwd(a1,params$W[[2]],params$b[[2]],dwSM) dwSG = sigmoid.bckwd(a1,dwA2$dx) dwA1 = affine.bckwd(x.batch,params$W[[1]],params$b[[1]],dwSG) params$W[[2]] = params$W[[2]] - lambda*dwA2$dW params$b[[2]] = params$b[[2]] - lambda*dwA2$db params$W[[1]] = params$W[[1]] - lambda*dwA1$dW params$b[[1]] = params$b[[1]] - lambda*dwA1$db } loss.dat = data.frame(trial = 1:length(loss), loss = loss) ggplot(loss.dat,aes(x = trial, y = loss)) + geom_smooth(se=F) ### methods to improve effeciency func02R = function(x){ return(1/20*x[1]^2 + x[2]^2) } numerical.grad <- function(func, x){ h = 1e-4 R = nrow(x) C = ncol(x) grad = matrix(0, R, C) for (i.col in 1:C){ for (i.row in 1:R){ temp.x = x[i.row,i.col] x[i.row, i.col] = temp.x + h plusH = do.call(func, list(x)) x[i.row, i.col] = temp.x - h minusH = do.call(func,list(x)) grad[i.row, i.col] = (plusH - minusH)/(2*h) x[i.row, i.col] = temp.x } } return(grad) } require(plot3D) x = seq(-10,10,0.5) y = seq(-10,10,0.5) M = mesh(x,y) R = nrow(M$x) C = nrow(M$x) scaling = 0.05 plot(c(),c(),xlim = c(-10,10),ylim=c(-10,10)) for (i.col in 1:C){ for (i.row in 1:R){ ng = numerical.grad("func02R",matrix(c(M$x[i.row,i.col],M$y[i.row,i.col]),nrow=1)) arrows(M$x[i.row,i.col],M$y[i.row,i.col], (M$x[i.row,i.col]-ng[1]*scaling),(M$y[i.row,i.col]-ng[2]*scaling), length = 0.05) } } x = seq(-10,10,0.2) y = seq(-10,10,0.2) M = mesh(x,y) Z = as.vector(1/20*M$x^2)+as.vector(M$y^2) Z.mesh = matrix(Z,nrow(M$x)) contour(x,y,Z.mesh,drawlabels = F,nlevels=40) grad.desc <- function(func, init.x, lr, n.iter){ x = init.x x.hist = init.x for (i.iter in 1:n.iter) { grad = numerical.grad(func, x) x = x - lr*grad x.hist = rbind(x.hist,x) } return(x.hist) } x.init = matrix(c(-7,2),nrow = 1) gd = grad.desc("func02R",x.init,0.9,100) lines(gd,type='o',col = 'green',pch=20) gd.moment <- function(func, init.x, lr, moment, n.iter){ x = init.x x.hist = init.x grad.hist = rep(0,length(x)) for (i.iter in 1:n.iter) { grad = numerical.grad(func, x) x = x - lr*grad+moment*grad.hist x.hist = rbind(x.hist,x) grad.hist = -lr*grad } return(x.hist) } x.init = matrix(c(-7,2),nrow = 1) gdm = gd.moment("func02R",x.init,0.9,0.3,100) lines(gdm,type='o',col = 'blue',pch=20) adaGrad <- function(func, init.x, eta, n.iter){ x = init.x x.hist = init.x h = rep(0,length(x)) for (i.iter in 1:n.iter) { grad = numerical.grad(func, x) h = h + grad*grad x = x - eta*(1/(sqrt(h)+1e-7))*grad x.hist = rbind(x.hist,x) } return(x.hist) } x.init = matrix(c(-7,2),nrow = 1) adaGrad = adaGrad("func02R",x.init,0.9,100) contour(x,y,Z.mesh,drawlabels = F,nlevels=40) lines(adaGrad,type='o',col = 'red',pch=20) adam <- function(func, init.x, eta, beta1, beta2, epsilon, n.iter){ x = init.x x.hist = init.x m = rep(0,length(x)) v = rep(0,length(x)) for (i.iter in 1:n.iter) { grad = numerical.grad(func, x) m = beta1*m + (1-beta1)*grad v = beta2*v + (1-beta2)*grad*grad m.hat = m/(1-beta1) v.hat = v/(1-beta2) x = x - eta/((sqrt(v.hat)+epsilon))*m.hat x.hist = rbind(x.hist,x) } return(x.hist) } x.init = matrix(c(-7,2),nrow = 1) adam = adam("func02R",x.init,0.2,0.9,0.999,1e-8,100) contour(x,y,Z.mesh,drawlabels = F,nlevels=40) lines(adam,type='o',col = 'red',pch=20) ### w/ functions relu.forwd <- function(x){ return(pmax(x,0)) } relu.bckwd <- function(z, dout){ dout[which(z <= 0)] = 0 return(dout) } # sigmoid sigmoid.forwd <- function(x){ return(1/(1+exp(-x))) } sigmoid.bckwd <- function(z, dout){ return(dout*(1-z)*z) } # Affine affine.forwd <- function(x, W, b){ return(x%*%W + matrix(1, nrow = nrow(x), ncol = 1)%*%b) } affine.bckwd <- function(x, W, dout){ dx = dout%*%t(W) dW = t(x)%*%dout db = colSums(dout) return(list(dx = dx, dW = dW, db = db)) } # softmax with CE softmax.forwd <- function(x, target){ max.x = apply(x,1,max) C = ncol(x) x = x - max.x%*%matrix(1,nrow=1,ncol=C) y = exp(x)/rowSums(exp(x)) delta = 1e-7; R = nrow(as.matrix(y)) return(list(smx = y, ce = -sum(target*log(y + delta))/R)) } softmax.bckwd <- function(smx, target){ R = nrow(smx) return((smx-target)/R) } activation <- function(A, target, actFun){ if (actFun == "sigmoid"){ return(sigmoid.forwd(A)) } if (actFun == "relu"){ return(relu.forwd(A)) } if (actFun == "softmax"){ return(softmax.forwd(A, target)) } } Dact <- function(z, smx, target, dout, actFun){ if (actFun == "sigmoid"){ return(sigmoid.bckwd(z, dout)) } if (actFun == "relu"){ return(relu.bckwd(z, dout)) } if (actFun == "softmax"){ return(softmax.bckwd(smx, target)) } } # function for initialization init.network <- function(n.neurons, actFun, Opt, sdv){ n.layer = length(n.neurons) W = list(); b = list(); mW = list(); mb = list(); # momentum hW = list(); hb = list(); # adaGrad aMW = list(); aMb = list(); # adam M aVW = list(); aVb = list(); # adam V for (i.layer in 1:(n.layer-1)){ if (nargs() == 3) { if (actFun[i.layer]=="sigmoid"){ # Xavier sdv = 1/sqrt(n.neurons[i.layer]) } else { # He - assumes ReLU sdv = sqrt(2/n.neurons[i.layer]) } } W[[i.layer]] = matrix(rnorm(n.neurons[i.layer]*n.neurons[(i.layer+1)], sd = sdv), nrow=n.neurons[i.layer]) b[[i.layer]] = matrix(rnorm(n.neurons[(i.layer+1)], sd = sdv), nrow = 1) if (Opt == "momentum"){ mW[[i.layer]] = matrix(0, nrow=n.neurons[i.layer], ncol=n.neurons[(i.layer+1)]) mb[[i.layer]] = matrix(0, nrow = 1, ncol=n.neurons[(i.layer+1)]) } if (Opt == "adagrad"){ hW[[i.layer]] = matrix(0, nrow=n.neurons[i.layer], ncol=n.neurons[(i.layer+1)]) hb[[i.layer]] = matrix(0, nrow = 1, ncol=n.neurons[(i.layer+1)]) } if (Opt == "adam"){ aMW[[i.layer]] = matrix(0, nrow=n.neurons[i.layer], ncol=n.neurons[(i.layer+1)]) aMb[[i.layer]] = matrix(0, nrow = 1, ncol=n.neurons[(i.layer+1)]) aVW[[i.layer]] = matrix(0, nrow=n.neurons[i.layer], ncol=n.neurons[(i.layer+1)]) aVb[[i.layer]] = matrix(0, nrow = 1, ncol=n.neurons[(i.layer+1)]) } } return(list(W = W, b = b, actFun = actFun, optimizer = Opt, mW=mW, mb=mb, hW=hW, hb=hb, aMW=aMW,aMb=aMb,aVW=aVW)) } OPT<-function(net, Daff, HP){ if (net$optimizer == "momentum"){ return(Opt.mom(net, Daff, HP)) } if (net$optimizer == "adagrad"){ return(Opt.adagrad(net, Daff, HP)) } if (net$optimizer == "adam"){ return(Opt.adam(net, Daff, HP)) } } Opt.mom <- function(net, Daff, HP) { # HP[3] = learning rate # HP[4] = weight decay # HP[5] = momentum n.layer <- length(net$W) for (i.layer in 1:n.layer) { net$mW[[i.layer]] = HP[5]*net$mW[[i.layer]] - HP[3]*Daff[[i.layer]]$dW - HP[4]*net$W[[i.layer]] net$mb[[i.layer]] = HP[5]*net$mb[[i.layer]] - HP[3]*Daff[[i.layer]]$db - HP[4]*net$b[[i.layer]] net$W[[i.layer]] = net$W[[i.layer]] + net$mW[[i.layer]] net$b[[i.layer]] = net$b[[i.layer]] + net$mb[[i.layer]] } return(net=net) } Opt.adagrad <- function(net, Daff, HP) { # HP[3] = learning rate # HP[4] = weight decay n.layer <- length(net$W) for (i.layer in 1:n.layer) { net$hW[[i.layer]] = net$hW[[i.layer]] + Daff[[i.layer]]$dW*Daff[[i.layer]]$dW net$hb[[i.layer]] = net$hb[[i.layer]] + Daff[[i.layer]]$db*Daff[[i.layer]]$db net$W[[i.layer]] = net$W[[i.layer]]-HP[3]/(sqrt(net$hW[[i.layer]])+1e-7)*Daff[[i.layer]]$dW - HP[4]*net$W[[i.layer]] net$b[[i.layer]] = net$b[[i.layer]]-HP[3]/(sqrt(net$hb[[i.layer]])++1e-7)*Daff[[i.layer]]$db - HP[4]*net$b[[i.layer]] } return(net=net) } cu.nnet = function(train.x, train.y, net, HP = c(10,1000,0.05,0.01,0.1,0.999,0.9)){ # HP: Hyperparameters # HP[1] = batch size # HP[2] = n iteration # HP[3] = learning rate # HP[4] = weight decay # HP[5] = momentum # HP[6] = beta1 (adam) # HP[7] = beta2 (adam) n.layer <- length(net$W) loss = rep(0,HP[2]) A = list(); z = list(); Dact = list(); Daff = list() for (i.iter in 1:HP[2]){ batch_mask = sample(1:nrow(train.x), HP[1]) x.batch = train.x[batch_mask,] t.batch = train.y[batch_mask,] for (i.layer in 1:n.layer){ if (i.layer == 1){ x = x.batch } else { x = z[[(i.layer - 1)]] } A[[i.layer]] = affine.forwd(x, net$W[[i.layer]], net$b[[i.layer]]) z[[i.layer]] = activation(A[[i.layer]], t.batch, net$actFun[i.layer]) } loss[i.iter] = z[[i.layer]]$ce smx = z[[i.layer]]$smx for (i.layerR in n.layer:1){ if (i.layerR == n.layer){ dout = 1 } else { dout = Daff[[(i.layerR+1)]]$dx } Dact[[i.layerR]] = Dact(z[[i.layerR]], smx, t.batch, dout, net$actFun[i.layerR]) if (i.layerR==1){ x = x.batch } else { x = A[[(i.layerR-1)]] } Daff[[i.layerR]] = affine.bckwd(x, net$W[[i.layerR]], Dact[[i.layerR]]) } net = OPT(net, Daff, HP) } return(list(loss = loss, net = net)) } train.x = as.matrix(iris[,1:4]) train.y.temp = as.numeric(iris[,5]) train.y = matrix(0,nrow = nrow(train.x), ncol =3) train.y[which(train.y.temp==1), 1]=1 train.y[which(train.y.temp==2), 2]=1 train.y[which(train.y.temp==3), 3]=1 actF = c("relu","softmax") network = init.network(c(4,15,3), actF, "momentum") res = cu.nnet(train.x, train.y, network, HP=c(15,1000,0.01,0.0001,0.9,0.999,0.9)) hist(res$net$W[[1]]) plot(res$loss,type='l') MNIST.dat<-read.csv("http://www.matsuka.info/univ/course_folder/MNSTtrain.csv") train <- data.matrix(MNIST.dat) train.x <- as.matrix(train[,-1]/255) train.y.temp <- train[,1] train.y = matrix(0,nrow = nrow(train), ncol = 10) for (i in 1:nrow(train.x)){ train.y[i,(train.y.temp[i]+1)]=1 } actF = c("relu","relu","softmax") network = init.network(c(784,100,50,10), actF, "momentum") res = cu.nnet(train.x, train.y, network,HP=c(100,2000,0.1,0.001,0.8,0.999,0.9)) plot(res$loss,type='l')
広域システム特別講義II 教師あり学習1a
# with THRESHOLD (theta) AND.gate <- function(x1, x2){ w1 = 0.5 w2 = 0.5 theta = 0.7 y.temp = w1*x1 + w2*x2 if (y.temp <= theta){ y = 0 } else { y = 1 } return(y) } AND.gate <- function(x1, x2){ w1 = 0.5; w2 = 0.5; theta = 0.7 return(as.numeric(w1*x1 + w2*x2 > theta)) } NAND.gate <- function(x1, x2){ w1 = -0.5; w2 = -0.5; theta = -0.7 return(as.numeric(w1*x1 + w2*x2 > theta)) } OR.gate <- function(x1, x2){ w1 = 0.5; w2 = 0.5; theta = 0.3 return(as.numeric(w1*x1 + w2*x2 > theta)) } # with BIAS (b) AND.gate <- function(x1, x2){ w1 = 0.5 w2 = 0.5 b = -0.7 y.temp = w1*x1 + w2*x2 + b if (y.temp <= 0){ y = 0 } else { y = 1 } return(y) } AND.gate <- function(x1, x2){ w1 = 0.5; w2 = 0.5; b = -0.7 return(as.numeric(w1*x1 + w2*x2 + b > 0)) } NAND.gate <- function(x1, x2){ w1 = -0.5; w2 = -0.5; b = 0.7 return(as.numeric(w1*x1 + w2*x2 + b > 0)) } OR.gate <- function(x1, x2){ w1 = 0.5; w2 = 0.5; b = -0.3 return(as.numeric(w1*x1 + w2*x2 + b > 0)) } NOR.gate <- function(x1, x2){ w1 = -0.5; w2 = -0.5; b = 0.3 return(as.numeric(w1*x1 + w2*x2 + b > 0)) } plot.logic <- function(logic.oper){ x1 = c(0,0,1,1); x2 = c(0,1,0,1); if (logic.oper == "and") { w1 = 0.5; w2 = 0.5; theta = 0.7; true.point = AND.gate(x1,x2) } else if (logic.oper == "or") { w1 = 0.5; w2 = 0.5; theta = 0.3; true.point = OR.gate(x1,x2) } else if (logic.oper == "nand") { w1 = -0.5; w2 = -0.5; theta = -0.7; true.point = NAND.gate(x1,x2) } else if (logic.oper == "nor"){ w1 = -0.5; w2 = -0.5; theta = -0.3; true.point = NOR.gate(x1,x2) } else {warning("incompatible operator");stop() } plot(c(0,0,1,1),c(0,1,0,1),xlim = c(-0.5, 1.5), ylim = c(-0.5, 1.5), pch = 20, cex= 2, col = true.point+1) abline(a = theta/w1, b = -w1/w2, lwd = 3) } XOR.gate <- function(x1, x2){ gate1 <- NAND.gate(x1,x2) gate2 <- OR.gate(x1,x2) y <- AND.gate(gate1,gate2) return(y) } plot.XOR <- function(){ x1 = c(0,0,1,1); x2 = c(0,1,0,1); w11 = -0.5; w21 = -0.5; theta1 = -0.7 w12 = 0.5; w22 = 0.5; theta2 = 0.3 true.point = XOR.gate(x1, x2) plot(c(0,0,1,1),c(0,1,0,1),xlim = c(-0.5, 1.5), ylim = c(-0.5, 1.5), pch = 20, cex= 2, col = true.point+1) abline(a = theta1/w11, b = -w11/w21, lwd = 3) abline(a = theta2/w12, b = -w12/w22, lwd = 3) } multi.forwd <- function(x,y){ return(x*y) } multi.bckwd <- function(x, y, dout){ dx = dout * y dy = dout * x return(list(dx = dx, dy = dy)) } apple = 100; n.apple = 2; tax = 1.1 apple.pre.tax = multi.forwd(apple, n.apple) apple.post.tax = multi.forwd(apple.pre.tax, tax) dprice = 1 d.apple.post.tax = multi.bckwd(apple.pre.tax, tax, dprice) d.apple = multi.bckwd(apple, n.apple, d.apple.post.tax$dx)$dx d.n.apple = multi.bckwd(apple, n.apple, d.apple.post.tax$dx)$dy add.forwd <- function(x,y){ return(x + y) } add.bckwd <- function(x, y, dout){ dx = dout dy = dout return(list(dx = dx, dy = dy)) } # network step.func <- function(x){ return(as.numeric(x > 0)) } x = seq(-5, 5, 0.1) y = step.func(x) plot(x,y, ylab = 'y', xlab = 'a', type ="l", lwd =2) sigmoid.func <- function(x){ return(1/(1+exp(-x))) } y = sigmoid.func(x) plot(x,y, ylab = 'y', xlab = 'a', type ="l", lwd =2) y.step = step.func(x) y.sigm = sigmoid.func(x) plot(x,y.step, ylab = 'y', xlab = 'a', type ="l", lwd =2) lines(x,y.sigm, lwd =2, lty = 2) relu.func <- function(x){ return(pmax(0,x)) } y.relu = relu.func(x) plot(x,y.relu, ylab = 'y', xlab = 'a', type ="l", lwd =2) A = matrix(1:4, nrow = 2, byrow = T) B = matrix(5:8, nrow = 2, byrow = T) A = matrix(1:6, nrow = 3, byrow = T) B = matrix(7:8, nrow = 2, byrow = T) x = c(1,0.5) W1 = matrix((1:6)*0.1, nrow = 2) B1 = (1:3)*0.1 A1 = x%*%W1 + B1 Z1 = sigmoid.func(A1) W2 = matrix((1:6)*0.1, nrow = 3) B2 = c(0.1, 0.2) A2 = Z1%*%W2 + B2 Z2 = sigmoid.func(A2) W3 = matrix((1:4)*0.1, nrow = 2) B3 = c(0.1, 0.2) A3 = Z2%*%W3+ B3 Z3 = A3 # function to initialize 3L network init.3L.network <- function(){ W1 = matrix((1:6)*0.1, nrow = 2) B1 = (1:3)*0.1 W2 = matrix((1:6)*0.1, nrow = 3) B2 = c(0.1, 0.2) W3 = matrix((1:4)*0.1, nrow = 2) B3 = c(0.1, 0.2) return(list(W1 = W1, B1 = B1, W2 = W2, B2 = B2, W3 = W3, B3 = B3)) } # feedforward process forward.3L <- function(network, x){ A1 = x%*%network$W1 + network$B1 Z1 = sigmoid.func(A1) A2 = Z1%*%network$W2 + network$B2 Z2 = sigmoid.func(A2) A3 = Z2%*%network$W3 + network$B3 Z3 = sigmoid.func(A3) A3 = Z3 return(A3) } network<-init.3L.network() y = forward.3L(network, c(1, 0.5)) a = c(1010,1000,990) exp(a)/sum(exp(a)) softmax.func <- function(x){ max.x = max(x) return(exp(x-max.x)/sum(exp(x-max.x))) } train <- read.csv('http://www.Matsuka.info/univ/course_folder/MNSTtrain.csv', header=TRUE) train <- data.matrix(train) train.x <- train[,-1] train.y <- train[,1] train.x <- t(train.x/255) download.file("http://www.matsuka.info/univ/course_folder/trNetwork.Rdata", "trNetwork.Rdata") load("trNetwork.Rdata") network=trNetwork n.train = ncol(train.x) correct.cl = 0 conf.matrix = matrix(0,10,10) for (i.loop in 1:n.train){ y = forward.3L(network,train.x[,i.loop]) max.y = max.col(y) conf.matrix[max.y, (train.y[i.loop]+1)] = conf.matrix[max.y, (train.y[i.loop]+1)] + 1 } accuracy = sum(diag(conf.matrix))/n.train # learning apple = 100; n.apple = 2; tax = 1.1 orange = 150; n.orange = 3; apple.price = multi.forwd(apple, n.apple) orange.price = multi.forwd(orange, n.orange) all.price = add.forwd(apple.price, orange.price) price = multi.forwd(all.price, tax) dprice = 1 d.all.price = multi.bckwd(all.price, tax, dprice) d.apple.price = add.bckwd(apple.price, orange.price, d.all.price$dx)$dx d.orange.price = add.bckwd(orange, n.orange.price, d.all.price$dx)$dy d.apple = multi.bckwd(apple, n.apple, d.apple.price)$dx d.n.apple = multi.bckwd(apple, n.apple, d.apple.price)$dy d.orange = multi.bckwd(orange, n.orange, d.orange.price)$dx d.n.orange = multi.bckwd(orange, n.orange, d.orange.price)$dy relu.forwd <- function(x){ return(pmax(x,0)) } relu.bckwd <- function(x, dout){ dout[which(x <= 0)] = 0 return(dout) } sigmoid.forwd <- function(x){ return(1/(1+exp(-x))) } sigmoid.bckwd <- function(x, dout){ y = sigmoid.forwd(x) return(dout*(1-y)*y) } affine.forwd <- function(x, W, b){ return(x%*%W + matrix(1, nrow = nrow(x), ncol = 1)%*%b) } affine.bckwd <- function(x, W, b, dout){ dx = dout%*%t(W) dW = t(x)%*%dout db = colSums(dout) return(list(dx = dx, dW = dW, db = db)) } softmax.forwd <- function(x, target){ max.x = apply(x,1,max) C = ncol(x) x = x - max.x%*%matrix(1,nrow=1,ncol=C) y = exp(x)/rowSums(exp(x)) delta = 1e-7; R = nrow(as.matrix(y)) return(-sum(target*log(y + delta))/R) } softmax.bckwd <- function(x, target, dout = 1){ max.x = apply(x, 1, max) R = nrow(x) C = ncol(x) x = x - max.x%*%matrix(1,nrow=1,ncol=C) y = exp(x)/rowSums(exp(x)) return((y-target)/R) } init.network <- function(n.neurons){ n.layer = length(n.neurons) W = list(); b = list() for (i.layer in 1:(n.layer-1)){ W[[i.layer]] = matrix(rnorm(n.neurons[i.layer]*n.neurons[(i.layer+1)],sd = 0.1), nrow=n.neurons[i.layer]) b[[i.layer]] = matrix(rnorm(n.neurons[(i.layer+1)],sd = 0.1), nrow = 1) } return(list(W = W,b = b)) } sigmoid.func <- function(x){ return(1/(1+exp(-x))) } relu.func <- function(x){ y = apply(x,2,function(x) pmax(x,0)) return(y) } activation <- function(A, actFun){ if (actFun == "sigmoid"){ return(sigmoid.func(A)) } if (actFun == "relu"){ return(relu.func(A)) } if (actFun == "softmax"){ return(softmax(A)) } } feedforward <- function(network, x, actFun) { n.layer <- length(network$W) batch.size = nrow(x) for (i.layer in 1:n.layer){ A = x%*%network$W[[i.layer]] + matrix(1,nrow=batch.size,ncol = 1)%*%network$b[[i.layer]] x = activation(A, actFun[i.layer]) } return(x) } cross.entropy = function(y, target){ delta = 1e-7; R = nrow(as.matrix(y)) return(-sum(target*log(y + delta))/R) } loss.network = function(params, x, t, actFun){ y = feedforward(params,x,actFun) return(cross.entropy(y, t)) } softmax <- function(x, target){ max.x = apply(x,1,max) C = ncol(x) x = x - max.x%*%matrix(1,nrow=1,ncol=C) y = exp(x)/rowSums(exp(x)) return(y) } train.x = as.matrix(iris[,1:4]) train.y.temp = as.numeric(iris[,5]) train.y = matrix(0,nrow = nrow(train.x), ncol =3) train.y[which(train.y.temp==1), 1]=1 train.y[which(train.y.temp==2), 2]=1 train.y[which(train.y.temp==3), 3]=1 params = init.network(c(4,15,3)) batch_size = 10; n.iter =5000; lambda =0.05 n.train = nrow(train.x) params = init.network(c(4,30,3)) batch_size = 10; n.iter =5000; lambda =0.01 n.train = nrow(train.x) loss = rep(0,n.iter) for (i.iter in 1:n.iter){ batch_mask = sample(1:n.train, batch_size) x.batch = train.x[batch_mask,] t.batch = train.y[batch_mask,] a1 = affine.forwd(x.batch,params$W[[1]],params$b[[1]]) z1 = sigmoid.forwd(a1) a2 = affine.forwd(z1,params$W[[2]],params$b[[2]]) z2 = softmax.forwd(a2,t.batch) dwSM = softmax.bckwd(a2, t.batch, 1) dwA2 = affine.bckwd(a1,params$W[[2]],params$b[[2]],dwSM) dwSG = sigmoid.bckwd(a1,dwA2$dx) dwA1 = affine.bckwd(x.batch,params$W[[1]],params$b[[1]],dwSG) params$W[[2]] = params$W[[2]] - lambda*dwA2$dW params$b[[2]] = params$b[[2]] - lambda*dwA2$db params$W[[1]] = params$W[[1]] - lambda*dwA1$dW params$b[[1]] = params$b[[1]] - lambda*dwA1$db loss[i.iter] = loss.network(params,x.batch,t.batch,c("sigmoid","softmax")) } plot(loss,type='l', xlab = "trial")
広域システム特別講義II 強化学習1B
mk_MC_seq <- function(){ # defining transitino matrix & probs. north=c(1:3,15,1:10) east=2:15;east[ c(3,7,11)]=c(3,7,11) south=c(5:15,12:14) west=c(15,1:13);west[ c(4,8,12)]=c(4,8,12) trM=cbind(north,east,south,west) P=rep(0.25,4) # creating sequence state = sample(1:14,1) state.seq = state while(state!=15){ action = sample(1:4,1,prob = P) state.seq = c(state.seq,trM[state,action]) state = trM[state,action] } return(state.seq = state.seq) } # update Vs cal.cumV.MC <- function(cumV, state.seq,state.count){ r = -1 uniq.seq = unique(state.seq) for (i.uniq in 1:(length(uniq.seq)-1)){ first.visit = which(state.seq == uniq.seq[i.uniq])[1] cumV[uniq.seq[i.uniq]] = cumV[uniq.seq[i.uniq]] + r*(length(state.seq)-first.visit) } state.count[uniq.seq] = state.count[uniq.seq] + 1 return(list(cumV = cumV, state.count = state.count)) } # main script max.iter = 10000; state.count=rep(0,15); cumV = rep(0,14) for (i.iter in 1:max.iter){ state.seq = mk_MC_seq() updates = cal.cumV.MC(cumV, state.seq, state.count) state.count = updates$state.count cumV = updates$cumV } V = matrix(c(0,cumV/state.count[1:14],0),nrow=4) # function to calc. card values card.value<-function(adj.cards) { sum.cards=sum(adj.cards) if (any(adj.cards==1) & sum.cards<=11) { sum.cards=sum.cards+10; usableA=1 #true } else {usableA=2} #false return(c(sum.cards,usableA)) } # function to calc. reward calc.reward<-function(p.val,d.val) { if (p.val>21) { reward=-1 } else { if (d.val>21) { reward=1 } else { if (p.val==d.val) { reward=0 } else { reward=ifelse(p.val>d.val,1,-1) } } } } # playing a single game play.BJ <- function(policy){ cards=sample(rep(1:13,4)) player=cards[1:2]; adj.player=pmin(player,10) dealer=cards[3:4]; adj.dealer=pmin(dealer,10) cards=cards[-(1:4)] d.val=card.value(adj.dealer) p.val=card.value(adj.player) state.hist=c(adj.dealer[1],p.val[1],p.val[2]) while (p.val[1] < policy) { player=c(player,cards[1]); adj.player=pmin(player,10) cards=cards[-1] p.val=card.value(adj.player) state.hist=rbind(state.hist,c(adj.dealer[1],p.val[1],p.val[2])) } while (d.val[1] < 17) { dealer=c(dealer,cards[1]); adj.dealer=pmin(dealer,10) cards=cards[-1] d.val=card.value(adj.dealer) } return(list(p.val = p.val, d.val = d.val, state.hist = state.hist)) } # main function BJ_MC_fixedPolicy<-function(policy=20,maxIter=1e6){ rew.sum=array(0,dim=c(10,10,2)) rew.count=array(0,dim=c(10,10,2)) for (i_play in 1:maxIter) { result <- play.BJ(policy) p.val = result$p.val d.val = result$d.val state.hist = result$state.hist rew=calc.reward(p.val[1],d.val[1]) n.state=nrow(state.hist) if (is.null(n.state)) { n.state=1 state.hist=t(as.matrix(state.hist)) } for (i_state in 1:n.state) { if (state.hist[i_state,2] > 11 & state.hist[i_state,2] < 22) { rew.sum[state.hist[i_state,1],(state.hist[i_state,2]-11),state.hist[i_state,3]]= rew.sum[state.hist[i_state,1],(state.hist[i_state,2]-11),state.hist[i_state,3]]+rew rew.count[state.hist[i_state,1],(state.hist[i_state,2]-11),state.hist[i_state,3]]=rew.count[state.hist[i_state,1],(state.hist[i_state,2]-11),state.hist[i_state,3]]+1 } } } return(rew.sum/rew.count) } play.BJ2 <- function(policy){ cards=sample(c(rep(1:10,4),rep(10,12))) player=cards[1:2]; dealer=cards[3:4]; cards=cards[-(1:4)] d.val=card.value(dealer); p.val=card.value(player) while( p.val[1] < 12 ) { player=c(player,cards[1]) cards=cards[-1] p.val=card.value(player) } # initial random action action=sample(0:1,1) state.hist=c(dealer[1],p.val[1],p.val[2],(action+1)) # player's action while (action==1 & p.val[1]<22) { player=c(player,cards[1]) cards=cards[-1] p.val=card.value(player) state.hist=rbind(state.hist,c(dealer[1],p.val[1],p.val[2],(action+1))) if (p.val[1]<22) { action=policy[dealer[1],(p.val[1]-11),p.val[2]] } } # dealer's action while (d.val[1]<17) { dealer=c(dealer,cards[1]) cards=cards[-1] d.val=card.value(dealer) } return(list(p.val = p.val, d.val = d.val, state.hist = state.hist)) } BJ_MC<-function(maxIter=1e6){ rew.sum=array(0,dim=c(10,10,2,2)) rew.count=array(1,dim=c(10,10,2,2)) Q=array(0,dim=c(10,10,2)) V=array(0,dim=c(10,10,2)) policy=array(sample(0:1,10*10*2,replace=T),dim=c(10,10,2)) # policy: 1 = hit, 0 = stay for (i_play in 1:maxIter) { result = play.BJ2(policy) p.val = result$p.val d.val = result$d.val state.hist = result$state.hist rew=calc.reward(p.val[1],d.val[1]) n.state=nrow(state.hist) if (is.null(n.state)) { n.state=1 state.hist=t(as.matrix(state.hist)) } for (i_state in 1:n.state) { if (state.hist[i_state,2]>11 & state.hist[i_state,2]<22) { ind=state.hist[i_state,]-c(0,11,0,0) rew.sum[ind[1],ind[2],ind[3],ind[4]]= rew.sum[ind[1],ind[2],ind[3],ind[4]]+rew rew.count[ind[1],ind[2],ind[3],ind[4]]=rew.count[ind[1],ind[2],ind[3],ind[4]]+1 Q=rew.sum/rew.count; policy[,,1]=Q[,,1,1] < Q[,,1,2] policy[,,2]=Q[,,2,1] < Q[,,2,2] } } } V[,,1]=(rew.sum[,,1,1]+rew.sum[,,1,2])/(rew.count[,,1,1]+rew.count[,,1,2]) V[,,2]=(rew.sum[,,2,1]+rew.sum[,,2,2])/(rew.count[,,2,1]+rew.count[,,2,2]) return(list(policy,V,Q)) } # TD random walk TD0.ex1<-function(maxItr,alpha,gamma) { V=c(0,rep(0.5,5),0) V.hist=matrix(0,nrow=maxItr+1,ncol=5) V.hist[1,]=V[2:6] P.act=matrix(0.5,ncol=7,nrow=2) for (i_rep in 1:maxItr) { state=5 while (state!=1 & state!=7) { action=sample(c(-1,1),1,prob=P.act[,state]) state.old=state state=state+action r=ifelse(state==7,1,0) V[state.old]=V[state.old]+alpha*(r+gamma*V[state]-V[state.old]) } V.hist[(i_rep+1),]=V[2:6] } return(V.hist) } # constant step-size Monte Carlo constMC.ex1<-function(maxItr,alpha) { V=c(0,rep(0.5,5),0) V.hist=matrix(0,nrow=maxItr+1,5) V.hist[1,]=V[2:6] P.act=matrix(0.5,ncol=7,nrow=2) for (i_rep in 1:maxItr) { state=5; state.hist=state while (state!=1 & state!=7) { action=sample(c(-1,1),1,prob=P.act[,state]) state=state+action state.hist=cbind(state.hist,state) } R=ifelse(state==7,1,0) n.state=length(state.hist) for (i_state in 1:(n.state-1)) { V[state.hist[i_state]]=V[state.hist[i_state]]+ alpha*(R-V[state.hist[i_state]]) } V.hist[(i_rep+1),]=V[2:6] } return(V.hist) } # (re)creating Fig 6.7 alphaTD=c(0.05,0.075,0.1,0.15) alphaMC=c(0.01,0.02,0.03,0.04) n.alphas=length(alphaTD) pchs=0:(0+n.alphas) true.V=1:5*(1/6) n_rep=100 sqs=seq(1,101,2) plot(0,0,type='n',xlim=c(0,100),ylim=c(0,0.25)) for (i_alpha in 1:n.alphas) { rmsTD=matrix(0,101,n_rep) rmsMC=matrix(0,101,n_rep) for (i_rep in 1:n_rep) { resTD=TD0.ex1(100,alphaTD[i_alpha],1) resMC=constMC.ex1(100,alphaTD[i_alpha]) for (i_gen in 1:101) { rmsTD[i_gen,i_rep]=sqrt(mean((resTD[i_gen,]-true.V)^2)) rmsMC[i_gen,i_rep]=sqrt(mean((resMC[i_gen,]-true.V)^2)) } } mTD=rowMeans(rmsTD) mMC=rowMeans(rmsMC) lines(mTD,col='red') lines(mMC,col='blue') lines(sqs,mTD[sqs],col='red',pch=pchs[i_alpha],type='p') lines(sqs,mMC[sqs],col='blue',pch=pchs[i_alpha],type='p') } labs=c("MC, alpha=0.01", "MC, alpha=0.02", "MC, alpha=0.03", "MC, alpha=0.04", "TD, alpha=0.05", "TD, alpha=0.075", "TD, alpha=0.10", "TD, alpha=0.15") legend('topright',labs,col=c(rep('blue',4),rep('red',4)),pch=rep(0:3,2),lwd=1.5) sarsa.ex6.5<-function(maxItr,alpha,gamma,epsilon) { # field size: 7row x 10column # horizontal move -> COLUMN # vertical move -> ROW # effect of wind -> ROW # actions: 1-up, 2-right, 3-down, 4-left act.V=matrix(c(1,0,0,1,-1,0,0,-1),nrow=4,byrow=T) wind=matrix(c(0,0,0,0,0,0,1,0,1,0,1,0,2,0,2,0,1,0,0,0),byrow=T,nrow=10) goal=c(4,8) Qs=array(0,dim=c(7,10,4)) for (i_rep in 1:maxItr) { state=c(4,1) # start if (runif(1) > epsilon) { move=which.max(Qs[state[1],state[2],]) } else { move=sample(1:4,1)} while (!all(state==goal)) { st.old=state mv.old=move state=state+act.V[move,]+wind[state[2],] if (state[1]<1) {state[1]=1} if (state[1]>7) {state[1]=7} if (state[2]<1) {state[2]=1} if (state[2]>10) {state[2]=10} if (runif(1) > epsilon) { move=which.max(Qs[state[1],state[2],]) } else { move=sample(1:4,1)} rew=ifelse(all(state==goal),0,-1) Qs[st.old[1],st.old[2],mv.old]=Qs[st.old[1],st.old[2],mv.old] +alpha*(rew+gamma* Qs[state[1],state[2],move] -Qs[st.old[1],st.old[2],mv.old]) } } return(Qs) } # running example Qs=sarsa.ex6.5(5e6,0.1,1,0.1) # sim optimal actions state=c(4,1);goal=c(4,8); state.hist=state while (!all(state==goal)) { moveID=which.max(Qs[state[1],state[2],]) state=state+act.V[moveID,]+wind[state[2],] if (state[1]<1) {state[1]=1} if (state[1]>7) {state[1]=7} if (state[2]<1) {state[2]=1} if (state[2]>10) {state[2]=10} state.hist=rbind(state.hist,state) } # plotting results plot(0,0,type='n',xlim=c(0,11),ylim=c(0,8),xlab="",ylab="", main="Learned policies -- Sarsa") lines(1,4,type='p',pch=19,col='red',cex=2) lines(8,4,type='p',pch=19,col='red',cex=2) dirs=c("up","right","down","left" ) for (i_row in 1:7) { for (i_col in 1:10) { best.move=dirs[which.max(Qs[i_row,i_col,])] text(i_col,i_row,best.move) } } lines(state.hist[,2],state.hist[,1],col="red",lwd=2) Qlearn.ex6.5<-function(maxItr,alpha,gamma,epsilon) { # field size: 7row x 10column # horizontal move -> COLUMN # vertical move -> ROW # effect of wind -> ROW # actions: 1-up, 2-right, 3-down, 4-left act.V=matrix(c(1,0,0,1,-1,0,0,-1),nrow=4,byrow=T) wind=matrix(c(0,0,0,0,0,0,1,0,1,0,1,0,2,0,2,0,1,0,0,0),byrow=T,nrow=10) goal=c(4,8) Qs=array(0,dim=c(7,10,4)) for (i_rep in 1:maxItr) { state=c(4,1) # start while (!all(state==goal)) { if (runif(1) > epsilon) { move=which.max(Qs[state[1],state[2],]) } else { move=sample(1:4,1)} sIDX=state state=state+act.V[move,]+wind[state[2],] if (state[1]<1) {state[1]=1} if (state[1]>7) {state[1]=7} if (state[2]<1) {state[2]=1} if (state[2]>10) {state[2]=10} max.Q=max(Qs[state[1],state[2],]) rew=ifelse(all(state==goal),0,-1) Qs[sIDX[1],sIDX[2],move]=Qs[sIDX[1],sIDX[2],move] +alpha*(rew+gamma* max.Q-Qs[sIDX[1],sIDX[2],move]) } } return(Qs) } Qs=Qlearn.ex6.5(1e6,0.05,1,0.1) # sim optimal actions state=c(4,1);goal=c(4,8); state.hist=state while (!all(state==goal)) { moveID=which.max(Qs[state[1],state[2],]) state=state+act.V[moveID,]+wind[state[2],] if (state[1]<1) {state[1]=1} if (state[1]>7) {state[1]=7} if (state[2]<1) {state[2]=1} if (state[2]>10) {state[2]=10} state.hist=rbind(state.hist,state) } # plotting results plot(0,0,type='n',xlim=c(0,11),ylim=c(0,8),xlab="",ylab="", main="Learned policies -- Q-learning") lines(1,4,type='p',pch=19,col='red',cex=2) lines(8,4,type='p',pch=19,col='red',cex=2) dirs=c("up","right","down","left" ) for (i_row in 1:7) { for (i_col in 1:10) { best.move=dirs[which.max(Qs[i_row,i_col,])] text(i_col,i_row,best.move) } } lines(state.hist[,2],state.hist[,1],col="red",lwd=2)
広域システム特別講義II RL1b
library(tidyverse) library(gridExtra) epGreedy = function(nTrial,nRep,epsilon) { rew.hist = opt.hist = matrix(0,nrow=nTrial,ncol=nRep) for (i_rep in 1:nRep){ Q.true=rnorm(10); opt.ID=which.max(Q.true) Q.est = Q.cum = act.count=rep(0,10); for (i_trial in 1:nTrial) { if (runif(1) < epsilon) { action=sample(1:10,1) } else { action=which.max(Q.est) } rew.hist[i_trial,i_rep]=rnorm(1)+Q.true[action] opt.hist[i_trial,i_rep]=action==opt.ID act.count[action]=act.count[action]+1 Q.cum[action]=Q.cum[action]+rew.hist[i_trial,i_rep] Q.est[action]=Q.cum[action]/act.count[action] } } return(list(opt = opt.hist, rew = rew.hist)) } n.Trial = 1000; n.Rep = 2000 type1=epGreedy(n.Trial, n.Rep, 0.0) type2=epGreedy(n.Trial, n.Rep, 0.01) type3=epGreedy(n.Trial, n.Rep, 0.1) res.all = tibble(trial = rep(1:nTrial, n.Rep * 3), rew = c(as.vector(type1$rew),as.vector(type2$rew),as.vector(type3$rew)), opt = c(as.vector(type1$opt),as.vector(type2$opt),as.vector(type3$opt)), condition = c(rep("0.00", nTrial * n.Rep), rep("0.01", nTrial * n.Rep), rep("0.10", nTrial * n.Rep))) p1 <- ggplot(res.all) + geom_smooth(aes(x = trial, y = rew, color = condition)) + ylab("Average Reward") p2 <- ggplot(res.all) + geom_smooth(aes(x = trial, y = opt, color = condition)) + ylab("Prop. Optimal Action") grid.arrange(p1, p2, nrow =2) softmax = function(nTrial, nRep, tau) { rew.hist = opt.hist = matrix(0,nrow=nTrial,ncol=nRep) for (i_rep in 1:nRep){ Q.true=rnorm(10); opt.ID=which.max(Q.true) Q.est = Q.cum = act.count=rep(0,10); for (i_trial in 1:nTrial) { p = exp(Q.est/tau)/sum(exp(Q.est)/tau) action = sample(1:10, 1, prob = p) rew.hist[i_trial,i_rep]=rnorm(1)+Q.true[action] opt.hist[i_trial,i_rep]=action==opt.ID act.count[action]=act.count[action]+1 Q.cum[action]=Q.cum[action]+rew.hist[i_trial,i_rep] Q.est[action]=Q.cum[action]/act.count[action] } } return(list(opt = opt.hist, rew = rew.hist)) } n.Trial = 1000; n.Rep = 1000 eG00=epGreedy(n.Trial, n.Rep, 0.0) eG01=epGreedy(n.Trial, n.Rep, 0.1) sm=softmax(n.Trial, n.Rep, 0.1) res.all = tibble(trial = rep(1:n.Trial, n.Rep * 3), rew = c(as.vector(eG00$rew),as.vector(eG01$rew),as.vector(sm$rew)), opt = c(as.vector(eG00$opt),as.vector(eG01$opt),as.vector(sm$opt)), condition = c(rep("epsilon = 0.0", n.Trial * n.Rep), rep("epsilon = 0.1", n.Trial * n.Rep), rep("softmax", n.Trial * n.Rep))) p1 <- ggplot(res.all) + geom_smooth(aes(x = trial, y = rew, color = condition)) + ylab("Average Reward") p2 <- ggplot(res.all) + geom_smooth(aes(x = trial, y = opt, color = condition)) + ylab("Prop. Optimal Action") grid.arrange(p1, p2, nrow =2) # RL example V=rep(0,25); # defining probability matrix P=matrix(1/4,nrow=25,ncol=4) # # defining deterministic transition matrix north=c(2:25,25) north[ c(5,10,15,20,25)]=c(5,10,15,20,25) east=c(6:25,21:25) west=c(1:5,1:20) south=c(1,1:24) south[ c(1,6,11,16,21)]=c(1,6,11,16,21) trM=cbind(north,east,south,west) trM[10,]=6 trM[20,]=18 # defining reward matrix R=matrix(0,nrow=25,ncol=4) R[which(trM==1:25)]=-1 R[10,]=10 R[20,]=5 # calculating state-values iteratively # fixed number of iterations nRep=1000; gamma=0.9; for (i_rep in 1:nRep) { V.old=V for (i_state in 1:25) { V[i_state]=sum(P[i_state,]*(R[i_state,]+gamma*V.old[trM[i_state,]])) } } print(matrix(V,nrow=5)[5:1,]) # jisshu 1 north=c(1:3,15,1:10) east=2:15;east[ c(3,7,11)]=c(3,7,11) south=c(5:15,12:14) west=c(15,1:13);west[ c(4,8,12)]=c(4,8,12) trM=cbind(north,east,south,west) # defining Reward & trans. prob. R=-1;P=0.25; # policy improvement north=c(1:3,15,1:10) east=2:15;east[ c(3,7,11)]=c(3,7,11) south=c(5:15,12:14) west=c(15,1:13);west[ c(4,8,12)]=c(4,8,12) trM=cbind(north,east,south,west) R=-1;P=0.25;V=rep(0,15); delta=1; gamma=1; tol=1e-10; bestP=sample(1:4,14,replace=T) stable=F;counter=0; while (stable==F){ counter=counter+1 # iterative policy evaluation while (delta>tol) { delta=0; for (i_state in 1:14) { v=V[i_state] V[i_state]=sum(P*(R+gamma*V[trM[i_state,]])) delta=max(delta,abs(v-V[i_state])) } } # policy improvement stable=F for (i_state in 1:14) { b=bestP[i_state] bestP[i_state]=which.max(V[trM[i_state,]]) ifelse((bestP[i_state]==b),stable<-T,stable<-F) } }
広域システム特別講義II R prog.2
install.packages("tidyverse") library(tidyverse) random.number <- rnorm(1000) mean(random.number) mean(random.number <- rnorm(1000)) rnorm(1000) %>% mean() # CLT NperSample = 10 SampleSize = 300000 # "traditional" random.number <- runif(NperSample * SampleSize) dat <- matrix(random.number, nrow=NperSample) means <- colMeans(dat) dens <- density(means) hist(means, breaks = 100, probability = T, main = "Distributionf of Means") lines(dens, lwd = 3, col = "orange") runif(NperSample * SampleSize) %>% matrix(nrow=NperSample) %>% colMeans() -> means hist(means, breaks=100,probability = T, main = "Distributionf of Means") means %>% density() %>% lines(,lwd=3,col='orange') histWdensity <- function(dat, nbreaks=30, main.txt){ dens <- density(dat) hist(dat, breaks = nbreaks, probability = T, main = main.txt) lines(dens, lwd = 3, col = "orange") } runif(NperSample * SampleSize) %>% matrix(nrow=NperSample) %>% colMeans() %>% histWdensity(nbreaks = 100, main.txt = "Distributionf of Means") dat <- read.csv("http://www.matsuka.info/data_folder/sampleData2013.txt") dt <- as_tibble(dat) dt.la <- filter(dt, affil == "LA") dt.la2 <- filter(dt, affil == "LA" & grade == "SP") dt.laNot2 <- filter(dt, affil == "LA" & grade != "SP") dt.GB <- select(dt, grade, nbooks) dtWOgender <- select(dt, -gender) dt.arranged <- arrange(dt, affil, grade) dt.weekly <- mutate(dt,nbooksWeek = nbooks / 52) dt.atLeastOneBook <- mutate(dt, atleastOneBook = (nbooks/52) >= 1) dt.atLeastOneBook <- mutate(dt, atleastOneBook = (nbooks/12) >= 1) dt.BWindex = mutate(dt, nbooksWeek = nbooks / 52, idx = nbooksWeek / (12*7-Hworked)) dt.byGrade <- group_by(dt, grade) summarize(dt.byGrade, ave.books = mean(nbooks,na.rm = TRUE), ave.Hworked = mean(Hworked, na.rm = TRUE)) dt.byGrAf <- group_by(dt, grade, affil) dt.summ <- summarize(dt.byGrAf, ave.books = mean(nbooks,na.rm = TRUE), ave.Hworked = mean(Hworked, na.rm = TRUE), N = n()) dt.summ2 <- dt %>% group_by(grade, affil) %>% summarize(ave.books = mean(nbooks,na.rm = TRUE), ave.Hworked = mean(Hworked, na.rm = TRUE), N = n()) %>% filter(N > 2) %>% arrange(desc(ave.books)) plot(x = dt.summ2$ave.books, y = dt.summ2$ave.Hworked, pch=20, cex = 3,xlab = "Ave. # books read",ylab = "Ave hours worked") dt <- read_csv("http://www.matsuka.info/data_folder/sampleData2013.txt",col_names = TRUE) dt.summ3 <- dt %>% group_by(grade, gender) %>% summarize(ave.books = mean(nbooks,na.rm = TRUE), ave.Hworked = mean(Hworked, na.rm = TRUE)) dt.summ3G <- dt.summ3 %>% gather('ave.books', 'ave.Hworked', key = 'ave.values', value = "BorW") dt.summ3SformG <- spread(dt.summ3G, key = ave.values, value =BorW) dt.sumLA <- dt %>% filter(affil=="LA") %>% group_by(grade) %>% summarize(ave.books = mean(nbooks)) toeic <- tibble( grade = factor(c("SP", "JR")), score = c(800,830), ) new.dt1 <- dt.sumLA %>% inner_join(toeic, by = "grade") dt.sumLA <- add_row(dt.sumLA, grade = c("MS"), ave.books = (13)) toeic2 <- tibble( grade = factor(c("SP", "JR","DR")), score = c(800,830,900), ) new.dt3 <- full_join(dt.sumLA, toeic2) new.dt4 <- left_join(dt.sumLA, toeic2) new.dt5 <- right_join(dt.sumLA, toeic2) # CLT NperSample = 10 SampleSize = 300000 runif(NperSample * SampleSize) %>% matrix(nrow=NperSample) %>% colMeans() %>% tibble(sample.mean = .) -> means ggplot(means,aes(x = sample.mean, y = ..density..)) + geom_histogram(bins=200) + geom_density(colour = "orange",size=2) ggplot(means,aes(x = sample.mean, y = ..density..)) + geom_histogram(bins=200) + geom_line(stat = "density", colour = "orange",size=2) runif(NperSample * SampleSize) %>% matrix(nrow=NperSample) %>% colMeans() %>% tibble(sample.mean = .) %>% ggplot(., aes(x = sample.mean, y = ..density..)) + geom_histogram(bins=100,colour = "grey20") + geom_line(stat = "density", colour = "skyblue",size=2) dat <- read.csv("http://www.matsuka.info/data_folder/sampleData2013.txt") dt <- as_tibble(dat) ggplot(dt, aes(x = Hworked, y = nbooks)) + geom_point(size = 3) ggplot(dt) + geom_point(aes(x = Hworked, y = nbooks, color = grade),size = 3) ggplot(dt) + geom_point(aes(x = Hworked, y = nbooks, shape = grade),size = 5) ggplot(dt) + geom_point(aes(x = Hworked, y = nbooks),size = 5) + facet_wrap(~ grade, nrow = 1) ggplot(dt) + geom_smooth(aes(x = Hworked, y = nbooks)) ggplot(dt) + geom_smooth(aes(x = Hworked, y = nbooks, linetype = grade)) ggplot(dt) + geom_smooth(aes(x = Hworked, y = nbooks)) + facet_wrap(~ grade, nrow = 4) ggplot(dt) + geom_smooth(aes(x = Hworked, y = nbooks)) + geom_point(aes(x = Hworked, y = nbooks), size = 4) ggplot(dt) + geom_smooth(aes(x = Hworked, y = nbooks), colour = "black", se = FALSE) + geom_point(aes(x = Hworked, y = nbooks, color = grade), size = 4) ggplot(dt) + geom_smooth(aes(x = Hworked, y = nbooks, color = grade), se = FALSE) + geom_point(aes(x = Hworked, y = nbooks, color = grade), size = 4) plot1 <- ggplot(dt) + geom_smooth(aes(x = Hworked, y = nbooks, color = grade), se = FALSE) + geom_point(aes(x = Hworked, y = nbooks, color = grade), size = 4) plot1 + xlab("Hours worked") + ylab("Number of books read") plot1 + xlab("Hours worked") + ylab("Number of books read") + theme(axis.title.x = element_text(face = "italic",size = 14, colour = "navy"), axis.title.y = element_text(face = "bold",size = 10, colour = "darkgreen")) ggplot(filter(dt, affil == "LA")) + geom_point(aes(x = Hworked, y = nbooks, color = grade), size = 4) dt$grade <- fct_relevel(dt$grade, "FR","SP","JR","SR") group_by(dt, grade) %>% summarize(ave.books = mean(nbooks, na.rm = T)) %>% ggplot() + geom_bar(aes(x = grade, y = ave.books), stat = "identity") ggplot(dt) + geom_bar(aes(x = grade)) dt %>% count(grade) group_by(dt, grade) %>% summarize(ave.books = mean(nbooks, na.rm = T), se = sd(nbooks, na.rm =T)/sqrt(n())) %>% ggplot(aes(x = grade, y = ave.books)) + geom_bar(stat = "identity", fill = "grey70") + geom_errorbar(aes(ymin = ave.books - se, ymax = ave.books +se), width = 0.2) + ylab("Average # books read") ggplot(dt) + geom_boxplot(aes(x = grade, y = nbooks)) ggplot(dt) + geom_boxplot(aes(x = grade, y = nbooks)) + coord_flip() ggplot(dt,aes(x = Hworked, y = nbooks)) + stat_density2d(aes(colour =..level..)) + geom_point() ggplot(dt,aes(x = Hworked, y = nbooks)) + stat_density2d(aes(alpha =..density..), geom="tile",contour=F) + geom_point(alpha =0.4) ggplot(dt) + stat_summary(aes(x = grade, y = nbooks), fun.y = mean, fun.ymin = function(x) mean(x) - sd(x), fun.ymax = function(x) mean(x) + sd(x)) dat <- read.csv("http://www.matsuka.info/data_folder/datWA01.txt") dt <- as_tibble(dat) dt.lm <- lm(h~shoesize, dt) cfs <- coef(dt.lm) ggplot(dt, aes(x = shoesize, y = h)) + geom_point() + geom_abline(intercept = cfs[1], slope = cfs[2], col = "red") + geom_text( x= 22, y =175, aes(label = paste("r^2 =", round(summary(dt.lm)$r.squared,3))))