### model model { for ( i in 1:Ntotal ) { y[i] ~ dnorm( mu , 1/sigma^2 ) } mu ~ dnorm( meanY , 1/(100*sdY)^2 ) sigma ~ dunif( sdY/1000 , sdY*1000 ) } ### dat<-read.csv("~/Downloads/DBDA2Eprograms/TwoGroupIQ.csv") y = dat$Score[dat$Group=="Smart Drug"] Ntotal = length(y) dataList = list(y = y, Ntotal = Ntotal, meanY = mean(y), sdY = sd(y)) dataList = list(y = y, Ntotal = Ntotal, meanY = mean(y), sdY = sd(y)) jagsModel = jags.model("~/research/oka/DBDA/ch16/model_16_1_2.txt", data=dataList, n.chains=3, n.adapt=1000 ) update( jagsModel , n.iter=1000) codaSamples = coda.samples( jagsModel , variable.names=c("mu","sigma"), n.iter=5000, thin=5) mcmcMat<-as.matrix(codaSamples) hist(y,nclass=15,probability = T) x.temp = seq(40,200,0.1) n.plot = 100 randperm = sample(1:nrow(mcmcMat),n.plot) for (i.plot in 1:n.plot){ norm.temp=dnorm(x.temp,mean=mcmcMat[randperm[i.plot],1],sd=mcmcMat[randperm[i.plot],2]) lines(x.temp,norm.temp,col='orange') } ### model 16.2 model { for ( i in 1:Ntotal ) { y[i] ~ dt( mu , 1/sigma^2 , nu ) } mu ~ dnorm( meanY , 1/(100*sdY)^2 ) sigma ~ dunif( sdY/1000 , sdY*1000 ) nu <- nuMinusOne+1 nuMinusOne ~ dexp(1/30.0) } ### hist(y,nclass=20,probability = T) n.plot = 100 randperm = sample(1:nrow(mcmcMat),n.plot) for (i.plot in 1:n.plot){ x.temp1 = seq(40,200,0.1) x.temp2 = (x.temp1 - mcmcMat[randperm[i.plot],1])/mcmcMat[randperm[i.plot],3] t.temp=dt(x.temp2,mcmcMat[randperm[i.plot],2])/mcmcMat[randperm[i.plot],3] lines(x.temp1,t.temp,col='orange') } ### model 16.3 model { for ( i in 1:Ntotal ) { y[i] ~ dt( mu[group[i]] , 1/sigma[group[i]]^2 , nu ) } for (j in 1:Ngroup){ mu[j] ~ dnorm( meanY , 1/(100*sdY)^2 ) sigma[j] ~ dunif( sdY/1000 , sdY*1000 ) } nu <- nuMinusOne+1 nuMinusOne ~ dexp(1/29) } ### dat<-read.csv("~/Downloads/DBDA2Eprograms/TwoGroupIQ.csv") y = dat$Score group = as.numeric(dat$Group) Ntotal = length(y) Ngroup = length(unique(group)) dataList = list(y = y, Ntotal = Ntotal, meanY = mean(y), sdY = sd(y),Ngroup=Ngroup,group=group) jagsModel = jags.model("~/research/oka/DBDA/ch16/model_16_3.txt", data=dataList, n.chains=3, n.adapt=5000 ) update( jagsModel , n.iter=1000) codaSamples = coda.samples( jagsModel , variable.names=c("mu","sigma","nu"), n.iter=5000, thin=5) mcmcMat<-as.matrix(codaSamples)
Monthly Archives: July 2018
認知情報解析演習 W13
north=c(1:3,15,1:10) east=2:15;east[ c(3,7,11)]=c(3,7,11) south=c(5:15,12:14) west=c(15,1:13);west[ c(4,8,12)]=c(4,8,12) trM=cbind(north,east,south,west) ### nRep=1000; gamma=1; P = 0.25 R = -1 V = rep(0,15) for (i_rep in 1:nRep) { V.old=V for (i_state in 1:14) { V[i_state]=sum(P*(R+gamma*V.old[trM[i_state,]])) } } matrix(c(0,V),4,4) # ch4.1 policy evaluation ######################################## # example 4.1 - bug fixed on 2017.03.21 # defining deterministic trans. matrix north=c(1:3,15,1:10) east=2:15;east[ c(3,7,11)]=c(3,7,11) south=c(5:15,12:14) west=c(15,1:13);west[ c(4,8,12)]=c(4,8,12) trM=cbind(north,east,south,west) # defining Reward & trans. prob. R=-1;P=0.25; # iterative policy evaluation delta=1; gamma=1; tol=1e-8; V=rep(0,15); while (delta>tol) { delta=0; for (i_state in 1:14) { v=V[i_state] V[i_state]=sum(P*(R+gamma*V[trM[i_state,]])) delta=max(delta,abs(v-V[i_state])); } } # result > matrix(c(0,V),nrow=4) [,1] [,2] [,3] [,4] [1,] 0 -14 -20 -22 [2,] -14 -18 -20 -20 [3,] -20 -20 -18 -14 [4,] -22 -20 -14 0 ##################################### # ch4.3 Policy Improvement # easier one ##################################### north=c(1:3,15,1:10) east=2:15;east[ c(3,7,11)]=c(3,7,11) south=c(5:15,12:14) west=c(15,1:13);west[ c(4,8,12)]=c(4,8,12) trM=cbind(north,east,south,west) R=-1;V=rep(0,15); delta=1; gamma=1; tol=1e-5; bestP=sample(1:4,14,replace=T) stable=F;counter=0 while (stable==F){ counter=counter+1 Vmat=matrix(V[trM],ncol=4) Ppolicy=t(apply(Vmat,1,function(x) exp(10*x)/sum(exp(10*x)))) # iterative policy evaluation while (delta>tol) { delta=0; for (i_state in 1:14) { v=V[i_state] V[i_state]=sum(Ppolicy[i_state]*(R+gamma*V[trM[i_state,]])) delta=max(delta,abs(v-V[i_state])) } } # policy improvement stable=F for (i_state in 1:14) { b=bestP[i_state] bestP[i_state]=which.max(V[trM[i_state,]]) ifelse((bestP[i_state]==b),stable<-T,stable<-F) } } ##################################### # ch4.4 Value Iteration ##################################### # example 4.3 V=c(rep(0,100),1);V.hist=c() p=c(0.4,0.6); gamma=1;delta=1; tol=1e-20 max.a=rep(0,101) while (delta>tol) { delta=0; for (i_state in 1:99) { v=V[i_state+1] temp=matrix(0,nrow=1,ncol=i_state) for (i_action in 1:i_state) { temp[i_action]=sum(p*(gamma*c(V[(min(i_state+i_action,100)+1)], V[(max(i_state-i_action,0)+1)]))) } V[i_state+1]=max(temp) max.a[i_state+1]=which.max(round(temp,8)) delta=max(delta,abs(v-V[i_state+1])) } V.hist=rbind(V.hist,V) } # plotting results par(mfrow=c(1,2)) plot(V.hist[1,],type='l',lwd=2,xlab="Capital",ylab="Value Estimates",col='red') lines(V.hist[2,],lwd=2,col='blue') lines(V.hist[3,],lwd=2,col='green') lines(V.hist[32,],lwd=2,col='black') legend("topleft",c("sweep 1","sweep 2","sweep 3", "sweep 32"), col=c("red","blue","green","black"),lwd=2) barplot(max.a,xlab="Capital",ylab="Final Policy",col="white")
認知情報解析演習 強化学習2
# example 3.8: Gridworld # initializing value vector V=rep(0,25); # defining probability matrix P=matrix(1/4,nrow=25,ncol=4) # # defining deterministic transition matrix north=c(2:25,25) north[ c(5,10,15,20,25)]=c(5,10,15,20,25) east=c(6:25,21:25) west=c(1:5,1:20) south=c(1,1:24) south[ c(1,6,11,16,21)]=c(1,6,11,16,21) trM=cbind(north,east,south,west) trM[10,]=6 trM[20,]=18 # defining reward matrix R=matrix(0,nrow=25,ncol=4) R[which(trM==1:25)]=-1 R[10,]=10 R[20,]=5 # calculating state-values iteratively # fixed number of iterations nRep=1000; gamma=0.9; for (i_rep in 1:nRep) { V.old=V for (i_state in 1:25) { V[i_state]=sum(P[i_state,]*(R[i_state,]+gamma*V.old[trM[i_state,]])) } } ### example 2 north=c(1:3,15,1:10) east=2:15;east[ c(3,7,11)]=c(3,7,11) south=c(5:15,12:14) west=c(15,1:13);west[ c(4,8,12)]=c(4,8,12) trM=cbind(north,east,south,west) ### V.star=V; # V calculated as in exp3.8 iter=0;maxItr=1000;delta=1;tol=1e-5; while(iter < maxItr & delta > tol) { delta=0;iter=iter+1 V.star.old=V.star for (i_state in 1:25) { v=V.star[i_state] V.star[i_state]=max(R[i_state,]+gamma*V.star.old[trM[i_state,]]) delta=max(delta,abs(v-V.star[i_state])) } } ####################################### # ch4.1 policy evaluation ######################################## # example 4.1 - bug fixed on 2017.03.21 # defining deterministic trans. matrix north=c(1:3,15,1:10) east=2:15;east[ c(3,7,11)]=c(3,7,11) south=c(5:15,12:14) west=c(15,1:13);west[ c(4,8,12)]=c(4,8,12) trM=cbind(north,east,south,west) # defining Reward & trans. prob. R=-1;P=0.25; # iterative policy evaluation delta=1; gamma=1; tol=1e-8; V=rep(0,15); while (delta>tol) { delta=0; for (i_state in 1:14) { v=V[i_state] V[i_state]=sum(P*(R+gamma*V[trM[i_state,]])) delta=max(delta,abs(v-V[i_state])); } } ##################################### # ch4.3 Policy Improvement # easier one ##################################### north=c(1:3,15,1:10) east=2:15;east[ c(3,7,11)]=c(3,7,11) south=c(5:15,12:14) west=c(15,1:13);west[ c(4,8,12)]=c(4,8,12) trM=cbind(north,east,south,west) R=-1;V=rep(0,15); delta=1; gamma=1; tol=1e-5; bestP=sample(1:4,14,replace=T) stable=F;counter=0 while (stable==F){ counter=counter+1 Vmat=matrix(V[trM],ncol=4) Ppolicy=t(apply(Vmat,1,function(x) exp(10*x)/sum(exp(10*x)))) # iterative policy evaluation while (delta>tol) { delta=0; for (i_state in 1:14) { v=V[i_state] V[i_state]=sum(Ppolicy[i_state]*(R+gamma*V[trM[i_state,]])) delta=max(delta,abs(v-V[i_state])) } } # policy improvement stable=F for (i_state in 1:14) { b=bestP[i_state] bestP[i_state]=which.max(V[trM[i_state,]]) ifelse((bestP[i_state]==b),stable<-T,stable<-F) } } ##################################### # ch4.4 Value Iteration ##################################### # example 4.3 V=c(rep(0,100),1);V.hist=c() p=c(0.4,0.6); gamma=1;delta=1; tol=1e-20 max.a=rep(0,101) while (delta>tol) { delta=0; for (i_state in 1:99) { v=V[i_state+1] temp=matrix(0,nrow=1,ncol=i_state) for (i_action in 1:i_state) { temp[i_action]=sum(p*(gamma*c(V[(min(i_state+i_action,100)+1)], V[(max(i_state-i_action,0)+1)]))) } V[i_state+1]=max(temp) max.a[i_state+1]=which.max(round(temp,8)) delta=max(delta,abs(v-V[i_state+1])) } V.hist=rbind(V.hist,V) } # plotting results par(mfrow=c(1,2)) plot(V.hist[1,],type='l',lwd=2,xlab="Capital",ylab="Value Estimates",col='red') lines(V.hist[2,],lwd=2,col='blue') lines(V.hist[3,],lwd=2,col='green') lines(V.hist[32,],lwd=2,col='black') legend("topleft",c("sweep 1","sweep 2","sweep 3", "sweep 32"), col=c("red","blue","green","black"),lwd=2) barplot(max.a,xlab="Capital",ylab="Final Policy",col="white")
認知情報解析学演習 PSO
funZ<-function(x) { x[1]^4-16*x[1]^2-5*x[1]+x[2]^4-16*x[2]^2-5*x[2] } n.theta = 10; n.iter = 1000; Wp = 0.1; Wg = 0.1; theta = matrix(rnorm(n.theta*2), nrow=n.theta) v = matrix(rnorm(n.theta*2), nrow=n.theta) theta.hist = array(0,c(n.theta,2,n.iter)) theta.hist[,,1]=theta p.b.v <- apply(theta,1,funZ) p.best = theta g.b.v = min(p.b.v) g.b.idx = which.min(p.b.v) g.best <- theta[g.b.idx,] v = v + Wp*runif(n.theta)*(p.best - theta)+ Wg*runif(n.theta)*t(g.best - t(theta)) theta = theta + v for (i.iter in 2:n.iter){ v = v + Wp*runif(n.theta)*(p.best - theta)+ Wg*runif(n.theta)*t(g.best - t(theta)) theta = theta + v fitG = apply(theta,1,funZ) if (min(fitG) < g.b.v){ g.best = theta[which.min(fitG),] g.b.v = min(fitG) } idx = which(fitG < p.b.v) p.best[idx,] = theta[idx,] p.b.v= fitG theta.hist[,,i.iter]=theta } funZ2<-function(x,y) {x^4-16*x^2-5*x+y^4-16*y^2-5*y} xmin=-5;xmax=5;n_len=100; x<-seq(xmin, xmax,length.out=n_len); y<-x; z<-outer(x,y,funZ2) contour(x,y,z,nlevels= 50, ,lwd = 0.3,drawlabels = F,col='blue') for (i.agent in 1:n.theta){ lines(theta.hist[i.agent,1,],theta.hist[i.agent,2,],col=i.agent) points(theta[1,1],theta[1,2],pch=20,cex=2,col=i.agent) }
n.trial = 1000; n.rep = 100 Q.true = rnorm(10); opt.act = which.max(Q.true) r.hist = matrix(0,n.rep, n.trial) oa.hist = matrix(0,n.rep, n.trial) for (i.rep in 1:n.rep){ Q.est = rep(0,10);R.cum = rep(0,10) act.count = rep(0,10) for (i.trial in 1:n.trial){ max.id = which(Q.est == max(Q.est)) if (length(max.id)>1){ action = sample(max.id,1) } else { action = max.id } if (action == opt.act){ oa.hist[i.rep, i.trial] = 1 } r = rnorm(1, Q.true[action],1) r.hist[i.rep, i.trial] = r R.cum[action] = R.cum[action] + r act.count[action] = act.count[action] + 1 Q.est = R.cum / (act.count + 0.01) } } plot(colMeans(r.hist),type='l') plot(colMeans(oa.hist),type='l',ylim=c(0,1)) ### epsilon greedy n.trial = 1000; n.rep = 1000 r.hist = matrix(0,n.rep, n.trial) oa.hist = matrix(0,n.rep, n.trial) epsilon = 0.01 for (i.rep in 1:n.rep){ Q.est = rep(0,10);R.cum = rep(0,10) act.count = rep(0,10) Q.true = rnorm(10); opt.act = which.max(Q.true) for (i.trial in 1:n.trial){ if (runif(1)>epsilon){ max.id = which(Q.est == max(Q.est)) if (length(max.id)>1){ action = sample(max.id,1) } else { action = max.id } } else { action = sample(10,1) } if (action == opt.act){ oa.hist[i.rep, i.trial] = 1 } r = rnorm(1, Q.true[action],1) r.hist[i.rep, i.trial] = r R.cum[action] = R.cum[action] + r act.count[action] = act.count[action] + 1 Q.est[action] = R.cum[action] / act.count[action] } } ### softmax n.trial = 1000; n.rep = 1000 r.hist = matrix(0,n.rep, n.trial) oa.hist = matrix(0,n.rep, n.trial) gamma = 3 for (i.rep in 1:n.rep){ Q.est = rep(0,10);R.cum = rep(0,10) act.count = rep(0,10) Q.true = rnorm(10); opt.act = which.max(Q.true) for (i.trial in 1:n.trial){ action = sample(10, 1, prob=exp(gamma*Q.est)/sum(exp(gamma*Q.est))) if (action == opt.act){ oa.hist[i.rep, i.trial] = 1 } r = rnorm(1, Q.true[action],1) r.hist[i.rep, i.trial] = r R.cum[action] = R.cum[action] + r act.count[action] = act.count[action] + 1 Q.est[action] = R.cum[action] / act.count[action] }
データ解析基礎論A W12
source("http://www.matsuka.info/univ/course_folder/cutil.R") dat<-read.csv("http://www.matsuka.info/data_folder/dktb316.txt", col.names = c("dump","method","subj","words")) dat.aov<-aov(words~method+subj+Error(subj:method),data=dat) dat<-read.csv("http://www.matsuka.info/data_folder/dktb3211.txt") interaction.plot(dat$duration, dat$method, dat$result, pch=c(20,20), col=c("skyblue","orange"), ylab="score", xlab="Duration", lwd=3, type='b', cex=2, trace.label="Method") dat.aov<-aov(result~method*duration+Error(s+s:method),dat) dat<-read.csv("http://www.matsuka.info/data_folder/dktb3218.txt") dat.aov<-aov(result~method+duration+method:duration + Error(s+method:s+duration:s+method:duration:s), data=dat)