### model
model {
for ( i in 1:Ntotal ) {
y[i] ~ dnorm( mu , 1/sigma^2 )
}
mu ~ dnorm( meanY , 1/(100*sdY)^2 )
sigma ~ dunif( sdY/1000 , sdY*1000 )
}
###
dat<-read.csv("~/Downloads/DBDA2Eprograms/TwoGroupIQ.csv")
y = dat$Score[dat$Group=="Smart Drug"]
Ntotal = length(y)
dataList = list(y = y, Ntotal = Ntotal, meanY = mean(y), sdY = sd(y))
dataList = list(y = y, Ntotal = Ntotal, meanY = mean(y), sdY = sd(y))
jagsModel = jags.model("~/research/oka/DBDA/ch16/model_16_1_2.txt", data=dataList, n.chains=3, n.adapt=1000 )
update( jagsModel , n.iter=1000)
codaSamples = coda.samples( jagsModel , variable.names=c("mu","sigma"), n.iter=5000, thin=5)
mcmcMat<-as.matrix(codaSamples)
hist(y,nclass=15,probability = T)
x.temp = seq(40,200,0.1)
n.plot = 100
randperm = sample(1:nrow(mcmcMat),n.plot)
for (i.plot in 1:n.plot){
norm.temp=dnorm(x.temp,mean=mcmcMat[randperm[i.plot],1],sd=mcmcMat[randperm[i.plot],2])
lines(x.temp,norm.temp,col='orange')
}
### model 16.2
model {
for ( i in 1:Ntotal ) {
y[i] ~ dt( mu , 1/sigma^2 , nu )
}
mu ~ dnorm( meanY , 1/(100*sdY)^2 )
sigma ~ dunif( sdY/1000 , sdY*1000 )
nu <- nuMinusOne+1
nuMinusOne ~ dexp(1/30.0)
}
###
hist(y,nclass=20,probability = T)
n.plot = 100
randperm = sample(1:nrow(mcmcMat),n.plot)
for (i.plot in 1:n.plot){
x.temp1 = seq(40,200,0.1)
x.temp2 = (x.temp1 - mcmcMat[randperm[i.plot],1])/mcmcMat[randperm[i.plot],3]
t.temp=dt(x.temp2,mcmcMat[randperm[i.plot],2])/mcmcMat[randperm[i.plot],3]
lines(x.temp1,t.temp,col='orange')
}
### model 16.3
model {
for ( i in 1:Ntotal ) {
y[i] ~ dt( mu[group[i]] , 1/sigma[group[i]]^2 , nu )
}
for (j in 1:Ngroup){
mu[j] ~ dnorm( meanY , 1/(100*sdY)^2 )
sigma[j] ~ dunif( sdY/1000 , sdY*1000 )
}
nu <- nuMinusOne+1
nuMinusOne ~ dexp(1/29)
}
###
dat<-read.csv("~/Downloads/DBDA2Eprograms/TwoGroupIQ.csv")
y = dat$Score
group = as.numeric(dat$Group)
Ntotal = length(y)
Ngroup = length(unique(group))
dataList = list(y = y, Ntotal = Ntotal, meanY = mean(y), sdY = sd(y),Ngroup=Ngroup,group=group)
jagsModel = jags.model("~/research/oka/DBDA/ch16/model_16_3.txt", data=dataList, n.chains=3, n.adapt=5000 )
update( jagsModel , n.iter=1000)
codaSamples = coda.samples( jagsModel , variable.names=c("mu","sigma","nu"), n.iter=5000, thin=5)
mcmcMat<-as.matrix(codaSamples)
Monthly Archives: July 2018
認知情報解析演習 W13
north=c(1:3,15,1:10)
east=2:15;east[ c(3,7,11)]=c(3,7,11)
south=c(5:15,12:14)
west=c(15,1:13);west[ c(4,8,12)]=c(4,8,12)
trM=cbind(north,east,south,west)
###
nRep=1000; gamma=1;
P = 0.25
R = -1
V = rep(0,15)
for (i_rep in 1:nRep) {
V.old=V
for (i_state in 1:14) {
V[i_state]=sum(P*(R+gamma*V.old[trM[i_state,]]))
}
}
matrix(c(0,V),4,4)
# ch4.1 policy evaluation
########################################
# example 4.1 - bug fixed on 2017.03.21
# defining deterministic trans. matrix
north=c(1:3,15,1:10)
east=2:15;east[ c(3,7,11)]=c(3,7,11)
south=c(5:15,12:14)
west=c(15,1:13);west[ c(4,8,12)]=c(4,8,12)
trM=cbind(north,east,south,west)
# defining Reward & trans. prob.
R=-1;P=0.25;
# iterative policy evaluation
delta=1; gamma=1; tol=1e-8; V=rep(0,15);
while (delta>tol) {
delta=0;
for (i_state in 1:14) {
v=V[i_state]
V[i_state]=sum(P*(R+gamma*V[trM[i_state,]]))
delta=max(delta,abs(v-V[i_state]));
}
}
# result
> matrix(c(0,V),nrow=4)
[,1] [,2] [,3] [,4]
[1,] 0 -14 -20 -22
[2,] -14 -18 -20 -20
[3,] -20 -20 -18 -14
[4,] -22 -20 -14 0
#####################################
# ch4.3 Policy Improvement
# easier one
#####################################
north=c(1:3,15,1:10)
east=2:15;east[ c(3,7,11)]=c(3,7,11)
south=c(5:15,12:14)
west=c(15,1:13);west[ c(4,8,12)]=c(4,8,12)
trM=cbind(north,east,south,west)
R=-1;V=rep(0,15);
delta=1; gamma=1; tol=1e-5;
bestP=sample(1:4,14,replace=T)
stable=F;counter=0
while (stable==F){
counter=counter+1
Vmat=matrix(V[trM],ncol=4)
Ppolicy=t(apply(Vmat,1,function(x) exp(10*x)/sum(exp(10*x))))
# iterative policy evaluation
while (delta>tol) {
delta=0;
for (i_state in 1:14) {
v=V[i_state]
V[i_state]=sum(Ppolicy[i_state]*(R+gamma*V[trM[i_state,]]))
delta=max(delta,abs(v-V[i_state]))
}
}
# policy improvement
stable=F
for (i_state in 1:14) {
b=bestP[i_state]
bestP[i_state]=which.max(V[trM[i_state,]])
ifelse((bestP[i_state]==b),stable<-T,stable<-F)
}
}
#####################################
# ch4.4 Value Iteration
#####################################
# example 4.3
V=c(rep(0,100),1);V.hist=c()
p=c(0.4,0.6);
gamma=1;delta=1; tol=1e-20
max.a=rep(0,101)
while (delta>tol) {
delta=0;
for (i_state in 1:99) {
v=V[i_state+1]
temp=matrix(0,nrow=1,ncol=i_state)
for (i_action in 1:i_state) {
temp[i_action]=sum(p*(gamma*c(V[(min(i_state+i_action,100)+1)],
V[(max(i_state-i_action,0)+1)])))
}
V[i_state+1]=max(temp)
max.a[i_state+1]=which.max(round(temp,8))
delta=max(delta,abs(v-V[i_state+1]))
}
V.hist=rbind(V.hist,V)
}
# plotting results
par(mfrow=c(1,2))
plot(V.hist[1,],type='l',lwd=2,xlab="Capital",ylab="Value Estimates",col='red')
lines(V.hist[2,],lwd=2,col='blue')
lines(V.hist[3,],lwd=2,col='green')
lines(V.hist[32,],lwd=2,col='black')
legend("topleft",c("sweep 1","sweep 2","sweep 3", "sweep 32"),
col=c("red","blue","green","black"),lwd=2)
barplot(max.a,xlab="Capital",ylab="Final Policy",col="white")
認知情報解析演習 強化学習2
# example 3.8: Gridworld
# initializing value vector
V=rep(0,25);
# defining probability matrix
P=matrix(1/4,nrow=25,ncol=4) #
# defining deterministic transition matrix
north=c(2:25,25)
north[ c(5,10,15,20,25)]=c(5,10,15,20,25)
east=c(6:25,21:25)
west=c(1:5,1:20)
south=c(1,1:24)
south[ c(1,6,11,16,21)]=c(1,6,11,16,21)
trM=cbind(north,east,south,west)
trM[10,]=6
trM[20,]=18
# defining reward matrix
R=matrix(0,nrow=25,ncol=4)
R[which(trM==1:25)]=-1
R[10,]=10
R[20,]=5
# calculating state-values iteratively
# fixed number of iterations
nRep=1000; gamma=0.9;
for (i_rep in 1:nRep) {
V.old=V
for (i_state in 1:25) {
V[i_state]=sum(P[i_state,]*(R[i_state,]+gamma*V.old[trM[i_state,]]))
}
}
### example 2
north=c(1:3,15,1:10)
east=2:15;east[ c(3,7,11)]=c(3,7,11)
south=c(5:15,12:14)
west=c(15,1:13);west[ c(4,8,12)]=c(4,8,12)
trM=cbind(north,east,south,west)
###
V.star=V; # V calculated as in exp3.8
iter=0;maxItr=1000;delta=1;tol=1e-5;
while(iter < maxItr & delta > tol) {
delta=0;iter=iter+1
V.star.old=V.star
for (i_state in 1:25) {
v=V.star[i_state]
V.star[i_state]=max(R[i_state,]+gamma*V.star.old[trM[i_state,]])
delta=max(delta,abs(v-V.star[i_state]))
}
}
#######################################
# ch4.1 policy evaluation
########################################
# example 4.1 - bug fixed on 2017.03.21
# defining deterministic trans. matrix
north=c(1:3,15,1:10)
east=2:15;east[ c(3,7,11)]=c(3,7,11)
south=c(5:15,12:14)
west=c(15,1:13);west[ c(4,8,12)]=c(4,8,12)
trM=cbind(north,east,south,west)
# defining Reward & trans. prob.
R=-1;P=0.25;
# iterative policy evaluation
delta=1; gamma=1; tol=1e-8; V=rep(0,15);
while (delta>tol) {
delta=0;
for (i_state in 1:14) {
v=V[i_state]
V[i_state]=sum(P*(R+gamma*V[trM[i_state,]]))
delta=max(delta,abs(v-V[i_state]));
}
}
#####################################
# ch4.3 Policy Improvement
# easier one
#####################################
north=c(1:3,15,1:10)
east=2:15;east[ c(3,7,11)]=c(3,7,11)
south=c(5:15,12:14)
west=c(15,1:13);west[ c(4,8,12)]=c(4,8,12)
trM=cbind(north,east,south,west)
R=-1;V=rep(0,15);
delta=1; gamma=1; tol=1e-5;
bestP=sample(1:4,14,replace=T)
stable=F;counter=0
while (stable==F){
counter=counter+1
Vmat=matrix(V[trM],ncol=4)
Ppolicy=t(apply(Vmat,1,function(x) exp(10*x)/sum(exp(10*x))))
# iterative policy evaluation
while (delta>tol) {
delta=0;
for (i_state in 1:14) {
v=V[i_state]
V[i_state]=sum(Ppolicy[i_state]*(R+gamma*V[trM[i_state,]]))
delta=max(delta,abs(v-V[i_state]))
}
}
# policy improvement
stable=F
for (i_state in 1:14) {
b=bestP[i_state]
bestP[i_state]=which.max(V[trM[i_state,]])
ifelse((bestP[i_state]==b),stable<-T,stable<-F)
}
}
#####################################
# ch4.4 Value Iteration
#####################################
# example 4.3
V=c(rep(0,100),1);V.hist=c()
p=c(0.4,0.6);
gamma=1;delta=1; tol=1e-20
max.a=rep(0,101)
while (delta>tol) {
delta=0;
for (i_state in 1:99) {
v=V[i_state+1]
temp=matrix(0,nrow=1,ncol=i_state)
for (i_action in 1:i_state) {
temp[i_action]=sum(p*(gamma*c(V[(min(i_state+i_action,100)+1)],
V[(max(i_state-i_action,0)+1)])))
}
V[i_state+1]=max(temp)
max.a[i_state+1]=which.max(round(temp,8))
delta=max(delta,abs(v-V[i_state+1]))
}
V.hist=rbind(V.hist,V)
}
# plotting results
par(mfrow=c(1,2))
plot(V.hist[1,],type='l',lwd=2,xlab="Capital",ylab="Value Estimates",col='red')
lines(V.hist[2,],lwd=2,col='blue')
lines(V.hist[3,],lwd=2,col='green')
lines(V.hist[32,],lwd=2,col='black')
legend("topleft",c("sweep 1","sweep 2","sweep 3", "sweep 32"),
col=c("red","blue","green","black"),lwd=2)
barplot(max.a,xlab="Capital",ylab="Final Policy",col="white")
認知情報解析学演習 PSO
funZ<-function(x) {
x[1]^4-16*x[1]^2-5*x[1]+x[2]^4-16*x[2]^2-5*x[2]
}
n.theta = 10;
n.iter = 1000;
Wp = 0.1;
Wg = 0.1;
theta = matrix(rnorm(n.theta*2), nrow=n.theta)
v = matrix(rnorm(n.theta*2), nrow=n.theta)
theta.hist = array(0,c(n.theta,2,n.iter))
theta.hist[,,1]=theta
p.b.v <- apply(theta,1,funZ)
p.best = theta
g.b.v = min(p.b.v)
g.b.idx = which.min(p.b.v)
g.best <- theta[g.b.idx,]
v = v + Wp*runif(n.theta)*(p.best - theta)+ Wg*runif(n.theta)*t(g.best - t(theta))
theta = theta + v
for (i.iter in 2:n.iter){
v = v + Wp*runif(n.theta)*(p.best - theta)+ Wg*runif(n.theta)*t(g.best - t(theta))
theta = theta + v
fitG = apply(theta,1,funZ)
if (min(fitG) < g.b.v){
g.best = theta[which.min(fitG),]
g.b.v = min(fitG)
}
idx = which(fitG < p.b.v)
p.best[idx,] = theta[idx,]
p.b.v= fitG
theta.hist[,,i.iter]=theta
}
funZ2<-function(x,y) {x^4-16*x^2-5*x+y^4-16*y^2-5*y}
xmin=-5;xmax=5;n_len=100;
x<-seq(xmin, xmax,length.out=n_len);
y<-x;
z<-outer(x,y,funZ2)
contour(x,y,z,nlevels= 50, ,lwd = 0.3,drawlabels = F,col='blue')
for (i.agent in 1:n.theta){
lines(theta.hist[i.agent,1,],theta.hist[i.agent,2,],col=i.agent)
points(theta[1,1],theta[1,2],pch=20,cex=2,col=i.agent)
}
n.trial = 1000; n.rep = 100
Q.true = rnorm(10);
opt.act = which.max(Q.true)
r.hist = matrix(0,n.rep, n.trial)
oa.hist = matrix(0,n.rep, n.trial)
for (i.rep in 1:n.rep){
Q.est = rep(0,10);R.cum = rep(0,10)
act.count = rep(0,10)
for (i.trial in 1:n.trial){
max.id = which(Q.est == max(Q.est))
if (length(max.id)>1){
action = sample(max.id,1)
} else {
action = max.id
}
if (action == opt.act){
oa.hist[i.rep, i.trial] = 1
}
r = rnorm(1, Q.true[action],1)
r.hist[i.rep, i.trial] = r
R.cum[action] = R.cum[action] + r
act.count[action] = act.count[action] + 1
Q.est = R.cum / (act.count + 0.01)
}
}
plot(colMeans(r.hist),type='l')
plot(colMeans(oa.hist),type='l',ylim=c(0,1))
### epsilon greedy
n.trial = 1000; n.rep = 1000
r.hist = matrix(0,n.rep, n.trial)
oa.hist = matrix(0,n.rep, n.trial)
epsilon = 0.01
for (i.rep in 1:n.rep){
Q.est = rep(0,10);R.cum = rep(0,10)
act.count = rep(0,10)
Q.true = rnorm(10);
opt.act = which.max(Q.true)
for (i.trial in 1:n.trial){
if (runif(1)>epsilon){
max.id = which(Q.est == max(Q.est))
if (length(max.id)>1){
action = sample(max.id,1)
} else {
action = max.id
}
} else {
action = sample(10,1)
}
if (action == opt.act){
oa.hist[i.rep, i.trial] = 1
}
r = rnorm(1, Q.true[action],1)
r.hist[i.rep, i.trial] = r
R.cum[action] = R.cum[action] + r
act.count[action] = act.count[action] + 1
Q.est[action] = R.cum[action] / act.count[action]
}
}
### softmax
n.trial = 1000; n.rep = 1000
r.hist = matrix(0,n.rep, n.trial)
oa.hist = matrix(0,n.rep, n.trial)
gamma = 3
for (i.rep in 1:n.rep){
Q.est = rep(0,10);R.cum = rep(0,10)
act.count = rep(0,10)
Q.true = rnorm(10);
opt.act = which.max(Q.true)
for (i.trial in 1:n.trial){
action = sample(10, 1, prob=exp(gamma*Q.est)/sum(exp(gamma*Q.est)))
if (action == opt.act){
oa.hist[i.rep, i.trial] = 1
}
r = rnorm(1, Q.true[action],1)
r.hist[i.rep, i.trial] = r
R.cum[action] = R.cum[action] + r
act.count[action] = act.count[action] + 1
Q.est[action] = R.cum[action] / act.count[action]
}
データ解析基礎論A W12
source("http://www.matsuka.info/univ/course_folder/cutil.R")
dat<-read.csv("http://www.matsuka.info/data_folder/dktb316.txt",
col.names = c("dump","method","subj","words"))
dat.aov<-aov(words~method+subj+Error(subj:method),data=dat)
dat<-read.csv("http://www.matsuka.info/data_folder/dktb3211.txt")
interaction.plot(dat$duration,
dat$method,
dat$result,
pch=c(20,20),
col=c("skyblue","orange"),
ylab="score",
xlab="Duration",
lwd=3,
type='b',
cex=2,
trace.label="Method")
dat.aov<-aov(result~method*duration+Error(s+s:method),dat)
dat<-read.csv("http://www.matsuka.info/data_folder/dktb3218.txt")
dat.aov<-aov(result~method+duration+method:duration +
Error(s+method:s+duration:s+method:duration:s), data=dat)