認知情報解析演習b DL2 – ch.2

txt = "You say goodbye and I say hello.";txt = tolower(txt)
txt = gsub('[.]', ' .',txt)
words = unlist(strsplit(txt, " "))
uniq.words = unique(words)
uniq.words[1]
which(uniq.words=="say")

n.uniq = length(uniq.words)
n.words = length(words)

# creating corpus
corpus = rep(0,n.words)
corpus = match(words,uniq.words)

## co-occurance matrix
cc = matrix(0,nrow=n.uniq, ncol=n.uniq)
for (i.c in 1:n.uniq){
  loc = which(corpus==i.c)
  L = which(match(uniq.words,words[pmax(loc-1,1)])!='NA')
  R = which(match(uniq.words,words[pmin(loc+1,n.words)])!='NA')
  cc[i.c, c(L,R)]=cc[i.c, c(L,R)]+1
}
diag(cc)=0

## ppmi matrix
ppmi <- function(cc, eps = 1e-8){
  n.uniq = ncol(cc)
  PPMI = matrix(0, n.uniq, n.uniq)
  N = sum(cc)
  r.sum = rowSums(cc)
  pmi = log2(cc*N/outer(r.sum,r.sum))
  PPMI=pmax(0,pmi)
  return(PPMI)
}

# performing SVD
s = svd(PPMI)
plot(s$u[,1],s$u[,2],pch=20,col='red')
text(s$u[,1],s$u[,2],row.names(cc))

# calc. simliarity b/w words
words.sim <- function(word1, word2, eps=1e-8){
  nw1 = word1/(sqrt(sum(word1^2)) + eps)
  nw2 = word2/(sqrt(sum(word2^2)) + eps)
  return(nw1%*%nw2)
}

w1 = cc[which(uniq.words=="you"),]
w2 = cc[which(uniq.words=="i"),]
words.sim(w1,w2)

# reporting most simliar words
most.sim <- function(word, uniq.words, cc, N=5){
  n.uniq = length(uniq.words)
  word2 = cc[which(uniq.words==word),]
  res = data.frame(words = uniq.words, similarity = rep(0,n.uniq))
  top = data.frame(words = rep("",N+1), similarity=rep(0,N+1))
  res$similarity = apply(cc,1, function(x) words.sim(x,word2))
  sort.res = sort(res$similarity, decreasing = T,  index.return=T)
  top$words = res$words[sort.res$ix[1:(N+1)]]
  top$similarity = res$similarity[sort.res$ix[1:(N+1)]]
  self.idx = which(top$words == word)
  return(top[-self.idx,])
} 

## Penn TB
f = file("~/courses/CogMod/CMA2018/ptb.tex")
ptb<-readLines(con=f)
ptb = paste(ptb,"")
words = unlist(strsplit(ptb, " "))
uniq.words = unique(words)
n.uniq = length(uniq.words)
n.words = length(words)

corpus = rep(0,n.words)
corpus = match(words,uniq.words)

cc <- contxt(corpus,uniq.words,words)
PPMI<-ppmi(cc)
library(rsvd)
## 時間がかかります。
s = rsvd(PPMI)

データ解析基礎論B 因子分析

## chisq test
chisq.test(c(72,23,16,49),p=rep(40,4),rescale.p=T)
M=matrix(c(52,48,8,42),nrow=2)
chisq.test(M,correct=F)

## PCA
Mdat<-read.csv("http://peach.l.chiba-u.ac.jp/course_folder/AMSA_track_mens.csv",row.names=1)
plot(dat,pch=20)
dat.pca<-princomp(dat)

dist = c(100,200,400,800,1500,5000,10000,42195)
log.dist=log(dist)

plot(log.dist, dat.pca$loadings[,1],pch=20,col='red',
  cex=2,ylab="PC loadings (1st)",xlab="Log(distance)")
plot(log.dist, dat.pca$loadings[,2],pch=20,col='red',
  cex=5,ylab="PC loadings (2nd)",xlab="Log(distance)")

### EFA
dat<-read.csv("http://peach.l.chiba-u.ac.jp/course_folder/FA01.csv")
dat.fa<-factanal(dat,1)

dat<-read.table("http://www.matsuka.info/data_folder/tdkPCA01.txt")
dat.pca<-princomp(dat)
dat.fa<-factanal(dat,1)

dat<-read.csv("http://www.matsuka.info/data_folder/tdkCFA.csv")
dat.fa<-factanal(dat,2,rotation="varimax",score="regression")
plot(dat.fa$loadings[,1],dat.fa$loadings[,2],pch=20,cex=2,col="red",
     xlab="Factor 1", ylab="Factor 2")
abline(h=0,v=0)
text(dat.fa$loadings[,1],dat.fa$loadings[,2],colnames(dat),cex=1.4)

dat.model1<-factanal(dat,1)
dat.model2<-factanal(dat,2)
dat.model3<-factanal(dat,3)
dat.model4<-factanal(dat,4)

print(m1res<-c(dat.model1$dof,dat.model1$PVAL))
print(m2res<-c(dat.model2$dof,dat.model2$PVAL))
print(m3res<-c(dat.model3$dof,dat.model3$PVAL))
print(m4res<-c(dat.model4$dof,dat.model4$PVAL))
1-pchisq(dat.model3$STATISTIC-dat.model4$STATISTIC,dat.model3$dof-dat.model4$dof)

AIC1=2*(36-dat.model1$dof)+dat.model1$STATISTIC
AIC2=2*(36-dat.model2$dof)+dat.model2$STATISTIC
AIC3=2*(36-dat.model3$dof)+dat.model3$STATISTIC
AIC4=2*(36-dat.model4$dof)+dat.model4$STATISTIC

### CFA with sem ###
install.packages('sem')
library(sem)

model01=cfa(reference.indicator=FALSE)
F1:extrovert,cheerful, leadership, antisocial, talkative, motivated, hesitance, popularity

mod1<-sem(model01, cov(dat), nrow(dat))
summary(mod1)

model02=cfa(reference.indicator=FALSE)
F1: extrovert, leadership, motivated, hesitance 
F2: cheerful, antisocial, talkative, popularity

mod2<-sem(model02, cov(dat), 100)
opt <- options(fit.indices = c("RMSEA"))

認知情報解析演習

# data preparation
dat<-read.csv("~/Downloads/DBDA2Eprograms/z15N50.csv")
y=dat$y
Ntotal=length(dat$y)
datalist = list(y=y,Ntotal=Ntotal)

# model
model.txt = "
model {
  for ( i_data in 1:Ntotal ) {
    y[ i_data ] ~ dbern( theta )
  }
  theta ~ dbeta( 1, 1 )
}
"
writeLines(model.txt, "model.txt")

# jags
library(rjags)
jagsModel = jags.model(file="model.txt",data=datalist,n.chains=3,n.adapt=500)
update(jagsModel,n.iter=1000)
codaSamples=coda.samples(jagsModel,variable.names=c("theta"),n.iter=5000)
mcmcMat<-as.matrix(codaSamples)

par(mfrow=c(2,2))
cols=c("orange", "skyblue","pink")

# chain
par(mfrow=c(2,2))
cols=c("orange", "skyblue","pink")
mcmc1<-as.mcmc(codaSamples[[1]])
mcmc2<-as.mcmc(codaSamples[[2]])
mcmc3<-as.mcmc(codaSamples[[3]])
plot(as.matrix(mcmc1),type='l',col=cols[1])
lines(as.matrix(mcmc2),col=cols[2])
lines(as.matrix(mcmc3),,col=cols[3])

# autocorrelation
ac1=autocorr(mcmc1,lags=0:50)
ac2=autocorr(mcmc2,lags=0:50)
ac3=autocorr(mcmc3,lags=0:50)
plot(ac1, type='o', pch = 20, col = cols[1], ylab = "Autocorrelation", xlab = "Lag")
lines(ac2, type='o', pch = 20, col = cols[2])
lines(ac3, type='o', pch = 20, col = cols[3])

# shrink factor
resALL=mcmc.list(mcmc1,mcmc2,mcmc3)
gd1=gelman.plot(resALL, auto.layout = F)

# density
den1=density(mcmc1)
den2=density(mcmc2)
den3=density(mcmc3)
plot(den1, type='l', col = cols[1], main = " ", xlab = "param. value",lwd=2.5)
lines(den2, col = cols[2], lwd=2.5)
lines(den3, col = cols[3], lwd=2.5)

par(mfrow=c(1,1))

認知情報解析 DL1の復習


sigmoid.forwd <- function(x){
  return(1/(1+exp(-x)))
}

sigmoid.bckwd <- function(x, dout){
  y = sigmoid.forwd(x)
  return(dout*(1-y)*y)
}

affine.forwd <- function(x, W, b){
  return(x%*%W + matrix(1, nrow = nrow(x), ncol = 1)%*%b)
}

affine.bckwd <- function(x, W, b, dout){
  dx = dout%*%t(W)
  dW = t(x)%*%dout
  db = colSums(dout)
  return(list(dx = dx, dW = dW, db = db))
}

softmax.forwd <- function(x, target){
  max.x = apply(x,1,max)
  C = ncol(x)
  x = x - max.x%*%matrix(1,nrow=1,ncol=C)
  y = exp(x)/rowSums(exp(x))
  delta = 1e-7;
  R = nrow(as.matrix(y))
  return(-sum(target*log(y + delta))/R)
}

softmax.bckwd <- function(x, target,  dout = 1){
  max.x = apply(x, 1, max)
  R = nrow(x)
  C = ncol(x)
  x = x - max.x%*%matrix(1,nrow=1,ncol=C)
  y = exp(x)/rowSums(exp(x))
  return((y-target)/R)
}

softmax.pred <- function(x, target){
  max.x = apply(x,1,max)
  C = ncol(x)
  x = x - max.x%*%matrix(1,nrow=1,ncol=C)
  y = exp(x)/rowSums(exp(x))
  return(y)
}
N = 100; dim = 2; n.class =3;
x = matrix(0,nrow=N*n.class, ncol = dim)
t = matrix(0,nrow=N*n.class, ncol = n.class)

for (i.cl in 1:n.class){
  for (i.N in 1:N){
    radius = i.N/N
    theta = i.cl*4+4*radius+rnorm(1,0,0.2)
    idx = N*(i.cl-1)+i.N
    x[idx,]=c(radius*sin(theta),radius*cos(theta))
    t[idx,i.cl] = 1
  }
}



# spiral data
plot(x[1:100,1],x[1:100,2],pch=20,col='black',cex=2,xlim=c(-1,1),ylim=c(-1,1))
points(x[101:200,1],x[101:200,2],pch=20,col='blue',cex=2)
points(x[201:300,1],x[201:300,2],pch=20,col='green',cex=2)

init.network <- function(n.neurons){
  n.layer = length(n.neurons)
  W = list(); b = list()
  for (i.layer in 1:(n.layer-1)){
    W[[i.layer]] =
      matrix(rnorm(n.neurons[i.layer]*n.neurons[(i.layer+1)],sd = 0.01),nrow=n.neurons[i.layer])
    b[[i.layer]] =  matrix(rnorm(n.neurons[(i.layer+1)],sd = 0.01), nrow = 1)
  }
  return(list(W = W,b = b))
}

train.x = x
train.y = t
params = init.network(c(2,30,30,3))
batch_size = 50; n.iter =50000; lambda =0.3
n.train = nrow(train.x)
loss = rep(0,n.iter)
for (i.iter in 1:n.iter){
  batch_mask = sample(1:n.train, batch_size)
  x.batch = train.x[batch_mask,]
  t.batch = train.y[batch_mask,]
  a1 = affine.forwd(x.batch,params$W[[1]],params$b[[1]])
  z1 = sigmoid.forwd(a1)
  a2 = affine.forwd(z1,params$W[[2]],params$b[[2]])
  z2 = sigmoid.forwd(a2)
  a3 = affine.forwd(z2,params$W[[3]],params$b[[3]])
  z3 = softmax.forwd(a3,t.batch)
  loss[i.iter] = z3
  dwSM = softmax.bckwd(a3, t.batch, 1)
  dwA3 = affine.bckwd(a2,params$W[[3]],params$b[[3]],dwSM)
  dwSG2 = sigmoid.bckwd(a2,dwA3$dx)
  dwA2 = affine.bckwd(a1,params$W[[2]],params$b[[2]],dwSG2)
  dwSG = sigmoid.bckwd(a1,dwA2$dx)
  dwA1 = affine.bckwd(x.batch,params$W[[1]],params$b[[1]],dwSG)
  params$W[[3]] = params$W[[3]] - lambda*dwA3$dW
  params$b[[3]] = params$b[[3]] - lambda*dwA3$db
  params$W[[2]] = params$W[[2]] - lambda*dwA2$dW
  params$b[[2]] = params$b[[2]] - lambda*dwA2$db
  params$W[[1]] = params$W[[1]] - lambda*dwA1$dW
  params$b[[1]] = params$b[[1]] - lambda*dwA1$db
}

# plotting results
plot(loss,type='l', xlab = "trial")

library(plot3D)
M <- mesh(seq(-1,1,length.out = 300),seq(-1,1,length.out = 300))
temp.x = cbind(as.vector(M$x),as.vector(M$y))
a1 = affine.forwd(temp.x,params$W[[1]],params$b[[1]])
z1 = sigmoid.forwd(a1)
a2 = affine.forwd(z1,params$W[[2]],params$b[[2]])
z2 = sigmoid.forwd(a2)
a3 = affine.forwd(z2,params$W[[3]],params$b[[3]])
cl = softmax.pred(a3)
cl.pred = apply(cl,1,which.max)

image(matrix(cl.pred,300,300))
x2= x*0.5+0.5
points(x2[1:100,1],x2[1:100,2],pch=20,col='black',cex=2)
points(x2[101:200,1],x2[101:200,2],pch=20,col='blue',cex=2)
points(x2[201:300,1],x2[201:300,2],pch=20,col='green',cex=2)

データ解析基礎論B W03

dat<-read.table("http://www.matsuka.info/data_folder/tdkPCA01.txt")
dat.pca<-princomp(dat)
biplot(dat.pca)
dat.pca$score
dat.pca$loadings

scoreP1S1 = dat.pca$loadings[1,1]*(dat[1,1]-mean(dat$writing))+
  dat.pca$loadings[2,1]*(dat[1,2]-mean(dat$thesis))+
  dat.pca$loadings[3,1]*(dat[1,3]-mean(dat$interview))

scoreP2S2 = dat.pca$loadings[1,2]*(dat[2,1]-mean(dat$writing))+
  dat.pca$loadings[2,2]*(dat[2,2]-mean(dat$thesis))+
  dat.pca$loadings[3,2]*(dat[2,3]-mean(dat$interview))

dat<-read.csv("http://www.matsuka.info/data_folder/tdkCFA.csv")
dat.pca<-princomp(dat)
screeplot(dat.pca,type="lines")
biplot(dat.pca)
biplot(dat.pca,choices=c(2,3))


dat2<-read.table("http://www.matsuka.info/data_folder/tdkPCA01.txt")
dat2[,1]=dat2[,1]*0.1
dat2.pca<-princomp(dat2)

データ解析基礎論B Rの復習

dat<-read.csv("http://www.matsuka.info/data_folder/hwsk8-17-6.csv")
dat2<-read.csv("http://www.matsuka.info/data_folder/dktb321.txt")
source("http://www.matsuka.info/univ/course_folder/cutil.R")
score.X = dat2$result[dat2$method=="method.X"]
score.Y = dat2$result[dat2$method=="method.Y"]
dat2X<-dat2[dat2$method=="method.X",]

認知情報解析


### model 
model {
  for ( i in 1:Ntotal ) {
    y[i] ~ dnorm( mu , 1/sigma^2  )
  }
  mu ~ dnorm( meanY , 1/(100*sdY)^2 )
  sigma ~ dunif( sdY/1000 , sdY*1000 )
}
###

dat<-read.csv("~/Downloads/DBDA2Eprograms/TwoGroupIQ.csv")
y = dat$Score[dat$Group=="Smart Drug"]
Ntotal = length(y)
dataList = list(y = y, Ntotal = Ntotal, meanY = mean(y), sdY = sd(y))


dataList = list(y = y, Ntotal = Ntotal, meanY = mean(y), sdY = sd(y))
jagsModel = jags.model("~/research/oka/DBDA/ch16/model_16_1_2.txt", data=dataList, n.chains=3, n.adapt=1000 )
update( jagsModel , n.iter=1000)
codaSamples = coda.samples( jagsModel , variable.names=c("mu","sigma"), n.iter=5000, thin=5)
mcmcMat<-as.matrix(codaSamples)

hist(y,nclass=15,probability = T)
x.temp = seq(40,200,0.1)
n.plot = 100
randperm = sample(1:nrow(mcmcMat),n.plot)
for (i.plot in 1:n.plot){
  norm.temp=dnorm(x.temp,mean=mcmcMat[randperm[i.plot],1],sd=mcmcMat[randperm[i.plot],2])
  lines(x.temp,norm.temp,col='orange')
}

### model 16.2
model {
  for ( i in 1:Ntotal ) {
    y[i] ~ dt( mu , 1/sigma^2 , nu )
  }
  mu ~ dnorm( meanY , 1/(100*sdY)^2 )
  sigma ~ dunif( sdY/1000 , sdY*1000 )
  nu <- nuMinusOne+1
  nuMinusOne ~ dexp(1/30.0)
}
###
hist(y,nclass=20,probability = T)
n.plot = 100
randperm = sample(1:nrow(mcmcMat),n.plot)
for (i.plot in 1:n.plot){
  x.temp1 = seq(40,200,0.1)
  x.temp2 = (x.temp1 - mcmcMat[randperm[i.plot],1])/mcmcMat[randperm[i.plot],3]
  t.temp=dt(x.temp2,mcmcMat[randperm[i.plot],2])/mcmcMat[randperm[i.plot],3]
  lines(x.temp1,t.temp,col='orange')
}

### model 16.3
model {
  for ( i in 1:Ntotal ) {
    y[i] ~ dt( mu[group[i]] , 1/sigma[group[i]]^2 , nu )
  }
  for (j in 1:Ngroup){
    mu[j] ~ dnorm( meanY , 1/(100*sdY)^2 )
    sigma[j] ~ dunif( sdY/1000 , sdY*1000 )
  }      
  nu <- nuMinusOne+1
  nuMinusOne ~ dexp(1/29)
}
###
dat<-read.csv("~/Downloads/DBDA2Eprograms/TwoGroupIQ.csv")
y = dat$Score
group = as.numeric(dat$Group)
Ntotal = length(y)
Ngroup = length(unique(group))

dataList = list(y = y, Ntotal = Ntotal, meanY = mean(y), sdY = sd(y),Ngroup=Ngroup,group=group)
jagsModel = jags.model("~/research/oka/DBDA/ch16/model_16_3.txt", data=dataList, n.chains=3, n.adapt=5000 )
update( jagsModel , n.iter=1000)
codaSamples = coda.samples( jagsModel , variable.names=c("mu","sigma","nu"), n.iter=5000, thin=5)
mcmcMat<-as.matrix(codaSamples)

認知情報解析演習 W13

north=c(1:3,15,1:10)
east=2:15;east[ c(3,7,11)]=c(3,7,11)
south=c(5:15,12:14)
west=c(15,1:13);west[ c(4,8,12)]=c(4,8,12)
trM=cbind(north,east,south,west)
###

nRep=1000; gamma=1;
P = 0.25
R = -1
V = rep(0,15)
for (i_rep in 1:nRep) {
  V.old=V
  for (i_state in 1:14) {
    V[i_state]=sum(P*(R+gamma*V.old[trM[i_state,]]))
  }
}
matrix(c(0,V),4,4)

#   ch4.1 policy evaluation 
########################################
# example 4.1 - bug fixed on 2017.03.21
# defining deterministic trans. matrix
north=c(1:3,15,1:10)
east=2:15;east[ c(3,7,11)]=c(3,7,11)
south=c(5:15,12:14)
west=c(15,1:13);west[ c(4,8,12)]=c(4,8,12)
trM=cbind(north,east,south,west)
 
# defining Reward & trans. prob.
R=-1;P=0.25;
 
# iterative policy evaluation
delta=1; gamma=1; tol=1e-8; V=rep(0,15);
while (delta>tol) {
 delta=0;
  for (i_state in 1:14) {
    v=V[i_state]
    V[i_state]=sum(P*(R+gamma*V[trM[i_state,]]))
    delta=max(delta,abs(v-V[i_state]));
  }
}
 
# result
> matrix(c(0,V),nrow=4)
     [,1] [,2] [,3] [,4]
[1,]    0  -14  -20  -22
[2,]  -14  -18  -20  -20
[3,]  -20  -20  -18  -14
[4,]  -22  -20  -14    0
 
#####################################
#   ch4.3 Policy Improvement
#   easier one 
#####################################
north=c(1:3,15,1:10)
east=2:15;east[ c(3,7,11)]=c(3,7,11)
south=c(5:15,12:14)
west=c(15,1:13);west[ c(4,8,12)]=c(4,8,12)
trM=cbind(north,east,south,west)
R=-1;V=rep(0,15);
delta=1; gamma=1; tol=1e-5; 
bestP=sample(1:4,14,replace=T)
stable=F;counter=0
while (stable==F){
  counter=counter+1
  Vmat=matrix(V[trM],ncol=4)
  Ppolicy=t(apply(Vmat,1,function(x) exp(10*x)/sum(exp(10*x))))
# iterative policy evaluation
  while (delta>tol) {
    delta=0;
    for (i_state in 1:14) {
      v=V[i_state]
      V[i_state]=sum(Ppolicy[i_state]*(R+gamma*V[trM[i_state,]]))
      delta=max(delta,abs(v-V[i_state]))
    }
  }
# policy improvement
  stable=F
  for (i_state in 1:14) {
    b=bestP[i_state]
    bestP[i_state]=which.max(V[trM[i_state,]])
    ifelse((bestP[i_state]==b),stable<-T,stable<-F)
  }
}
 
 
#####################################
#   ch4.4 Value Iteration 
#####################################
# example 4.3
V=c(rep(0,100),1);V.hist=c()
p=c(0.4,0.6);
gamma=1;delta=1; tol=1e-20
max.a=rep(0,101)
while (delta>tol) {
  delta=0;
  for (i_state in 1:99) {
    v=V[i_state+1]
    temp=matrix(0,nrow=1,ncol=i_state)
    for (i_action in 1:i_state) {
       temp[i_action]=sum(p*(gamma*c(V[(min(i_state+i_action,100)+1)],
         V[(max(i_state-i_action,0)+1)])))
     }
    V[i_state+1]=max(temp)
    max.a[i_state+1]=which.max(round(temp,8))
    delta=max(delta,abs(v-V[i_state+1]))
  }
  V.hist=rbind(V.hist,V)
}
# plotting results
par(mfrow=c(1,2))
plot(V.hist[1,],type='l',lwd=2,xlab="Capital",ylab="Value Estimates",col='red')
lines(V.hist[2,],lwd=2,col='blue')
lines(V.hist[3,],lwd=2,col='green')
lines(V.hist[32,],lwd=2,col='black')
legend("topleft",c("sweep 1","sweep 2","sweep 3", "sweep 32"),
 col=c("red","blue","green","black"),lwd=2)
barplot(max.a,xlab="Capital",ylab="Final Policy",col="white")

認知情報解析演習 強化学習2

# example 3.8: Gridworld
# initializing value vector
V=rep(0,25);                
 
# defining probability matrix
P=matrix(1/4,nrow=25,ncol=4) # 
 
# defining deterministic transition matrix
north=c(2:25,25)
north[ c(5,10,15,20,25)]=c(5,10,15,20,25)
east=c(6:25,21:25)
west=c(1:5,1:20)
south=c(1,1:24)
south[ c(1,6,11,16,21)]=c(1,6,11,16,21)
trM=cbind(north,east,south,west)
trM[10,]=6
trM[20,]=18
 
# defining reward matrix
R=matrix(0,nrow=25,ncol=4)
R[which(trM==1:25)]=-1
R[10,]=10
R[20,]=5
 
# calculating state-values iteratively
# fixed number of iterations 
nRep=1000; gamma=0.9;
for (i_rep in 1:nRep) {
  V.old=V
  for (i_state in 1:25) {
    V[i_state]=sum(P[i_state,]*(R[i_state,]+gamma*V.old[trM[i_state,]]))
  }
}
 

### example 2
north=c(1:3,15,1:10)
east=2:15;east[ c(3,7,11)]=c(3,7,11)
south=c(5:15,12:14)
west=c(15,1:13);west[ c(4,8,12)]=c(4,8,12)
trM=cbind(north,east,south,west)
###

V.star=V; # V calculated as in exp3.8
iter=0;maxItr=1000;delta=1;tol=1e-5;
while(iter < maxItr & delta > tol) {
  delta=0;iter=iter+1
  V.star.old=V.star
  for (i_state in 1:25) {
    v=V.star[i_state]
    V.star[i_state]=max(R[i_state,]+gamma*V.star.old[trM[i_state,]])
    delta=max(delta,abs(v-V.star[i_state]))
  }
}

#######################################
#   ch4.1 policy evaluation 
########################################
# example 4.1 - bug fixed on 2017.03.21
# defining deterministic trans. matrix
north=c(1:3,15,1:10)
east=2:15;east[ c(3,7,11)]=c(3,7,11)
south=c(5:15,12:14)
west=c(15,1:13);west[ c(4,8,12)]=c(4,8,12)
trM=cbind(north,east,south,west)
 
# defining Reward & trans. prob.
R=-1;P=0.25;
 
# iterative policy evaluation
delta=1; gamma=1; tol=1e-8; V=rep(0,15);
while (delta>tol) {
 delta=0;
  for (i_state in 1:14) {
    v=V[i_state]
    V[i_state]=sum(P*(R+gamma*V[trM[i_state,]]))
    delta=max(delta,abs(v-V[i_state]));
  }
}

#####################################
#   ch4.3 Policy Improvement
#   easier one 
#####################################
north=c(1:3,15,1:10)
east=2:15;east[ c(3,7,11)]=c(3,7,11)
south=c(5:15,12:14)
west=c(15,1:13);west[ c(4,8,12)]=c(4,8,12)
trM=cbind(north,east,south,west)
R=-1;V=rep(0,15);
delta=1; gamma=1; tol=1e-5; 
bestP=sample(1:4,14,replace=T)
stable=F;counter=0
while (stable==F){
  counter=counter+1
  Vmat=matrix(V[trM],ncol=4)
  Ppolicy=t(apply(Vmat,1,function(x) exp(10*x)/sum(exp(10*x))))
# iterative policy evaluation
  while (delta>tol) {
    delta=0;
    for (i_state in 1:14) {
      v=V[i_state]
      V[i_state]=sum(Ppolicy[i_state]*(R+gamma*V[trM[i_state,]]))
      delta=max(delta,abs(v-V[i_state]))
    }
  }
# policy improvement
  stable=F
  for (i_state in 1:14) {
    b=bestP[i_state]
    bestP[i_state]=which.max(V[trM[i_state,]])
    ifelse((bestP[i_state]==b),stable<-T,stable<-F)
  }
}


#####################################
#   ch4.4 Value Iteration 
#####################################
# example 4.3
V=c(rep(0,100),1);V.hist=c()
p=c(0.4,0.6);
gamma=1;delta=1; tol=1e-20
max.a=rep(0,101)
while (delta>tol) {
  delta=0;
  for (i_state in 1:99) {
    v=V[i_state+1]
    temp=matrix(0,nrow=1,ncol=i_state)
    for (i_action in 1:i_state) {
       temp[i_action]=sum(p*(gamma*c(V[(min(i_state+i_action,100)+1)],
         V[(max(i_state-i_action,0)+1)])))
     }
    V[i_state+1]=max(temp)
    max.a[i_state+1]=which.max(round(temp,8))
    delta=max(delta,abs(v-V[i_state+1]))
  }
  V.hist=rbind(V.hist,V)
}
# plotting results
par(mfrow=c(1,2))
plot(V.hist[1,],type='l',lwd=2,xlab="Capital",ylab="Value Estimates",col='red')
lines(V.hist[2,],lwd=2,col='blue')
lines(V.hist[3,],lwd=2,col='green')
lines(V.hist[32,],lwd=2,col='black')
legend("topleft",c("sweep 1","sweep 2","sweep 3", "sweep 32"),
 col=c("red","blue","green","black"),lwd=2)
barplot(max.a,xlab="Capital",ylab="Final Policy",col="white")

認知情報解析学演習 PSO

funZ<-function(x) {
  x[1]^4-16*x[1]^2-5*x[1]+x[2]^4-16*x[2]^2-5*x[2]
}

n.theta = 10;
n.iter = 1000;
Wp = 0.1;
Wg = 0.1;
theta = matrix(rnorm(n.theta*2), nrow=n.theta)
v = matrix(rnorm(n.theta*2), nrow=n.theta)
theta.hist = array(0,c(n.theta,2,n.iter))
theta.hist[,,1]=theta
p.b.v <- apply(theta,1,funZ)
p.best = theta
g.b.v = min(p.b.v)
g.b.idx = which.min(p.b.v)
g.best <- theta[g.b.idx,]
v = v + Wp*runif(n.theta)*(p.best - theta)+ Wg*runif(n.theta)*t(g.best - t(theta))
theta = theta + v


for (i.iter in 2:n.iter){
  v = v + Wp*runif(n.theta)*(p.best - theta)+ Wg*runif(n.theta)*t(g.best - t(theta))
  theta = theta + v
  fitG = apply(theta,1,funZ)
  if (min(fitG) < g.b.v){
    g.best = theta[which.min(fitG),]
    g.b.v = min(fitG)
  }
  idx = which(fitG < p.b.v)
  p.best[idx,] = theta[idx,]
  p.b.v= fitG
  theta.hist[,,i.iter]=theta
}


funZ2<-function(x,y) {x^4-16*x^2-5*x+y^4-16*y^2-5*y}
xmin=-5;xmax=5;n_len=100;
x<-seq(xmin, xmax,length.out=n_len);
y<-x;
z<-outer(x,y,funZ2)
contour(x,y,z,nlevels= 50, ,lwd = 0.3,drawlabels = F,col='blue')
for (i.agent in 1:n.theta){
  lines(theta.hist[i.agent,1,],theta.hist[i.agent,2,],col=i.agent)
  points(theta[1,1],theta[1,2],pch=20,cex=2,col=i.agent)
}
n.trial = 1000; n.rep = 100
Q.true = rnorm(10); 
opt.act = which.max(Q.true)
r.hist = matrix(0,n.rep, n.trial)
oa.hist = matrix(0,n.rep, n.trial)
for (i.rep in 1:n.rep){
  Q.est = rep(0,10);R.cum = rep(0,10)
  act.count = rep(0,10)
  for (i.trial in 1:n.trial){
    max.id = which(Q.est == max(Q.est))
    if (length(max.id)>1){
      action = sample(max.id,1)
    } else {
      action = max.id
    }
    if (action == opt.act){
      oa.hist[i.rep, i.trial] = 1
    } 
    r = rnorm(1, Q.true[action],1)
    r.hist[i.rep, i.trial] = r
    R.cum[action] = R.cum[action] + r
    act.count[action] = act.count[action] + 1
    Q.est = R.cum / (act.count + 0.01)
  }
}

plot(colMeans(r.hist),type='l')
plot(colMeans(oa.hist),type='l',ylim=c(0,1))


### epsilon greedy
n.trial = 1000; n.rep = 1000
r.hist = matrix(0,n.rep, n.trial)
oa.hist = matrix(0,n.rep, n.trial)
epsilon = 0.01
for (i.rep in 1:n.rep){
  Q.est = rep(0,10);R.cum = rep(0,10)
  act.count = rep(0,10)
  Q.true = rnorm(10); 
  opt.act = which.max(Q.true)
  for (i.trial in 1:n.trial){
    if (runif(1)>epsilon){
      max.id = which(Q.est == max(Q.est))
      if (length(max.id)>1){
        action = sample(max.id,1)
      } else {
        action = max.id
      }
    } else {
      action = sample(10,1)
    }
    if (action == opt.act){
      oa.hist[i.rep, i.trial] = 1
    } 
    r = rnorm(1, Q.true[action],1)
    r.hist[i.rep, i.trial] = r
    R.cum[action] = R.cum[action] + r
    act.count[action] = act.count[action] + 1
    Q.est[action] = R.cum[action] / act.count[action]
  }
}

### softmax
n.trial = 1000; n.rep = 1000
r.hist = matrix(0,n.rep, n.trial)
oa.hist = matrix(0,n.rep, n.trial)
gamma = 3
for (i.rep in 1:n.rep){
  Q.est = rep(0,10);R.cum = rep(0,10)
  act.count = rep(0,10)
  Q.true = rnorm(10); 
  opt.act = which.max(Q.true)
  for (i.trial in 1:n.trial){
    action = sample(10, 1, prob=exp(gamma*Q.est)/sum(exp(gamma*Q.est)))
    if (action == opt.act){
      oa.hist[i.rep, i.trial] = 1
    } 
    r = rnorm(1, Q.true[action],1)
    r.hist[i.rep, i.trial] = r
    R.cum[action] = R.cum[action] + r
    act.count[action] = act.count[action] + 1
    Q.est[action] = R.cum[action] / act.count[action]
  }