# function to initialize ALCOVE's parameter set
ALC.init<-function() {
stimType='nom' # stimType
c=2 # gain
phi=5 # decision strength
lrA=0.1 # learning rate for attentions
lrW=0.1 # learning rate for associations
return(data.frame(stimType,lrA,lrW,c,phi))
}
# main function
alcove<-function(parmSet,inp,targ,iter) {
# ----ALCOVE for R ---
# input arguments
# parmSet - parameter set
# inp - input matrix (n_sample x n_dimension)
# targ - target matrix (n_sample x n_category)
# iter - # of training epochs
# ----ALCOVE for R ---
# initialization
inpSize=dim(inp)
targSize=dim(targ)
alpha=matrix(1,nrow=1,ncol=inpSize[2])/inpSize[2]
w=matrix(rnorm(inpSize[1]*targSize[2])*0.1,ncol=inpSize[1])
accu=rep(0,nrow=iter+1)
attn=matrix(0,nrow=iter+1,ncol=inpSize[2])
attn[1,]=alpha
# end initialization
for (i_iter in 1:iter) {
ro=sample(1:inpSize[1],inpSize[1])
prob_temp=0;
for (i_tr in 1:inpSize[1]) {
if (parmSet$stimType=='nom') {
diff= matrix(as.numeric(matrix(1,inpSize[1],1)%*%inp[ro[i_tr],]!=inp),nrow=inpSize[1])
} else { diff=inp[ro[i_tr],]-inp }
exemp=exp(-parmSet$c*(diff%*%t(alpha)))
out=w%*%exemp
err=targ[ro[i_tr],]-out
delta_W=parmSet$lrW*exemp%*%t(err)
delta_A=-parmSet$lrA*(t(err)%*%w*t(exemp))%*%diff*parmSet$c
w=w+t(delta_W)
alpha=alpha+delta_A
alpha[which(alpha<0)]=0
pT=(exp(parmSet$phi*out)/sum(exp(parmSet$phi*out)))*targ[ro[i_tr],]
prob_temp=prob_temp+pT[which(!pT==0)]
}
accu[i_iter+1]=prob_temp/targSize[1]
attn[i_iter+1,]=alpha
}
attnN=attn/apply(attn,1, sum)
out=matrix(0,nrow=targSize[2],ncol=inpSize[1])
for (i_tr in 1:inpSize[1]) {
if (parmSet$stimType=='nom') {
diff= matrix(as.numeric(matrix(1,inpSize[1],1)%*%inp[i_tr,]!=inp),nrow=inpSize[1])
} else { diff=inp[i_tr,]-inp}
exemp=exp(-parmSet$c*(diff%*%t(alpha)))
out[,i_tr]=w%*%exemp
}
return(list(alpha=alpha,w=w,attn=attn,attnN=attnN,accu=accu,out=out,ps=parmSet))
}
##################################################
# example
# カテゴリー学習モデル:ALCOVE
# medin & schaffer 1978 のシミュレーション
##################################################
inp=matrix(c(1, 1, 1, 0,
1, 0, 1, 0,
1, 0, 1, 1,
1, 1, 0, 1,
0, 1, 1, 1,
1, 1, 0, 0,
0, 1, 1 ,0,
0, 0, 0, 1,
0, 0, 0, 0),nrow=9,byrow=T)
targ=matrix(c(1, 0,
1, 0,
1, 0,
1, 0,
1, 0,
0, 1,
0, 1,
0, 1,
0, 1),nrow=9,byrow=T)
parmSet<-ALC.init()
result<-alcove(parmSet,inp,targ,30)
plot(result$accu, type="o", col="black",ylim=c(0,1.1),
xlab="training", ylab="Proportion", cex.lab=1.4)
lines(result$attnN[,1], type="o", pch=22, lty=2, col="blue")
lines(result$attnN[,2], type="o", pch=23, lty=3, col="red")
lines(result$attnN[,3], type="o", pch=24, lty=4, col="green")
lines(result$attnN[,4], type="o", pch=25, lty=5, col="cyan")
title(main="ALOCVE: Learning MS (1978) 5-4 Stimulus Set ")
legend("topleft", c("Accuracy","AttnD1","AttnD2","AttnD3","AttnD4"),
cex=1,col=c("black","blue","red","green","cyan"), pch=c(21:25), lty=c(1:5));
RL Sutton & Barto Ch.5
########################################
# ch5.1 Monte Carlo policy evaluation
########################################
#
# reference: http://waxworksmath.com
#
########################################
# function to calc. value of cards
card.value<-function(adj.cards) {
sum.cards=sum(adj.cards)
if (any(adj.cards==1) & sum.cards<=11) {
sum.cards=sum.cards+10;
usableA=1 #true
} else {usableA=2} #false
return(c(sum.cards,usableA))
}
# function to calc. reward
calc.reward<-function(p.val,d.val) {
if (p.val>21) { reward=-1
} else {if (d.val>21) { reward=1
} else {if (p.val==d.val) {reward=0
} else{ reward=ifelse(p.val>d.val,1,-1)}
}}}
# main function
BJ_MC_fixedPolicy<-function(policy=20,maxIter=1e6){
rew.sum=array(0,dim=c(10,10,2))
rew.count=array(0,dim=c(10,10,2))
for (i_play in 1:maxIter) {
cards=sample(rep(1:13,4))
player=cards[1:2]; adj.player=pmin(player,10)
dealer=cards[3:4]; adj.dealer=pmin(dealer,10)
cards=cards[-(1:4)]
d.val=card.value(adj.dealer)
p.val=card.value(adj.player)
state.hist=c(adj.dealer[1],p.val[1],p.val[2])
while (p.val[1] < policy) {
player=c(player,cards[1]); adj.player=pmin(player,10)
cards=cards[-1]
p.val=card.value(adj.player)
state.hist=rbind(state.hist,c(adj.dealer[1],p.val[1],p.val[2]))
}
while (d.val[1] < 17) {
dealer=c(dealer,cards[1]); adj.dealer=pmin(dealer,10)
cards=cards[-1]
d.val=card.value(adj.dealer)
}
rew=calc.reward(p.val[1],d.val[1])
n.state=nrow(state.hist)
if (is.null(n.state)) {
n.state=1
state.hist=t(as.matrix(state.hist))
}
for (i_state in 1:n.state) {
if (state.hist[i_state,2] > 11 & state.hist[i_state,2] < 22) {
rew.sum[state.hist[i_state,1],(state.hist[i_state,2]-11),state.hist[i_state,3]]
= rew.sum[state.hist[i_state,1],(state.hist[i_state,2]-11),state.hist[i_state,3]]+rew
rew.count[state.hist[i_state,1],(state.hist[i_state,2]-11),state.hist[i_state,3]]
=rew.count[state.hist[i_state,1],(state.hist[i_state,2]-11),state.hist[i_state,3]]+1
}
}
}
return(rew.sum/rew.count)
}
# function 2 plot results
plot.BJ_MC<-function(V){
par(mfrow=c(1,2))
image(V[,,1],main="with usable Ace",xaxt='n',yaxt='n',
xlab="Dealer showing",ylab="Player sum")
axis(1,at=seq(0,1,length.out=10),label=c("A",paste(2:10)))
axis(2,at=seq(0,1,length.out=10),label=12:21)
image(V[,,2],main="without usable Ace",xaxt='n',yaxt='n',
xlab="Dealer showing",ylab="Player sum")
axis(1,at=seq(0,1,length.out=10),label=c("A",paste(2:10)))
axis(2,at=seq(0,1,length.out=10),label=12:21)
}
> V=BJ_MC(17)
> plot.BJ_MC(V)
########################################
# ch5.3 Monte Carlo exploring starts
########################################
# function to calc. value of cards
card.value<-function(adj.cards) {
sum.cards=sum(adj.cards)
if (any(adj.cards==1) & sum.cards<=11) {
sum.cards=sum.cards+10;
usableA=1 #true
} else {usableA=2} #false
return(c(sum.cards,usableA))
}
# function to calc. reward
calc.reward<-function(p.val,d.val) {
if (p.val>21) { reward=-1
} else {if (d.val>21) { reward=1
} else {if (p.val==d.val) {reward=0
} else{ reward=ifelse(p.val>d.val,1,-1)}
}}}
# main function
BJ_MC<-function(maxIter=1e6){
rew.sum=array(0,dim=c(10,10,2,2))
rew.count=array(1,dim=c(10,10,2,2))
Q=array(0,dim=c(10,10,2))
V=array(0,dim=c(10,10,2))
policy=array(sample(0:1,10*10*2,replace=T),dim=c(10,10,2))
# policy: 1 = hit, 0 = stay
for (i_play in 1:maxIter) {
# initial draw
cards=sample(c(rep(1:10,4),rep(10,12)))
player=cards[1:2]
dealer=cards[3:4]
cards=cards[-(1:4)]
d.val=card.value(dealer)
p.val=card.value(player)
while( p.val[1] < 12 ) {
player=c(player,cards[1])
cards=cards[-1]
p.val=card.value(player)
}
action=sample(0:1,1)
state.hist=c(dealer[1],p.val[1],p.val[2],(action+1))
# player's action
while (action==1 & p.val[1]<22) {
player=c(player,cards[1])
cards=cards[-1]
p.val=card.value(player)
state.hist=rbind(state.hist,c(dealer[1],p.val[1],p.val[2],(action+1)))
if (p.val[1]<22) {
action=policy[dealer[1],(p.val[1]-11),p.val[2]]
}
}
# dealer's action
while (d.val[1]<17) {
dealer=c(dealer,cards[1])
cards=cards[-1]
d.val=card.value(dealer)
}
rew=calc.reward(p.val[1],d.val[1])
n.state=nrow(state.hist)
if (is.null(n.state)) {
n.state=1
state.hist=t(as.matrix(state.hist))
}
for (i_state in 1:n.state) {
if (state.hist[i_state,2]>11 & state.hist[i_state,2]<22) {
ind=state.hist[i_state,]-c(0,11,0,0)
rew.sum[ind[1],ind[2],ind[3],ind[4]]= rew.sum[ind[1],ind[2],ind[3],ind[4]]+rew
rew.count[ind[1],ind[2],ind[3],ind[4]]=rew.count[ind[1],ind[2],ind[3],ind[4]]+1
Q=rew.sum/rew.count;
policy[,,1]=Q[,,1,1] < Q[,,1,2]
policy[,,2]=Q[,,2,1] < Q[,,2,2]
}
}
}
V[,,1]=(rew.sum[,,1,1]+rew.sum[,,1,2])/(rew.count[,,1,1]+rew.count[,,1,2])
V[,,2]=(rew.sum[,,2,1]+rew.sum[,,2,2])/(rew.count[,,2,1]+rew.count[,,2,2])
return(list(policy,V,Q))
}
# function 2 plot results
plot.BJ_MC2<-function(V){
par(mfrow=c(2,2))
image(V[[2]][,,1],main="Utility with usable Ace",xaxt='n',yaxt='n',
xlab="Dealer showing",ylab="Player sum")
axis(1,at=seq(0,1,length.out=10),label=c("A",paste(2:10)))
axis(2,at=seq(0,1,length.out=10),label=12:21)
image(V[[2]][,,2],main="Utility without usable Ace",xaxt='n',yaxt='n',
xlab="Dealer showing",ylab="Player sum")
axis(1,at=seq(0,1,length.out=10),label=c("A",paste(2:10)))
axis(2,at=seq(0,1,length.out=10),label=12:21)
image(V[[1]][,,1],main="Policy with usable Ace",xaxt='n',yaxt='n',
xlab="Dealer showing",ylab="Player sum")
axis(1,at=seq(0,1,length.out=10),label=c("A",paste(2:10)))
axis(2,at=seq(0,1,length.out=10),label=12:21)
image(V[[1]][,,2],main="Policy without usable Ace",xaxt='n',yaxt='n',
xlab="Dealer showing",ylab="Player sum")
axis(1,at=seq(0,1,length.out=10),label=c("A",paste(2:10)))
axis(2,at=seq(0,1,length.out=10),label=12:21)
}
V=BJ_MC(5e6)
plot.BJ_MC2(V)
数理社会学: なぜ宣伝しなくても流行がおこるのか
社会を<モデル>でみる:数理社会学への招待
18章:なぜ宣伝しなくても流行がおこるのか。
モデルの説明:
x:流行に影響されていない人
y:流行に影響されている人
z:以前流行に影響されてが、もう影響されていない
t:時間
\delta x = -\alpha * x[t] * y[t] * \delta t
\delta y = (\alpha * x[t] * y[t] – \beta * y[t]) * \delta t
\delta z = \beta * y[t] * \delta t
# initialization
vogue<-function(alpha, beta, inits, max_time){
dt=0.01
max_timeR = max_time - 1
x=c(inits[1], rep(0, max_timeR))
y=c(inits[2], rep(0, max_timeR))
z=c(inits[3], rep(0, max_timeR))
# main loop
for (t in 1:max_timeR) {
x[(t+1)] = x[t] - alpha*x[t]*y[t]*dt
y[(t+1)] = y[t] + (alpha*x[t]*y[t] - beta*y[t])*dt
z[(t+1)] = z[t] + beta*y[t]*dt
}
return(data.frame(x,y,z))
}
vogue.plot<-function(dat) {
plot(dat$x,type="l",lwd=4,col="blue", main="Typical Time Series of \"Vogue\" ",
xlab="time", ylab="Number of People",cex.lab=1.3,ylim=c(0,dat$x[1]))
lines(dat$y,type="l",lwd=4,col="red")
lines(dat$z,type="l",lwd=4,col="green")
legend("right", c("affected","not affected", "no longer affected"),
col=c("red","blue","green"),lty=rep(1,3),lwd=4)
}
# running functions
res<-vogue(0.0001, 0.1, c(10000, 100, 0), 5000)
vogue.plot(res)
RL Sutton & Barto Ch.4
#######################################
# ch4.1 policy evaluation
########################################
# example 4.1 - bug fixed on 2017.03.21
# defining deterministic trans. matrix
north=c(1:3,15,1:10)
east=2:15;east[ c(3,7,11)]=c(3,7,11)
south=c(5:15,12:14)
west=c(15,1:13);west[ c(4,8,12)]=c(4,8,12)
trM=cbind(north,east,south,west)
# defining Reward & trans. prob.
R=-1;P=0.25;
# iterative policy evaluation
delta=1; gamma=1; tol=1e-8; V=rep(0,15);
while (delta>tol) {
delta=0;
for (i_state in 1:14) {
v=V[i_state]
V[i_state]=sum(P*(R+gamma*V[trM[i_state,]]))
delta=max(delta,abs(v-V[i_state]));
}
}
# result
> matrix(c(0,V),nrow=4)
[,1] [,2] [,3] [,4]
[1,] 0 -14 -20 -22
[2,] -14 -18 -20 -20
[3,] -20 -20 -18 -14
[4,] -22 -20 -14 0
#####################################
# ch4.3 Policy Improvement
# easier one
#####################################
north=c(1:3,15,1:10)
east=2:15;east[ c(3,7,11)]=c(3,7,11)
south=c(5:15,12:14)
west=c(15,1:13);west[ c(4,8,12)]=c(4,8,12)
trM=cbind(north,east,south,west)
R=-1;P=0.25;V=rep(0,15);
delta=1; gamma=1; tol=1e-10;
bestP=sample(1:4,14,replace=T)
stable=F;counter=0;
while (stable==F){
counter=counter+1
# iterative policy evaluation
while (delta>tol) {
delta=0;
for (i_state in 1:14) {
v=V[i_state]
V[i_state]=sum(P*(R+gamma*V[trM[i_state,]]))
delta=max(delta,abs(v-V[i_state]))
}
}
# policy improvement
stable=F
for (i_state in 1:14) {
b=bestP[i_state]
bestP[i_state]=which.max(V[trM[i_state,]])
ifelse((bestP[i_state]==b),stable<-T,stable<-F)
}
}
# with softmax
R=-1;V=rep(0,15);
bestP=sample(1:4,14,replace=T)
stable=F;counter=0
while (stable==F){
counter=counter+1
Vmat=matrix(V[trM],ncol=4)
Ppolicy=t(apply(Vmat,1,function(x) exp(10*x)/sum(exp(10*x))))
# iterative policy evaluation
while (delta>tol) {
delta=0;
for (i_state in 1:14) {
v=V[i_state]
V[i_state]=sum(Ppolicy[i_state]*(R+gamma*V[trM[i_state,]]))
delta=max(delta,abs(v-V[i_state]))
}
}
# policy improvement
stable=F
for (i_state in 1:14) {
b=bestP[i_state]
bestP[i_state]=which.max(V[trM[i_state,]])
ifelse((bestP[i_state]==b),stable<-T,stable<-F)
}
}
#####################################
# ch4.4 Value Iteration
#####################################
# example 4.3
V=c(rep(0,100),1);V.hist=c()
p=c(0.4,0.6);
gamma=1;delta=1; tol=1e-20
max.a=rep(0,101)
while (delta>tol) {
delta=0;
for (i_state in 1:99) {
v=V[i_state+1]
temp=matrix(0,nrow=1,ncol=i_state)
for (i_action in 1:i_state) {
temp[i_action]=sum(p*(gamma*c(V[(min(i_state+i_action,100)+1)],
V[(max(i_state-i_action,0)+1)])))
}
V[i_state+1]=max(temp)
max.a[i_state+1]=which.max(round(temp,8))
delta=max(delta,abs(v-V[i_state+1]))
}
V.hist=rbind(V.hist,V)
}
# plotting results
par(mfrow=c(1,2))
plot(V.hist[1,],type='l',lwd=2,xlab="Capital",ylab="Value Estimates",col='red')
lines(V.hist[2,],lwd=2,col='blue')
lines(V.hist[3,],lwd=2,col='green')
lines(V.hist[32,],lwd=2,col='black')
legend("topleft",c("sweep 1","sweep 2","sweep 3", "sweep 32"),
col=c("red","blue","green","black"),lwd=2)
barplot(max.a,xlab="Capital",ylab="Final Policy",col="white")
RL Sutton & Barto Ch.3
chapter 3 - example 3.8 & 3.12
bug fixed on 2015.12.18
#####################################
# Ch3.7 Value function
#####################################
# example 3.8: Gridworld
# initializing value vector
V=rep(0,25);
# defining probability matrix
P=matrix(1/4,nrow=25,ncol=4) #
# defining deterministic transition matrix
north=c(2:25,25)
north[ c(5,10,15,20,25)]=c(5,10,15,20,25)
east=c(6:25,21:25)
west=c(1:5,1:20)
south=c(1,1:24)
south[ c(1,6,11,16,21)]=c(1,6,11,16,21)
trM=cbind(north,east,south,west)
trM[10,]=6
trM[20,]=18
# defining reward matrix
R=matrix(0,nrow=25,ncol=4)
R[which(trM==1:25)]=-1
R[10,]=10
R[20,]=5
# calculating state-values iteratively
# fixed number of iterations
nRep=1000; gamma=0.9;
for (i_rep in 1:nRep) {
V.old=V
for (i_state in 1:25) {
V[i_state]=sum(P[i_state,]*(R[i_state,]+gamma*V.old[trM[i_state,]]))
}
}
# result
> matrix(V,nrow=5)[5:1,]
[,1] [,2] [,3] [,4] [,5]
[1,] 3.30899634 8.7892919 4.4276192 5.3223676 1.4921788
[2,] 1.52158807 2.9923179 2.2501400 1.9075717 0.5474027
[3,] 0.05082249 0.7381706 0.6731133 0.3581862 -0.4031411
[4,] -0.97359230 -0.4354954 -0.3548823 -0.5856051 -1.1830751
[5,] -1.85770055 -1.3452313 -1.2292673 -1.4229181 -1.9751790
#####################################
# ch3.8 optimal value function
#####################################
# example 3.12
V.star=V; # V calculated as in exp3.8
iter=0;maxItr=1000;delta=1;tol=1e-5;
while(iter < maxItr & delta > tol) {
delta=0;iter=iter+1
V.star.old=V.star
for (i_state in 1:25) {
v=V.star[i_state]
V.star[i_state]=max(R[i_state,]+gamma*V.star.old[trM[i_state,]])
delta=max(delta,abs(v-V.star[i_state]))
}
}
# result - ex3.12
> matrix(V.star,nrow=5)[5:1,]
[,1] [,2] [,3] [,4] [,5]
[1,] 21.97746 24.41940 21.97746 19.41941 17.47747
[2,] 19.77971 21.97746 19.77971 17.80174 16.02157
[3,] 17.80173 19.77971 17.80174 16.02157 14.41941
[4,] 16.02156 17.80173 16.02156 14.41940 12.97746
[5,] 14.41940 16.02156 14.41940 12.97746 11.67972
RL Sutton & Barto Ch2
epGreedy = function(nTrial,nRep,epsilon) {
ret.hist=matrix(0,nrow=nTrial,ncol=nRep)
opt.hist=ret.hist
for (i_rep in 1:nRep){
Q.true=rnorm(10);Q.est=rep(0,10);
Q.cum=rep(0,10);act.count=rep(0,10);
opt.ID=which.max(Q.true)
for (i_trial in 1:nTrial) {
if (runif(1) < epsilon) {
action=sample(1:10,1)
} else {
action=which.max(Q.est)
}
ret.hist[i_trial,i_rep]=rnorm(1)+Q.true[action]
opt.hist[i_trial,i_rep]=action==opt.ID
act.count[action]=act.count[action]+1
Q.cum[action]=Q.cum[action]+ret.hist[i_trial,i_rep]
Q.est[action]=Q.cum[action]/act.count[action]
}
}
return(data.frame(opt=rowMeans(opt.hist),ret=rowMeans(ret.hist)))
}
type1=epGreedy(1000,2000,0.0)
type2=epGreedy(1000,2000,0.01)
type3=epGreedy(1000,2000,0.1)
par(mfrow=c(2,1))
plot(type3$ret,type='l',xlab="Play",ylab="average reward")
lines(type2$ret,type='l',col='red')
lines(type1$ret,type='l',col='green')
legend("bottomright",c("epsilon=0.00","epsilon=0.01","epsilon=0.10"),
col=c("black","red","green"),lty=c(1,1,1))
plot(type3$opt,type='l',xlab="Play",ylab="% optimal action")
lines(type2$opt,type='l',col='red')
lines(type1$opt,type='l',col='green')
legend("bottomright",c("epsilon=0.00","epsilon=0.01","epsilon=0.10"),
col=c("black","red","green"),lty=c(1,1,1))
#####################################
# Generalised Version - ch2.5 & 2.7
#####################################
epGreedyG = function(nTrial,nRep,epsilon,LR,constantLR="F",initialQ) {
# generalized version of epsilon-greedy
ret.hist=matrix(0,nrow=nTrial,ncol=nRep)
opt.hist=ret.hist
for (i_rep in 1:nRep){
Q.true=rnorm(10);Q.est=rep(initialQ,10);act.count=rep(0,10);
opt.ID=which.max(Q.true)
for (i_trial in 1:nTrial) {
if (runif(1) < epsilon) {
action=sample(1:10,1)
} else {
action=which.max(Q.est)
}
ret.hist[i_trial,i_rep]=rnorm(1)+Q.true[action]
opt.hist[i_trial,i_rep]=action==opt.ID
act.count[action]=act.count[action]+1
if (constantLR=="F"){LR=1/act.count[action]}
Q.est[action]=Q.est[action]+LR*(ret.hist[i_trial,i_rep]-Q.est[action])
}
}
return(data.frame(opt=rowMeans(opt.hist),ret=rowMeans(ret.hist)))
}

#################################
# ch2.4 softmax
#################################
RLsoftmax = function(nTrial,nRep,temp) {
ret.hist=matrix(0,nrow=nTrial,ncol=nRep)
opt.hist=ret.hist
for (i_rep in 1:nRep){
Q.true=rnorm(10);Q.est=rep(0,10);
Q.cum=rep(0,10);act.count=rep(0,10);
opt.ID=which.max(Q.true)
t=temp
for (i_trial in 1:nTrial) {
action=sample(1:10,1,prob=exp(Q.est/t)/sum(exp(Q.est/t)))
ret.hist[i_trial,i_rep]=rnorm(1)+Q.true[action]
opt.hist[i_trial,i_rep]=action==opt.ID
act.count[action]=act.count[action]+1
Q.cum[action]=Q.cum[action]+ret.hist[i_trial,i_rep]
Q.est[action]=Q.cum[action]/act.count[action]
t=max(0.001,0.995*t)
}
}
return(data.frame(opt=rowMeans(opt.hist),ret=rowMeans(ret.hist)))
}
#################################
# ch2.8 reinforcement comparison
#################################
reinfComp = function(nTrial,nRep,alpha,beta) {
ret.hist=matrix(0,nrow=nTrial,ncol=nRep)
opt.hist=ret.hist
for (i_rep in 1:nRep){
Q.true=rnorm(10);p=rep(0,10);r.ave=0;
opt.ID=which.max(Q.true)
for (i_trial in 1:nTrial) {
action=sample(1:10,1,prob=exp(p)/sum(exp(p)))
ret.hist[i_trial,i_rep]=rnorm(1)+Q.true[action]
opt.hist[i_trial,i_rep]=action==opt.ID
p[action]=p[action]+beta*(ret.hist[i_trial,i_rep]-r.ave)
r.ave=r.ave+alpha*(ret.hist[i_trial,i_rep]-r.ave)
}
}
return(data.frame(opt=rowMeans(opt.hist),ret=rowMeans(ret.hist)))
}
#################################
# ch2.9 pursuit method
#################################
pursuit= function(nTrial,nRep,beta) {
ret.hist=matrix(0,nrow=nTrial,ncol=nRep)
opt.hist=ret.hist
for (i_rep in 1:nRep){
Q.true=rnorm(10);p=rep(1/10,10);act.count=rep(0,10);Q.est=rep(0,10)
opt.ID=which.max(Q.true)
for (i_trial in 1:nTrial) {
actTemp=which.max(Q.est)
p[actTemp]=p[actTemp]+beta*(1-p[actTemp])
nonA=(1:10)[-actTemp]
p[nonA]=p[nonA]+beta*(-p[nonA])
action=sample(1:10,1,prob=p)
ret.hist[i_trial,i_rep]=rnorm(1)+Q.true[action]
opt.hist[i_trial,i_rep]=action==opt.ID
act.count[action]=act.count[action]+1
Q.est[action]=Q.est[action]+1/act.count[action]*(ret.hist[i_trial,i_rep]-Q.est[action])
}
}
return(data.frame(opt=rowMeans(opt.hist),ret=rowMeans(ret.hist)))
}
認知情報解析 GA/ES preview
ES_recomb<-function(G,child) {
nParent=nrow(G$b);nChild=nrow(child$b);nVar=ncol(G$b)
for (i_child in 1:nChild) {
parentID=sample(1:nParent,2)
coID=sample(c(0,1),nVar,replace=T)
child$b[i_child,]=G$b[parentID[1],]
child$b[i_child,which(coID==1)]=G$b[parentID[2],which(coID==1)]
child$sigma[i_child,]=0.5*(G$sigma[parentID[1],]+G$sigma[parentID[2],])
}
return(child)
}
ES_mutate<-function(child,tau){
nChild=nrow(child$b);nVar=ncol(child$b)
child$sigma<-child$sigma*exp(matrix(rnorm(nChild*nVar)*tau,nrow=nChild))
child$b=child$b+child$sigma*matrix(rnorm(nChild*nVar),nrow=nChild,ncol=nVar)
return(child)
}
ES_er<-function(b,x,y){
yhat<-x%*%b
return(sum((y-yhat)^2))
}
x=matrix(rnorm(4*50,mean=10,sd=2),ncol=4);x=cbind(rep(1,50),x)
y=x%*%c(10,2,5,-3,-5)+rnorm(50,mean=0,sd=2);
G=list();child=list();
nGen=1000;nParent=30;nChild=60;tau=1;nVar=5
G$b=matrix(rnorm(nVar*nParent,mean=0,sd=1),ncol=nVar)
G$sigma=matrix(runif(nVar*nParent),ncol=nVar)+0.5
child$b=matrix(0,nrow=nChild,ncol=nVar)
child$sigma=matrix(0,nrow=nChild,ncol=nVar)
optHist=matrix(0,nrow=nGen,ncol=1)
for (i_gen in 1:nGen) {
child<-ES_recomb(G,child)
child<-ES_mutate(child,tau)
fitG=apply(G$b,1,ES_er,x,y);fitC=apply(child$b,1,ES_er,x,y)
fitT=c(fitG,fitC);fitMin=sort(fitT,index.return=T)
tempB=rbind(G$b,child$b);tempS=rbind(G$sigma,child$sigma)
G$b=tempB[fitMin$ix[1:nParent],]
G$sigma=tempS[fitMin$ix[1:nParent],]
optHist[i_gen]=fitMin$x[1]
}
> head(G$b)
[,1] [,2] [,3] [,4] [,5]
[1,] 8.597338 2.118279 5.081328 -2.982018 -5.062286
[2,] 8.597347 2.118279 5.081328 -2.982018 -5.062286
[3,] 8.597336 2.118280 5.081328 -2.982018 -5.062286
[4,] 8.597333 2.118280 5.081328 -2.982018 -5.062286
[5,] 8.597343 2.118280 5.081328 -2.982018 -5.062286
[6,] 8.597336 2.118280 5.081328 -2.982018 -5.062287
> summary(lm(y~x[,2:5]))
Call:
lm(formula = y ~ x[, 2:5])
Residuals:
Min 1Q Median 3Q Max
-4.1866 -1.7043 0.3254 1.3890 4.1851
Coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) 8.5973 3.9641 2.169 0.0354 *
x[, 2:5]1 2.1183 0.2235 9.476 2.73e-12 ***
x[, 2:5]2 5.0813 0.2077 24.464 < 2e-16 ***
x[, 2:5]3 -2.9820 0.1606 -18.566 < 2e-16 ***
x[, 2:5]4 -5.0623 0.2120 -23.874 < 2e-16 ***
---
Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
Residual standard error: 2.342 on 45 degrees of freedom
Multiple R-squared: 0.9737, Adjusted R-squared: 0.9713
F-statistic: 415.7 on 4 and 45 DF, p-value: < 2.2e-16
Traveling Sales Man Problem
# route inversion
TSP_inv<-function(route,len) {
len_route=length(route)
invP=sample(1:len_route,1)
route[invP:min(len_route,(invP+len-1))]
=rev(route[invP:min(len_route,(invP+len-1))])
return(route)
}
> TSP_inv(1:10,4)
[1] 1 2 3 4 5 9 8 7 6 10
# route switching
TSP_switch<-function(route){
len_route=length(route)
switchP=sample(1:len_route,2)
route[switchP]=route[rev(switchP)]
return(route)
}
> TSP_switch(1:10)
[1] 1 2 3 4 5 6 7 10 9 8
# route translation
TSP_trans<-function(route){
len_route=length(route)
transP=sample(1:len_route,2);
tempR=route[-transP[1]]
if (transP[2]==1){
tempR=c(route[transP[1]],tempR)
} else if (transP[2]==len_route) {
tempR=c(tempR,route[transP[1]])
} else {
tempR=c(tempR[1:(transP[2]-1)],route[transP[1]],tempR[(transP[2]):
(len_route-1)])
}
return(tempR)
}
> TSP_trans(1:10)
[1] 1 3 4 5 6 7 8 9 2 10
# initialize cities' locations
FUN_initTSP<-function(n_city=10) {
return(matrix(runif(n_city*2,1,100),nrow=n_city,ncol=2))
}
> print(loc<-FUN_initTSP(10))
[,1] [,2]
[1,] 36.78996 40.33464
[2,] 91.67296 97.33156
[3,] 87.15730 58.82665
[4,] 56.19289 44.84425
[5,] 46.06971 43.22932
[6,] 50.91508 30.14634
[7,] 84.18521 56.17124
[8,] 77.68784 43.77393
[9,] 42.48220 11.74252
[10,] 12.99097 21.11037
# calc. total distance
FUN_costTSP<-function(route,cities) {
route=c(route,route[1]);n_cities=nrow(cities);totDist=0;
for (i_cities in 1:n_cities) {
totDist=totDist+dist(cities[route[i_cities:(i_cities+1)],])
}
return(totDist)
}
> FUN_costTSP(1:10,loc)
1
2 341.4417
# TSP demo
TSP_demo<-function(n_city=20, maxItr=1000) {
loc=FUN_initTSP(n_city);route=1:n_city
## param. for simulated annealing - change values if necessary
C=1;eta=0.99;TEMP=50;
##
optDist=FUN_costTSP(route,loc)
optHist=matrix(0,nrow=maxItr,ncol=(length(route)+1))
optHist[1,]=c(route,optDist)
for (i_loop in 2:maxItr) {
rand.op=sample(c('inv','sw','trans'),1,prob=c(0.75,0.125,0.125))
if (rand.op=='inv') {
new.route=TSP_inv(route,sample(2:n_city,1))
} else if (rand.op=='sw') {
new.route=TSP_switch(route)
} else { new.route=TSP_trans(route)}
new.dist=FUN_costTSP(new.route,loc)
delta=new.dist-optDist
prob=1/(1+exp(delta/(C*TEMP)))
if (runif(1) < prob) {
route=new.route;optDist=new.dist;
}
optHist[i_loop,]=c(route,optDist);
TEMP=TEMP*eta
}
par(mfrow=c(1,2))
plot(rbind(loc,loc[1,]),type='o',pch=20,cex=2.5, col='red',
xlab='location @X',ylab='location @Y',main='Initial route')
plot(loc[optHist[1000,c(1:n_city,1)],],type='o',pch=20, col='magenta',
cex=2.5,xlab='location @X',ylab='location @Y',main='Optimized route')
return(optHist)
}
# 古いのバージョン
# initializing cities
FUN_initTSP<-function(n_city=10) {
return(matrix(runif(n_city*2,1,100),nrow=n_city,ncol=2))
}
# calculating total (Euclidean) distance
FUN_costTSP<-function(route,cities) {
route=c(route,route[1]);n_cities=nrow(cities);totDist=0;
for (i_cities in 1:n_cities) {
totDist=totDist+dist(cities[route[i_cities:(i_cities+1)],])
}
return(totDist)
}
# route inversion
TSP_inv<-function(route,len) {
len_route=length(route)
invP=sample(1:len_route,1)
route[invP:min(len_route,(invP+len-1))]=rev(route[invP:min(len_route,(invP+len-1))])
return(route)
}
# route switching
TSP_switch<-function(route){
len_route=length(route)
switchP=sample(1:len_route,2)
route[switchP]=route[rev(switchP)]
return(route)
}
# route translation / insertion
TSP_trans<-function(route){
len_route=length(route)
transP=sample(1:len_route,2);
tempR=route[-transP[1]]
if (transP[2]==1){
tempR=c(route[transP[1]],tempR)
} else if (transP[2]==len_route) {
tempR=c(tempR,route[transP[1]])
} else {
tempR=c(tempR[1:(transP[2]-1)],route[transP[1]],tempR[(transP[2]):(len_route-1)])
}
return(tempR)
}
## main TSP script ###
n_city=20;loc=FUN_initTSP(n_city);route=1:n_city
maxItr=5000;C=1;eta=0.99;TEMP=20;
optDist=FUN_costTSP(route,loc)
optHist=matrix(0,nrow=maxItr,ncol=(length(route)+1))
optHist[1,]=c(route,optDist)
for (i_loop in 2:maxItr) {
rand.op=sample(c('inv','sw','trans'),1,prob=c(0.75,0.125,0.125))
if (rand.op=='inv') {
new.route=TSP_inv(route,sample(2:n_city,1))
} else if (rand.op=='sw') {
new.route=TSP_switch(route)
} else { new.route=TSP_trans(route)}
new.dist=FUN_costTSP(new.route,loc)
delta=new.dist-optDist
prob=1/(1+exp(delta/(C*TEMP)))
if (runif(1) < prob) {
route=new.route;optDist=new.dist;
}
optHist[i_loop,]=c(route,optDist);
TEMP=TEMP*eta
}
par(mfrow=c(1,2))
plot(rbind(loc,loc[1,]),type='b',pch=20,cex=2,
xlab='location @X',ylab='location @Y',main='Initial route')
plot(loc[optHist[1000,c(1:n_city,1)],],type='b',pch=20,
cex=2,xlab='location @X',ylab='location @Y',main='Optimized route')
## DEMO function ###
TSP_demo<-function(n_city=20, maxItr=1000) {
loc=FUN_initTSP(n_city);route=1:n_city
## param. for simulated annealing - change values if necessary
C=1;eta=0.99;TEMP=20;
##
optDist=FUN_costTSP(route,loc)
optHist=matrix(0,nrow=maxItr,ncol=(length(route)+1))
optHist[1,]=c(route,optDist)
for (i_loop in 2:maxItr) {
rand.op=sample(c('inv','sw','trans'),1,prob=c(0.75,0.125,0.125))
if (rand.op=='inv') {
new.route=TSP_inv(route,sample(2:n_city,1))
} else if (rand.op=='sw') {
new.route=TSP_switch(route)
} else { new.route=TSP_trans(route)}
new.dist=FUN_costTSP(new.route,loc)
delta=new.dist-optDist
prob=1/(1+exp(delta/(C*TEMP)))
if (runif(1) < prob) {
route=new.route;optDist=new.dist;
}
optHist[i_loop,]=c(route,optDist);
TEMP=TEMP*eta
}
par(mfrow=c(1,2))
plot(rbind(loc,loc[1,]),type='o',pch=20,cex=2.5, col='red',
xlab='location @X',ylab='location @Y',main='Initial route')
plot(loc[optHist[1000,c(1:n_city,1)],],type='o',pch=20, col='magenta',
cex=2.5,xlab='location @X',ylab='location @Y',main='Optimized route')
return(optHist)
}
最適化問題
最適化問題
# 勾配法
tol=0.0001;grad=100;
x=10;hist.x=x;lambda=0.1;
while(grad>tol) {
grad=(2*x+2)
x=x-lambda*grad
hist.x=c(hist.x,x)
}
par(mfrow=c(1,2));
xs=seq(-10,10,0.1)
plot(xs,xs^2+2*xs+1)
plot(hist.x)
# naive stochastic optimization
sx=10;hist.sx=sx
for (i_loop in 1:100){
sx.temp=sx+rnorm(1);
if ((sx.temp^2+2*sx.temp+1)<(sx^2+2*sx+1)){
sx=sx.temp
}
hist.sx=c(hist.sx,sx)
}
# simulated annealing
x=10;maxIt=1000;
c=1;s=1;temp=1;eta=0.99;
hist.x=matrix(0,nrow=maxIt);
for (i_loop in 1:maxIt) {
x.new=x+rnorm(1,mean=0,sd=temp*s)
E.old=x^2+2*x+1;
E.new=x.new^2+2*x.new+1;
Paccept=1/(1+exp((E.new-E.old)/(c*temp)))
if (Paccept>runif(1)){x=x.new}
temp=temp*eta;
hist.x[i_loop]=x;
}
数理社会学:なぜ差別しなくても外国人居住区ができるのか
社会を<モデル>でみる:数理社会学への招待
29章:なぜ差別しなくても外国人居住区ができるのか
○と♯の2つのグループが存在し、以下の条件で他の場所へ移動する。
○: 近隣に2人以下○の場合、
♯: 近隣の1/3以上が♯でない場合、
# 1 epochの内、数回移動する可能性のある場合
FUNmigration<-function(field_size=6, Nsharp=10, Ncircle=10) {
Nempty=(field_size^2-Nsharp-Ncircle)
society<-matrix(sample(c(rep(0,Nempty),rep(1,Ncircle),rep(2,Nsharp))),ncol=field_size)
# plotting initial config.
par(mfrow=c(1,2))
idx1<-which(society==1,arr.ind=T);idx2<-which(society==2,arr.ind=T)
plot(idx1[,1],idx1[,2],type="n",xlim=c(0.5,field_size+0.5),
ylim=c(0.5,field_size+0.5),main="Initial Arrangement",
xlab="location @ x", ylab="location @ y")
text(idx1[,1],idx1[,2],"o",cex=4,col="red");text(idx2[,1],idx2[,2],"#",cex=4,col="blue")
# main
moved=1;counter=0
while (moved>0&counter<1000) {
counter=counter+1;moved=0
for (i_row in 1:field_size) {
for (i_col in 1:field_size) {
# checking sharps
if (society[i_row,i_col]==2) {
neR_IDX=max((i_row-1),1):min((i_row+1),field_size);
neC_IDX=max((i_col-1),1):min((i_col+1),field_size);
n_sharp=sum(society[neR_IDX,neC_IDX]==2)-1;
n_circle=sum(society[neR_IDX,neC_IDX]==1);
if (n_sharp+n_circle==0 | n_sharp/(n_sharp+n_circle) < (1/3)) {
moved=moved+1;
loc_mov=sample(which(society==0),1)
society[i_row,i_col]=0
society[loc_mov]=2
}
}
# checking circles
if (society[i_row,i_col]==1) {
neR_IDX=max((i_row-1),1):min((i_row+1),field_size);
neC_IDX=max((i_col-1),1):min((i_col+1),field_size);
n_circle=sum(society[neR_IDX,neC_IDX]==1)-1;
if (n_circle < 3) {
moved=moved+1;
loc_mov=sample(which(society==0),1)
society[i_row,i_col]=0
society[loc_mov]=1
}
}
}
}
}
# plotting final config.
idx1<-which(society==1,arr.ind=T)
idx2<-which(society==2,arr.ind=T)
plot(idx1[,1],idx1[,2],type="n",xlim=c(0.5,field_size+0.5),ylim=c(0.5,field_size+0.5),
main="Arragement After Migration",,xlab="location @ x", ylab="location @ y")
text(idx1[,1],idx1[,2],"o",cex=4,col="red")
text(idx2[,1],idx2[,2],"#",cex=4,col="blue")
}
# 1 epochの内、1度のみ移動する場合
FUNmigration <- function(soc.size = 6, n.circle = 10, n.sharp = 10) {
r.sample = sample(soc.size ^ 2)
society = matrix(0, soc.size, soc.size)
society[r.sample[1:n.circle]] = 1
society[r.sample[(n.circle + 1):(n.circle + n.sharp)]] = 2
# plotting initial config.
par(mfrow = c(1, 2))
idx1 <- which(society == 1, arr.ind = T)
idx2 <- which(society == 2, arr.ind = T)
plot(idx1[, 1], idx1[, 2], type = "n", xlim = c(0.5, soc.size + 0.5),
ylim = c(0.5, soc.size + 0.5), main = "Initial Arrangement",
xlab = "location @ x", ylab = "location @ y")
text(idx1[, 1], idx1[, 2], "o", cex = 3, col = "red")
text(idx2[, 1], idx2[, 2], "#", cex = 3, col = "blue")
move = 1
while (move != 0 ) {
# circles
move = 0
c.pos = which(society == 1, arr.ind = T)
for (i.c in 1:n.circle) {
r.idx = c(max(1, c.pos[i.c, 1] - 1), min(soc.size, c.pos[i.c, 1] + 1))
c.idx = c(max(1, c.pos[i.c, 2] - 1), min(soc.size, c.pos[i.c, 2] + 1))
neighber = society[r.idx[1]:r.idx[2], c.idx[1]:c.idx[2]]
neighber.c = sum(neighber == 1) - 1
if (neighber.c < 3) {
move = 1
move.idx = which(society == 0)
society[sample(move.idx, 1)] = 1
society[c.pos[i.c, 1], c.pos[i.c, 2]] = 0
}
}
# sharps
s.pos = which(society == 2, arr.ind = T)
for (i.s in 1:n.sharp) {
r.idx = c(max(1, s.pos[i.s, 1] - 1), min(soc.size, s.pos[i.s, 1] + 1))
c.idx = c(max(1, s.pos[i.s, 2] - 1), min(soc.size, s.pos[i.s, 2] + 1))
neighbor = society[r.idx[1]:r.idx[2], c.idx[1]:c.idx[2]]
neighbor.s = sum(neighbor == 2) - 1
neighbor.all = sum(neighbor != 0) - 1
prop.s = max(0, neighbor.s / neighbor.all, na.rm = T)
if (prop.s < 1 / 3) {
move = 1
move.idx = which(society == 0)
society[sample(move.idx, 1)] = 2
society[s.pos[i.s, 1], s.pos[i.s, 2]] = 0
}
}
}
idx1 <- which(society == 1, arr.ind = T)
idx2 <- which(society == 2, arr.ind = T)
plot(idx1[, 1], idx1[, 2], type = "n",
xlim = c(0.5, soc.size + 0.5), ylim = c(0.5, soc.size + 0.5),
main = "Arragement After Migration", xlab = "location @ x", ylab = "location @ y")
text(idx1[, 1], idx1[, 2], "o", cex = 3, col = "red")
text(idx2[, 1], idx2[, 2], "#", cex = 3, col = "blue")
}




