north=c(1:3,15,1:10)
east=2:15;east[ c(3,7,11)]=c(3,7,11)
south=c(5:15,12:14)
west=c(15,1:13);west[ c(4,8,12)]=c(4,8,12)
trM=cbind(north,east,south,west)
r=-1;P=rep(0.25,4);V = rep(0,14)max.iter = 10000;
state.count=rep(0,15)
for (i.iter in 1:max.iter){
state = sample(1:14,1)
state.seq = state
while(state!=15){
action = sample(1:4,1,prob = P)
state.seq = c(state.seq,trM[state,action])
state = trM[state,action]
}
uniq.seq = unique(state.seq)
for (i.uniq in 1:(length(uniq.seq)-1)){
first.visit = which(state.seq == uniq.seq[i.uniq])[1]
V[uniq.seq[i.uniq]] = V[uniq.seq[i.uniq]] + r*(length(state.seq)-first.visit-1)
}
state.count[uniq.seq] = state.count[uniq.seq] + 1
}
V = matrix(c(0,V/state.count[1:14],0),nrow=4)
Related