認知情報解析 課題2

# initializing Q matrix
Q = P = matrix(1/4,nrow=25,ncol=4) # 

# defining deterministic transition matrix
north=c(2:25,25)
north[ c(5,10,15,20,25)]=c(5,10,15,20,25)
east=c(6:25,21:25)
west=c(1:5,1:20)
south=c(1,1:24)
south[ c(1,6,11,16,21)]=c(1,6,11,16,21)
trM=cbind(north,east,south,west)
trM[10,]=6
trM[20,]=18

R=matrix(0,nrow=25,ncol=4)
R[which(trM==1:25)]=-1
R[10,]=10
R[20,]=5

nRep=1000; gamma=0.9; P = 0.25
for (i_rep in 1:nRep) {
  Q.old = Q
  for (i_state in 1:25) {
    for (i_act in 1:4){
      Q[i_state, i_act]=R[i_state, i_act]+gamma * P * sum(Q.old[trM[i_state,i_act]])
    }
  }
}