June | 2019 | CourseLog

V=rep(0,25);                

# defining probability matrix
P=matrix(1/4,nrow=25,ncol=4) # 

# defining deterministic transition matrix
north=c(2:25,25)
north[ c(5,10,15,20,25)]=c(5,10,15,20,25)
east=c(6:25,21:25)
west=c(1:5,1:20)
south=c(1,1:24)
south[ c(1,6,11,16,21)]=c(1,6,11,16,21)
trM=cbind(north,east,south,west)
trM[10,]=6
trM[20,]=18

# defining reward matrix
R=matrix(0,nrow=25,ncol=4)
R[which(trM==1:25)]=-1
R[10,]=10
R[20,]=5

delta=1; gamma=0.9; tol=1e-10; 
bestP=sample(1:4,25,replace=T)
stable=F;counter=0;
while (stable==F){
  counter=counter+1
  # iterative policy evaluation
  while (delta>tol) {
    delta=0;
    V.old=V
    for (i_state in 1:25) {
      v=V[i_state]
      V[i_state]=sum(P[i_state,]*(R[i_state,]+gamma*V.old[trM[i_state,]]))
      delta=max(delta,abs(v-V[i_state]))
    }
  }
  # policy improvement
  stable=F
  for (i_state in 1:25) {
    b=bestP[i_state]
    bestP[i_state]=which.max(V[trM[i_state,]])
    ifelse((bestP[i_state]==b),stable<-T,stable<-F)
  }
}

apply(matrix(bestP,nrow=5),2,rev)
bestP.mat = apply(matrix(as.character(bestP),nrow=5),2,rev)
bestP.mat[which(bestP.mat=="1")] = "N"
bestP.mat[which(bestP.mat=="2")] = "E"
bestP.mat[which(bestP.mat=="3")] = "S"
bestP.mat[which(bestP.mat=="4")] = "W"
print(bestP.mat)

x.temp = 0:11 m = dbinom(x.temp, 11, prob=0.5) names(m) <- paste(0:11) barplot(m, col = c(rep("red",3),rep("red",6),rep("red",3))) ssize = c(24,25,26,23.5,25,27,24,22,27.5,28)ssize.mean = mean(ssize) ssize.var = var(ssize) N = 10 t.value=(ssize.mean-24)/(sqrt(ssize.var/N)) x.temp = seq(-4,4,0.01) y.temp = dt(x.temp,df = 9) plot(x.temp,y.temp, type='l',lwd=3,ylab = "Density", xlab = 't-value') abline(v = t.value, col='red',lwd=3,lty=2) abline(v = -t.value, col='red',lwd=3,lty=2) A=c(12,19,10,10,14,18,15,11,16) B=c(15,20,16,14,17,16,12,12,19) d=A-B tValue<-mean(d)/sqrt(var(d)/length(d)) (1-pt(abs(tValue), df=8))*2 x.temp = seq(-4,4,0.01) y.temp = dt(x.temp,df = 8) plot(x.temp,y.temp, type='l',lwd=3,ylab = "Density", xlab = 't-value') abline(v = t.value, col='red',lwd=3,lty=2) abline(v = -t.value, col='red',lwd=3,lty=2) X1=c(78,70,66,76,78,76,88,76) X2=c(76,72,60,72,70,72,84,70) t.value=(mean(X1)-mean(X2))/sqrt((var(X1)+var(X2))/8) 2*(1-pt(abs(t.value),14))

CourseLog

Dept Cognitive & Info Science

Monthly Archives: June 2019

認知情報解析学演習a 課題03

2019年度　データ解析基礎論a T検定