V=rep(0,25); # defining probability matrix P=matrix(1/4,nrow=25,ncol=4) # # defining deterministic transition matrix north=c(2:25,25) north[ c(5,10,15,20,25)]=c(5,10,15,20,25) east=c(6:25,21:25) west=c(1:5,1:20) south=c(1,1:24) south[ c(1,6,11,16,21)]=c(1,6,11,16,21) trM=cbind(north,east,south,west) trM[10,]=6 trM[20,]=18 # defining reward matrix R=matrix(0,nrow=25,ncol=4) R[which(trM==1:25)]=-1 R[10,]=10 R[20,]=5 delta=1; gamma=0.9; tol=1e-10; bestP=sample(1:4,25,replace=T) stable=F;counter=0; while (stable==F){ counter=counter+1 # iterative policy evaluation while (delta>tol) { delta=0; V.old=V for (i_state in 1:25) { v=V[i_state] V[i_state]=sum(P[i_state,]*(R[i_state,]+gamma*V.old[trM[i_state,]])) delta=max(delta,abs(v-V[i_state])) } } # policy improvement stable=F for (i_state in 1:25) { b=bestP[i_state] bestP[i_state]=which.max(V[trM[i_state,]]) ifelse((bestP[i_state]==b),stable<-T,stable<-F) } } apply(matrix(bestP,nrow=5),2,rev) bestP.mat = apply(matrix(as.character(bestP),nrow=5),2,rev) bestP.mat[which(bestP.mat=="1")] = "N" bestP.mat[which(bestP.mat=="2")] = "E" bestP.mat[which(bestP.mat=="3")] = "S" bestP.mat[which(bestP.mat=="4")] = "W" print(bestP.mat)
Monthly Archives: June 2019
2019年度 データ解析基礎論a T検定
x.temp = 0:11 m = dbinom(x.temp, 11, prob=0.5) names(m) <- paste(0:11) barplot(m, col = c(rep("red",3),rep("red",6),rep("red",3))) ssize = c(24,25,26,23.5,25,27,24,22,27.5,28)ssize.mean = mean(ssize) ssize.var = var(ssize) N = 10 t.value=(ssize.mean-24)/(sqrt(ssize.var/N)) x.temp = seq(-4,4,0.01) y.temp = dt(x.temp,df = 9) plot(x.temp,y.temp, type='l',lwd=3,ylab = "Density", xlab = 't-value') abline(v = t.value, col='red',lwd=3,lty=2) abline(v = -t.value, col='red',lwd=3,lty=2) A=c(12,19,10,10,14,18,15,11,16) B=c(15,20,16,14,17,16,12,12,19) d=A-B tValue<-mean(d)/sqrt(var(d)/length(d)) (1-pt(abs(tValue), df=8))*2 x.temp = seq(-4,4,0.01) y.temp = dt(x.temp,df = 8) plot(x.temp,y.temp, type='l',lwd=3,ylab = "Density", xlab = 't-value') abline(v = t.value, col='red',lwd=3,lty=2) abline(v = -t.value, col='red',lwd=3,lty=2) X1=c(78,70,66,76,78,76,88,76) X2=c(76,72,60,72,70,72,84,70) t.value=(mean(X1)-mean(X2))/sqrt((var(X1)+var(X2))/8) 2*(1-pt(abs(t.value),14))