V=rep(0,25);
# defining probability matrix
P=matrix(1/4,nrow=25,ncol=4) #
# defining deterministic transition matrix
north=c(2:25,25)
north[ c(5,10,15,20,25)]=c(5,10,15,20,25)
east=c(6:25,21:25)
west=c(1:5,1:20)
south=c(1,1:24)
south[ c(1,6,11,16,21)]=c(1,6,11,16,21)
trM=cbind(north,east,south,west)
trM[10,]=6
trM[20,]=18
# defining reward matrix
R=matrix(0,nrow=25,ncol=4)
R[which(trM==1:25)]=-1
R[10,]=10
R[20,]=5
delta=1; gamma=0.9; tol=1e-10;
bestP=sample(1:4,25,replace=T)
stable=F;counter=0;
while (stable==F){
counter=counter+1
# iterative policy evaluation
while (delta>tol) {
delta=0;
V.old=V
for (i_state in 1:25) {
v=V[i_state]
V[i_state]=sum(P[i_state,]*(R[i_state,]+gamma*V.old[trM[i_state,]]))
delta=max(delta,abs(v-V[i_state]))
}
}
# policy improvement
stable=F
for (i_state in 1:25) {
b=bestP[i_state]
bestP[i_state]=which.max(V[trM[i_state,]])
ifelse((bestP[i_state]==b),stable<-T,stable<-F)
}
}
apply(matrix(bestP,nrow=5),2,rev)
bestP.mat = apply(matrix(as.character(bestP),nrow=5),2,rev)
bestP.mat[which(bestP.mat=="1")] = "N"
bestP.mat[which(bestP.mat=="2")] = "E"
bestP.mat[which(bestP.mat=="3")] = "S"
bestP.mat[which(bestP.mat=="4")] = "W"
print(bestP.mat)
Monthly Archives: June 2019
2019年度 データ解析基礎論a T検定
x.temp = 0:11
m = dbinom(x.temp, 11, prob=0.5)
names(m) <- paste(0:11)
barplot(m, col = c(rep("red",3),rep("red",6),rep("red",3)))
ssize = c(24,25,26,23.5,25,27,24,22,27.5,28)ssize.mean = mean(ssize)
ssize.var = var(ssize)
N = 10
t.value=(ssize.mean-24)/(sqrt(ssize.var/N))
x.temp = seq(-4,4,0.01)
y.temp = dt(x.temp,df = 9)
plot(x.temp,y.temp, type='l',lwd=3,ylab = "Density", xlab = 't-value')
abline(v = t.value, col='red',lwd=3,lty=2)
abline(v = -t.value, col='red',lwd=3,lty=2)
A=c(12,19,10,10,14,18,15,11,16)
B=c(15,20,16,14,17,16,12,12,19)
d=A-B
tValue<-mean(d)/sqrt(var(d)/length(d))
(1-pt(abs(tValue), df=8))*2
x.temp = seq(-4,4,0.01)
y.temp = dt(x.temp,df = 8)
plot(x.temp,y.temp, type='l',lwd=3,ylab = "Density", xlab = 't-value')
abline(v = t.value, col='red',lwd=3,lty=2)
abline(v = -t.value, col='red',lwd=3,lty=2)
X1=c(78,70,66,76,78,76,88,76)
X2=c(76,72,60,72,70,72,84,70)
t.value=(mean(X1)-mean(X2))/sqrt((var(X1)+var(X2))/8)
2*(1-pt(abs(t.value),14))