V=c(rep(0,100),1);V.hist=c() p=c(0.4,0.6); gamma=1;delta=1; tol=1e-20 max.a=rep(0,101) while (delta>tol) { delta=0; for (i_state in 1:99) { v=V[i_state+1] temp=matrix(0,nrow=1,ncol=i_state) for (i_action in 1:i_state) { temp[i_action]=sum(p*(gamma*c(V[(min(i_state+i_action,100)+1)], V[(max(i_state-i_action,0)+1)]))) } V[i_state+1]=max(temp) max.a[i_state+1]=which.max(round(temp,8)) delta=max(delta,abs(v-V[i_state+1])) } V.hist=rbind(V.hist,V) } # plotting results par(mfrow=c(1,2)) plot(V.hist[1,],type='l',lwd=2,xlab="Capital",ylab="Value Estimates",col='red') lines(V.hist[2,],lwd=2,col='blue') lines(V.hist[3,],lwd=2,col='green') lines(V.hist[32,],lwd=2,col='black') legend("topleft",c("sweep 1","sweep 2","sweep 3", "sweep 32"), col=c("red","blue","green","black"),lwd=2) barplot(max.a,xlab="Capital",ylab="Final Policy",col="white")
Category Archives: その他
認知情報解析演習 居住区問題
n.circle = 20; n.sharp = 20; size = 10 loc = sample(1:size^2, n.circle+n.sharp) type = c(rep(1,n.circle),rep(2,n.sharp)) # circle = 1; sharp = 2 people = cbind(type,loc) state.mat = matrix(0,size,size) state.mat[people[,2]]=people[,1] p.move = #1/(2*n.circle) p.other = 0.1 c.count = 3 idx = cbind(rep(1:size,size),sort(rep(1:size,size))) # term = F while (term == F){ term = T rand.order = sample(1:nrow(people)) for (i.p in 1:nrow(people)){ if (people[rand.order[i.p],1]==1){ # circle if (runif(1) < p.move){ empty = 1:(size^2) empty = empty[-sort(people[,2])] people[rand.order[i.p],2] = sample(empty,1) state.mat = matrix(0,size,size) state.mat[people[,2]]=people[,1] term = F } } else { # sharp x.min = max(idx[people[rand.order[i.p],2],1]-1,1) x.max = min(idx[people[rand.order[i.p],2],1]+1,size) y.min = max(idx[people[rand.order[i.p],2],2]-1,1) y.max = min(idx[people[rand.order[i.p],2],2]+1,size) circle.in = sum(state.mat[x.min:x.max,y.min:y.max]==1) if (circle.in >= c.count){ empty = 1:(size^2) empty = empty[-sort(people[,2])] people[rand.order[i.p],2] = sample(empty,1) state.mat = matrix(0,size,size) state.mat[people[,2]]=people[,1] term = F #print('moved') } } } for (i.p in 1:nrow(people)){ if (people[rand.order[i.p],1]==2){ x.min = max(idx[people[i.p,2],1]-1,1) x.max = min(idx[people[i.p,2],1]+1,size) y.min = max(idx[people[i.p,2],2]-1,1) y.max = min(idx[people[i.p,2],2]+1,size) circle.in = sum(state.mat[x.min:x.max,y.min:y.max]==1) print(circle.in) if (circle.in >= c.count){ term = F break } } } } plot(0,0, type= 'n', xlim = c(0,11),ylim=c(0,11)) lab = c("0","#") text(idx[people[,2],1],idx[people[,2],2],lab[people[,1]],cex=3) ab = seq(0.5,10.5,1) for (i in 1:11){ abline(h=ab[i],col='red') abline(v=ab[i],col='red') }
基礎自習B02
dec2bin<-function(num, digits=8) { bin=c() if (num==0){ bin=0 } else { while(num!=0){ rem=num%%2 num=num%/%2 bin=c(rem,bin) } } if (length(bin) < digits){ res=matrix(0,nrow=1,ncol=digits) res[(digits-length(bin)+1):digits]=bin } else {res=bin} return(res) }
認知情報解析演習 W13
north=c(1:3,15,1:10) east=2:15;east[ c(3,7,11)]=c(3,7,11) south=c(5:15,12:14) west=c(15,1:13);west[ c(4,8,12)]=c(4,8,12) trM=cbind(north,east,south,west) ### nRep=1000; gamma=1; P = 0.25 R = -1 V = rep(0,15) for (i_rep in 1:nRep) { V.old=V for (i_state in 1:14) { V[i_state]=sum(P*(R+gamma*V.old[trM[i_state,]])) } } matrix(c(0,V),4,4) # ch4.1 policy evaluation ######################################## # example 4.1 - bug fixed on 2017.03.21 # defining deterministic trans. matrix north=c(1:3,15,1:10) east=2:15;east[ c(3,7,11)]=c(3,7,11) south=c(5:15,12:14) west=c(15,1:13);west[ c(4,8,12)]=c(4,8,12) trM=cbind(north,east,south,west) # defining Reward & trans. prob. R=-1;P=0.25; # iterative policy evaluation delta=1; gamma=1; tol=1e-8; V=rep(0,15); while (delta>tol) { delta=0; for (i_state in 1:14) { v=V[i_state] V[i_state]=sum(P*(R+gamma*V[trM[i_state,]])) delta=max(delta,abs(v-V[i_state])); } } # result > matrix(c(0,V),nrow=4) [,1] [,2] [,3] [,4] [1,] 0 -14 -20 -22 [2,] -14 -18 -20 -20 [3,] -20 -20 -18 -14 [4,] -22 -20 -14 0 ##################################### # ch4.3 Policy Improvement # easier one ##################################### north=c(1:3,15,1:10) east=2:15;east[ c(3,7,11)]=c(3,7,11) south=c(5:15,12:14) west=c(15,1:13);west[ c(4,8,12)]=c(4,8,12) trM=cbind(north,east,south,west) R=-1;V=rep(0,15); delta=1; gamma=1; tol=1e-5; bestP=sample(1:4,14,replace=T) stable=F;counter=0 while (stable==F){ counter=counter+1 Vmat=matrix(V[trM],ncol=4) Ppolicy=t(apply(Vmat,1,function(x) exp(10*x)/sum(exp(10*x)))) # iterative policy evaluation while (delta>tol) { delta=0; for (i_state in 1:14) { v=V[i_state] V[i_state]=sum(Ppolicy[i_state]*(R+gamma*V[trM[i_state,]])) delta=max(delta,abs(v-V[i_state])) } } # policy improvement stable=F for (i_state in 1:14) { b=bestP[i_state] bestP[i_state]=which.max(V[trM[i_state,]]) ifelse((bestP[i_state]==b),stable<-T,stable<-F) } } ##################################### # ch4.4 Value Iteration ##################################### # example 4.3 V=c(rep(0,100),1);V.hist=c() p=c(0.4,0.6); gamma=1;delta=1; tol=1e-20 max.a=rep(0,101) while (delta>tol) { delta=0; for (i_state in 1:99) { v=V[i_state+1] temp=matrix(0,nrow=1,ncol=i_state) for (i_action in 1:i_state) { temp[i_action]=sum(p*(gamma*c(V[(min(i_state+i_action,100)+1)], V[(max(i_state-i_action,0)+1)]))) } V[i_state+1]=max(temp) max.a[i_state+1]=which.max(round(temp,8)) delta=max(delta,abs(v-V[i_state+1])) } V.hist=rbind(V.hist,V) } # plotting results par(mfrow=c(1,2)) plot(V.hist[1,],type='l',lwd=2,xlab="Capital",ylab="Value Estimates",col='red') lines(V.hist[2,],lwd=2,col='blue') lines(V.hist[3,],lwd=2,col='green') lines(V.hist[32,],lwd=2,col='black') legend("topleft",c("sweep 1","sweep 2","sweep 3", "sweep 32"), col=c("red","blue","green","black"),lwd=2) barplot(max.a,xlab="Capital",ylab="Final Policy",col="white")
基礎実習MB02
multi.forwd <- function(x,y){ return(x*y) } multi.bckwd <- function(x, y, dout){ dx = dout * y dy = dout * x return(list(dx = dx, dy = dy)) } apple = 100; n.apple = 2; tax = 1.1 apple.pre.tax = multi.forwd(apple, n.apple) apple.post.tax = multi.forwd(apple.pre.tax, tax) dprice = 1 d.apple.post.tax = multi.bckwd(apple.pre.tax, tax, dprice) d.apple = multi.bckwd(apple, n.apple, d.apple.post.tax$dx)$dx d.n.apple = multi.bckwd(apple, n.apple, d.apple.post.tax$dx)$dy
人文科学入門(色実験 ー 仮)
Disclaimer
このcourselogにあるコードは、主に学部生・博士課程前期向けの講義・演習・実習で例として提示しているもので、原則直感的に分かりやすいように書いているつもりです。例によってはとても非効率なものもあるでしょうし、「やっつけ」で書いているものあります。また、普段はMATLABを使用していますので、変な癖がでているかもしれません。これらの例を使用・参考にする場合はそれを踏まえてたうえで使用・参考にして下さい。
卒業論文に関する資料:[2015PDF] [word template] [latex template] [表紙] [レポートの書き方] [引用文献など]
A Brief Guide to R (Rのコマンドの手引きおよび例)はこちらをどうぞ
卒論関係の資料
[2015PDF] [word template] [latex template] [表紙]
[レポートの書き方] [引用文献など]