# 広域システム特別講義II 確率的最適化法

```naive.stochOptim <- function(obj.func, x, n.iter, width){
opt.value <- do.call(obj.func, list(x))
opt.hist = matrix(0, nrow = n.iter, ncol = 5)
opt.hist[1,] = c(x, x, opt.value, opt.value, 1)
for (i.iter in 2:n.iter){
accpet = 0
temp.x <-  x + rnorm(1, mean = 0, sd = width)
temp.value <- do.call(obj.func, list(temp.x))
if (temp.value <  opt.value){
x = temp.x
opt.value = temp.value
accept = 1
}
opt.hist[i.iter, ] = c(x, temp.x, opt.value, temp.value, 1)
}
return(data.frame(opt.hist))
}

set.seed(50)
n.iter =500
fun01<-function(x){x^2+2*x}
res <- naive.stochOptim(fun01,3,n.iter,1)

# vizualizing results
par(mfrow=c(2,1))
# upper panel
plot(res\$X2,res\$X4,col='red',pch=20,cex=2,xlab = "x", ylab='f(x)')
legend("topleft",c("accepted","tested"),pch=c(20,20),col=c("black","red"))
x.seq = seq(min(res\$X2),max(res\$X2),length.out = n.iter)
lines(x.seq,fun01(x.seq))
points(res\$X1,res\$X3,col='black',pch=20)
# lower panel
plot(res\$X3,1:n.iter,type='n',pch=20,xlim=c(min(res\$X2),max(res\$X2)),xlab='x',ylab="Trial")
for (i.iter in 2:n.iter){
lines(c(res\$X1[(i.iter-1)],res\$X2[i.iter]),c((i.iter-1),i.iter),type='o',pch=20,col='red',cex=1.5)
}
points(res\$X1,1:n.iter,type='o',pch=20)
legend("topright",c("accepted","tested"),pch=c(20,20),col=c("black","red"))

SimAneal01<-function(func, initXY, maxItr=1000, C=1, eta=0.99, width=10) {
x=initXY
opt.value = do.call(func,list(x))
n.var = length(x)
opt.hist=matrix(0, nrow=maxItr, ncol=5)
opt.hist[1,]=c(x,x,opt.value,opt.value,0)
for (i_loop in 2:maxItr) {
accept = 0
temp.x = x + rnorm(n.var, mean = 0, sd=width)
temp.value= do.call(func,list(temp.x))
delta=temp.value-opt.value;
prob=1/(1+exp(delta/(C*width)))
if (runif(1) < prob) {
x = temp.x;
opt.value = temp.value;
accept = 1
}
opt.hist[i_loop,]=c(x, temp.x, opt.value, temp.value, accept);
width=width*eta
}
return(data.frame(opt.hist))
}

set.seed(48)
n.iter = 500
res <- SimAneal01(fun01, 3, n.iter, 1, 0.985, 1)
#vizualizing results
par(mfrow=c(2,1))
# upper panel
plot(res\$X2,res\$X4,col='red',pch=20,cex=2,xlab = "x", ylab='f(x)')
legend("topleft",c("accepted","tested"),pch=c(20,20),col=c("black","red"))
x.seq = seq(min(res\$X2),max(res\$X2),length.out = n.iter)
lines(x.seq,fun01(x.seq))
points(res\$X1,res\$X3,col='black',pch=20)
# lower panel
plot(res\$X3,1:n.iter,type='n',pch=20,xlim=c(min(res\$X2),max(res\$X2)),xlab='x',ylab="Trial")
for (i.iter in 2:n.iter){
lines(c(res\$X1[(i.iter-1)],res\$X2[i.iter]),c((i.iter-1),i.iter),type='o',pch=20,col='red',cex=1.5)
}
points(res\$X1,1:n.iter,type='o',pch=20)
legend("topright",c("accepted","tested"),pch=c(20,20),col=c("black","red"))

### TSP
TSP_inv<-function(route,len) {
len_route=length(route)
invP=sample(1:len_route,1)
route[invP:min(len_route,(invP+len-1))]=rev(route[invP:min(len_route,(invP+len-1))])
return(route)
}

TSP_switch<-function(route){
len_route=length(route)
switchP=sample(1:len_route,2)
route[switchP]=route[rev(switchP)]
return(route)
}

TSP_trans<-function(route){
len_route=length(route)
transP=sample(1:len_route,2);
tempR=route[-transP[1]]
if (transP[2]==1){
tempR=c(route[transP[1]],tempR)
} else if (transP[2]==len_route) {
tempR=c(tempR,route[transP[1]])
} else {
tempR=c(tempR[1:(transP[2]-1)],route[transP[1]],tempR[(transP[2]):
(len_route-1)])
}
return(tempR)
}

FUN_initTSP<-function(n_city=10) {
return(matrix(runif(n_city*2,1,100),nrow=n_city,ncol=2))
}

FUN_costTSP<-function(route,cities) {
route=c(route,route[1]);n_cities=nrow(cities);totDist=0;
for (i_cities in 1:n_cities) {
totDist=totDist+dist(cities[route[i_cities:(i_cities+1)],])
}
return(totDist)
}

TSP_demo<-function(n_city=20, maxItr=1000 , C = 1, eta = 0.99, TEMP = 50) {
loc=FUN_initTSP(n_city);route=1:n_city
optDist=FUN_costTSP(route,loc)
optHist=matrix(0,nrow=maxItr,ncol=(length(route)+1))
optHist[1,]=c(route,optDist)
for (i_loop in 2:maxItr) {
rand.op=sample(c('inv','sw','trans'),1,prob=c(0.75,0.125,0.125))
if (rand.op=='inv') {
new.route=TSP_inv(route,sample(2:n_city,1))
} else if (rand.op=='sw') {
new.route=TSP_switch(route)
} else { new.route=TSP_trans(route)}
new.dist=FUN_costTSP(new.route,loc)
delta=new.dist-optDist
prob=1/(1+exp(delta/(C*TEMP)))
if (runif(1) < prob) {
route=new.route;optDist=new.dist;
}
optHist[i_loop,]=c(route,optDist);
TEMP=TEMP*eta
}
par(mfrow=c(1,2))
plot(rbind(loc,loc[1,]),type='o',pch=20,cex=2.5, col='red',
xlab='location @X',ylab='location @Y',main='Initial route')
plot(loc[optHist[1000,c(1:n_city,1)],],type='o',pch=20, col='magenta',
cex=2.5,xlab='location @X',ylab='location @Y',main='Optimized route')
return(optHist)
}

TSP_demo()

### GA / ES
# genetic algorithm - identify the best linear model
GA_recomb<-function(G) {
nPop=nrow(G);
nVar=ncol(G);
child = G;
G.permuted = G[sample(1:nPop),]
recomb.idx=which(matrix(sample(0:1,nPop*nVar,replace=TRUE),nrow=nPop)==1)
child[recomb.idx] = G.permuted[recomb.idx]
return(child)
}

GA_mutate = function(child, p){
n.pop = nrow(child)
n.var = ncol(child)
mut.mat= matrix((runif(n.pop*n.var) < p), nrow = n.pop)
child = abs(child-mut.mat)
return(child)
}

GA_survive<-function(G, child, fitG, fitC){
nPop=nrow(G);
fitT=c(fitG,fitC);
fitMax=sort(fitT,index.return=T,decreasing = T)
tempX=rbind(G,child);
G=tempX[fitMax\$ix[1:nPop],]
return(G)
}

GA<-function(G, nGen,p,X,Y){
optHist=matrix(0,nrow=nGen,ncol=1)
G.hist = matrix(0,nrow=nGen,ncol = ncol(G))
nPop = nrow(G)
nVar = ncol(G)
fitG = rep(0,nPop)
fitC = fitG
for (i.pop in 1:nPop){
dat.temp = X[,which(G[i.pop,]==1)]
}
optHist[1]=max(c(fitG))
G.hist[1,] = G[which.max(fitG),]
for (i_gen in 2:nGen) {
child<-GA_recomb(G)
child<-GA_mutate(child,p)
# assuming same numbers of set of genes
for (i.pop in 1:nPop){
dat.temp = X[,which(G[i.pop,]==1)]
if (sum(child[i.pop,])==0){
child[i.pop,sample(1:ncol(child.all),sample(1:nVar,1))]=1
}
dat.temp = X[,which(child[i.pop,]==1)]
}
G<-GA_survive(G, child, fitG, fitC)
optHist[i_gen]=max(c(fitG,fitC))
G.hist[i_gen, ] = G[1,]
}
return(list(G = G, optHist = optHist, G.hist = G.hist))
}

set.seed(19)
nVar = 10
nData = 50
X=matrix(rnorm(nVar*nData,mean=10,sd=5),ncol=nVar);
y=X%*%c(-2,0,2,0,2,0,-3,0,-2,0)+rnorm(nData,mean=0,sd=8);
summary(lm(y~X[,seq(1,9,2)]))
summary(lm(y~X))
G = matrix(sample(0:1,300,replace = T),nrow=30)
res<-GA(G,100,0.1,X,y)

C = combn(3,2)
temp.lab =toString(paste("X",C[ , i.comb], sep = ""))
gsub(",", "*", temp.lab)

n.comb = ncol(C)
for (i.comb in 1: n.comb){
temp.label = toString(paste("X",C[ , i.comb], sep = ""))
var.labels = c(var.labels, gsub(",", "*", temp.label) )
}

mk.labels <- function(C){
var.labels = c()
n.comb = ncol(C)
for (i.comb in 1: n.comb){
temp.label = toString(paste("X",C[ , i.comb], sep = ""))
var.labels = c(var.labels, gsub(",", "*", temp.label) )
}
return(var.labels)
}
var.labels = paste("X", 1:n.var, sep = "")

# interaction terms
for (i.interaction  in 2:max.interaction ){
combination = combn(n.var, i.interaction)
var.labels = c(var.labels, mk.labels(combination))
}
model.def = paste("Y ~", gsub(",", "+", toString(var.labels[c(1,1,1,1,1,0,0) == 1])))
X=matrix(rnorm(nVar*nData,mean=10,sd=5),ncol=nVar);
y=X%*%c(1,1,-2)+rnorm(nData,mean=0,sd=3);
dat = data.frame(y=y,X1=X[,1],X2=X[,2],X3=X[,3])

model.lm = lm(model.def, dat)

## full implementation
# function to make model labels for paritcular set of combn
mk.labels <- function(C){
var.labels = c()
n.comb = ncol(C)
for (i.comb in 1: n.comb){
temp.label = toString(paste("X",C[ , i.comb], sep = ""))
#  options for model specification
#  Opt 1: models that include all lower-degree interaction term
#  Opt 2: models that do NOT include any lower-degree interaction term
#
# Opt1
#var.labels = c(var.labels, gsub(",", ":", temp.label) )
#
# Opt2
var.labels = c(var.labels, gsub(",", ":", temp.label) )
}
return(var.labels)
}

# function to make all model labels given number of variable
# and maximum degrees of interactions
mk.labels.all <- function(n.var, max.interaction){
var.labels = paste("X", 1:n.var, sep = "")
for (i.interaction  in 2:max.interaction ){
combination = combn(n.var, i.interaction)
var.labels = c(var.labels, mk.labels(combination))
}
return(var.labels)
}

# crearting model label
# assuming 10 variables & 5-degree interaction terms
var.labels<-mk.labels.all(5,5)

# generating data set
set.seed(30)
nData = 1000; n.var = 5
X=matrix(rnorm(n.var*nData,mean=10,sd=5),ncol=n.var);
Y=X[,seq(1,5)]%*%c(1,1,-2,-1,1) +
apply(X[,c(1,3)],1,prod)*0.1 +
apply(X[,c(2,4)],1,prod)*-0.1 +
apply(X[,c(1,3,5)],1,prod)*0.01 +
apply(X[,c(2,4,5)],1,prod)*0.01+
rnorm(nData,mean=0,sd=10);

dat = data.frame(Y,X)
colnames(dat) <- c('Y', paste("X",1:n.var,sep=""))
# checking fit of the "TRUE" model
summary(lm(Y~X1*X3+X2*X4+X1:X3:X5++X2:X4:X5+X5, dat))
AIC(lm(Y~X1*X3+X2*X4+X1:X3:X5++X2:X4:X5+X5, dat))

# initializing parent population
nPop = 50
G = matrix(sample(0:1,nPop*length(var.labels),replace=TRUE,prob = c(0.8, 0.2)),nrow=nPop)

# recombination
GA_recomb<-function(G) {
nPop=nrow(G);
nVar=ncol(G);
child = G;
G.permuted = G[sample(1:nPop),]
recomb.idx=which(matrix(sample(0:1,nPop*nVar,replace=T),nrow=nPop)==1)
child[recomb.idx] = G.permuted[recomb.idx]
return(child)
}

# mutation
GA_mutate = function(child, p){
n.pop = nrow(child)
n.var = ncol(child)
mut.mat= matrix((runif(n.pop*n.var) < p), nrow = n.pop)
child = abs(child-mut.mat)
return(child)
}

# surviver selection - works with either min or max prob.
GA_survive<-function(G, child, fitG, fitC, mnmx = "min"){
nPop=nrow(G);
fitT=c(fitG,fitC);
if (mnmx == "max"){
fitMax=sort(fitT,index.return=TRUE,decreasing = TRUE)
} else {
fitMax=sort(fitT,index.return=TRUE)
}
tempX=rbind(G,child);
G=tempX[fitMax\$ix[1:nPop],]
return(G)
}

# main function
GA<-function(G, nGen,p,dat){
optHist=matrix(0,nrow=nGen,ncol=1)
G.hist = matrix(0,nrow=nGen,ncol = ncol(G))
nPop = nrow(G)
nVar = ncol(G)
fitG = rep(0,nPop)
fitC = fitG
for (i.pop in 1:nPop){
model.def = paste("Y ~", gsub(",", "+", toString(var.labels[G[i.pop,] == 1])))
fitG[i.pop] = AIC(lm(model.def,dat))
}
optHist[1]=max(c(fitG))
G.hist[1,] = G[which.max(fitG),]

# to display progress
prog.prop = round(seq(0,nGen,length.out=10))
prog.report = paste(seq(0,100,10),"% done\n",sep="")
for (i_gen in 2:nGen) {
if (any(i_gen == prog.prop)){
cat(prog.report[which(i_gen==prog.prop)])
}
child <- GA_recomb(G)
child <- GA_mutate(child,p)
# assuming same numbers of set of genes
for (i.pop in 1:nPop){
model.def = paste("Y ~", gsub(",", "+", toString(var.labels[G[i.pop,] == 1])))
fitG[i.pop] = AIC(lm(model.def,dat))
if (sum(child[i.pop,],na.rm=TRUE)==0){
child[i.pop,sample(1:ncol(child.all),sample(1:nVar,1))]=1
}
model.def = paste("Y ~", gsub(",", "+", toString(var.labels[child[i.pop,] == 1])))
fitC[i.pop] = AIC(lm(model.def,dat))
}

G <- GA_survive(G, child, fitG, fitC, "min")
optHist[i_gen]=min(c(fitG,fitC))
G.hist[i_gen, ] = G[1,]
}
return(list(G = G, optHist = optHist, G.hist = G.hist))
}

# running simulation
res = GA(G,1000,0.15, dat)
model.def = paste("Y ~", gsub(",", "+", toString(var.labels[res\$G[1,] == 1])))
summary(lm(model.def,dat))
AIC(lm(model.def,dat))
plot(res\$optHist,type='l',xlab="generation",ylab="AIC")
var.labels[which(res\$G[1,]==1)]

### end extensive GA

# Evolutionary Strategy - estimating coefficient
ES_recomb<-function(G) {
nParent=nrow(G\$x);nVar=ncol(G\$x)
child = G;
for (i_child in 1:nParent) {
parentID=sample(1:nParent,2)
coID=sample(c(0,1),nVar,replace=T)
child\$x[i_child,]=G\$x[parentID[1],]
child\$x[i_child,which(coID==1)]=G\$x[parentID[2],which(coID==1)]
child\$sigma[i_child,]=0.5*(G\$sigma[parentID[1],]+G\$sigma[parentID[2],])
}
return(child)
}

ES_mutate<-function(child,tau){
nChild=nrow(child\$x);nVar=ncol(child\$x)
child\$sigma<-child\$sigma*exp(matrix(rnorm(nChild*nVar)*tau,nrow=nChild))
child\$x=child\$x+child\$sigma*matrix(rnorm(nChild*nVar),nrow=nChild,ncol=nVar)
return(child)
}

ES_survive<-function(G, child, fitG, fitC){
nParent=nrow(G\$x);
fitT=c(fitG,fitC);
fitMin=sort(fitT,index.return=T)
tempX=rbind(G\$x,child\$x);
tempS=rbind(G\$sigma,child\$sigma)
G\$x=tempX[fitMin\$ix[1:nParent],]
G\$sigma=tempS[fitMin\$ix[1:nParent],]
return(G)
}

ES<-function(G,func, nGen, tau,X,y){
optHist=matrix(0,nrow=nGen,ncol=1)
G.hist = matrix(0,nrow=nGen,ncol = ncol(G\$x))
fitG = fitG = apply(G\$x,1,func,X,y)
optHist[1] = min(fitG)
G.hist[1,] = G\$x[which.min(fitG),]
child = G;
for (i_gen in 2:nGen) {
child<-ES_recomb(G)
child<-ES_mutate(child,tau)
fitG = apply(G\$x,1,func,X,y)
fitC = apply(child\$x,1,func,X,y)
G<-ES_survive(G, child, fitG, fitC)
optHist[i_gen]=min(c(fitG,fitC))
G.hist[i_gen, ] = G\$x[1,]
}
return(list(G = G, optHist = optHist, G.hist = G.hist))
}

set.seed(20); nData = 100
X=matrix(rnorm(9*nData,mean=10,sd=2),ncol=9);X=cbind(rep(1,nData),X)
y=X%*%c(10,2,5,-3,-5,0,0,0,0,0)+rnorm(nData,mean=0,sd=2);
fun04<-function(b,X,y){
yhat<-X%*%b
return(sum((y-yhat)^2))
}
G = list()
G\$x = matrix(rnorm(10*30), ncol=10)
G\$sigma = matrix(runif(10*30,0.5,1.5), ncol=10)

res=ES(G, fun04, 1000, 1,X,y)

# PSO - optim. 2-variable function
PSO<-function(G, wP, wG, func, maxIter){
swarm.hist = array(0, c(nrow(G), ncol(G), maxIter))
swarm.hist[,,1]=G
p.b.hist = apply(G,1,func)
global.best.v = min(p.b.hist)
p.Best = G
g.Best = matrix(G[which.min(p.b.hist),],nrow=nrow(G),ncol=ncol(G),byrow=T)
v = matrix(0,nrow = nrow(G),ncol = ncol(G))
for (i.iter in 2:maxIter){
v = v + wP*runif(1)*(p.Best - G) + wG*runif(1)*(g.Best - G)
G = G + v
fitG = apply(G,1,func)
if (min(fitG) < global.best.v){
g.Best = matrix(G[which.min(fitG),],nrow=nrow(G),ncol=ncol(G),byrow=T)
global.best.v = min(fitG)
}
idx = which(fitG < p.b.hist)
p.Best[idx,] = G[idx,]
p.b.hist= fitG
swarm.hist[,,i.iter]=G
}
return(swarm.hist)
}

# running PSO
par(mfrow=c(1,1))
fun03<-function(x) {x[1]^4-16*x[1]^2-5*x[1]+x[2]^4-16*x[2]^2-5*x[2]}
x<-seq(-5, 5,length.out=100);y<-x;z<-outer(x,y,funZ)
contour(x,y,z,nlevels=30,drawlabel=F)
nSwarm = 100; nVar = 2;
G=matrix(rnorm(nVar*nSwarm,mean=0,sd=0.1),ncol=nVar)
res<-PSO(G,0.1,0.1,fun03,500)
lines(res[1,1,],res[1,2,],type='o',col='red')
```