# genetic algorithm - identify the best linear model
GA_recomb<-function(G) {
nPop=nrow(G);
nVar=ncol(G);
child = G;
G.permuted = G[sample(1:nPop),]
recomb.idx=which(matrix(sample(0:1,nPop*nVar,replace=T),nrow=nPop)==1)
child[recomb.idx] = G.permuted[recomb.idx]
return(child)
}
GA_mutate = function(child, p){
n.pop = nrow(child)
n.var = ncol(child)
mut.mat= matrix((runif(n.pop*n.var) < p), nrow = n.pop)
child = abs(child-mut.mat)
return(child)
}
GA_survive<-function(G, child, fitG, fitC){
nPop=nrow(G);
fitT=c(fitG,fitC);
fitMax=sort(fitT,index.return=TRUE,decreasing = TRUE)
tempX=rbind(G,child);
G=tempX[fitMax$ix[1:nPop],]
return(G)
}
GA<-function(G, nGen,p,X,Y){
optHist=matrix(0,nrow=nGen,ncol=1)
G.hist = matrix(0,nrow=nGen,ncol = ncol(G))
nPop = nrow(G)
nVar = ncol(G)
fitG = rep(0,nPop)
fitC = fitG
for (i.pop in 1:nPop){
dat.temp = X[,which(G[i.pop,]==1)]
fitG[i.pop] = summary(lm(Y~dat.temp))$adj.r.squared
}
optHist[1]=max(c(fitG))
G.hist[1,] = G[which.max(fitG),]
for (i_gen in 2:nGen) {
child<-GA_recomb(G)
child<-GA_mutate(child,p)
# assuming same numbers of set of genes
for (i.pop in 1:nPop){
dat.temp = X[,which(G[i.pop,]==1)]
fitG[i.pop] = summary(lm(Y~dat.temp))$adj.r.squared
if (sum(child[i.pop,])==0){
child[i.pop,sample(1:ncol(child.all),sample(1:nVar,1))]=1
}
dat.temp = X[,which(child[i.pop,]==1)]
fitC[i.pop] = summary(lm(Y~dat.temp))$adj.r.squared
}
G<-GA_survive(G, child, fitG, fitC)
optHist[i_gen]=max(c(fitG,fitC))
G.hist[i_gen, ] = G[1,]
}
return(list(G = G, optHist = optHist, G.hist = G.hist))
}
set.seed(19)
nVar = 10
nData = 50
X=matrix(rnorm(nVar*nData,mean=10,sd=5),ncol=nVar);
y=X%*%c(-2,0,2,0,2,0,-3,0,-2,0)+rnorm(nData,mean=0,sd=8);
summary(lm(y~X[,seq(1,9,2)]))
summary(lm(y~X))
G = matrix(sample(0:1,300,replace = T),nrow=30)
res<-GA(G,100,0.1,X,y)
head(res$G)
# GA - challenging task
C = combn(3,2)
temp.lab =toString(paste("X",C[ , i.comb], sep = ""))
gsub(",", "*", temp.lab)
n.comb = ncol(C)
var.labels = c()
for (i.comb in 1: n.comb){
temp.label = toString(paste("X",C[ , i.comb], sep = ""))
var.labels = c(var.labels, gsub(",", "*", temp.label) )
}
mk.labels <- function(C){
var.labels = c()
n.comb = ncol(C)
for (i.comb in 1: n.comb){
temp.label = toString(paste("X",C[ , i.comb], sep = ""))
var.labels = c(var.labels, gsub(",", "*", temp.label) )
}
return(var.labels)
}
var.labels = paste("X", 1:n.var, sep = "")
# interaction terms
for (i.interaction in 2:max.interaction ){
combination = combn(n.var, i.interaction)
var.labels = c(var.labels, mk.labels(combination))
}
model.def = paste("Y ~", gsub(",", "+", toString(var.labels[c(1,1,1,1,1,0,0) == 1])))
X=matrix(rnorm(nVar*nData,mean=10,sd=5),ncol=nVar);
y=X%*%c(1,1,-2)+rnorm(nData,mean=0,sd=3);
dat = data.frame(y=y,X1=X[,1],X2=X[,2],X3=X[,3])
model.lm = lm(model.def, dat)
Related