認知情報解析演習 08

  # genetic algorithm - identify the best linear model
  GA_recomb<-function(G) {
    nPop=nrow(G);
    nVar=ncol(G);
    child = G;
    G.permuted = G[sample(1:nPop),]
    recomb.idx=which(matrix(sample(0:1,nPop*nVar,replace=T),nrow=nPop)==1)
    child[recomb.idx] = G.permuted[recomb.idx]
    return(child)
  }
  
  GA_mutate = function(child, p){
    n.pop = nrow(child)
    n.var = ncol(child)
    mut.mat= matrix((runif(n.pop*n.var) < p), nrow = n.pop)
    child = abs(child-mut.mat)
    return(child)
  }
  
  GA_survive<-function(G, child, fitG, fitC){
    nPop=nrow(G);
    fitT=c(fitG,fitC);
    fitMax=sort(fitT,index.return=TRUE,decreasing = TRUE)
    tempX=rbind(G,child);
    G=tempX[fitMax$ix[1:nPop],]
    return(G)
  }
  
  GA<-function(G, nGen,p,X,Y){
    optHist=matrix(0,nrow=nGen,ncol=1)
    G.hist = matrix(0,nrow=nGen,ncol = ncol(G))
    nPop = nrow(G)
    nVar = ncol(G)
    fitG = rep(0,nPop)
    fitC = fitG
    for (i.pop in 1:nPop){
      dat.temp = X[,which(G[i.pop,]==1)]
      fitG[i.pop] = summary(lm(Y~dat.temp))$adj.r.squared
    }
    optHist[1]=max(c(fitG))
    G.hist[1,] = G[which.max(fitG),]
    for (i_gen in 2:nGen) {
      child<-GA_recomb(G)
      child<-GA_mutate(child,p)
      # assuming same numbers of set of genes
      for (i.pop in 1:nPop){
        dat.temp = X[,which(G[i.pop,]==1)]
        fitG[i.pop] = summary(lm(Y~dat.temp))$adj.r.squared
        if (sum(child[i.pop,])==0){
          child[i.pop,sample(1:ncol(child.all),sample(1:nVar,1))]=1
        }
        dat.temp = X[,which(child[i.pop,]==1)]
        fitC[i.pop] = summary(lm(Y~dat.temp))$adj.r.squared
      }
      G<-GA_survive(G, child, fitG, fitC)
      optHist[i_gen]=max(c(fitG,fitC))
      G.hist[i_gen, ] = G[1,]
    }
    return(list(G = G, optHist = optHist, G.hist = G.hist))
  }
  
  set.seed(19)
  nVar = 10
  nData = 50
  X=matrix(rnorm(nVar*nData,mean=10,sd=5),ncol=nVar);
  y=X%*%c(-2,0,2,0,2,0,-3,0,-2,0)+rnorm(nData,mean=0,sd=8);
  summary(lm(y~X[,seq(1,9,2)]))
  summary(lm(y~X))
  G = matrix(sample(0:1,300,replace = T),nrow=30)
  res<-GA(G,100,0.1,X,y)
  head(res$G)
  
  # GA - challenging task
  C = combn(3,2)
  temp.lab =toString(paste("X",C[ , i.comb], sep = ""))
  gsub(",", "*", temp.lab) 
  
  n.comb = ncol(C)
  var.labels = c()
  for (i.comb in 1: n.comb){
    temp.label = toString(paste("X",C[ , i.comb], sep = ""))
    var.labels = c(var.labels, gsub(",", "*", temp.label) )
  }
  
  mk.labels <- function(C){
    var.labels = c()
    n.comb = ncol(C)
    for (i.comb in 1: n.comb){
      temp.label = toString(paste("X",C[ , i.comb], sep = ""))
      var.labels = c(var.labels, gsub(",", "*", temp.label) )
    }
    return(var.labels)
  }
  var.labels = paste("X", 1:n.var, sep = "")
  
  # interaction terms
  for (i.interaction  in 2:max.interaction ){
    combination = combn(n.var, i.interaction)
    var.labels = c(var.labels, mk.labels(combination))
  }
  model.def = paste("Y ~", gsub(",", "+", toString(var.labels[c(1,1,1,1,1,0,0) == 1]))) 
  X=matrix(rnorm(nVar*nData,mean=10,sd=5),ncol=nVar);
  y=X%*%c(1,1,-2)+rnorm(nData,mean=0,sd=3);
  dat = data.frame(y=y,X1=X[,1],X2=X[,2],X3=X[,3])
  
  model.lm = lm(model.def, dat)