データ解析基礎論B W10

install.packages("nnet")
library(nnet)

# regression
dat<-read.csv("http://www.matsuka.info/data_folder/tdkReg01.csv")
for (i.iter in 1:10){
set.seed(5)
print(i.iter)
x = dat[,1:3]
y = dat[,4]
dat.nnet = nnet(x,y, size = 150, linout= TRUE,maxit = 1000)
nnet.pred <-predict(dat.nnet,dat)
print(c(cor(nnet.pred,dat$sales),cor(nnet.pred,dat$sales)^2))
plot(dat.nnet$fitted.values, dat$sales,pch=20,col='black')
points(predict(dat.lm), dat$sales,pch=20,col='red')
abline(h=max(dat$sales),col='green')
abline(v=max(dat$sales),col='green')
abline(h=min(dat$sales),col='green')
abline(v=min(dat$sales),col='green')
abline(h=mean(dat$sales),col='green')
abline(v=mean(dat$sales),col='green')


n.data = nrow(dat);n.sample = n.data*0.6; n.rep = 1000 
trainNN.cor = rep(0,n.rep); trainLM.cor = rep(0,n.rep)
testNN.cor = rep(0,n.rep); testLM.cor = rep(0,n.rep)
for (i.rep in 1:n.rep){  
  randperm = sample(1:n.data)  
  train.idx = randperm[1:n.sample]  
  test.idx = randperm[(n.sample+1):n.data]  
  dat.nnet <- nnet(sales~.,size = 10, linout=T,decay= 0.1, maxit=1000, data = dat[train.idx,])  
  dat.lm <-lm(sales~.,data=dat[train.idx, ])  
  trainNN.cor[i.rep] <- cor(predict(dat.nnet,dat[train.idx, ]), dat[train.idx,]$sales)  
  trainLM.cor[i.rep] <- cor(predict(dat.lm,dat[train.idx, ]), dat[train.idx,]$sales)  
  testNN.cor[i.rep] <- cor(predict(dat.nnet,dat[test.idx, ]), dat[test.idx,]$sales)  
  testLM.cor[i.rep] <- cor(predict(dat.lm,dat[test.idx, ]), dat[test.idx,]$sales)
}
print(c(mean(trainNN.cor,na.rm=T),mean(testNN.cor,na.rm=T)))
print(c(mean(trainLM.cor,na.rm=T),mean(testLM.cor,na.rm=T)))

# logistic regression
dat<-read.csv("http://www.matsuka.info/data_folder/tdkDA01.csv")
dat.nnet<-nnet(class~.,dat,size=30,maxit=1000,decay=0.05)
dat.pred<-predict(dat.nnet,dat,type="class")
table(dat.pred,dat$class)

dat.glm<-glm(class~., family="binomial",dat)
glm.pred<-predict(dat.glm, dat, type="response")>0.5
table(glm.pred,dat$class)
dat.glm<-glm(class~social*coop.*dilig.*enterp., family="binomial",dat)


dat<-read.csv("http://www.matsuka.info/data_folder/cda7-16.csv")
dat.nnet<-nnet(survival~., dat, size=30, maxit=1000, decay=0.01)
dat.pred<-predict(dat.nnet,dat,type="class")
table(dat.pred,dat$survival)

Ns = summary(dat$survival)
(Ns[1]/Ns[2])^-1

wts = rep(1,nrow(dat))
wts[which(dat$survival=="no")]=45
dat.nnet<-nnet(survival~., weights=wts, dat, size=30, maxit=1000, decay=0.01)
dat.pred<-predict(dat.nnet,dat,type="class")
table(dat.pred,dat$survival)

# discriminant analysis
dat<-read.csv("http://www.matsuka.info/data_folder/tdkDA02.csv")
class.id<-class.ind(dat$class)
x = dat[,1:6]
dat.nnet<-nnet(x,class.id,size=30, maxit=1000, decay=0.01, softmax=TRUE)
max.id = apply(dat.nnet$fitted.values,1,which.max)
table(max.id,dat$class)


dat<-read.table("http://www.matsuka.info/data_folder/tdkPCA01.txt")
dat.nnet<-nnet(dat,dat,size=2, maxit=1000, decay=0.01, linout=TRUE)