データ解析基礎論B GLM

library(tidyverse)
dat<-read.csv("http://peach.l.chiba-u.ac.jp/course_folder/logisticReg01.txt ")
ggplot(dat) +
  geom_point(aes(x = study, y = pass), size =3) +
  xlab("Hours studied") +
  ylab("Passing")


dat.lm <- lm(pass~study,dat)
ggplot(dat,aes(x = study, y = pass)) +
  geom_point(size =3) +
  xlab("Hours studied") +
  ylab("Passing") +
  geom_abline(slope = dat.lm$coefficients[2], intercept = dat.lm$coefficients[1]+1, color = "red",size = 2)
par(mfrow=c(2,2))
plot(dat.lm)

real.p = data.frame( real.p = table(dat$pass, dat$study)[2,] / colSums(table(dat$pass, dat$study)), x = 0:30)
ggplot(real.p,aes(x = x, y = real.p)) +
  geom_point(size =3) +
  xlab("Hours studied") +
  ylab("Passing (actual probability)") 
  

dat.lr <- glm(pass~study,family=binomial, data=dat)
summary(dat.lr)
coef = coefficients(dat.lr)
temp.x = seq(0,30,0.1)
pred.pass.p = data.frame(pred.p = 1/(1+exp(-(coef[1]+coef[2]*temp.x))), x = temp.x)
ggplot(dat, aes(x = study,y = pass)) +
  geom_point(size=3) +
  geom_line(aes(x =x, y= pred.p + 1), data = pred.pass.p, color = 'red',size = 1)+
  xlab("Hours studied") +
  ylab("Passing")
  
##
pred.pass.p = 1/(1+exp(-(coef[1]+coef[2]*c(10:15))))
odds=pred.pass.p/(1-pred.pass.p)
exp(coef[2])
odds[2:6]/odds[1:5]


dat<-read.csv("http://www.matsuka.info/data_folder/datWA01.txt")
dat.lr<-glm(gender~shoesize,family=binomial,data=dat)
anova(dat.lr, test ="Chisq")
dat.lr0<-glm(gender~1,family="binomial",data=dat)
dat.lrS<-glm(gender~shoesize,family=binomial,data=dat)
dat.lrh<-glm(gender~h,family="binomial",data=dat)


M=matrix(c(52,48,8,42),nrow=2)
rownames(M)<-c("present", "absent")
colnames(M)<-c("smoker",'non-smoker')
dat<-as.data.frame((as.table(M)))
colnames(dat)<-c("cancer","smoking","freq")
dat=dat[rep(1:nrow(dat),dat$freq),1:2]
rownames(dat)<-c()

dat.glm<-glm(cancer~smoking,family=binomial,data=dat)


dat<-read.csv("http://www.matsuka.info/data_folder/cda7-16.csv")
dat.glm<-glm(survival~age, family=binomial,data=dat)
dat.glm2<-glm(survival~Ncigarettes,family=binomial,data=dat)
dat.glm3<-glm(survival~NdaysGESTATION,family=binomial,data=dat)
dat.glmAllAdd=glm(survival~age+Ncigarettes+NdaysGESTATION,family=binomial,data=dat)
dat.glmAllMult=glm(survival~age*Ncigarettes*NdaysGESTATION,family=binomial,data=dat)
library(MASS)
stepAIC(dat.glmAllMult)


dat<-read.csv("http://peach.l.chiba-u.ac.jp/course_folder/poissonReg01.txt ")
ggplot(dat) +
  geom_histogram(aes(eye.count), fill='red') +
  xlab("Number of times looking at eyes")

ggplot(dat, aes(x=attr,  y = eye.count)) +
  geom_point(size =2) +
  ylab("Number of times looking at eyes")+
  xlab("Attractiveness") +
  geom_abline(slope = dat.lm$coefficients[2], intercept = dat.lm$coefficients[1], color = "red",size = 2)

dat.lm <- lm(eye.count~attr,data = dat)
dat.pr<-glm(eye.count~attr,family = "poisson",data=dat)
cf = coefficients(dat.pr)
x.temp <- seq(0,10,0.1)
pred.c = data.frame(x=x.temp, pred = exp(cf[1]+cf[2]*x.temp))
ggplot(dat, aes(x=attr,  y = eye.count)) +
  geom_point(size =2) +
  ylab("Number of times looking at eyes")+
  xlab("Attractiveness") +
  geom_abline(slope = dat.lm$coefficients[2], intercept = dat.lm$coefficients[1], color = "red",size = 2)+
  geom_line(aes(x = x.temp, y= pred),data = pred.c, color="blue", size=2)