# データ解析基礎論B GLM

```library(tidyverse)
ggplot(dat) +
geom_point(aes(x = study, y = pass), size =3) +
xlab("Hours studied") +
ylab("Passing")

dat.lm <- lm(pass~study,dat)
ggplot(dat,aes(x = study, y = pass)) +
geom_point(size =3) +
xlab("Hours studied") +
ylab("Passing") +
geom_abline(slope = dat.lm\$coefficients[2], intercept = dat.lm\$coefficients[1]+1, color = "red",size = 2)
par(mfrow=c(2,2))
plot(dat.lm)

real.p = data.frame( real.p = table(dat\$pass, dat\$study)[2,] / colSums(table(dat\$pass, dat\$study)), x = 0:30)
ggplot(real.p,aes(x = x, y = real.p)) +
geom_point(size =3) +
xlab("Hours studied") +
ylab("Passing (actual probability)")

dat.lr <- glm(pass~study,family=binomial, data=dat)
summary(dat.lr)
coef = coefficients(dat.lr)
temp.x = seq(0,30,0.1)
pred.pass.p = data.frame(pred.p = 1/(1+exp(-(coef[1]+coef[2]*temp.x))), x = temp.x)
ggplot(dat, aes(x = study,y = pass)) +
geom_point(size=3) +
geom_line(aes(x =x, y= pred.p + 1), data = pred.pass.p, color = 'red',size = 1)+
xlab("Hours studied") +
ylab("Passing")

##
pred.pass.p = 1/(1+exp(-(coef[1]+coef[2]*c(10:15))))
odds=pred.pass.p/(1-pred.pass.p)
exp(coef[2])
odds[2:6]/odds[1:5]

dat.lr<-glm(gender~shoesize,family=binomial,data=dat)
anova(dat.lr, test ="Chisq")
dat.lr0<-glm(gender~1,family="binomial",data=dat)
dat.lrS<-glm(gender~shoesize,family=binomial,data=dat)
dat.lrh<-glm(gender~h,family="binomial",data=dat)

M=matrix(c(52,48,8,42),nrow=2)
rownames(M)<-c("present", "absent")
colnames(M)<-c("smoker",'non-smoker')
dat<-as.data.frame((as.table(M)))
colnames(dat)<-c("cancer","smoking","freq")
dat=dat[rep(1:nrow(dat),dat\$freq),1:2]
rownames(dat)<-c()

dat.glm<-glm(cancer~smoking,family=binomial,data=dat)

dat.glm<-glm(survival~age, family=binomial,data=dat)
dat.glm2<-glm(survival~Ncigarettes,family=binomial,data=dat)
dat.glm3<-glm(survival~NdaysGESTATION,family=binomial,data=dat)
dat.glmAllMult=glm(survival~age*Ncigarettes*NdaysGESTATION,family=binomial,data=dat)
library(MASS)
stepAIC(dat.glmAllMult)

ggplot(dat) +
geom_histogram(aes(eye.count), fill='red') +
xlab("Number of times looking at eyes")

ggplot(dat, aes(x=attr,  y = eye.count)) +
geom_point(size =2) +
ylab("Number of times looking at eyes")+
xlab("Attractiveness") +
geom_abline(slope = dat.lm\$coefficients[2], intercept = dat.lm\$coefficients[1], color = "red",size = 2)

dat.lm <- lm(eye.count~attr,data = dat)
dat.pr<-glm(eye.count~attr,family = "poisson",data=dat)
cf = coefficients(dat.pr)
x.temp <- seq(0,10,0.1)
pred.c = data.frame(x=x.temp, pred = exp(cf[1]+cf[2]*x.temp))
ggplot(dat, aes(x=attr,  y = eye.count)) +
geom_point(size =2) +
ylab("Number of times looking at eyes")+
xlab("Attractiveness") +
geom_abline(slope = dat.lm\$coefficients[2], intercept = dat.lm\$coefficients[1], color = "red",size = 2)+
geom_line(aes(x = x.temp, y= pred),data = pred.c, color="blue", size=2)
```