2019 データ解析基礎論A 回帰分析2

# two sample t-test
dat <- read.csv("http://www.matsuka.info/data_folder/hwsk8-17-6.csv")
boxplot(dat$ani, dat$otouto, col = c("skyblue", "orange"),ylab = "Score",
        axnt ="n")
axis(1, at = c(1,2),labels = c("Ani", "Otouto"))
t.test(dat$ani, dat$otouto, var.equal=T)
dat2<-data.frame(score=c(dat$ani,dat$otouto),
                 order=c(rep("ani",10),rep("otouto",10)))

plot(dat2$score~as.numeric(dat2$order), pch=20, xlab="order",
     ylab="score", xlim=c(0.5,2.5), cex=2, xaxt="n")
axis(1, at = c(1,2),labels = c("Ani", "Otouto"))
dat2.lm<-lm(score~order,data=dat2)
abline(dat2.lm,col='red',lwd=3)

# one sample t-test
dat.D = dat$ani - dat$otouto
boxplot(dat.D,col="skyblue",ylab="Difference")
t.test(dat.D)

plot(dat.D~rep(1,10), pch=20, xlab="", ylab="Difference", cex=3, xaxt="n")
dat.D.lm<-lm(dat.D~1)
abline(dat.D.lm,col='red',lwd=3)

# LM with 3 or more categories
dat<-read.csv("http://www.matsuka.info/data_folder/dktb312.csv")
dat2<-data.frame(result=dat$result, method=dat$method,
                 c1=c(rep(0,8),rep(1,8),rep(0,16)),
                 c2=c(rep(0,16),rep(1,8),rep(0,8)),
                 c3=c(rep(0,24),rep(1,8)))

dat2.lm<-lm(result~c1+c2+c3,data=dat2)
dat.lm <- lm(result~method, dat)

dat3<-data.frame(result=dat$result, 
                 c1=c(rep(-3,8), rep(1,24)), 
                 c2=c(rep(0,8),rep(-2,8),rep(1,16)),
                 c3=c(rep(0,16),rep(-1,8),rep(1,8)))

# trend analysis
dat<-read.csv("http://www.matsuka.info/data_folder/dktb321.txt")
plot(dat$result~dat$duration,data=dat[dat$method=="method.X",])
result<-dat$result[dat$method=="method.X"]
CL=c(rep(-3,5),rep(-1,5),rep(1,5),rep(3,5))
CQ=c(rep(-1,5),rep(1,5),rep(1,5),rep(-1,5))
CC=c(rep(-3,5),rep(1,5),rep(-1,5),rep(3,5))
trend.lm<-lm(result~CL+CQ+CC)
trend.lm2 <- lm(result~duration, contrasts=list(duration = "contr.poly"),data=dat.x)
summary(trend.lm2)

# trend analysis - numeric predictor
set.seed(15)
x = runif(50,0,30)
y = x^0.5 + rnorm(50,9,0.2)
dat.xy = data.frame(y=y,x=x, xsq = x^2)
plot(x,y, xlab = "Hours Studies", ylab="score",
     pch=20, cex=2, col="orange")
summary(lm(y~x+xsq, data=dat.xy))

# regression diagnostic
dat<-read.csv("http://www.matsuka.info/data_folder/tdkReg01.csv") 
dat.lm01<-lm(sales~price, data=as.data.frame(scale(dat)))
plot(dat.lm01,which=1)
plot(dat.lm01,which=2)

norm.vars=rnorm(300)
qqnorm(norm.vars)
qqline(norm.vars,col='red',lwd=2) 

unif.vars=runif(300)
qqnorm(unif.vars)
qqline(unif.vars,col='green',lwd=2)

plot(sort(unif.vars),sort(unif.vars))
plot(sort(norm.vars),sort(unif.vars))

par(mfrow=c(2,2))
plot(dat.lm01)

dat<-read.csv("http://www.matsuka.info/data_folder/tdkReg02.csv")
plot(dat)
dat.lm<-lm(sales~., data=dat) 
install.packages("DAAG")
library(DAAG)
vif(dat.lm)
lm.price<-lm(price~material+design+dump, data=dat)
p.rsq = summary(lm.price)$r.squared
vif.p = 1/(1-p.rsq)