source("http://www.matsuka.info/univ/course_folder/cutil.R") dat<-read.csv("http://www.matsuka.info/data_folder/dktb316.txt") dat.aov<-aov(words~method+subj+Error(subj:method),data=dat) dat.aov2<-aov(words~method+Error(subj+subj:method),data=dat) dat.aov.BTW <-aov(words~method,data=dat) summary(dat.aov) RB.qtukey(dat.aov,dat, 0.05) dat<-read.csv("http://www.matsuka.info/data_folder/dktb3211.txt") interaction.plot(dat$duration, # x軸 dat$method, # まとめる変数 dat$result, # y軸 pch=c(20,20), col=c("skyblue","orange"), ylab="score", xlab="Duration", lwd=3, type='b', cex=2, trace.label="Method") dat.aov <- aov(result~method*duration+Error(s+s:duration),dat) TSME<-SPF.tsme(dat.aov,dat,"result") dat<-read.csv("http://www.matsuka.info/data_folder/dktb3218.txt") dat.aov<-aov(result~method+duration+method:duration + Error(s+method:s+duration:s+method:duration:s), data=dat) TSME<-RBF.tsme(dat.aov, dat, "result")
Category Archives: データ解析
2019年度 データ解析基礎論a 分散分析2
source("http://www.matsuka.info/univ/course_folder/cutil.R") adj.alpha(5,0.05) # anova dat<-read.csv("http://www.matsuka.info/data_folder/dktb312.csv") dat$method=factor(dat$method, levels(dat$method)[c(1,3,4,2)]) dat.aov<-aov(result~method, data=dat) summary(dat.aov) # multiple comparison # do not use these command # use simpler one - "cu.bonF1F" dat.means<-tapply(dat$result,dat$method,mean) new.alpha = 1-(1-0.05)^(1/5) cont=c(-3,1,1,1) bunshi=sum(cont*dat.means) bunbo=sqrt(5.29*(sum((cont^2)/8))) t.value=bunshi/bunbo 2*(1-pt(t.value,28)) # cu.bonF1F new.alpha<-adj.alpha(5,0.05) cu.bonF1F(dat.aov,dat,c(-3,1,1,1),new.alpha) cu.bonF1F(dat.aov,dat,c(-1,1,0,0),new.alpha) cu.bonF1F(dat.aov,dat,c(-1,0,1,0),new.alpha) cu.bonF1F(dat.aov,dat,c(-1,0,0,1),new.alpha) cu.bonF1F(dat.aov,dat,c(0,-2,1,1),new.alpha) # cu.scheffe1F new.f<-adj.f(4,3,28,0.05) cu.scheffe1F(dat.aov,dat,c(-3,1,1,1)) # 2 way ANOVA dat <- read.csv("http://peach.l.chiba-u.ac.jp/course_folder/datW03R.txt") interaction.plot(dat$gender, dat$affil, dat$shoesize, pch=c(20,20), col=c("skyblue","orange"), xlab="gender", ylab="shoesize", lwd=3,type='b',cex=2, trace.label="Affiliation") dat.aov=aov(shoesize~gender*affil, data=dat) dat.aov.sum=summary(dat.aov) # testing simple main effect # do not use these command # use simpler one - CRF.tsme means<-tapply(dat$shoesize, list(dat$gender,dat$affil), mean) SS_gen_CS<- 5*(means[2,1]^2 + means[1,1]^2 -0.5*sum(means[,1])^2) # SS_gender CS SS_gen_PS<- 5*(means[2,2]^2 + means[1,2]^2 -0.5*sum(means[,2])^2) # SS_gender PS dat.aov.sum=summary(dat.aov) # ANOVA table MSe=dat.aov.sum[[1]][4,3] # MSE from ANOVA table or MSe=0.62 dfE=dat.aov.sum[[1]][4,1] # DF for error or dfE=16 dfG=1 # DF for gender F_gen_CS=(SS_gen_CS/dfG)/MSe # F-value for gender effect given CS F_gen_PS=(SS_gen_PS/dfG)/MSe # F-value for gender effect given PS P_gen_CS=1-pf(F_gen_CS,1,dfE) # p-value for gender effect given CS P_gen_PS=1-pf(F_gen_PS,1,dfE) SS_affil_F<- 5*(means[1,1]^2+means[1,2]^2-0.5*sum(means[1,])^2) #SS_affil | F SS_affil_M<- 5*(means[2,1]^2+means[2,2]^2-0.5*sum(means[2,])^2) #SS_affil | M dfA=1 # DF for affil F_affil_F=SS_affil_F/dfA/MSe # F-value for affiliation effect | F F_affil_M=SS_affil_M/dfA/MSe # F-value for affiliation effect | M P_affil_F=1-pf(F_affil_F,1,dfE) # p-value for affiliation effect | F P_affil_M=1-pf(F_affil_M,1,dfE) # p-value for affiliation effect | M # testint simple main effect w/ CRF.tsme tsme = CRF.tsme(dat.aov, dat) # another 2-way ANOVA dat <-read.csv("http://www.matsuka.info/data_folder/dktb321.txt") interaction.plot(dat$duration,dat$method,dat$result, pch=c(20,20), col=c("skyblue","orange"), ylab="score", xlab="Duration", lwd=3,type='b',cex=2,trace.label="Method") mod1=aov(result~method+duration,data=dat) mod1.sum=print(summary(mod1)) mod2=aov(result~method*duration,data=dat) mod2.sum=print(summary(mod2)) CRF.tsme(mod2, dat) # plotting dat<-read.csv("http://www.matsuka.info/univ/course_folder/datW03R.txt",header=T) means<-tapply(dat$shoesize,list(dat$gender, dat$affil),mean) Ns<-tapply(dat$shoesize,list(dat$gender, dat$affil),length) sds<-tapply(dat$shoesize,list(dat$gender, dat$affil),sd) sems<-sds/sqrt(Ns) plot(c(0,1),means[,1],type='o',col='skyblue', xlim=c(-0.1,1.1), lwd=2, cex=2, pch=20, ylim=c(min(means)*0.975, max(means)*1.025), xlab="gender", ylab="shoesize", xaxt="n") axis(1,c(0,1),c("female","male")) lines(c(0,1),means[,2],type='o',col='orange',lwd=2,cex=2,pch=20) lines(c(0,0),c(means[1,1]-sems[1,1],means[1,1]+sems[1,1]),col="skyblue",lwd=2) lines(c(0,0),c(means[1,2]-sems[1,2],means[1,2]+sems[1,2]),col="orange",lwd=2) lines(c(1,1),c(means[2,1]-sems[2,1],means[2,1]+sems[2,1]),col="skyblue",lwd=2) lines(c(1,1),c(means[2,2]-sems[2,2],means[2,2]+sems[2,2]),col="orange",lwd=2) legend("topleft",c("CogSci","PsySci"),col=c("skyblue","orange"),lwd=2)
2019 データ解析基礎論A 分散分析1
f=c(24.1,23.9,24.4,24.4,23.5) m=c(25.6,26.1,25.8,25.9,26) # ANOVA w/o aov G.mean=mean(c(f,m)) ss.total=sum((c(f,m)-G.mean)^2) ss.tr=sum((mean(f)-G.mean)^2)*5+sum((mean(m)-G.mean)^2)*5 ss.error=sum((f-mean(f))^2)+sum((m-mean(m))^2) ss.tr+ss.error df.tr = 2 - 1 df.error = (4-1)*2 ms.tr = ss.tr /df.tr ms.error = ss.error /df.error 1-pf(f.value,1,8) # ANOVA w/ aov dat<-data.frame(ssize=c(f,m),gender=c(rep("f",5),rep("m",5))) dat.aov<-aov(ssize ~ gender, data=dat) summary(dat.aov) dat<-read.table("http://www.matsuka.info/data_folder/aov01.txt") dat.aov<-aov(shoesize~club, dat) # TukeyHSD w/o TukeyHSD command qT<-qtukey(0.95, 3, 67) HSD<-qT*sqrt((2.243*(1/23+1/24))/2) means<-tapply(dat$shoesize,dat$club,mean) abs(outer(means,means,"-")) abs(outer(means,means,"-"))>HSD # TukeyHSD TukeyHSD(dat.aov) dat<-read.csv("http://www.matsuka.info/data_folder/dktb312.csv") dat.aov<-aov(result~method, data=dat) summary(dat.aov) source("http://www.matsuka.info/univ/course_folder/cutil.R") adj.alpha(5,0.05) # multiple t-test dat.means<-tapply(dat$result,dat$method,mean) new.alpha = 1-(1-0.05)^(1/5) cont=c(-3,1,1,1) bunshi=sum(cont*dat.means) bunbo=sqrt(5.29*(sum((cont^2)/8))) t.value=bunshi/bunbo 2*(1-pt(t.value,28)) > new.alpha # bonferroni w/ cUTIL cu.bonF1F(dat.aov,dat,c(-3,1,1,1),new.alpha) cu.bonF1F(dat.aov,dat,c(-1,1,0,0),new.alpha) cu.bonF1F(dat.aov,dat,c(-1,0,1,0),new.alpha) cu.bonF1F(dat.aov,dat,c(-1,0,0,1),new.alpha) cu.bonF1F(dat.aov,dat,c(0,-2,1,1),new.alpha) # scheffe w/ cUTIL new.f<-adj.f(4,3,28,0.05) cu.scheffe1F(dat.aov,dat,c(-3,1,1,1)) cu.scheffe1F(dat.aov,dat,c(-1,0,1,0)) cu.scheffe1F(dat.aov,dat,c(-1,0,0,1))
データ解析基礎論 回帰分析3
dat <- read.csv("http://www.matsuka.info/data_folder/hwsk8-17-6.csv") dat.lm<-lm(ani~otouto, dat) plot(hatvalues(dat.lm)) dat.lm$residuals x = dat$otouto mean.x = mean(x) h = 1/length(x)+(x-mean.x)^2/sum((x - mean.x)^2) hatvalues(dat.lm) e.std = e/(slm$sigma*sqrt(1-h)) rstandard(dat.lm) (e^2*h)/(2*slm$sigma^2*(1-h)^2) cooks.distance(dat.lm) dat<-read.csv("http://www.matsuka.info/data_folder/tdkReg02.csv") plot(dat) dat.lm<-lm(sales~., data=dat) install.packages("DAAG") library(DAAG) vif(dat.lm) lm.price<-lm(price~material+design+dump, data=dat) p.rsq = summary(lm.price)$r.squared vif.p = 1/(1-p.rsq) summary(dat.lm) dat<-read.table("http://www.matsuka.info/data_folder/tdkPATH01.txt",header=TRUE) plot(dat,pch=20,cex=2) dat.lm1<-lm(absence~interest,dat) dat.lm2<-lm(study~interest+knowledge,dat) dat.lm3<-lm(grade~knowledge+study+absence,dat)
2019 データ解析基礎論A 回帰分析2
# two sample t-test dat <- read.csv("http://www.matsuka.info/data_folder/hwsk8-17-6.csv") boxplot(dat$ani, dat$otouto, col = c("skyblue", "orange"),ylab = "Score", axnt ="n") axis(1, at = c(1,2),labels = c("Ani", "Otouto")) t.test(dat$ani, dat$otouto, var.equal=T) dat2<-data.frame(score=c(dat$ani,dat$otouto), order=c(rep("ani",10),rep("otouto",10))) plot(dat2$score~as.numeric(dat2$order), pch=20, xlab="order", ylab="score", xlim=c(0.5,2.5), cex=2, xaxt="n") axis(1, at = c(1,2),labels = c("Ani", "Otouto")) dat2.lm<-lm(score~order,data=dat2) abline(dat2.lm,col='red',lwd=3) # one sample t-test dat.D = dat$ani - dat$otouto boxplot(dat.D,col="skyblue",ylab="Difference") t.test(dat.D) plot(dat.D~rep(1,10), pch=20, xlab="", ylab="Difference", cex=3, xaxt="n") dat.D.lm<-lm(dat.D~1) abline(dat.D.lm,col='red',lwd=3) # LM with 3 or more categories dat<-read.csv("http://www.matsuka.info/data_folder/dktb312.csv") dat2<-data.frame(result=dat$result, method=dat$method, c1=c(rep(0,8),rep(1,8),rep(0,16)), c2=c(rep(0,16),rep(1,8),rep(0,8)), c3=c(rep(0,24),rep(1,8))) dat2.lm<-lm(result~c1+c2+c3,data=dat2) dat.lm <- lm(result~method, dat) dat3<-data.frame(result=dat$result, c1=c(rep(-3,8), rep(1,24)), c2=c(rep(0,8),rep(-2,8),rep(1,16)), c3=c(rep(0,16),rep(-1,8),rep(1,8))) # trend analysis dat<-read.csv("http://www.matsuka.info/data_folder/dktb321.txt") plot(dat$result~dat$duration,data=dat[dat$method=="method.X",]) result<-dat$result[dat$method=="method.X"] CL=c(rep(-3,5),rep(-1,5),rep(1,5),rep(3,5)) CQ=c(rep(-1,5),rep(1,5),rep(1,5),rep(-1,5)) CC=c(rep(-3,5),rep(1,5),rep(-1,5),rep(3,5)) trend.lm<-lm(result~CL+CQ+CC) trend.lm2 <- lm(result~duration, contrasts=list(duration = "contr.poly"),data=dat.x) summary(trend.lm2) # trend analysis - numeric predictor set.seed(15) x = runif(50,0,30) y = x^0.5 + rnorm(50,9,0.2) dat.xy = data.frame(y=y,x=x, xsq = x^2) plot(x,y, xlab = "Hours Studies", ylab="score", pch=20, cex=2, col="orange") summary(lm(y~x+xsq, data=dat.xy)) # regression diagnostic dat<-read.csv("http://www.matsuka.info/data_folder/tdkReg01.csv") dat.lm01<-lm(sales~price, data=as.data.frame(scale(dat))) plot(dat.lm01,which=1) plot(dat.lm01,which=2) norm.vars=rnorm(300) qqnorm(norm.vars) qqline(norm.vars,col='red',lwd=2) unif.vars=runif(300) qqnorm(unif.vars) qqline(unif.vars,col='green',lwd=2) plot(sort(unif.vars),sort(unif.vars)) plot(sort(norm.vars),sort(unif.vars)) par(mfrow=c(2,2)) plot(dat.lm01) dat<-read.csv("http://www.matsuka.info/data_folder/tdkReg02.csv") plot(dat) dat.lm<-lm(sales~., data=dat) install.packages("DAAG") library(DAAG) vif(dat.lm) lm.price<-lm(price~material+design+dump, data=dat) p.rsq = summary(lm.price)$r.squared vif.p = 1/(1-p.rsq)
2019データ解析基礎論a 回帰分析
dat <- read.csv("http://www.matsuka.info/data_folder/hwsk8-17-6.csv") plot(dat$otouto,dat$ani,pch=20, cex =3, xlab = "score of younger brother", ylab = "score of elder brother") dat.lm <- lm(ani~otouto, data=dat) summary(dat.lm) abline(dat.lm, col = 'red',lwd = 2.5) dat<-read.csv("http://www.matsuka.info/data_folder/tdkReg01.csv") dat.reg1<-lm(sales~material,data=dat) dat.reg2<-lm(sales~price,data=dat) dat.reg3<-lm(sales~design,data=dat) dat.regALL<-lm(sales~material+price+design,data=dat) dat.regALL<-lm(sales~.,data=dat) dat <- read.csv("http://www.matsuka.info/data_folder/hwsk8-17-6.csv") t.test(dat$ani, dat$otouto, var.equal=T) dat2<-data.frame(score=c(dat$ani,dat$otouto),order=c(rep("ani",10),rep("otouto",10))) plot(dat2$score~as.numeric(dat2$order), pch=20, xlab="order", ylab="score", xlim=c(0.5,2.5), cex=2,xaxt="n") axis(1,c(1,2),c("ani","otouto")) dat2.lm<-lm(score~order,data=dat2) abline(dat2.lm,col='red',lwd=3) dat.D = dat$ani - dat$otouto boxplot(dat.D,col="skyblue",ylab="Difference") t.test(dat.D) plot(dat.D~rep(1,10),pch=20,xlab="",ylab="Difference",cex=3) dat.D.lm<-lm(dat.D~1) abline(dat.D.lm,col='red',lwd=3)
2019年度 データ解析基礎論a T検定
x.temp = 0:11 m = dbinom(x.temp, 11, prob=0.5) names(m) <- paste(0:11) barplot(m, col = c(rep("red",3),rep("red",6),rep("red",3))) ssize = c(24,25,26,23.5,25,27,24,22,27.5,28)ssize.mean = mean(ssize) ssize.var = var(ssize) N = 10 t.value=(ssize.mean-24)/(sqrt(ssize.var/N)) x.temp = seq(-4,4,0.01) y.temp = dt(x.temp,df = 9) plot(x.temp,y.temp, type='l',lwd=3,ylab = "Density", xlab = 't-value') abline(v = t.value, col='red',lwd=3,lty=2) abline(v = -t.value, col='red',lwd=3,lty=2) A=c(12,19,10,10,14,18,15,11,16) B=c(15,20,16,14,17,16,12,12,19) d=A-B tValue<-mean(d)/sqrt(var(d)/length(d)) (1-pt(abs(tValue), df=8))*2 x.temp = seq(-4,4,0.01) y.temp = dt(x.temp,df = 8) plot(x.temp,y.temp, type='l',lwd=3,ylab = "Density", xlab = 't-value') abline(v = t.value, col='red',lwd=3,lty=2) abline(v = -t.value, col='red',lwd=3,lty=2) X1=c(78,70,66,76,78,76,88,76) X2=c(76,72,60,72,70,72,84,70) t.value=(mean(X1)-mean(X2))/sqrt((var(X1)+var(X2))/8) 2*(1-pt(abs(t.value),14))
2019年度 データ解析基礎論B W06
x.temp = 0:9 mass= matrix(dbinom(x.temp,9,0.5),nrow=1) colnames(mass) <- paste(0:9) barplot(mass) sum(dbinom(7:9,9,0.5)) 2*(0.5-pnorm(750,800,40)) pnorm(700,800,40) qnorm(c(0.025, 0.975),800,40) qnorm(0.99,800,40) zA=(165-150)/(sqrt(15^2/10)) 1-pnorm(zA) (1-pnorm(zA))*2 dat<-read.csv("http://www.matsuka.info/data_folder/datWA01.txt") mean.M <-mean(dat$h[dat$gender=="M"]) sigma = 10 n.M = length(dat$h[dat$gender=="M"]) z.value=(mean.M-171)/(sqrt(sigma)) ssize = c(24,25,26,23.5,25,27,24,22,27.5,28) ssize.mean = mean(ssize) ssize.var = var(ssize) N = 10 t.value=(ssize.mean-24)/(sqrt(ssize.var/N)) (1-pt(abs(t.value),df=9))*2 t.test(ssize, mu=24)
2019 データ解析基礎論A DAA04
dat<- read.csv("http://www.matsuka.info/data_folder/datWA01.txt") plot(dat$shoesize, dat$h, main="Relationship b/w shoesize and height", xlab = "shoesize", ylab="height", pch=19, col="red") txt = paste("r =", round(cor(dat$shoesize,dat$h), 4)) text(22, 175, txt, cex = 1.5) abline(h = mean(dat$h), col='blue'); abline(v = mean(dat$shoesize), col='darkgreen'); text(x = 21, y = mean(dat$h)+3, paste("mean height =", round(mean(dat$h),2)), col="blue",adj = 0) text(x = mean(dat$shoesize)+0.2, y = 145, paste("mean shoesize =", round(mean(dat$shoesize),2)), col="darkgreen",adj = 0) abline(lm(dat$h~dat$shoesize), lty=2, lwd=2) plot(dat[dat$gender=='F',]$shoesize, dat[dat$gender=='F',]$h, main="Relationship b/w shoesize and height", xlab='shoesize', ylab='height', cex.lab=1.5, pch=19, col='blue', xlim=c(20,29), ylim=c(140,190)) plot(dat[dat$gender=='M',]$shoesize, dat[dat$gender=='M',]$h, main="Relationship b/w shoesize and height", xlab='shoesize', ylab='height', cex.lab=1.5, pch=15, col='green', xlim=c(20,29), ylim=c(140,190)) plot(dat[dat$gender=='F',]$shoesize, dat[dat$gender=='F',]$h, main="Relationship b/w shoesize and height", xlab='shoesize', ylab='height', cex.lab=1.5, pch=19, col='blue', xlim=c(20,29), ylim=c(140,190)) points(dat[dat$gender=='M',]$shoesize,dat[dat$gender=='M',]$h, pch = 15, col = 'green') legend("topleft", c('Female','Male'), pch =c(19,15), col = c('blue','green'), cex = 1.5) dat<-read.csv("http://www.matsuka.info/data_folder/tdkReg01.csv") plot(dat, pch=20, col='blue') dat.pca<-read.table("http://www.matsuka.info/data_folder/tdkPCA01.txt") plot(dat.pca, pch = rownames(dat.pca), cex = 1.7, col = "blue") dat<-read.table("http://www.matsuka.info/data_folder/aov01.txt",header=T) summary(dat) dat.meter = dat[,1:2]*0.01 dat.h.ext5 = dat$h+5 hANDshoe = dat$h+dat$shoesize dat.h.meter.ext5 = dat$h*0.01+0.05
2019データ解析基礎論A DAA03可視化2
v1 = seq(-3,3,0.1) v2 = v1^2 plot(v1, v2, col = "blue", type = "o", lty = 2, pch = 19, cex.lab = 1.5, lwd = 3, main = "Y=X*X", xlab = "X", ylab="X*X", xlim=c(-3.5,3.5), ylim=c(-0.5, 10)) # histogram dat<- read.csv("http://www.matsuka.info/data_folder/datWA01.txt") hist(dat$h) hist(dat$h, breaks = 20, main = "Histogram of Height", xlab = "Height", col = 'blue', xlim = c(140, 190)) dens<-density(dat$h); hist(dat$h, main = "Histogram of Height", xlab = "Height", xlim = c(140,190), probability = T) lines(dens, lwd = 2, col = "red", lty=2) plot(v1, v2, col = "blue", type = "l", pch = 19, cex.lab = 1.5, lwd = 3, xlab = "X", ylab="f(X)", xlim=c(-3.5,3.5), ylim=c(-0.5, 10)) lines(v1, v1^3, col='red',lwd = 3) legend("bottomright", c("x^2","x^3"), col=c('blue','red'), lwd=2) legend(-3,0.5, c("x^2","x^3"), col=c('blue','red'), lwd=2) boxplot(dat$h,main="Boxplot of Height", ylab="Height", col='cyan', ylim=c(140,190)) boxplot(dat$h,main="Boxplot of Height", xlab="Height", col="orange", horizontal=T) boxplot(dat$h ~ dat$gender, main="Distribution of Height by Gender", ylab="Gender", xlab="Height", col=c('blue','cyan'), ylim=c(140,190), horizontal=T) dat<-read.table("http://www.matsuka.info/data_folder/aov01.txt") boxplot(dat$h ~ dat$gender + dat$affil, main="Distribution of Height by Gender and Affiliation", ylab="Gender x Affiliation", xlab="Height", col=c('blue','cyan','red','magenta'), ylim=c(140,190),horizontal=T) interaction.plot(dat$gender, dat$affil, dat$h, pch=c(20,20), col=c("skyblue","orange"), xlab="gender", ylab="height", lwd=3,type='b',cex=2, trace.label="Affiliation") par(mfrow=c(1,2)) hist(dat[dat$gender=="F",]$h, main="Dist. of Height for Female Participants", xlab="Height", xlim=c(140,190), probability=T) dens.F = density(dat[dat$gender=='F',]$h) lines(dens.F, col='blue',lwd=2) #2つ目のhistogram hist(dat[dat$gender=="M",]$h, main="Dist. of Height for Male Participants", xlab="Height", xlim=c(140,190), probability=T,ylim=c(0,0.08)) dens.M = density(dat[dat$gender=='M',]$h) lines(dens.M, col='green', lwd=2) par(mfrow=c(1,1)) plot(dens.F,col='blue',lwd=2, ylab='density', xlim=c(140,190), main="Dist. of Height by gender",xlab='Height') lines(dens.M,col='green',lwd=2) legend("topleft", c('Female','Male'), col=c('blue','green'), cex=1.5,lwd=2) text(157.5, 0.04, 'Female', col='blue', cex=2) text(170, 0.04,'Male', col='green', cex=2) plot(dat$shoesize, dat$h, main="Relationship b/w shoesize and height", xlab = "shoesize", ylab="height", pch=19, col="red") txt = paste("r =", round(cor(dat$shoesize,dat$h), 3)) text(22, 175, txt, cex = 1.5) plot(dat[dat$gender=='F',]$shoesize, dat[dat$gender=='F',]$h, main="Relationship b/w shoesize and height", xlab='shoesize', ylab='height', cex.lab=1.5, pch=19, col='blue', xlim=c(20,29), ylim=c(140,190)) lines(dat[dat$gender=='M',]$shoesize,dat[dat$gender=='M',]$h, type = 'p', pch = 15, col = 'green') legend("topleft", c('Female','Male'), pch =c(19,15), col = c('blue','green'), cex = 1.5)