dat<-read.table("http://www.matsuka.info/data_folder/tdkPCA01.txt") dat.pca<-princomp(dat) summary(dat.pca) biplot(dat.pca) dat.pca$loadings[,1] scoreP1S1 = dat.pca$loadings[1,1]*(dat[1,1]-mean(dat$writing))+ dat.pca$loadings[2,1]*(dat[1,2]-mean(dat$thesis))+ dat.pca$loadings[3,1]*(dat[1,3]-mean(dat$interview)) dat<-read.csv("http://www.matsuka.info/data_folder/tdkCFA.csv") dat.pca<-princomp(dat) dat<-read.csv("http://peach.l.chiba-u.ac.jp/course_folder/AMSA_track_mens.csv",row.names=1) plot(dat,pch=20) dat.pca<-princomp(dat) screeplot(dat.pca,type="lines") dist = c(100,200,400,800,1500,5000,10000,42195) log.dist=log(dist) plot(log.dist, dat.pca$loadings[,1],pch=20,col='red', cex=2,ylab="PC loadings (1st)",xlab="Log(distance)") plot(log.dist, dat.pca$loadings[,2],pch=20,col='red', cex=5,ylab="PC loadings (2nd)",xlab="Log(distance)")
Monthly Archives: October 2019
広域システム特別講義II RL1b
library(tidyverse) library(gridExtra) epGreedy = function(nTrial,nRep,epsilon) { rew.hist = opt.hist = matrix(0,nrow=nTrial,ncol=nRep) for (i_rep in 1:nRep){ Q.true=rnorm(10); opt.ID=which.max(Q.true) Q.est = Q.cum = act.count=rep(0,10); for (i_trial in 1:nTrial) { if (runif(1) < epsilon) { action=sample(1:10,1) } else { action=which.max(Q.est) } rew.hist[i_trial,i_rep]=rnorm(1)+Q.true[action] opt.hist[i_trial,i_rep]=action==opt.ID act.count[action]=act.count[action]+1 Q.cum[action]=Q.cum[action]+rew.hist[i_trial,i_rep] Q.est[action]=Q.cum[action]/act.count[action] } } return(list(opt = opt.hist, rew = rew.hist)) } n.Trial = 1000; n.Rep = 2000 type1=epGreedy(n.Trial, n.Rep, 0.0) type2=epGreedy(n.Trial, n.Rep, 0.01) type3=epGreedy(n.Trial, n.Rep, 0.1) res.all = tibble(trial = rep(1:nTrial, n.Rep * 3), rew = c(as.vector(type1$rew),as.vector(type2$rew),as.vector(type3$rew)), opt = c(as.vector(type1$opt),as.vector(type2$opt),as.vector(type3$opt)), condition = c(rep("0.00", nTrial * n.Rep), rep("0.01", nTrial * n.Rep), rep("0.10", nTrial * n.Rep))) p1 <- ggplot(res.all) + geom_smooth(aes(x = trial, y = rew, color = condition)) + ylab("Average Reward") p2 <- ggplot(res.all) + geom_smooth(aes(x = trial, y = opt, color = condition)) + ylab("Prop. Optimal Action") grid.arrange(p1, p2, nrow =2) softmax = function(nTrial, nRep, tau) { rew.hist = opt.hist = matrix(0,nrow=nTrial,ncol=nRep) for (i_rep in 1:nRep){ Q.true=rnorm(10); opt.ID=which.max(Q.true) Q.est = Q.cum = act.count=rep(0,10); for (i_trial in 1:nTrial) { p = exp(Q.est/tau)/sum(exp(Q.est)/tau) action = sample(1:10, 1, prob = p) rew.hist[i_trial,i_rep]=rnorm(1)+Q.true[action] opt.hist[i_trial,i_rep]=action==opt.ID act.count[action]=act.count[action]+1 Q.cum[action]=Q.cum[action]+rew.hist[i_trial,i_rep] Q.est[action]=Q.cum[action]/act.count[action] } } return(list(opt = opt.hist, rew = rew.hist)) } n.Trial = 1000; n.Rep = 1000 eG00=epGreedy(n.Trial, n.Rep, 0.0) eG01=epGreedy(n.Trial, n.Rep, 0.1) sm=softmax(n.Trial, n.Rep, 0.1) res.all = tibble(trial = rep(1:n.Trial, n.Rep * 3), rew = c(as.vector(eG00$rew),as.vector(eG01$rew),as.vector(sm$rew)), opt = c(as.vector(eG00$opt),as.vector(eG01$opt),as.vector(sm$opt)), condition = c(rep("epsilon = 0.0", n.Trial * n.Rep), rep("epsilon = 0.1", n.Trial * n.Rep), rep("softmax", n.Trial * n.Rep))) p1 <- ggplot(res.all) + geom_smooth(aes(x = trial, y = rew, color = condition)) + ylab("Average Reward") p2 <- ggplot(res.all) + geom_smooth(aes(x = trial, y = opt, color = condition)) + ylab("Prop. Optimal Action") grid.arrange(p1, p2, nrow =2) # RL example V=rep(0,25); # defining probability matrix P=matrix(1/4,nrow=25,ncol=4) # # defining deterministic transition matrix north=c(2:25,25) north[ c(5,10,15,20,25)]=c(5,10,15,20,25) east=c(6:25,21:25) west=c(1:5,1:20) south=c(1,1:24) south[ c(1,6,11,16,21)]=c(1,6,11,16,21) trM=cbind(north,east,south,west) trM[10,]=6 trM[20,]=18 # defining reward matrix R=matrix(0,nrow=25,ncol=4) R[which(trM==1:25)]=-1 R[10,]=10 R[20,]=5 # calculating state-values iteratively # fixed number of iterations nRep=1000; gamma=0.9; for (i_rep in 1:nRep) { V.old=V for (i_state in 1:25) { V[i_state]=sum(P[i_state,]*(R[i_state,]+gamma*V.old[trM[i_state,]])) } } print(matrix(V,nrow=5)[5:1,]) # jisshu 1 north=c(1:3,15,1:10) east=2:15;east[ c(3,7,11)]=c(3,7,11) south=c(5:15,12:14) west=c(15,1:13);west[ c(4,8,12)]=c(4,8,12) trM=cbind(north,east,south,west) # defining Reward & trans. prob. R=-1;P=0.25; # policy improvement north=c(1:3,15,1:10) east=2:15;east[ c(3,7,11)]=c(3,7,11) south=c(5:15,12:14) west=c(15,1:13);west[ c(4,8,12)]=c(4,8,12) trM=cbind(north,east,south,west) R=-1;P=0.25;V=rep(0,15); delta=1; gamma=1; tol=1e-10; bestP=sample(1:4,14,replace=T) stable=F;counter=0; while (stable==F){ counter=counter+1 # iterative policy evaluation while (delta>tol) { delta=0; for (i_state in 1:14) { v=V[i_state] V[i_state]=sum(P*(R+gamma*V[trM[i_state,]])) delta=max(delta,abs(v-V[i_state])) } } # policy improvement stable=F for (i_state in 1:14) { b=bestP[i_state] bestP[i_state]=which.max(V[trM[i_state,]]) ifelse((bestP[i_state]==b),stable<-T,stable<-F) } }
広域システム特別講義II R prog.2
install.packages("tidyverse") library(tidyverse) random.number <- rnorm(1000) mean(random.number) mean(random.number <- rnorm(1000)) rnorm(1000) %>% mean() # CLT NperSample = 10 SampleSize = 300000 # "traditional" random.number <- runif(NperSample * SampleSize) dat <- matrix(random.number, nrow=NperSample) means <- colMeans(dat) dens <- density(means) hist(means, breaks = 100, probability = T, main = "Distributionf of Means") lines(dens, lwd = 3, col = "orange") runif(NperSample * SampleSize) %>% matrix(nrow=NperSample) %>% colMeans() -> means hist(means, breaks=100,probability = T, main = "Distributionf of Means") means %>% density() %>% lines(,lwd=3,col='orange') histWdensity <- function(dat, nbreaks=30, main.txt){ dens <- density(dat) hist(dat, breaks = nbreaks, probability = T, main = main.txt) lines(dens, lwd = 3, col = "orange") } runif(NperSample * SampleSize) %>% matrix(nrow=NperSample) %>% colMeans() %>% histWdensity(nbreaks = 100, main.txt = "Distributionf of Means") dat <- read.csv("http://www.matsuka.info/data_folder/sampleData2013.txt") dt <- as_tibble(dat) dt.la <- filter(dt, affil == "LA") dt.la2 <- filter(dt, affil == "LA" & grade == "SP") dt.laNot2 <- filter(dt, affil == "LA" & grade != "SP") dt.GB <- select(dt, grade, nbooks) dtWOgender <- select(dt, -gender) dt.arranged <- arrange(dt, affil, grade) dt.weekly <- mutate(dt,nbooksWeek = nbooks / 52) dt.atLeastOneBook <- mutate(dt, atleastOneBook = (nbooks/52) >= 1) dt.atLeastOneBook <- mutate(dt, atleastOneBook = (nbooks/12) >= 1) dt.BWindex = mutate(dt, nbooksWeek = nbooks / 52, idx = nbooksWeek / (12*7-Hworked)) dt.byGrade <- group_by(dt, grade) summarize(dt.byGrade, ave.books = mean(nbooks,na.rm = TRUE), ave.Hworked = mean(Hworked, na.rm = TRUE)) dt.byGrAf <- group_by(dt, grade, affil) dt.summ <- summarize(dt.byGrAf, ave.books = mean(nbooks,na.rm = TRUE), ave.Hworked = mean(Hworked, na.rm = TRUE), N = n()) dt.summ2 <- dt %>% group_by(grade, affil) %>% summarize(ave.books = mean(nbooks,na.rm = TRUE), ave.Hworked = mean(Hworked, na.rm = TRUE), N = n()) %>% filter(N > 2) %>% arrange(desc(ave.books)) plot(x = dt.summ2$ave.books, y = dt.summ2$ave.Hworked, pch=20, cex = 3,xlab = "Ave. # books read",ylab = "Ave hours worked") dt <- read_csv("http://www.matsuka.info/data_folder/sampleData2013.txt",col_names = TRUE) dt.summ3 <- dt %>% group_by(grade, gender) %>% summarize(ave.books = mean(nbooks,na.rm = TRUE), ave.Hworked = mean(Hworked, na.rm = TRUE)) dt.summ3G <- dt.summ3 %>% gather('ave.books', 'ave.Hworked', key = 'ave.values', value = "BorW") dt.summ3SformG <- spread(dt.summ3G, key = ave.values, value =BorW) dt.sumLA <- dt %>% filter(affil=="LA") %>% group_by(grade) %>% summarize(ave.books = mean(nbooks)) toeic <- tibble( grade = factor(c("SP", "JR")), score = c(800,830), ) new.dt1 <- dt.sumLA %>% inner_join(toeic, by = "grade") dt.sumLA <- add_row(dt.sumLA, grade = c("MS"), ave.books = (13)) toeic2 <- tibble( grade = factor(c("SP", "JR","DR")), score = c(800,830,900), ) new.dt3 <- full_join(dt.sumLA, toeic2) new.dt4 <- left_join(dt.sumLA, toeic2) new.dt5 <- right_join(dt.sumLA, toeic2) # CLT NperSample = 10 SampleSize = 300000 runif(NperSample * SampleSize) %>% matrix(nrow=NperSample) %>% colMeans() %>% tibble(sample.mean = .) -> means ggplot(means,aes(x = sample.mean, y = ..density..)) + geom_histogram(bins=200) + geom_density(colour = "orange",size=2) ggplot(means,aes(x = sample.mean, y = ..density..)) + geom_histogram(bins=200) + geom_line(stat = "density", colour = "orange",size=2) runif(NperSample * SampleSize) %>% matrix(nrow=NperSample) %>% colMeans() %>% tibble(sample.mean = .) %>% ggplot(., aes(x = sample.mean, y = ..density..)) + geom_histogram(bins=100,colour = "grey20") + geom_line(stat = "density", colour = "skyblue",size=2) dat <- read.csv("http://www.matsuka.info/data_folder/sampleData2013.txt") dt <- as_tibble(dat) ggplot(dt, aes(x = Hworked, y = nbooks)) + geom_point(size = 3) ggplot(dt) + geom_point(aes(x = Hworked, y = nbooks, color = grade),size = 3) ggplot(dt) + geom_point(aes(x = Hworked, y = nbooks, shape = grade),size = 5) ggplot(dt) + geom_point(aes(x = Hworked, y = nbooks),size = 5) + facet_wrap(~ grade, nrow = 1) ggplot(dt) + geom_smooth(aes(x = Hworked, y = nbooks)) ggplot(dt) + geom_smooth(aes(x = Hworked, y = nbooks, linetype = grade)) ggplot(dt) + geom_smooth(aes(x = Hworked, y = nbooks)) + facet_wrap(~ grade, nrow = 4) ggplot(dt) + geom_smooth(aes(x = Hworked, y = nbooks)) + geom_point(aes(x = Hworked, y = nbooks), size = 4) ggplot(dt) + geom_smooth(aes(x = Hworked, y = nbooks), colour = "black", se = FALSE) + geom_point(aes(x = Hworked, y = nbooks, color = grade), size = 4) ggplot(dt) + geom_smooth(aes(x = Hworked, y = nbooks, color = grade), se = FALSE) + geom_point(aes(x = Hworked, y = nbooks, color = grade), size = 4) plot1 <- ggplot(dt) + geom_smooth(aes(x = Hworked, y = nbooks, color = grade), se = FALSE) + geom_point(aes(x = Hworked, y = nbooks, color = grade), size = 4) plot1 + xlab("Hours worked") + ylab("Number of books read") plot1 + xlab("Hours worked") + ylab("Number of books read") + theme(axis.title.x = element_text(face = "italic",size = 14, colour = "navy"), axis.title.y = element_text(face = "bold",size = 10, colour = "darkgreen")) ggplot(filter(dt, affil == "LA")) + geom_point(aes(x = Hworked, y = nbooks, color = grade), size = 4) dt$grade <- fct_relevel(dt$grade, "FR","SP","JR","SR") group_by(dt, grade) %>% summarize(ave.books = mean(nbooks, na.rm = T)) %>% ggplot() + geom_bar(aes(x = grade, y = ave.books), stat = "identity") ggplot(dt) + geom_bar(aes(x = grade)) dt %>% count(grade) group_by(dt, grade) %>% summarize(ave.books = mean(nbooks, na.rm = T), se = sd(nbooks, na.rm =T)/sqrt(n())) %>% ggplot(aes(x = grade, y = ave.books)) + geom_bar(stat = "identity", fill = "grey70") + geom_errorbar(aes(ymin = ave.books - se, ymax = ave.books +se), width = 0.2) + ylab("Average # books read") ggplot(dt) + geom_boxplot(aes(x = grade, y = nbooks)) ggplot(dt) + geom_boxplot(aes(x = grade, y = nbooks)) + coord_flip() ggplot(dt,aes(x = Hworked, y = nbooks)) + stat_density2d(aes(colour =..level..)) + geom_point() ggplot(dt,aes(x = Hworked, y = nbooks)) + stat_density2d(aes(alpha =..density..), geom="tile",contour=F) + geom_point(alpha =0.4) ggplot(dt) + stat_summary(aes(x = grade, y = nbooks), fun.y = mean, fun.ymin = function(x) mean(x) - sd(x), fun.ymax = function(x) mean(x) + sd(x)) dat <- read.csv("http://www.matsuka.info/data_folder/datWA01.txt") dt <- as_tibble(dat) dt.lm <- lm(h~shoesize, dt) cfs <- coef(dt.lm) ggplot(dt, aes(x = shoesize, y = h)) + geom_point() + geom_abline(intercept = cfs[1], slope = cfs[2], col = "red") + geom_text( x= 22, y =175, aes(label = paste("r^2 =", round(summary(dt.lm)$r.squared,3))))
データ解析基礎論B W03 R graphics
library(tidyverse) # CLT NperSample = 10 SampleSize = 300000 runif(NperSample * SampleSize) %>% matrix(nrow=NperSample) %>% colMeans() %>% tibble(sample.mean = .) -> means ggplot(means,aes(x = sample.mean, y = ..density..)) + geom_histogram(bins=200) + geom_density(colour = "orange",size=2) ggplot(means,aes(x = sample.mean, y = ..density..)) + geom_histogram(bins=200) + geom_line(stat = "density", colour = "orange",size=2) runif(NperSample * SampleSize) %>% matrix(nrow=NperSample) %>% colMeans() %>% tibble(sample.mean = .) %>% ggplot(., aes(x = sample.mean, y = ..density..)) + geom_histogram(bins=100,colour = "grey20") + geom_line(stat = "density", colour = "skyblue",size=2) dat <- read.csv("http://www.matsuka.info/data_folder/sampleData2013.txt") dt <- as_tibble(dat) ggplot(dt, aes(x = Hworked, y = nbooks)) + geom_point(size = 3) ggplot(dt) + geom_point(aes(x = Hworked, y = nbooks, color = grade),size = 3) ggplot(dt) + geom_point(aes(x = Hworked, y = nbooks, shape = grade),size = 5) ggplot(dt) + geom_point(aes(x = Hworked, y = nbooks),size = 5) + facet_wrap(~ grade, nrow = 1) ggplot(dt) + geom_smooth(aes(x = Hworked, y = nbooks)) ggplot(dt) + geom_smooth(aes(x = Hworked, y = nbooks, linetype = grade)) ggplot(dt) + geom_smooth(aes(x = Hworked, y = nbooks)) + facet_wrap(~ grade, nrow = 4) ggplot(dt) + geom_smooth(aes(x = Hworked, y = nbooks)) + geom_point(aes(x = Hworked, y = nbooks), size = 4) ggplot(dt) + geom_smooth(aes(x = Hworked, y = nbooks), colour = "black", se = FALSE) + geom_point(aes(x = Hworked, y = nbooks, color = grade), size = 4) ggplot(dt) + geom_smooth(aes(x = Hworked, y = nbooks, color = grade), se = FALSE) + geom_point(aes(x = Hworked, y = nbooks, color = grade), size = 4) plot1 <- ggplot(dt) + geom_smooth(aes(x = Hworked, y = nbooks, color = grade), se = FALSE) + geom_point(aes(x = Hworked, y = nbooks, color = grade), size = 4) plot1 + xlab("Hours worked") + ylab("Number of books read") plot1 + xlab("Hours worked") + ylab("Number of books read") + theme(axis.title.x = element_text(face = "italic",size = 14, colour = "navy"), axis.title.y = element_text(face = "bold",size = 10, colour = "darkgreen")) ggplot(filter(dt, affil == "LA")) + geom_point(aes(x = Hworked, y = nbooks, color = grade), size = 4) dt$grade <- fct_relevel(dt$grade, "FR","SP","JR","SR") group_by(dt, grade) %>% summarize(ave.books = mean(nbooks, na.rm = T)) %>% ggplot() + geom_bar(aes(x = grade, y = ave.books), stat = "identity") group_by(dt, grade) %>% summarize(ave.books = mean(nbooks, na.rm = T)) %>% ggplot() + geom_bar(aes(x = grade, y = ave.books), stat = "identity") group_by(dt, grade) %>% summarize(ave.books = mean(nbooks, na.rm = T), se = sd(nbooks, na.rm =T)/n()) %>% ggplot(aes(x = grade, y = ave.books)) + geom_bar(stat = "identity", fill = "grey70") + geom_errorbar(aes(ymin = ave.books - se, ymax = ave.books +se), width = 0.2) + ylab("Average # books read") ggplot(dt,aes(x = Hworked, y = nbooks)) + stat_density2d(aes(colour =..level..)) + geom_point() ggplot(dt,aes(x = Hworked, y = nbooks)) + stat_density2d(aes(alpha =..density..), geom="tile",contour=F) + geom_point(alpha =0.4) ggplot(dt) + stat_summary(aes(x = grade, y = nbooks), fun.y = mean, fun.ymin = function(x) mean(x) - sd(x), fun.ymax = function(x) mean(x) + sd(x)) ggplot(dt) + geom_boxplot(aes(x = grade, y = nbooks)) ggplot(dt) + geom_boxplot(aes(x = grade, y = nbooks)) + coord_flip() dat <- read.csv("http://www.matsuka.info/data_folder/datWA01.txt") dt <- as_tibble(dat) dt.lm <- lm(h~shoesize, dt) cfs <- coef(dt.lm) ggplot(dt, aes(x = shoesize, y = h)) + geom_point() + geom_abline(intercept = cfs[1], slope = cfs[2], col = "red") + geom_text( x= 22, y =175, aes(label = paste("r^2 =",round(summary(dt.lm)$r.squared,3))))
広域システム特別講義II 10.11の講義
10.11ABの講義は家族が急病の為、休講とさせてください。直前の連絡で申し訳ありません。
次回の講義・実習は10.25日です。
補講日は次回、皆さんの都合で調整しましょう。
補講日の候補は11.01、12.27、 および、東大が指定している補講日である、12.25(午後)、01.17(午前)、01.20、01.21です。
データ解析基礎論B W02
install.packages("tidyverse") library(tidyverse) random.number <- rnorm(1000) mean(random.number) mean(random.number <- rnorm(1000)) rnorm(1000) %>% mean() # CLT NperSample = 10 SampleSize = 300000 # "traditional" random.number <- runif(NperSample * SampleSize) dat <- matrix(random.number, nrow=NperSample) means <- colMeans(dat) dens <- density(means) hist(means, breaks = 100, probability = T, main = "Distributionf of Means") lines(dens, lwd = 3, col = "orange") runif(NperSample * SampleSize) %>% matrix(nrow=NperSample) %>% colMeans() -> means hist(means, breaks=100,probability = T, main = "Distributionf of Means") means %>% density() %>% lines(,lwd=3,col='orange') histWdensity <- function(dat, nbreaks=30, main.txt){ dens <- density(dat) hist(dat, breaks = nbreaks, probability = T, main = main.txt) lines(dens, lwd = 3, col = "orange") } runif(NperSample * SampleSize) %>% matrix(nrow=NperSample) %>% colMeans() %>% histWdensity(nbreaks = 100, main.txt = "Distributionf of Means") dat <- read.csv("http://www.matsuka.info/data_folder/sampleData2013.txt") dt <- as_tibble(dat) dt.la <- filter(dt, affil == "LA") dt.la2 <- filter(dt, affil == "LA" & grade == "SP") dt.laNot2 <- filter(dt, affil == "LA" & grade != "SP") dt.GB <- select(dt, grade, nbooks) dtWOgender <- select(dt, -gender) dt.arranged <- arrange(dt, affil, grade) dt.weekly <- mutate(dt,nbooksWeek = nbooks / 52) dt.atLeastOneBook <- mutate(dt, atleastOneBook = (nbooks/52) >= 1) dt.atLeastOneBook <- mutate(dt, atleastOneBook = (nbooks/12) >= 1) dt.BWindex = mutate(dt, nbooksWeek = nbooks / 52, idx = nbooksWeek / (12*7-Hworked)) dt.byGrade <- group_by(dt, grade) summarize(dt.byGrade, ave.books = mean(nbooks,na.rm = TRUE), ave.Hworked = mean(Hworked, na.rm = TRUE)) dt.byGrAf <- group_by(dt, grade, affil) dt.summ <- summarize(dt.byGrAf, ave.books = mean(nbooks,na.rm = TRUE), ave.Hworked = mean(Hworked, na.rm = TRUE), N = n()) dt.summ2 <- dt %>% group_by(grade, affil) %>% summarize(ave.books = mean(nbooks,na.rm = TRUE), ave.Hworked = mean(Hworked, na.rm = TRUE), N = n()) %>% filter(N > 2) %>% arrange(desc(ave.books)) plot(x = dt.summ2$ave.books, y = dt.summ2$ave.Hworked, pch=20, cex = 3, xlab = "Ave. # books read",ylab = "Ave hours worked") dt <- read_csv("http://www.matsuka.info/data_folder/sampleData2013.txt", col_names = TRUE) dt.summ3 <- dt %>% group_by(grade, gender) %>% summarize(ave.books = mean(nbooks,na.rm = TRUE), ave.Hworked = mean(Hworked, na.rm = TRUE)) dt.summ3G <- dt.summ3 %>% gather('ave.books', 'ave.Hworked', key = 'ave.values', value = "BorW") dt.summ3SformG <- spread(dt.summ3G, key = ave.values, value =BorW) dt.sumLA <- dt %>% filter(affil=="LA") %>% group_by(grade) %>% summarize(ave.books = mean(nbooks)) toeic <- tibble( grade = factor(c("SP", "JR")), score = c(800,830), ) new.dt1 <- dt.sumLA %>% inner_join(toeic, by = "grade") dt.sumLA <- add_row(dt.sumLA, grade = c("MS"), ave.books = (13)) toeic2 <- tibble( grade = factor(c("SP", "JR","DR")), score = c(800,830,900), ) new.dt3 <- full_join(dt.sumLA, toeic2) new.dt4 <- left_join(dt.sumLA, toeic2) new.dt5 <- right_join(dt.sumLA, toeic2)