データ解析基礎論B W03 R graphics

library(tidyverse)


# CLT
NperSample = 10
SampleSize = 300000

runif(NperSample * SampleSize) %>%
  matrix(nrow=NperSample) %>%
     colMeans() %>% tibble(sample.mean = .) -> means

ggplot(means,aes(x = sample.mean, y = ..density..)) +
  geom_histogram(bins=200) +
    geom_density(colour = "orange",size=2)

ggplot(means,aes(x = sample.mean, y = ..density..)) +
  geom_histogram(bins=200) +
  geom_line(stat = "density", colour = "orange",size=2)

runif(NperSample * SampleSize) %>%
  matrix(nrow=NperSample) %>%
  colMeans() %>% tibble(sample.mean = .) %>%
  ggplot(., aes(x = sample.mean, y = ..density..)) +
    geom_histogram(bins=100,colour = "grey20") +
    geom_line(stat = "density", colour = "skyblue",size=2)


dat <- read.csv("http://www.matsuka.info/data_folder/sampleData2013.txt")
dt <- as_tibble(dat)




ggplot(dt, aes(x = Hworked, y = nbooks)) +
  geom_point(size = 3)

ggplot(dt) +
  geom_point(aes(x = Hworked, y = nbooks, color = grade),size = 3)

ggplot(dt) +
  geom_point(aes(x = Hworked, y = nbooks, shape = grade),size = 5)

ggplot(dt) +
  geom_point(aes(x = Hworked, y = nbooks),size = 5) +
  facet_wrap(~ grade, nrow = 1)

ggplot(dt) +
  geom_smooth(aes(x = Hworked, y = nbooks))

ggplot(dt) +
  geom_smooth(aes(x = Hworked, y = nbooks, linetype = grade))

ggplot(dt) +
  geom_smooth(aes(x = Hworked, y = nbooks)) +
    facet_wrap(~ grade, nrow = 4)

ggplot(dt) +
  geom_smooth(aes(x = Hworked, y = nbooks)) +
  geom_point(aes(x = Hworked, y = nbooks), size = 4)

ggplot(dt) +
  geom_smooth(aes(x = Hworked, y = nbooks), colour = "black", se = FALSE) +
  geom_point(aes(x = Hworked, y = nbooks, color = grade), size = 4)

ggplot(dt) +
  geom_smooth(aes(x = Hworked, y = nbooks, color = grade), se = FALSE) +
  geom_point(aes(x = Hworked, y = nbooks, color = grade), size = 4)


plot1 <- ggplot(dt) +
  geom_smooth(aes(x = Hworked, y = nbooks, color = grade), se = FALSE) +
  geom_point(aes(x = Hworked, y = nbooks, color = grade), size = 4)
plot1 + xlab("Hours worked") + ylab("Number of books read")

plot1 + xlab("Hours worked") +  ylab("Number of books read") +
  theme(axis.title.x = element_text(face = "italic",size = 14, colour = "navy"),
        axis.title.y = element_text(face = "bold",size = 10, colour = "darkgreen"))

ggplot(filter(dt, affil == "LA")) +
  geom_point(aes(x = Hworked, y = nbooks, color = grade), size = 4)


dt$grade <- fct_relevel(dt$grade, "FR","SP","JR","SR")
group_by(dt, grade) %>% summarize(ave.books = mean(nbooks, na.rm = T)) %>%
  ggplot() + geom_bar(aes(x = grade, y = ave.books), stat = "identity")

group_by(dt, grade) %>% summarize(ave.books = mean(nbooks, na.rm = T)) %>%
  ggplot() + geom_bar(aes(x = grade, y = ave.books), stat = "identity")

group_by(dt, grade) %>% summarize(ave.books = mean(nbooks, na.rm = T),
                                  se = sd(nbooks, na.rm =T)/n()) %>%
ggplot(aes(x = grade, y = ave.books)) +
  geom_bar(stat = "identity", fill = "grey70") +
  geom_errorbar(aes(ymin = ave.books - se, ymax = ave.books +se), width = 0.2) +
  ylab("Average # books read")

ggplot(dt,aes(x = Hworked, y = nbooks)) +
  stat_density2d(aes(colour =..level..)) +
  geom_point()

ggplot(dt,aes(x = Hworked, y = nbooks)) +
  stat_density2d(aes(alpha =..density..), geom="tile",contour=F) +
 geom_point(alpha =0.4)


ggplot(dt) +
  stat_summary(aes(x = grade, y = nbooks),
               fun.y = mean,
               fun.ymin = function(x) mean(x) - sd(x),
               fun.ymax = function(x) mean(x) + sd(x))

ggplot(dt) +
  geom_boxplot(aes(x = grade, y = nbooks))
ggplot(dt) +
  geom_boxplot(aes(x = grade, y = nbooks)) +
  coord_flip()

dat <- read.csv("http://www.matsuka.info/data_folder/datWA01.txt")
dt <- as_tibble(dat)
dt.lm <- lm(h~shoesize, dt)
cfs <- coef(dt.lm)
ggplot(dt, aes(x = shoesize, y = h)) +
  geom_point() +
  geom_abline(intercept = cfs[1], slope = cfs[2], col = "red") +
  geom_text( x= 22, y =175, aes(label = paste("r^2  =",round(summary(dt.lm)$r.squared,3))))