# データ解析基礎論B PCA

```dat<-read.table("http://www.matsuka.info/data_folder/tdkPCA01.txt")
dat.pca<-princomp(dat)
summary(dat.pca)
biplot(dat.pca)

dat.pca<-princomp(dat)

plot(dat,pch=20)
dat.pca<-princomp(dat)
screeplot(dat.pca,type="lines")

dist = c(100,200,400,800,1500,5000,10000,42195)
log.dist=log(dist)

```

# 広域システム特別講義II RL1b

```library(tidyverse)
library(gridExtra)

epGreedy = function(nTrial,nRep,epsilon) {
rew.hist = opt.hist = matrix(0,nrow=nTrial,ncol=nRep)
for (i_rep in 1:nRep){
Q.true=rnorm(10); opt.ID=which.max(Q.true)
Q.est = Q.cum = act.count=rep(0,10);
for (i_trial in 1:nTrial) {
if (runif(1) < epsilon) {
action=sample(1:10,1)
} else {
action=which.max(Q.est)
}
rew.hist[i_trial,i_rep]=rnorm(1)+Q.true[action]
opt.hist[i_trial,i_rep]=action==opt.ID
act.count[action]=act.count[action]+1
Q.cum[action]=Q.cum[action]+rew.hist[i_trial,i_rep]
Q.est[action]=Q.cum[action]/act.count[action]
}
}
return(list(opt = opt.hist, rew = rew.hist))
}
n.Trial = 1000; n.Rep = 2000
type1=epGreedy(n.Trial, n.Rep, 0.0)
type2=epGreedy(n.Trial, n.Rep, 0.01)
type3=epGreedy(n.Trial, n.Rep, 0.1)

res.all = tibble(trial = rep(1:nTrial, n.Rep * 3),
rew = c(as.vector(type1\$rew),as.vector(type2\$rew),as.vector(type3\$rew)),
opt = c(as.vector(type1\$opt),as.vector(type2\$opt),as.vector(type3\$opt)),
condition = c(rep("0.00", nTrial * n.Rep),
rep("0.01", nTrial * n.Rep),
rep("0.10", nTrial * n.Rep)))

p1 <- ggplot(res.all) +
geom_smooth(aes(x = trial, y = rew, color = condition)) +
ylab("Average Reward")
p2 <- ggplot(res.all) +
geom_smooth(aes(x = trial, y = opt, color = condition)) +
ylab("Prop. Optimal Action")
grid.arrange(p1, p2, nrow =2)

softmax = function(nTrial, nRep, tau) {
rew.hist = opt.hist = matrix(0,nrow=nTrial,ncol=nRep)
for (i_rep in 1:nRep){
Q.true=rnorm(10); opt.ID=which.max(Q.true)
Q.est = Q.cum = act.count=rep(0,10);
for (i_trial in 1:nTrial) {
p = exp(Q.est/tau)/sum(exp(Q.est)/tau)
action = sample(1:10, 1, prob = p)
rew.hist[i_trial,i_rep]=rnorm(1)+Q.true[action]
opt.hist[i_trial,i_rep]=action==opt.ID
act.count[action]=act.count[action]+1
Q.cum[action]=Q.cum[action]+rew.hist[i_trial,i_rep]
Q.est[action]=Q.cum[action]/act.count[action]
}
}
return(list(opt = opt.hist, rew = rew.hist))
}

n.Trial = 1000; n.Rep = 1000
eG00=epGreedy(n.Trial, n.Rep, 0.0)
eG01=epGreedy(n.Trial, n.Rep, 0.1)
sm=softmax(n.Trial, n.Rep, 0.1)
res.all = tibble(trial = rep(1:n.Trial, n.Rep * 3),
rew = c(as.vector(eG00\$rew),as.vector(eG01\$rew),as.vector(sm\$rew)),
opt = c(as.vector(eG00\$opt),as.vector(eG01\$opt),as.vector(sm\$opt)),
condition = c(rep("epsilon = 0.0", n.Trial * n.Rep),
rep("epsilon = 0.1", n.Trial * n.Rep),
rep("softmax", n.Trial * n.Rep)))
p1 <- ggplot(res.all) +
geom_smooth(aes(x = trial, y = rew, color = condition)) +
ylab("Average Reward")

p2 <- ggplot(res.all) +
geom_smooth(aes(x = trial, y = opt, color = condition)) +
ylab("Prop. Optimal Action")

grid.arrange(p1, p2, nrow =2)

# RL example
V=rep(0,25);

# defining probability matrix
P=matrix(1/4,nrow=25,ncol=4) #

# defining deterministic transition matrix
north=c(2:25,25)
north[ c(5,10,15,20,25)]=c(5,10,15,20,25)
east=c(6:25,21:25)
west=c(1:5,1:20)
south=c(1,1:24)
south[ c(1,6,11,16,21)]=c(1,6,11,16,21)
trM=cbind(north,east,south,west)
trM[10,]=6
trM[20,]=18

# defining reward matrix
R=matrix(0,nrow=25,ncol=4)
R[which(trM==1:25)]=-1
R[10,]=10
R[20,]=5

# calculating state-values iteratively
# fixed number of iterations
nRep=1000; gamma=0.9;
for (i_rep in 1:nRep) {
V.old=V
for (i_state in 1:25) {
V[i_state]=sum(P[i_state,]*(R[i_state,]+gamma*V.old[trM[i_state,]]))
}
}
print(matrix(V,nrow=5)[5:1,])

# jisshu 1
north=c(1:3,15,1:10)
east=2:15;east[ c(3,7,11)]=c(3,7,11)
south=c(5:15,12:14)
west=c(15,1:13);west[ c(4,8,12)]=c(4,8,12)
trM=cbind(north,east,south,west)

# defining Reward & trans. prob.
R=-1;P=0.25;

# policy improvement
north=c(1:3,15,1:10)
east=2:15;east[ c(3,7,11)]=c(3,7,11)
south=c(5:15,12:14)
west=c(15,1:13);west[ c(4,8,12)]=c(4,8,12)
trM=cbind(north,east,south,west)
R=-1;P=0.25;V=rep(0,15);
delta=1; gamma=1; tol=1e-10;
bestP=sample(1:4,14,replace=T)
stable=F;counter=0;
while (stable==F){
counter=counter+1
# iterative policy evaluation
while (delta>tol) {
delta=0;
for (i_state in 1:14) {
v=V[i_state]
V[i_state]=sum(P*(R+gamma*V[trM[i_state,]]))
delta=max(delta,abs(v-V[i_state]))
}
}
# policy improvement
stable=F
for (i_state in 1:14) {
b=bestP[i_state]
bestP[i_state]=which.max(V[trM[i_state,]])
ifelse((bestP[i_state]==b),stable<-T,stable<-F)
}
}
```
Posted in UT

# 広域システム特別講義II R prog.2

```install.packages("tidyverse")
library(tidyverse)

random.number <- rnorm(1000)
mean(random.number)
mean(random.number <- rnorm(1000))

rnorm(1000) %>% mean()

# CLT
NperSample = 10
SampleSize = 300000

random.number <- runif(NperSample * SampleSize)
dat <- matrix(random.number, nrow=NperSample)
means <- colMeans(dat)
dens <- density(means)
hist(means, breaks = 100, probability = T, main = "Distributionf of Means")
lines(dens, lwd = 3, col = "orange")

runif(NperSample * SampleSize) %>%
matrix(nrow=NperSample) %>%
colMeans() -> means
hist(means, breaks=100,probability = T, main = "Distributionf of Means")
means %>% density() %>% lines(,lwd=3,col='orange')

histWdensity <- function(dat, nbreaks=30, main.txt){
dens <- density(dat)
hist(dat, breaks = nbreaks, probability = T, main = main.txt)
lines(dens, lwd = 3, col = "orange")
}

runif(NperSample * SampleSize) %>%
matrix(nrow=NperSample) %>%
colMeans() %>% histWdensity(nbreaks = 100, main.txt = "Distributionf of Means")

dt <- as_tibble(dat)
dt.la <- filter(dt, affil == "LA")

dt.la2 <- filter(dt, affil == "LA" & grade == "SP")
dt.laNot2 <- filter(dt, affil == "LA" & grade != "SP")

dtWOgender <- select(dt, -gender)

dt.weekly <- mutate(dt,nbooksWeek = nbooks / 52)

dt.atLeastOneBook <- mutate(dt, atleastOneBook = (nbooks/52) >= 1)
dt.atLeastOneBook <- mutate(dt, atleastOneBook = (nbooks/12) >= 1)

dt.BWindex = mutate(dt, nbooksWeek = nbooks / 52, idx = nbooksWeek / (12*7-Hworked))

summarize(dt.byGrade, ave.books = mean(nbooks,na.rm = TRUE), ave.Hworked = mean(Hworked, na.rm = TRUE))

dt.summ <- summarize(dt.byGrAf, ave.books = mean(nbooks,na.rm = TRUE), ave.Hworked = mean(Hworked, na.rm = TRUE), N = n())

dt.summ2 <- dt %>%
summarize(ave.books = mean(nbooks,na.rm = TRUE), ave.Hworked = mean(Hworked, na.rm = TRUE), N = n()) %>% filter(N > 2) %>% arrange(desc(ave.books))

plot(x = dt.summ2\$ave.books, y = dt.summ2\$ave.Hworked, pch=20, cex = 3,xlab = "Ave. # books read",ylab = "Ave hours worked")

dt.summ3 <- dt %>%
summarize(ave.books = mean(nbooks,na.rm = TRUE), ave.Hworked = mean(Hworked, na.rm = TRUE))

dt.summ3G <- dt.summ3 %>% gather('ave.books', 'ave.Hworked', key = 'ave.values', value = "BorW")

dt.summ3SformG <- spread(dt.summ3G, key = ave.values, value =BorW)

dt.sumLA <- dt %>% filter(affil=="LA") %>% group_by(grade) %>% summarize(ave.books = mean(nbooks))

toeic <- tibble(
score = c(800,830),
)

new.dt1 <- dt.sumLA %>% inner_join(toeic, by = "grade")

toeic2 <- tibble(
score = c(800,830,900),
)
new.dt3 <- full_join(dt.sumLA, toeic2)

new.dt4 <- left_join(dt.sumLA, toeic2)
new.dt5 <- right_join(dt.sumLA, toeic2)

# CLT
NperSample = 10
SampleSize = 300000

runif(NperSample * SampleSize) %>%
matrix(nrow=NperSample) %>%
colMeans() %>% tibble(sample.mean = .) -> means

ggplot(means,aes(x = sample.mean, y = ..density..)) +
geom_histogram(bins=200) +
geom_density(colour = "orange",size=2)

ggplot(means,aes(x = sample.mean, y = ..density..)) +
geom_histogram(bins=200) +
geom_line(stat = "density", colour = "orange",size=2)

runif(NperSample * SampleSize) %>%
matrix(nrow=NperSample) %>%
colMeans() %>% tibble(sample.mean = .) %>%
ggplot(., aes(x = sample.mean, y = ..density..)) +
geom_histogram(bins=100,colour = "grey20") +
geom_line(stat = "density", colour = "skyblue",size=2)

dt <- as_tibble(dat)

ggplot(dt, aes(x = Hworked, y = nbooks)) +
geom_point(size = 3)

ggplot(dt) +
geom_point(aes(x = Hworked, y = nbooks, color = grade),size = 3)

ggplot(dt) +
geom_point(aes(x = Hworked, y = nbooks, shape = grade),size = 5)

ggplot(dt) +
geom_point(aes(x = Hworked, y = nbooks),size = 5) +

ggplot(dt) +
geom_smooth(aes(x = Hworked, y = nbooks))

ggplot(dt) +
geom_smooth(aes(x = Hworked, y = nbooks, linetype = grade))

ggplot(dt) +
geom_smooth(aes(x = Hworked, y = nbooks)) +

ggplot(dt) +
geom_smooth(aes(x = Hworked, y = nbooks)) +
geom_point(aes(x = Hworked, y = nbooks), size = 4)

ggplot(dt) +
geom_smooth(aes(x = Hworked, y = nbooks), colour = "black", se = FALSE) +
geom_point(aes(x = Hworked, y = nbooks, color = grade), size = 4)

ggplot(dt) +
geom_smooth(aes(x = Hworked, y = nbooks, color = grade), se = FALSE) +
geom_point(aes(x = Hworked, y = nbooks, color = grade), size = 4)

plot1 <- ggplot(dt) +
geom_smooth(aes(x = Hworked, y = nbooks, color = grade), se = FALSE) +
geom_point(aes(x = Hworked, y = nbooks, color = grade), size = 4)
plot1 + xlab("Hours worked") + ylab("Number of books read")

plot1 + xlab("Hours worked") +  ylab("Number of books read") +
theme(axis.title.x = element_text(face = "italic",size = 14, colour = "navy"),
axis.title.y = element_text(face = "bold",size = 10, colour = "darkgreen"))

ggplot(filter(dt, affil == "LA")) +
geom_point(aes(x = Hworked, y = nbooks, color = grade), size = 4)

group_by(dt, grade) %>% summarize(ave.books = mean(nbooks, na.rm = T)) %>%
ggplot() + geom_bar(aes(x = grade, y = ave.books), stat = "identity")

group_by(dt, grade) %>% summarize(ave.books = mean(nbooks, na.rm = T),
se = sd(nbooks, na.rm =T)/sqrt(n())) %>%
ggplot(aes(x = grade, y = ave.books)) +
geom_bar(stat = "identity", fill = "grey70") +
geom_errorbar(aes(ymin = ave.books - se, ymax = ave.books +se), width = 0.2) +

ggplot(dt) +
geom_boxplot(aes(x = grade, y = nbooks))
ggplot(dt) +
geom_boxplot(aes(x = grade, y = nbooks)) +
coord_flip()

ggplot(dt,aes(x = Hworked, y = nbooks)) +
stat_density2d(aes(colour =..level..)) +
geom_point()

ggplot(dt,aes(x = Hworked, y = nbooks)) +
stat_density2d(aes(alpha =..density..), geom="tile",contour=F) +
geom_point(alpha =0.4)

ggplot(dt) +
stat_summary(aes(x = grade, y = nbooks),
fun.y = mean,
fun.ymin = function(x) mean(x) - sd(x),
fun.ymax = function(x) mean(x) + sd(x))

dt <- as_tibble(dat)
dt.lm <- lm(h~shoesize, dt)
cfs <- coef(dt.lm)
ggplot(dt, aes(x = shoesize, y = h)) +
geom_point() +
geom_abline(intercept = cfs[1], slope = cfs[2], col = "red") +
geom_text( x= 22, y =175, aes(label = paste("r^2  =", round(summary(dt.lm)\$r.squared,3))))
```
Posted in UT

# データ解析基礎論B W03 R graphics

```library(tidyverse)

# CLT
NperSample = 10
SampleSize = 300000

runif(NperSample * SampleSize) %>%
matrix(nrow=NperSample) %>%
colMeans() %>% tibble(sample.mean = .) -> means

ggplot(means,aes(x = sample.mean, y = ..density..)) +
geom_histogram(bins=200) +
geom_density(colour = "orange",size=2)

ggplot(means,aes(x = sample.mean, y = ..density..)) +
geom_histogram(bins=200) +
geom_line(stat = "density", colour = "orange",size=2)

runif(NperSample * SampleSize) %>%
matrix(nrow=NperSample) %>%
colMeans() %>% tibble(sample.mean = .) %>%
ggplot(., aes(x = sample.mean, y = ..density..)) +
geom_histogram(bins=100,colour = "grey20") +
geom_line(stat = "density", colour = "skyblue",size=2)

dt <- as_tibble(dat)

ggplot(dt, aes(x = Hworked, y = nbooks)) +
geom_point(size = 3)

ggplot(dt) +
geom_point(aes(x = Hworked, y = nbooks, color = grade),size = 3)

ggplot(dt) +
geom_point(aes(x = Hworked, y = nbooks, shape = grade),size = 5)

ggplot(dt) +
geom_point(aes(x = Hworked, y = nbooks),size = 5) +

ggplot(dt) +
geom_smooth(aes(x = Hworked, y = nbooks))

ggplot(dt) +
geom_smooth(aes(x = Hworked, y = nbooks, linetype = grade))

ggplot(dt) +
geom_smooth(aes(x = Hworked, y = nbooks)) +

ggplot(dt) +
geom_smooth(aes(x = Hworked, y = nbooks)) +
geom_point(aes(x = Hworked, y = nbooks), size = 4)

ggplot(dt) +
geom_smooth(aes(x = Hworked, y = nbooks), colour = "black", se = FALSE) +
geom_point(aes(x = Hworked, y = nbooks, color = grade), size = 4)

ggplot(dt) +
geom_smooth(aes(x = Hworked, y = nbooks, color = grade), se = FALSE) +
geom_point(aes(x = Hworked, y = nbooks, color = grade), size = 4)

plot1 <- ggplot(dt) +
geom_smooth(aes(x = Hworked, y = nbooks, color = grade), se = FALSE) +
geom_point(aes(x = Hworked, y = nbooks, color = grade), size = 4)
plot1 + xlab("Hours worked") + ylab("Number of books read")

plot1 + xlab("Hours worked") +  ylab("Number of books read") +
theme(axis.title.x = element_text(face = "italic",size = 14, colour = "navy"),
axis.title.y = element_text(face = "bold",size = 10, colour = "darkgreen"))

ggplot(filter(dt, affil == "LA")) +
geom_point(aes(x = Hworked, y = nbooks, color = grade), size = 4)

group_by(dt, grade) %>% summarize(ave.books = mean(nbooks, na.rm = T)) %>%
ggplot() + geom_bar(aes(x = grade, y = ave.books), stat = "identity")

group_by(dt, grade) %>% summarize(ave.books = mean(nbooks, na.rm = T)) %>%
ggplot() + geom_bar(aes(x = grade, y = ave.books), stat = "identity")

group_by(dt, grade) %>% summarize(ave.books = mean(nbooks, na.rm = T),
se = sd(nbooks, na.rm =T)/n()) %>%
ggplot(aes(x = grade, y = ave.books)) +
geom_bar(stat = "identity", fill = "grey70") +
geom_errorbar(aes(ymin = ave.books - se, ymax = ave.books +se), width = 0.2) +

ggplot(dt,aes(x = Hworked, y = nbooks)) +
stat_density2d(aes(colour =..level..)) +
geom_point()

ggplot(dt,aes(x = Hworked, y = nbooks)) +
stat_density2d(aes(alpha =..density..), geom="tile",contour=F) +
geom_point(alpha =0.4)

ggplot(dt) +
stat_summary(aes(x = grade, y = nbooks),
fun.y = mean,
fun.ymin = function(x) mean(x) - sd(x),
fun.ymax = function(x) mean(x) + sd(x))

ggplot(dt) +
geom_boxplot(aes(x = grade, y = nbooks))
ggplot(dt) +
geom_boxplot(aes(x = grade, y = nbooks)) +
coord_flip()

dt <- as_tibble(dat)
dt.lm <- lm(h~shoesize, dt)
cfs <- coef(dt.lm)
ggplot(dt, aes(x = shoesize, y = h)) +
geom_point() +
geom_abline(intercept = cfs[1], slope = cfs[2], col = "red") +
geom_text( x= 22, y =175, aes(label = paste("r^2  =",round(summary(dt.lm)\$r.squared,3))))
```

# 広域システム特別講義II 10.11の講義

10.11ABの講義は家族が急病の為、休講とさせてください。直前の連絡で申し訳ありません。

Posted in UT

# データ解析基礎論B W02

```install.packages("tidyverse")
library(tidyverse)

random.number <- rnorm(1000)
mean(random.number)
mean(random.number <- rnorm(1000))

rnorm(1000) %>% mean()

# CLT
NperSample = 10
SampleSize = 300000

random.number <- runif(NperSample * SampleSize)
dat <- matrix(random.number, nrow=NperSample)
means <- colMeans(dat)
dens <- density(means)
hist(means, breaks = 100, probability = T, main = "Distributionf of Means")
lines(dens, lwd = 3, col = "orange")

runif(NperSample * SampleSize) %>%
matrix(nrow=NperSample) %>%
colMeans() -> means
hist(means, breaks=100,probability = T, main = "Distributionf of Means")
means %>% density() %>% lines(,lwd=3,col='orange')

histWdensity <- function(dat, nbreaks=30, main.txt){
dens <- density(dat)
hist(dat, breaks = nbreaks, probability = T, main = main.txt)
lines(dens, lwd = 3, col = "orange")
}

runif(NperSample * SampleSize) %>%
matrix(nrow=NperSample) %>%
colMeans() %>%
histWdensity(nbreaks = 100, main.txt = "Distributionf of Means")

dt <- as_tibble(dat)
dt.la <- filter(dt, affil == "LA")

dt.la2 <- filter(dt, affil == "LA" & grade == "SP")
dt.laNot2 <- filter(dt, affil == "LA" & grade != "SP")

dtWOgender <- select(dt, -gender)

dt.weekly <- mutate(dt,nbooksWeek = nbooks / 52)

dt.atLeastOneBook <- mutate(dt, atleastOneBook = (nbooks/52) >= 1)
dt.atLeastOneBook <- mutate(dt, atleastOneBook = (nbooks/12) >= 1)

dt.BWindex = mutate(dt, nbooksWeek = nbooks / 52,
idx = nbooksWeek / (12*7-Hworked))

summarize(dt.byGrade, ave.books = mean(nbooks,na.rm = TRUE),
ave.Hworked = mean(Hworked, na.rm = TRUE))

dt.summ <- summarize(dt.byGrAf, ave.books = mean(nbooks,na.rm = TRUE),
ave.Hworked = mean(Hworked, na.rm = TRUE), N = n())

dt.summ2 <- dt %>%
summarize(ave.books = mean(nbooks,na.rm = TRUE),
ave.Hworked = mean(Hworked, na.rm = TRUE),
N = n()) %>% filter(N > 2) %>% arrange(desc(ave.books))

plot(x = dt.summ2\$ave.books, y = dt.summ2\$ave.Hworked, pch=20, cex = 3,
xlab = "Ave. # books read",ylab = "Ave hours worked")

col_names = TRUE)

dt.summ3 <- dt %>%
summarize(ave.books = mean(nbooks,na.rm = TRUE),
ave.Hworked = mean(Hworked, na.rm = TRUE))

dt.summ3G <- dt.summ3 %>% gather('ave.books', 'ave.Hworked',
key = 'ave.values', value = "BorW")

dt.summ3SformG <- spread(dt.summ3G, key = ave.values, value =BorW)

dt.sumLA <- dt %>% filter(affil=="LA") %>% group_by(grade) %>%
summarize(ave.books = mean(nbooks))

toeic <- tibble(
score = c(800,830),
)

new.dt1 <- dt.sumLA %>% inner_join(toeic, by = "grade")