multi.forwd <- function(x,y){ return(x*y) } multi.bckwd <- function(x, y, dout){ dx = dout * y dy = dout * x return(list(dx = dx, dy = dy)) } apple = 100; n.apple = 2; tax = 1.1 apple.pre.tax = multi.forwd(apple, n.apple) apple.post.tax = multi.forwd(apple.pre.tax, tax) dprice = 1 d.apple.post.tax = multi.bckwd(apple.pre.tax, tax, dprice) d.apple = multi.bckwd(apple, n.apple, d.apple.post.tax$dx)$dx d.n.apple = multi.bckwd(apple, n.apple, d.apple.post.tax$dx)$dy add.forwd <- function(x,y){ return(x + y) } add.bckwd <- function(x, y, dout){ dx = dout dy = dout return(list(dx = dx, dy = dy)) } apple = 100; n.apple = 2; tax = 1.1 orange = 150; n.orange = 3; apple.price = multi.forwd(apple, n.apple) orange.price = multi.forwd(orange, n.orange) all.price = add.forwd(apple.price, orange.price) price = multi.forwd(all.price, tax) dprice = 1 d.all.price = multi.bckwd(all.price, tax, dprice) d.apple.price = add.bckwd(apple.price, orange.price, d.all.price$dx)$dx d.orange.price = add.bckwd(orange, n.orange.price, d.all.price$dx)$dy d.apple = multi.bckwd(apple, n.apple, d.apple.price)$dx d.n.apple = multi.bckwd(apple, n.apple, d.apple.price)$dy d.orange = multi.bckwd(orange, n.orange, d.orange.price)$dx d.n.orange = multi.bckwd(orange, n.orange, d.orange.price)$dy relu.forwd <- function(x){ return(pmax(x,0)) } relu.bckwd <- function(x, dout){ dout[which(x <= 0)] = 0 return(dout) } sigmoid.forwd <- function(x){ return(1/(1+exp(-x))) } sigmoid.bckwd <- function(x, dout){ y = sigmoid.forwd(x) return(dout*(1-y)*y) } affine.forwd <- function(x, W, b){ return(x%*%W + matrix(1, nrow = nrow(x), ncol = 1)%*%b) } affine.bckwd <- function(x, W, b, dout){ dx = dout%*%t(W) dW = t(x)%*%dout db = colSums(dout) return(list(dx = dx, dW = dW, db = db)) } softmax.forwd <- function(x, target){ max.x = apply(x,1,max) C = ncol(x) x = x - max.x%*%matrix(1,nrow=1,ncol=C) y = exp(x)/rowSums(exp(x)) delta = 1e-7; R = nrow(as.matrix(y)) return(-sum(target*log(y + delta))/R) } softmax.bckwd <- function(x, target, dout = 1){ max.x = apply(x, 1, max) R = nrow(x) C = ncol(x) x = x - max.x%*%matrix(1,nrow=1,ncol=C) y = exp(x)/rowSums(exp(x)) return((y-target)/R) } init.network <- function(n.neurons){ n.layer = length(n.neurons) W = list(); b = list() for (i.layer in 1:(n.layer-1)){ W[[i.layer]] = matrix(rnorm(n.neurons[i.layer]*n.neurons[(i.layer+1)],sd = 0.1), nrow=n.neurons[i.layer]) b[[i.layer]] = matrix(rnorm(n.neurons[(i.layer+1)],sd = 0.1), nrow = 1) } return(list(W = W,b = b)) } sigmoid.func <- function(x){ return(1/(1+exp(-x))) } relu.func <- function(x){ y = apply(x,2,function(x) pmax(x,0)) return(y) } activation <- function(A, actFun){ if (actFun == "sigmoid"){ return(sigmoid.func(A)) } if (actFun == "relu"){ return(relu.func(A)) } if (actFun == "softmax"){ return(softmax(A)) } } feedforward <- function(network, x, actFun) { n.layer <- length(network$W) batch.size = nrow(x) for (i.layer in 1:n.layer){ A = x%*%network$W[[i.layer]] + matrix(1,nrow=batch.size,ncol = 1)%*%network$b[[i.layer]] x = activation(A, actFun[i.layer]) } return(x) } cross.entropy = function(y, target){ delta = 1e-7; R = nrow(as.matrix(y)) return(-sum(target*log(y + delta))/R) } loss.network = function(params, x, t, actFun){ y = feedforward(params,x,actFun) return(cross.entropy(y, t)) } 以下は課題が完了するまでは見ないように心がけててください。 ######################################################################## ######################################################################## ######################################################################## train.x = as.matrix(iris[,1:4]) train.y.temp = as.numeric(iris[,5]) train.y = matrix(0,nrow = nrow(train.x), ncol =3) train.y[which(train.y.temp==1), 1]=1 train.y[which(train.y.temp==2), 2]=1 train.y[which(train.y.temp==3), 3]=1 params = init.network(c(4,15,3)) batch_size = 10; n.iter =5000; lambda =0.05 n.train = nrow(train.x) loss = rep(0,n.iter) for (i.iter in 1:n.iter){ batch_mask = sample(1:n.train, batch_size) x.batch = train.x[batch_mask,] t.batch = train.y[batch_mask,] a1 = affine.forwd(x.batch,params$W[[1]],params$b[[1]]) z1 = sigmoid.forwd(a1) a2 = affine.forwd(z1,params$W[[2]],params$b[[2]]) z2 = softmax.forwd(a2,t.batch) dwSM = softmax.bckwd(a2, t.batch, 1) dwA2 = affine.bckwd(a1,params$W[[2]],params$b[[2]],dwSM) dwSG = sigmoid.bckwd(a1,dwA2$dx) dwA1 = affine.bckwd(x.batch,params$W[[1]],params$b[[1]],dwSG) params$W[[2]] = params$W[[2]] - lambda*dwA2$dW params$b[[2]] = params$b[[2]] - lambda*dwA2$db params$W[[1]] = params$W[[1]] - lambda*dwA1$dW params$b[[1]] = params$b[[1]] - lambda*dwA1$db loss[i.iter] = loss.network(params,x.batch,t.batch,c("sigmoid","softmax")) } plot(loss,type='l', xlab = "trial") # MNIST train <- read.csv('~/courses/CogMod/CMA2017/MNSTtrain.csv', header=TRUE) test <- read.csv('~/courses/CogMod/CMA2017/MNSTtest.csv', header=TRUE) train <- data.matrix(train) test <- data.matrix(test) train.x <- as.matrix(train[,-1]/255) train.y.temp <- train[,1] train.y = matrix(0,nrow = nrow(train.x), ncol = 10) for (i in 1:nrow(train.x)){ train.y[i,(train.y.temp[i]+1)]=1 } params = init.network(c(784,50,10)) batch_size = 100; n.iter =5000; lambda =0.05 n.train = nrow(train.x) loss = rep(0,n.iter) for (i.iter in 1:n.iter){ batch_mask = sample(1:n.train, batch_size) x.batch = train.x[batch_mask,] t.batch = train.y[batch_mask,] a1 = affine.forwd(x.batch,params$W[[1]],params$b[[1]]) z1 = sigmoid.forwd(a1) a2 = affine.forwd(z1,params$W[[2]],params$b[[2]]) z2 = softmax.forwd(a2,t.batch) dwSM = softmax.bckwd(a2, t.batch, 1) dwA2 = affine.bckwd(a1,params$W[[2]],params$b[[2]],dwSM) dwSG = sigmoid.bckwd(a1,dwA2$dx) dwA1 = affine.bckwd(x.batch,params$W[[1]],params$b[[1]],dwSG) params$W[[2]] = params$W[[2]] - lambda*dwA2$dW params$b[[2]] = params$b[[2]] - lambda*dwA2$db params$W[[1]] = params$W[[1]] - lambda*dwA1$dW params$b[[1]] = params$b[[1]] - lambda*dwA1$db loss[i.iter] = loss.network(params,x.batch,t.batch,c("sigmoid","softmax")) } plot(loss,type='l', xlab = "trial") apply((feedforward(params, train.x[1:10,], c("sigmoid", "softmax"))),1,which.max) train.y.temp[1:10]+1 ### MNIST 3-layer params = init.network(c(784,50,50,10)) batch_size = 100; n.iter =5000; lambda =0.05 n.train = nrow(train.x) loss = rep(0,n.iter) for (i.iter in 1:n.iter){ batch_mask = sample(1:n.train, batch_size) x.batch = train.x[batch_mask,] t.batch = train.y[batch_mask,] a1 = affine.forwd(x.batch,params$W[[1]],params$b[[1]]) z1 = sigmoid.forwd(a1) a2 = affine.forwd(z1,params$W[[2]],params$b[[2]]) z2 = sigmoid.forwd(a2) a3 = affine.forwd(z2,params$W[[3]],params$b[[3]]) z3 = softmax.forwd(a3,t.batch) dwSM = softmax.bckwd(a3, t.batch, 1) dwA3 = affine.bckwd(a2,params$W[[3]],params$b[[3]],dwSM) dwSG2 = sigmoid.bckwd(a2,dwA3$dx) dwA2 = affine.bckwd(a1,params$W[[2]],params$b[[2]],dwSG2) dwSG1 = sigmoid.bckwd(a1,dwA2$dx) dwA1 = affine.bckwd(x.batch,params$W[[1]],params$b[[1]],dwSG1) params$W[[3]] = params$W[[3]] - lambda*dwA3$dW params$b[[3]] = params$b[[3]] - lambda*dwA3$db params$W[[2]] = params$W[[2]] - lambda*dwA2$dW params$b[[2]] = params$b[[2]] - lambda*dwA2$db params$W[[1]] = params$W[[1]] - lambda*dwA1$dW params$b[[1]] = params$b[[1]] - lambda*dwA1$db loss[i.iter] = loss.network(params,x.batch,t.batch,c("sigmoid","sigmoid","softmax")) } plot(loss,type='l', xlab = "trial") pred<-apply((feedforward(params, train.x, c("sigmoid","sigmoid", "softmax"))), 1, which.max) table(pred,train.y+1)
Monthly Archives: May 2017
データ解析基礎論A Week06
# 2017 week 06 # # regression dat<-read.csv("http://www.matsuka.info/data_folder/hwsk8-17-6.csv") plot(ani~otouto,data=dat,xlab="Score of Younger Brother", ylab = "Score of Elder Brother", pch=20,cex=2, xlim=c(5,27),ylim = c(5,27)) dat.lm <- lm(ani~otouto, data=dat) summary(dat.lm) abline(dat.lm, col = 'red',lwd = 2.5) # two sample t-test boxplot(dat,col=c('skyblue','coral'),ylab = "score") t.test(dat$ani, dat$otouto, var.equal=T) dat2<-data.frame(score = c(dat$ani, dat$otouto),order=c(rep("ani",10),rep("otouto",10))) plot(dat2$score~as.numeric(dat2$order),pch=20,xlab="order", ylab="score",xlim=c(0.5,2.5),cex=2,xaxt="n") axis(1,c(1,2),c("ani","otouto")) dat2.lm<-lm(score~order,data=dat2) abline(dat2.lm,col='red',lwd=3) # one sample t-test dat.D = dat$ani - dat$otouto boxplot(dat.D,col="skyblue",ylab="Difference") t.test(dat.D) dat.D.lm<-lm(dat.D~1) plot(dat.D~rep(1,10),pch=20,xlab="",ylab="Difference",cex=3) summary(dat.D.lm) # plotting errors plot(ani~otouto,data=dat,xlab="Score of Younger Brother", ylab = "Score of Elder Brother", pch=20,cex=2, xlim=c(5,27),ylim = c(5,27)) dat.lm <- lm(ani~otouto, data=dat) abline(h=mean(dat$ani),lty=2,col="green",lwd=3) abline(dat.lm,col='red',lwd=3) pred.lm<-predict(dat.lm) for (i.ani in 1:10){ lines(rep(dat$otouto[i.ani],2),c(dat$ani[i.ani],pred.lm[i.ani]), col='blue',lwd=3) } # multiple regression dat<-read.csv("http://www.matsuka.info/data_folder/tdkReg01.csv") dat.regALL<-lm(sales~price+design+material,data=dat) # ANCOVA dat<-read.csv("http://www.matsuka.info/data_folder/ancova01.csv") dat$pretest=dat$pretest*0.1
認知情報解析 ch04 解答例
init.network <- function(n.neurons){ n.layer = length(n.neurons) W = list(); b = list() for (i.layer in 1:(n.layer-1)){ W[[i.layer]] = matrix(rnorm(n.neurons[i.layer]*n.neurons[(i.layer+1)],sd = 0.1), nrow=n.neurons[i.layer]) b[[i.layer]] = matrix(rnorm(n.neurons[(i.layer+1)],sd = 0.1), nrow = 1) } return(list(W = W,b = b)) } sigmoid.func <- function(x){ return(1/(1+exp(-x))) } relu.func <- function(x){ y = apply(x,2,function(x) pmax(x,0)) return(y) } activation <- function(A, actFun){ if (actFun == "sigmoid"){ return(sigmoid.func(A)) } if (actFun == "relu"){ return(relu.func(A)) } if (actFun == "softmax"){ return(softmax(A)) } } softmax<- function(x){ max.x = apply(x,1,max) C = ncol(x) x = x - max.x%*%matrix(1,nrow=1,ncol=C) return(exp(x)/rowSums(exp(x))) } feedforward <- function(network, x, actFun) { n.layer <- length(network$W) batch.size = nrow(x) for (i.layer in 1:n.layer){ A = x%*%network$W[[i.layer]] + matrix(1,nrow=batch.size,ncol = 1)%*%network$b[[i.layer]] x = activation(A, actFun[i.layer]) } return(x) } cross.entropy = function(y, target){ delta = 1e-7; R = nrow(as.matrix(y)) return(-sum(target*log(y + delta))/R) } loss.network = function(params, x, t, actFun){ y = feedforward(params,x,actFun) return(cross.entropy(y, t)) } numerical.grad <- function(func, params, x, t, actFun) { # input args # func: name of objective function # params: list of parameters (W & b) # x : input # t : target output # actFun: activation function ############################################## h = 1e-4 n.list = length(params) grad = params for (i.list in 1:n.list) { R = nrow(params$W[[i.list]]) C = ncol(params$W[[i.list]]) grad$W[[i.list]] = matrix(0, R, C) grad$b[[i.list]] = matrix(0, nrow = 1, C) for (i.col in 1:C) { for (i.row in 1:R) { temp.w = params$W[[i.list]][i.row, i.col] params$W[[i.list]][i.row, i.col] = temp.w + h plusH = do.call(func, list(params, x, t, actFun)) params$W[[i.list]][i.row, i.col] = temp.w - h minusH = do.call(func, list(params, x, t, actFun)) grad$W[[i.list]][i.row, i.col] = (plusH - minusH) / (2 * h) params$W[[i.list]][i.row, i.col] = temp.w } temp.b = params$b[[i.list]][i.col] params$b[[i.list]][i.col] = temp.b + h plusH = do.call(func, list(params, x, t, actFun)) params$b[[i.list]][i.col] = temp.b - h minusH = do.call(func, list(params, x, t, actFun)) grad$b[[i.list]][i.col] = (plusH - minusH) / (2 * h) params$b[[i.list]][i.col] = temp.b } } return(grad) } train.x = as.matrix(iris[,1:4]) train.y.temp = as.numeric(iris[,5]) train.y = matrix(0,nrow = nrow(train.x), ncol =3) train.y[which(train.y.temp==1), 1]=1 train.y[which(train.y.temp==2), 2]=1 train.y[which(train.y.temp==3), 3]=1 n.neurons = c(4,15,3) params = init.network(n.neurons) batch_size = 50; n.iter =2000; lambda =0.05 n.train = nrow(train.x) loss = rep(0,n.iter) n.layer = length(params$W) for (i.iter in 1:n.iter){ batch_mask = sample(1:n.train, batch_size) x.batch = train.x[batch_mask,] t.batch = train.y[batch_mask,] dW = numerical.grad("loss.network",params,x.batch,t.batch,c("sigmoid","softmax")) for (i.layer in 1:n.layer){ params$W[[i.layer]] = params$W[[i.layer]] - lambda*dW$W[[i.layer]] params$b[[i.layer]] = params$b[[i.layer]] - lambda*dW$b[[i.layer]] } loss[i.iter] = loss.network(params,x.batch,t.batch,c("sigmoid","softmax")) } plot(loss,type='l')
データ解析基礎論A Week05 t検定
dat<-read.csv("http://www.matsuka.info/data_folder/datWA01.txt") mean.M <-mean(dat$h[dat$gender=="M"]) sigma = 10 n.M = length(dat$h[dat$gender=="M"]) z.value=(mean.M-171)/(sqrt(sigma/n.M)) (1-pnorm(abs(z.value)))*2 ssize = c(24,25,26,23.5,25,27,24,22,27.5,28) ssize.mean = mean(ssize) ssize.var = var(ssize) N = 10 t.value=(ssize.mean-24)/(sqrt(ssize.var/N)) (1-pt(abs(t.value),df=9))*2 h.mean.M <-mean(dat$h[dat$gender=="M"]) h.var.M <- var(dat$h[dat$gender=="M"]) n.M = length(dat$h[dat$gender=="M"]) t.value=(h.mean.M-171)/(sqrt(h.var.M/n.M)) (1-pt(abs(t.value),df = (n.M-1)))*2 # RCMD A=c(12,19,10,10,14,18,15,11,16) B=c(15,20,16,14,17,16,12,12,19) d=A-B tValue<-mean(d)/sqrt(var(d)/length(d)) (1-pt(abs(tValue), df=8))*2 t.test(d,mu=0) X1=c(78,70,66,76,78,76,88,76) X2=c(76,72,60,72,70,72,84,70) t.value=(mean(X1)-mean(X2))/sqrt((var(X1)+var(X2))/8) 2*(1-pt(abs(t.value),14))
認知情報解析 ch04
MSE <- function(target, y){ return(0.5*sum((target-y)^2)) } t = rep(0,10) t[3]=1 y = c(0.1, 0.05, 0.6, 0, 0.05, 0.1, 0, 0.1, 0, 0) x = seq(0,1,0.01) plot(x,-log(x),lwd = 2) cross.entropy = function(y, target){ delta = 1e-7; R = nrow(as.matrix(y)) return(-sum(target*log(y + delta))/R) } numerical.diff = function(func, x){ h = 1e-4 plusH = do.call(func,list(x+h)) minusH = do.call(func,list(x-h)) num.diff = (plusH - minusH)/(2*h) return(num.diff) } func01 = function(x){ return(0.01*x^2+0.1*x) } x = seq(0,20,0.1) y = func01(x) plot(x,y,xlab ="x", ylab = "f(x)",type = "l",lwd =2) ND.5 = numerical.diff('func01',5) abline(a = func01(5)-ND.5*5, b = ND.5, col = 'red', lwd =2) abline(v = 5, lty = 2, col = 'red') ND.10 = numerical.diff('func01',10) abline(a = func01(10)-ND.10*10, b = ND.10, col = 'blue',lwd = 2) abline(v = 10, lty = 2, col = 'blue') func02 = function(x0, x1){ return(x0^2 + x1^2) } func02.x0 = function(x0){ return(x0^2) } func02.x1 = function(x1){ return(x1^2) } func02R = function(x){ return(x[1]^2 + x[2]^2) } numerical.grad <- function(func, x){ h = 1e-4 R = nrow(x) C = ncol(x) grad = matrix(0, R, C) for (i.col in 1:C){ for (i.row in 1:R){ temp.x = x[i.row,i.col] x[i.row, i.col] = temp.x + h plusH = do.call(func, list(x)) x[i.row, i.col] = temp.x - h minusH = do.call(func,list(x)) grad[i.row, i.col] = (plusH - minusH)/(2*h) x[i.row, i.col] = temp.x } } return(grad) } numerical.grad("func02R",matrix(c(3,4),nrow=1)) numerical.grad("func02R",matrix(c(0,4),nrow=1)) numerical.grad("func02R",matrix(c(3,0),nrow=1)) require(plot3D) x = seq(-2,2,0.2) y = seq(-2,2,0.2) M = mesh(x,y) R = nrow(M$x) C = nrow(M$x) scaling = 0.05 plot(c(),c(),xlim = c(-2,2),ylim=c(-2,2)) for (i.col in 1:C){ for (i.row in 1:R){ ng = numerical.grad("func02R",matrix(c(M$x[i.row,i.col],M$y[i.row,i.col]),nrow=1)) arrows(M$x[i.row,i.col],M$y[i.row,i.col], (M$x[i.row,i.col]-ng[1]*scaling),(M$y[i.row,i.col]-ng[2]*scaling), length = 0.05) } } grad.desc <- function(func, init.x, lr, n.iter){ x = init.x for (i.iter in 1:n.iter) { grad = numerical.grad(func, x) x = x - lr*grad } return(x) } x.init = matrix(c(-3,4),nrow = 1) grad.desc("func02R",x.init,0.1,100) x = seq(-4,4,0.2) y = seq(-4,4,0.2) M = mesh(x,y) Z = as.vector(M$x^2)+as.vector(M$y^2) Z.mesh = matrix(Z,nrow(M$x)) contour(x,y,Z.mesh,drawlabels = F) grad.desc2 <- function(func, init.x, lr, n.iter){ x = init.x x.hist = init.x for (i.iter in 1:n.iter) { grad = numerical.grad(func, x) x = x - lr*grad x.hist = rbind(x.hist,x) } return(x.hist) } gd = grad.desc2("func02R",x.init,0.1,100) points(gd,col = 'green',pch=20) # manual implementation w = matrix(c(0.47355232,0.85557411,0.9977393,0.03563661,0.84668094,0.69422093),nrow=2) x = matrix(c(0.6, 0.9), nrow=1) t = c(0,0,1) nn.predict <- function(w,x){ return(x%*%w) } loss.func = function(w, x, t){ pred = nn.predict(w,x) y = softmax.func(pred) return(cross.entropy(y, t)) } numerical.gradCE <- function(func, w, x, t){ # input args # func: name of function # w : weight # x : input # t : target output ############################################## h = 1e-4 R = nrow(w) C = ncol(w) grad = matrix(0, R, C) for (i.col in 1:C){ for (i.row in 1:R){ temp.w = w[i.row,i.col] w[i.row, i.col] = temp.w + h plusH = do.call(func, list(w,x,t)) w[i.row, i.col] = temp.w - h minusH = do.call(func,list(w,x,t)) grad[i.row, i.col] = (plusH - minusH)/(2*h) w[i.row, i.col] = temp.w } } return(grad) } dW = numerical.gradCE("loss.func",w,x,t) ### ch 4.5 2-layer NN ### init.2LN <- function(n.input, n.hidden, n.output, w.std = 0.01){ W1 = matrix(rnorm(n.input*n.hidden,0,w.std),nrow = n.input) B1 = matrix(rnorm(n.hidden,0,w.std),nrow =1) W2 = matrix(rnorm(n.hidden*n.output,0,w.std),nrow = n.hidden) B2 = matrix(rnorm(n.output,0,w.std),nrow =1) return(list(W1 = W1, B1 = B1, W2 = W2, B2 = B2)) } softmax.2LN <- function(x){ max.x = apply(x,1,max) C = ncol(x) x = x - max.x%*%matrix(1,nrow=1,ncol=C) return(exp(x)/rowSums(exp(x))) } sigmoid.func <- function(x){ return(1/(1+exp(-x))) } pred.2LN <- function(params, x){ NR = nrow(x) a1 = x%*%params$W1 + matrix(1,nrow = NR)%*%params$B1 z1 = sigmoid.func(a1) a2 = z1%*%params$W2 + matrix(1,nrow = NR)%*%params$B2 y = softmax.2LN(a2) return(y) } loss.2LN = function(params, x, t){ y = pred.2LN(params,x) return(cross.entropy(y, t)) } numerical.grad2LN <- function(func, params, x, t) { # input args # func: name of function # w : weight # x : input # t : target output ############################################## h = 1e-4; n.list = length(params); grad = params for (i.list in 1:n.list) { R = nrow(params[[i.list]]) C = ncol(params[[i.list]]) grad[[i.list]] = matrix(0, R, C) for (i.col in 1:C) { for (i.row in 1:R) { temp.w = params[[i.list]][i.row, i.col] params[[i.list]][i.row, i.col] = temp.w + h plusH = do.call(func, list(params, x, t)) params[[i.list]][i.row, i.col] = temp.w - h minusH = do.call(func, list(params, x, t)) grad[[i.list]][i.row, i.col] = (plusH - minusH) / (2 * h) params[[i.list]][i.row, i.col] = temp.w } } } return(grad) } ## example using IRIS data set train.x = as.matrix(iris[,1:4]) train.y.temp = as.numeric(iris[,5]) train.y = matrix(0,nrow = nrow(train.x), ncol =3) train.y[which(train.y.temp==1), 1]=1 train.y[which(train.y.temp==2), 2]=1 train.y[which(train.y.temp==3), 3]=1 params = init.2LN(4,15,3,0.01) batch_size = 7; n.iter =2000; lambda =0.05 n.train = nrow(train.x) loss = rep(0,n.iter) for (i.iter in 1:n.iter){ batch_mask = sample(1:n.train, batch_size) x.batch = train.x[batch_mask,] t.batch = train.y[batch_mask,] dW = numerical.grad2LN("loss.2LN",params,x.batch,t.batch) params$W1 = params$W1 - lambda*dW$W1 params$B1 = params$B1 - lambda*dW$B1 params$W2 = params$W2 - lambda*dW$W2 params$B2 = params$B2 - lambda*dW$B2 loss[i.iter] = loss.2LN(params,x.batch,t.batch) }
認知情報解析・課題1・解答例
relu.func <- function(x){ y = apply(x,2,function(x) pmax(x,0)) return(y) } sigmoid.func <- function(x){ return(1/(1+exp(-x))) } init.network <- function(n.neurons){ n.layer = length(n.neurons) W = list(); b = list() for (i.layer in 1:(n.layer-1)){ W[[i.layer]] = matrix(rnorm(n.neurons[i.layer]*n.neurons[(i.layer+1)]),nrow=n.neurons[i.layer]) b[[i.layer]] = matrix(rnorm(n.neurons[(i.layer+1)]), nrow = 1) } return(list(W = W,b = b)) } activation <- function(A, actFun){ if (actFun == "sigmoid"){ return(sigmoid.func(A)) } if (actFun == "relu"){ return(relu.func(A)) } } feedforward <- function(network, x, actFun) { n.layer <- length(network$W) batch.size = nrow(x) for (i.layer in 1:n.layer){ A = x%*%network$W[[i.layer]] + matrix(1,nrow=batch.size,ncol = 1)%*%network$b[[i.layer]] x = activation(A, actFun[i.layer]) } return(x) }
データ解析基礎論a Week04
nSample=10;nRep=10^5; CLT.unif <- function(nSample, nRep) { x=matrix(runif(nSample*nRep),nrow=nSample); x.means<-colMeans(x) hist(x.means,50,main='Distribution of Means of Uniform Distribution', xlab='Means', probability=T) x.means.dens<-density(x.means) lines(x.means.dens,lwd=3,lty=1,col='red') x2=seq(0,1,0.001);CLT=dnorm(x2,mean=0.5,sd=(sqrt(1/12))/(sqrt(nSample))) lines(x2,CLT,lwd=3,lty=3,col='cyan') legend("topright",c("Density of Actual Means","Normal Distribution"), lty=c(1,3), col=c('red','cyan'),lwd=3) } > CLT.unif(10,100000)