認知情報解析 03

# objective: find x that minimizes y: y = x^2 +2*x

# simple gradient descent (GD)
tol = 1e-7;lr = 0.1; 
x = 10; x.hist = x
repeat{
  grad = 2*x + 2
  if (abs(grad) <= tol) { break }
  x = x - lr*grad
  x.hist = c(x.hist,x)
}
x.temp = seq(-10,10,length.out = 100)
plot(x.temp, x.temp^2+2*x.temp,type='l',lwd = 3, ylim = c(-5,120),
     ylab = "y",xlab="x")
lines(x.hist,x.hist^2+2*x.hist,type='o',col='red',lwd=2,pch=19)
points(x.hist,rep(-4,length(x.hist)),col='red',pch="I")

# GD w/ momentum
tol = 1e-7;lr = 0.1; gamma = 0.36
x = 10; x.histM = x; v = 0;
repeat{
  grad = 2*x + 2
  if (abs(grad) <= tol) { break }
  v = gamma*v - lr*grad
  x = x + v
  x.histM = c(x.histM,x)
}
lines(x.histM,x.histM^2+2*x.histM,type='o',col='blue',pch=19)
points(x.histM,rep(-2,length(x.histM)),col='blue',pch="I")
legend("topleft",c("standard GD","GD w/ moment."), pch=1,lwd=2,col=c('red','blue'))
c(length(x.hist),length(x.histM))