院:認識情報解析

library(rjags)
source("http://peach.l.chiba-u.ac.jp/course_folder/HDI_revised.txt")

dat<-read.csv("http://www.matsuka.info/data_folder/HtWtData110.csv")
library(plot3D)
w = seq(80,360,length.out=100)
h = seq(50, 75, length.out=100)
M <- mesh(w,h)
P.male = 1/(1+exp(-1*(0.018*M$x+0.7*M$y-50)))

scatter3D(dat$weight, dat$height, dat$mal, pch = 19, cex = 2,
          theta = 30, phi = 45, ticktype = "detailed", zlim=c(-0.1,1),ylim=c(50,78),xlim=c(80,360),
          xlab = "weight", ylab = "height", zlab = "P(male)",
          surf = list(x = M$x, y = M$y, z = P.male,facets = NA))


y = dat$male; x = dat$weight; Ntotal = length(y)
dataList = list(y = y, x = x, Ntotal = Ntotal)

model.txt = "
data {
  xm <- mean(x)
  xsd <- sd(x)
  for (i in 1:Ntotal){
    zx[i] = (x[i] - xm)/xsd
  }
}
model {
  for ( i_data in 1:Ntotal ) {
    y[ i_data ] ~ dbern(ilogit( zbeta0 + zbeta * zx[i_data]))
  }
  zbeta0 ~ dnorm(0, 1/2^2)
  zbeta ~ dnorm(0, 1/2^2)

  beta <- zbeta / xsd
  beta0 <- zbeta0 - zbeta * xm/xsd
}"
writeLines(model.txt, "model1.txt")
parameters = c( "beta0" ,  "beta")
jagsModel = jags.model( "model1.txt", data=dataList, n.chains=3, n.adapt=500 )
update( jagsModel , n.iter=1000)
codaSamples = coda.samples( jagsModel , variable.names=parameters, n.iter=10000, thin=5)
mcmcMat<-as.matrix(codaSamples)

plot(dat$weight,dat$male,xlim=c(90,280),yaxt="n",ylab="Male / Female",
     xlab="Weight", cex=2.5)
axis(2,at = 0:1,labels=c("Femal","Male"))
n2plot=100
idx=sample(1:nrow(mcmcMat),n2plot)
temp.x = seq(90,280,length.out = 100)
for (i_sample in 1:n2plot) {
  temp.y = 1/(1+exp(-1*(mcmcMat[idx[i_sample],2] + mcmcMat[idx[i_sample],1]*temp.x)))
  lines(temp.x, temp.y, col='orange', lwd=2)
}


x = cbind(dat$weight,dat$height);Nx = ncol(x)
dataList = list(y = y, x = x, Ntotal = Ntotal, Nx = Nx)

model.txt = "
data {
  for (j in 1:Nx){
    xm[j] <- mean(x[,j])
    xsd[j] <- sd(x[,j])
    for (i in 1:Ntotal){
      zx[i,j] = (x[i,j] - xm[j])/xsd[j]
    }
  }
}
model {
  for ( i_data in 1:Ntotal ) {
    y[ i_data ] ~ dbern(ilogit( zbeta0 + sum(zbeta[1:Nx] * zx[i_data, 1:Nx ])))
  }
  zbeta0 ~ dnorm(0, 1/2^2)
  for (j in 1:Nx){
    zbeta[j] ~ dnorm(0, 1/2^2)
  }
  beta[1:Nx] <- zbeta[1:Nx] / xsd[1:Nx]
  beta0 <- zbeta0 -sum(zbeta[1:Nx] * xm[1:Nx]/xsd[1:Nx])
}"
writeLines(model.txt, "model.txt")

parameters = c( "beta0" ,  "beta")
jagsModel = jags.model( "model.txt", data=dataList, n.chains=3, n.adapt=500 )
update( jagsModel , n.iter=1000)
codaSamples = coda.samples( jagsModel , variable.names=parameters, n.iter=10000, thin=5)
mcmcMat<-as.matrix(codaSamples)
par(mfrow=c(1,3))
HDI.plot(mcmcMat[,3],xlabel='intercept')
HDI.plot(mcmcMat[,1],xlabel='weight')
HDI.plot(mcmcMat[,2],xlabel='height')

par(mfrow=c(1,1))
plot(dat$weight,dat$height,xlab="Weight", ylab="Height", type="n")
n2plot=100
idx=sample(1:nrow(mcmcMat),n2plot)
for (i_sample in 1:n2plot) {
  abline(a=-1*mcmcMat[idx[i_sample],3]/mcmcMat[idx[i_sample],2],
         b=-1*mcmcMat[idx[i_sample],1]/mcmcMat[idx[i_sample],2],col="orange")

}
points(dat$weight,dat$height,pch=paste(dat$male), cex=1.5)

# un-even data
x = rnorm(300)
pr = 1/(1+exp(2*x))
y = pr < runif(300)
plot(x,y)

remove.id = sample(which(y == 0),120)

Ntotal = length(y[-remove.id])
dataList = list(y = y[-remove.id], x = x[-remove.id], Ntotal = Ntotal)

model.txt = "
data {
xm <- mean(x)
xsd <- sd(x)
for (i in 1:Ntotal){
zx[i] = (x[i] - xm)/xsd
}
}
model {
for ( i_data in 1:Ntotal ) {
y[ i_data ] ~ dbern(ilogit( zbeta0 + zbeta * zx[i_data]))
}
zbeta0 ~ dnorm(0, 1/2^2)
zbeta ~ dnorm(0, 1/2^2)

beta <- zbeta / xsd
beta0 <- zbeta0 - zbeta * xm/xsd
}"
writeLines(model.txt, "model1.txt")
parameters = c( "beta0" ,  "beta")
jagsModel = jags.model( "model1.txt", data=dataList, n.chains=3, n.adapt=500 )
update( jagsModel , n.iter=1000)
codaSamples = coda.samples( jagsModel , variable.names=parameters, n.iter=10000, thin=5)
mcmcMat<-as.matrix(codaSamples)

plot(x[-remove.id],y[-remove.id],xlim=c(-3,3))
n2plot=100
idx=sample(1:nrow(mcmcMat),n2plot)
temp.x = seq(-3,3,length.out = 100)
for (i_sample in 1:n2plot) {
  temp.y = 1/(1+exp(-1*(mcmcMat[idx[i_sample],2] + mcmcMat[idx[i_sample],1]*temp.x)))
  lines(temp.x, temp.y, col='orange', lwd=2)
}


x1 = rnorm(150)
x2 = x1*0.9+rnorm(150,0,0.5)
pr = 1/(1+exp(x1+x2))
y = pr < runif(150)
Ntotal = length(y)
dataList = list(y = y, x = cbind(x1,x2), Ntotal = Ntotal, Nx = 2)


model.txt = "
data {
for (j in 1:Nx){
xm[j] <- mean(x[,j])
xsd[j] <- sd(x[,j])
for (i in 1:Ntotal){
zx[i,j] = (x[i,j] - xm[j])/xsd[j]
}
}
}
model {
for ( i_data in 1:Ntotal ) {
y[ i_data ] ~ dbern(ilogit( zbeta0 + sum(zbeta[1:Nx] * zx[i_data, 1:Nx ])))
}
zbeta0 ~ dnorm(0, 1/2^2)
for (j in 1:Nx){
zbeta[j] ~ dnorm(0, 1/2^2)
}
beta[1:Nx] <- zbeta[1:Nx] / xsd[1:Nx]
beta0 <- zbeta0 -sum(zbeta[1:Nx] * xm[1:Nx]/xsd[1:Nx])
}"
writeLines(model.txt, "model.txt")

parameters = c( "beta0" ,  "beta")
jagsModel = jags.model( "model.txt", data=dataList, n.chains=3, n.adapt=500 )
update( jagsModel , n.iter=1000)
codaSamples = coda.samples( jagsModel , variable.names=parameters, n.iter=10000, thin=5)
mcmcMat<-as.matrix(codaSamples)
plot(x1,x2,xlab="x1", ylab="x2", type="n")
n2plot=100
idx=sample(1:nrow(mcmcMat),n2plot)
for (i_sample in 1:n2plot) {
  abline(a=-1*mcmcMat[idx[i_sample],3]/mcmcMat[idx[i_sample],2],
         b=-1*mcmcMat[idx[i_sample],1]/mcmcMat[idx[i_sample],2],col="orange")

}
points(x1,x2,pch=paste(y), cex=1.5)

# guessing
y = dat$male; x = dat$weight; Ntotal = length(y)
dataList = list(y = y, x = x, Ntotal = Ntotal)

model.txt = "
data {
xm <- mean(x)
xsd <- sd(x)
for (i in 1:Ntotal){
zx[i] = (x[i] - xm)/xsd
}
}
model {
for ( i_data in 1:Ntotal ) {
y[ i_data ] ~ dbern(mu[i_data])
mu[i_data] <- (guess*0.5 + (1-guess)*ilogit( zbeta0 + zbeta * zx[i_data]))
}
zbeta0 ~ dnorm(0, 1/2^2)
zbeta ~ dnorm(0, 1/2^2)
guess ~ dbeta(1,9)
beta <- zbeta / xsd
beta0 <- zbeta0 - zbeta * xm/xsd
}"
writeLines(model.txt, "model1.txt")
parameters = c( "beta0" ,  "beta", "guess")
jagsModel = jags.model( "model1.txt", data=dataList, n.chains=3, n.adapt=500 )
update( jagsModel , n.iter=1000)
codaSamples = coda.samples( jagsModel , variable.names=parameters, n.iter=10000, thin=5)
mcmcMat<-as.matrix(codaSamples)

plot(x[-remove.id],y[-remove.id],xlim=c(-3,3))
n2plot=100
idx=sample(1:nrow(mcmcMat),n2plot)
temp.x = seq(-3,3,length.out = 100)
for (i_sample in 1:n2plot) {
  temp.y = 1/(1+exp(-1*(mcmcMat[idx[i_sample],2] + mcmcMat[idx[i_sample],1]*temp.x)))
  lines(temp.x, temp.y, col='orange', lwd=2)
}
par(mfrow=c(1,3))
HDI.plot(mcmcMat[,2],xlabel='intercept')
HDI.plot(mcmcMat[,1],xlabel='weight')
HDI.plot(mcmcMat[,3],xlabel='guessing')

par(mfrow=c(1,1))
plot(dat$weight,dat$male,xlim=c(90,280),yaxt="n",ylab="Male / Female",
     xlab="Weight", cex=2.5)
axis(2,at = 0:1,labels=c("Femal","Male"))
n2plot=100
idx=sample(1:nrow(mcmcMat),n2plot)
temp.x = seq(90,280,length.out = 100)
for (i_sample in 1:n2plot) {
  temp.y = mcmcMat[idx[i_sample],3]/2+(1-mcmcMat[idx[i_sample],3])*1/(1+exp(-1*(mcmcMat[idx[i_sample],2] + mcmcMat[idx[i_sample],1]*temp.x)))
  lines(temp.x, temp.y, col='orange', lwd=2)
}

# nomial predictors
model.txt = "
model {
  for ( i.data in 1:Ntotal ) {
    y[ i.data ] ~ dbin(mu[i.data],N[i.data])
    mu[i.data] ~ dbeta(omega[x[i.data]]*(kappa-2)+1,(1-omega[x[i.data]])*(kappa-2)+1)
  }
  for (i.pos in 1:Npos){
    omega[i.pos] <- ilogit(a0+a[i.pos])
    a[i.pos] ~ dnorm(0.0, 1/aSigma^2)
  }
  a0 ~  dnorm(0,1/2^2)
  aSigma ~ dgamma(1.64, 0.32)
  kappa <- kappaMinusTwo +2
  kappaMinusTwo ~ dgamma(0.01,0.01)
  for (i.pos in 1:Npos){
    m[i.pos] <- a0+a[i.pos]
  }
  b0 <- mean(m[1:Npos])
  for (i.pos in 1:Npos){
    b[i.pos] <- m[i.pos] - b0
  }
}"
writeLines(model.txt, "model.txt")

dat<-read.csv("http://www.matsuka.info/data_folder/BattingAverage.csv")
y = dat$Hits
N = dat$AtBats
x = dat$PriPosNumber
Ntotal = length(y)
Npos = length(unique(x))
dataList = list(y = y, x = x, N = N, Ntotal = Ntotal, Npos = Npos)
parameters = c( "b0" ,  "b", "omega")
jagsModel = jags.model( "model.txt", data=dataList, n.chains=3, n.adapt=500 )
update( jagsModel , n.iter=1000)
codaSamples = coda.samples( jagsModel , variable.names=parameters, n.iter=10000, thin=5)
mcmcMat<-as.matrix(codaSamples)

par(mfrow=c(3,3))
for (i.pos in 1:9){
  HDI.plot(mcmcMat[,i.pos+10])
}

par(mfrow=c(2,2))
HDI.plot(mcmcMat[,1]-mcmcMat[,2])
HDI.plot(mcmcMat[,2]-mcmcMat[,3])
HDI.plot(mcmcMat[,11]-mcmcMat[,12])
HDI.plot(mcmcMat[,12]-mcmcMat[,13])

# softmax regression
x1 = runif(500, min=-2, max = 2)
x2 = runif(500, min=-2, max = 2)
b0 = c(0,-3,-4,-5)
b1 = c(0,-5,-1,10)
b2 = c(0,-5,10,-1)
l1 = b0[1]+b1[1]*x1+b2[1]*x2
l2 = b0[2]+b1[2]*x1+b2[2]*x2
l3 = b0[3]+b1[3]*x1+b2[3]*x2
l4 = b0[4]+b1[4]*x1+b2[4]*x2
p1 = exp(l1)/sum(exp(l1)+exp(l2)+exp(l3)+exp(l4))
p2 = exp(l2)/sum(exp(l1)+exp(l2)+exp(l3)+exp(l4))
p3 = exp(l3)/sum(exp(l1)+exp(l2)+exp(l3)+exp(l4))
p4 = exp(l4)/sum(exp(l1)+exp(l2)+exp(l3)+exp(l4))
ps = cbind(p1,p2,p3,p4)
y = apply(ps,1,which.max)
plot(x1,x2,pch=y,col=y)

b0 = c(0,-4,-1,-1)
b1 = c(0,-5,1,3)
b2 = c(0,0,-5,3)
l1 = b0[1]+b1[1]*x1+b2[1]*x2
l2 = b0[2]+b1[2]*x1+b2[2]*x2
l3 = b0[3]+b1[3]*x1+b2[3]*x2
l4 = b0[4]+b1[4]*x1+b2[4]*x2
p1 = exp(l1)/sum(exp(l1)+exp(l2)+exp(l3)+exp(l4))
p2 = exp(l2)/sum(exp(l1)+exp(l2)+exp(l3)+exp(l4))
p3 = exp(l3)/sum(exp(l1)+exp(l2)+exp(l3)+exp(l4))
p4 = exp(l4)/sum(exp(l1)+exp(l2)+exp(l3)+exp(l4))
ps = cbind(p1,p2,p3,p4)
y = apply(ps,1,which.max)
plot(x1,x2,pch=y,col=y)

p1 = exp(l1)/sum(exp(l1)+exp(l2)+exp(l3)+exp(l4))
p2 = exp(l2)/sum(exp(l1)+exp(l2)+exp(l3)+exp(l4))
p3 = exp(l3)/sum(exp(l1)+exp(l2)+exp(l3)+exp(l4))
p4 = exp(l4)/sum(exp(l1)+exp(l2)+exp(l3)+exp(l4))
ps = cbind(p1,p2,p3,p4)
p12 = pmax(p1,p2)
p34 = pmax(p3,p4)
y12vs34 = apply(cbind(p1,p2),1,which.max)
plot(x1,x2,pch=y12vs34,col=y12vs34)
y1vs2 = apply(cbind(p1,p3),1,which.max)
points(x1,x2,pch=y1vs2+2,col=y1vs2+2)
y3vs4 = apply(cbind(p1,p4),1,which.max)
points(x1,x2,pch=y3vs4+6,col=y3vs4+6)



model.txt = "
data {
  for ( j in 1:Nx ) {
    xm[j]  <- mean(x[,j])
    xsd[j] <-   sd(x[,j])
    for ( i in 1:Ntotal ) {
      zx[i,j] <- ( x[i,j] - xm[j] ) / xsd[j]
    }
  }
}
model {
  for ( i in 1:Ntotal ) {
    y[i] ~ dcat(mu[1:Nout,i])
    mu[1:Nout,i] <- explambda[1:Nout,i]/sum(explambda[1:Nout,i])
    for (k in 1:Nout){
      explambda[k,i]=exp(zbeta0[k] + sum(zbeta[k,1:Nx] * zx[i, 1:Nx ]))
    }
  }
  zbeta0[1] = 0
  for (j in 1:Nx){
    zbeta[1,j] <- 0
  }
  for (k in 2:Nout){
    zbeta0[k] ~ dnorm(0, 1/2^2)
    for (j in 1:Nx){
      zbeta[k,j]~dnorm(0, 1/2^2)
    }
  }
  for ( k in 1:Nout ) {
    beta[k,1:Nx] <- zbeta[k,1:Nx] / xsd[1:Nx]
    beta0[k] <- zbeta0[k] - sum( zbeta[k,1:Nx] * xm[1:Nx] / xsd[1:Nx] )
  }
}"
writeLines(model.txt, "model.txt")

dat<-read.csv( "http://www.matsuka.info/data_folder/CondLogistRegData1.csv" )
y = dat$Y
x = cbind(dat[,1],dat[,2])
Ntotal = length(y)
Nout = length(unique(y))
dataList = list(y = y, x = x, Nx = 2, Ntotal = Ntotal, Nout = Nout)
parameters = c( "beta0" ,  "beta")
jagsModel = jags.model( "model.txt", data=dataList, n.chains=3, n.adapt=500 )
update( jagsModel , n.iter=1000)
codaSamples = coda.samples( jagsModel , variable.names=parameters, n.iter=10000, thin=5)
mcmcMat<-as.matrix(codaSamples)
par(mfrow=c(1,3))
HDI.plot(mcmcMat[,7+0],xlab='intercept')
HDI.plot(mcmcMat[,1+0],xlab='b1')
HDI.plot(mcmcMat[,4+0],xlab='b2')

model = "
data {
  for ( j in 1:Nx ) {
    xm[j]  <- mean(x[,j])
    xsd[j] <-   sd(x[,j])
    for ( i in 1:Ntotal ) {
      zx[i,j] <- ( x[i,j] - xm[j] ) / xsd[j]
    }
  }
}
# Specify the model for standardized data:
model {
  for ( i in 1:Ntotal ) {
    y[i] ~ dcat( mu[1:Nout,i] )
    mu[1,i] <- phi[1,i]
    mu[2,i] <- phi[2,i] * (1-phi[1,i])
    mu[3,i] <- phi[3,i] * (1-phi[2,i]) * (1-phi[1,i])
    mu[4,i] <- (1-phi[3,i]) * (1-phi[2,i]) * (1-phi[1,i])
    for ( r in 1:(Nout-1) ) {
      phi[r,i] <- ilogit( zbeta0[r] + sum( zbeta[r,1:Nx] * zx[i,1:Nx] ) )
    }
  }
  for ( r in 1:(Nout-1) ) {
    zbeta0[r] ~ dnorm( 0 , 1/20^2 )
    for ( j in 1:Nx ) {
      zbeta[r,j] ~ dnorm( 0 , 1/20^2 )
    }
  }
  for ( r in 1:(Nout-1) ) {
    beta[r,1:Nx] <- zbeta[r,1:Nx] / xsd[1:Nx]
    beta0[r] <- zbeta0[r] - sum( zbeta[r,1:Nx] * xm[1:Nx] / xsd[1:Nx] )
  }
}
"
writeLines( model , "model.txt" )

dat<-read.csv( "http://www.matsuka.info/data_folder/SoftmaxRegData2.csv" )
y = dat$Y
x = cbind(dat[,1],dat[,2])
Ntotal = length(y)
Nout = length(unique(y))
dataList = list(y = y, x = x, Nx = 2, Ntotal = Ntotal, Nout = Nout)
parameters = c( "beta0" ,  "beta")
jagsModel = jags.model( "model.txt", data=dataList, n.chains=3, n.adapt=500 )
update( jagsModel , n.iter=1000)
codaSamples = coda.samples( jagsModel , variable.names=parameters, n.iter=10000, thin=5)
mcmcMat<-as.matrix(codaSamples)
par(mfrow=c(1,1))
plot(x[,1],x[,2],col=y)
n2plot=100
idx=sample(1:nrow(mcmcMat),n2plot)
temp.x = seq(-3,3,length.out = 100)
for (i.cat in 0:2){
  for (i_sample in 1:n2plot) {
    abline(a=-1*mcmcMat[idx[i_sample],7+i.cat]/mcmcMat[idx[i_sample],4+i.cat],
           b=-1*mcmcMat[idx[i_sample],1+i.cat]/mcmcMat[idx[i_sample],4+i.cat],col="orange")
  }
}
model2 = "
data {
  for ( j in 1:Nx ) {
    xm[j]  <- mean(x[,j])
    xsd[j] <-   sd(x[,j])
      for ( i in 1:Ntotal ) {
        zx[i,j] <- ( x[i,j] - xm[j] ) / xsd[j]
      }
    }
  }
model {
  for ( i in 1:Ntotal ) {
    y[i] ~ dcat( mu[1:Nout,i] )
    mu[1,i] <- phi[2,i] * phi[1,i]
    mu[2,i] <- (1-phi[2,i]) * phi[1,i]
    mu[3,i] <- phi[3,i] * (1-phi[1,i])
    mu[4,i] <- (1-phi[3,i]) * (1-phi[1,i])
    for ( r in 1:(Nout-1) ) {
      phi[r,i] <- ilogit( zbeta0[r] + sum( zbeta[r,1:Nx] * zx[i,1:Nx] ) )
    }
  }
  for ( r in 1:(Nout-1) ) {
    zbeta0[r] ~ dnorm( 0 , 1/20^2 )
    for ( j in 1:Nx ) {
      zbeta[r,j] ~ dnorm( 0 , 1/20^2 )
    }
  }
  for ( r in 1:(Nout-1) ) {
    beta[r,1:Nx] <- zbeta[r,1:Nx] / xsd[1:Nx]
    beta0[r] <- zbeta0[r] - sum( zbeta[r,1:Nx] * xm[1:Nx] / xsd[1:Nx] )
  }
}"
writeLines( modelString , con="TEMPmodel.txt" )