Fix annoying gibbs bug. - bnp - Some older Bayesian nonparametrics research.

commit e8fe0ce617051805205687ae37dc9ee7e42df09a
parent 4bc704aed8a469c0d232746fb83c45b09627d716
Author: Jared Tobin <jared@jtobin.ca>
Date:   Mon, 14 Mar 2016 21:57:02 +1300

Fix annoying gibbs bug.

Diffstat:
M finite-gaussian-mixture/src/fmm_multivariate_conditional.r  | 69 +++++++++++++++------------------------------------------------------
M finite-gaussian-mixture/src/simulation_multivariate_conditional.r  | 27 +++++++++++----------------

2 files changed, 26 insertions(+), 70 deletions(-)
diff --git a/finite-gaussian-mixture/src/fmm_multivariate_conditional.r b/finite-gaussian-mixture/src/fmm_multivariate_conditional.r
@@ -56,7 +56,6 @@ conditional_cluster_parameters_model = function(y, k, z, l, r, b, w) {
       as.matrix(vals)
     })
 
-  # FIXME (jtobin): NaN for empty clusters; need to handle this?
   ybar     = lapply(clustered, colMeans)
   n        = lapply(clustered, nrow)
   pl       = function(lj, nj, ybarj) { (lj + nj * ybarj) / (1 + nj) }
@@ -64,8 +63,14 @@ conditional_cluster_parameters_model = function(y, k, z, l, r, b, w) {
   centered = mapply('-', clustered, ybar, SIMPLIFY = F)
   ss       = lapply(centered, function(x) t(x) %*% x)
 
+  # NOTE (jtobin): the extra 'solve' calls here helped; came from
+  # http://thaines.com/content/misc/gaussian_conjugate_prior_cheat_sheet.pdf
+  # murphy's famous reference at
+  # http://www.cs.ubc.ca/~murphyk/Papers/bayesGauss.pdf may be incorrect.
   pt = function(wj, ssj, nj, ybarj) {
-    wj + ssj + nj / (1 + nj) * ((l - ybarj) %*% t(l - ybarj))
+    if (nj == 0) { wj } else {
+      solve(solve(wj) + ssj + nj / (1 + nj) * ((l - ybarj) %*% t(l - ybarj)))
+    }
   }
 
   tn   = mapply(pt, list(w), ss, n, ybar, SIMPLIFY = F)
@@ -76,49 +81,6 @@ conditional_cluster_parameters_model = function(y, k, z, l, r, b, w) {
   list(m = loc, s = prec)
 }
 
-conditional_location_model = function(y, z, s, l, r) {
-  labelled  = data.frame(y, L1 = z)
-  clustered = lapply(seq_along(s),
-    function(j) {
-      labelled[which(labelled$L1 == j), !(names(labelled) %in% 'L1')]
-    })
-
-  n    = lapply(clustered, nrow)
-  yt   = lapply(clustered, function(j) { apply(j, MARGIN = 2, sum) })
-  num0 = mapply('%*%', yt, s, SIMPLIFY = F)
-
-  num  = lapply(num0, function(z) { z + (l %*% r) })
-  den0 = mapply('*', n, s, SIMPLIFY = F)
-  den  = lapply(den0, function(z) z + r)
-
-  v = lapply(den, solve)
-  m = mapply('%*%', num, v, SIMPLIFY = F)
-  mapply(rmvnorm, 1, m, v, SIMPLIFY = F)
-}
-
-conditional_precision_model = function(y, z, m, b, w) {
-  labelled = cbind(y, L1 = z)
-  cluster  = function(d, j) {
-    vals = d[which(d$L1 == j), !(names(d) %in% 'L1')]
-  }
-
-  clustered = lapply(seq_along(m), function(j) { cluster(labelled, j) })
-  yt = lapply(clustered, function(foo) { apply(foo, MARGIN = 2, sum) })
-
-  center = function(i, j) {
-    if (nrow(i) == 0) { as.matrix(i) } else { as.matrix(i - j) }
-  }
-
-  centered = mapply(center, clustered, m, SIMPLIFY = F)
-  ss       = lapply(centered, function(x) t(x) %*% x)
-  n        = lapply(clustered, nrow)
-  a        = lapply(n, function(j) j + b)
-  bet0     = lapply(ss, function(j) { (j + w * b) })
-  bet      = mapply('/', bet0, a, SIMPLIFY = F)
-
-  mapply(function(i, j) drop(rWishart(1, i, j)), a, bet, SIMPLIFY = F)
-}
-
 inverse_model = function(n, k, y, a, l, r, b, w) {
   gibbs = function(p0, m0, s0) {
     z  = conditional_label_model(y, p0, m0, s0)
@@ -128,15 +90,14 @@ inverse_model = function(n, k, y, a, l, r, b, w) {
     s1 = ps$s
     l  = lmodel(y, p1, m1, s1)
     list(p = p1, m = m1, s = s1, z = z, l = l)
-    }
+  }
 
-  p0     = mixing_model(k, a)
-  m0     = location_model(k, l, r)
-  s0     = precision_model(k, b, w)
   params = list(
-      p = p0
-    , m = lapply(m0, function(j) { matrix(j, ncol = length(j)) })
-    , s = s0
+      p = mixing_model(k, a)
+    , m = lapply(
+              location_model(k, l, r)
+            , function(j) { matrix(j, ncol = length(j)) })
+    , s = precision_model(k, b, w)
     )
 
   acc = params
@@ -146,11 +107,11 @@ inverse_model = function(n, k, y, a, l, r, b, w) {
       acc$p  = rbind(acc$p, params$p)
       acc$m  = mapply(rbind, acc$m, params$m, SIMPLIFY = F)
       # FIXME (jtobin): not logging intermediate covariances
-      #                 might be desirable to log some reduced ellipse dims
+      # possibly desirable to log eigenvalues
       acc$s  = params$s
       acc$z  = rbind(acc$z, params$z)
       acc$l  = c(acc$l, params$l)
     }
   acc
-  }
+}
 
diff --git a/finite-gaussian-mixture/src/simulation_multivariate_conditional.r b/finite-gaussian-mixture/src/simulation_multivariate_conditional.r
@@ -19,14 +19,10 @@ config = list(
 
 set.seed(222)
 
-origin = list(
-    p = mixing_model(config$k, config$a)
-  , m = location_model(config$k, config$l, config$r)
-  , s = precision_model(config$k, config$b, config$w)
-  )
-
-# FIXME generate a known/non-pathological configuration first, to test
-d  = model(config$k, config$l, config$r, config$b, config$w, config$n)
+d = list(
+    t(replicate(250, rnorm(2, c(5, 5))))
+  , t(replicate(250, rnorm(2, c(-5, -5))))
+  , t(replicate(500, rnorm(2))))
 dn = lapply(d, function(j) { data.frame(x = j[,1], y = j[,2]) })
 m  = melt(dn, id.vars = c('x', 'y'))
 
@@ -39,13 +35,12 @@ params = inverse_model(
   , config$b, config$w
   )
 
-
-m_ts_plot = function(j) {
-  melted = as.data.frame(j, id.vars = c('V1', 'V2'))
-  ggplot(
-      melted
-    , aes(x = V1, y = V2, alpha = seq_along(V1), colour = seq_along(V1))) +
-    geom_line() + xlim(-2, 2) + ylim(-10, 10)
-}
+#m_ts_plot = function(j) {
+#  melted = as.data.frame(j, id.vars = c('V1', 'V2'))
+#  ggplot(
+#      melted
+#    , aes(x = V1, y = V2, alpha = seq_along(V1), colour = seq_along(V1))) +
+#    geom_line() + xlim(-2, 2) + ylim(-10, 10)
+#}

	bnp Some older Bayesian nonparametrics research.
	Log \| Files \| Refs \| README \| LICENSE

M	finite-gaussian-mixture/src/fmm_multivariate_conditional.r	\|	69	+++++++++++++++------------------------------------------------------
M	finite-gaussian-mixture/src/simulation_multivariate_conditional.r	\|	27	+++++++++++----------------