commit e8fe0ce617051805205687ae37dc9ee7e42df09a
parent 4bc704aed8a469c0d232746fb83c45b09627d716
Author: Jared Tobin <jared@jtobin.ca>
Date: Mon, 14 Mar 2016 21:57:02 +1300
Fix annoying gibbs bug.
Diffstat:
2 files changed, 26 insertions(+), 70 deletions(-)
diff --git a/finite-gaussian-mixture/src/fmm_multivariate_conditional.r b/finite-gaussian-mixture/src/fmm_multivariate_conditional.r
@@ -56,7 +56,6 @@ conditional_cluster_parameters_model = function(y, k, z, l, r, b, w) {
as.matrix(vals)
})
- # FIXME (jtobin): NaN for empty clusters; need to handle this?
ybar = lapply(clustered, colMeans)
n = lapply(clustered, nrow)
pl = function(lj, nj, ybarj) { (lj + nj * ybarj) / (1 + nj) }
@@ -64,8 +63,14 @@ conditional_cluster_parameters_model = function(y, k, z, l, r, b, w) {
centered = mapply('-', clustered, ybar, SIMPLIFY = F)
ss = lapply(centered, function(x) t(x) %*% x)
+ # NOTE (jtobin): the extra 'solve' calls here helped; came from
+ # http://thaines.com/content/misc/gaussian_conjugate_prior_cheat_sheet.pdf
+ # murphy's famous reference at
+ # http://www.cs.ubc.ca/~murphyk/Papers/bayesGauss.pdf may be incorrect.
pt = function(wj, ssj, nj, ybarj) {
- wj + ssj + nj / (1 + nj) * ((l - ybarj) %*% t(l - ybarj))
+ if (nj == 0) { wj } else {
+ solve(solve(wj) + ssj + nj / (1 + nj) * ((l - ybarj) %*% t(l - ybarj)))
+ }
}
tn = mapply(pt, list(w), ss, n, ybar, SIMPLIFY = F)
@@ -76,49 +81,6 @@ conditional_cluster_parameters_model = function(y, k, z, l, r, b, w) {
list(m = loc, s = prec)
}
-conditional_location_model = function(y, z, s, l, r) {
- labelled = data.frame(y, L1 = z)
- clustered = lapply(seq_along(s),
- function(j) {
- labelled[which(labelled$L1 == j), !(names(labelled) %in% 'L1')]
- })
-
- n = lapply(clustered, nrow)
- yt = lapply(clustered, function(j) { apply(j, MARGIN = 2, sum) })
- num0 = mapply('%*%', yt, s, SIMPLIFY = F)
-
- num = lapply(num0, function(z) { z + (l %*% r) })
- den0 = mapply('*', n, s, SIMPLIFY = F)
- den = lapply(den0, function(z) z + r)
-
- v = lapply(den, solve)
- m = mapply('%*%', num, v, SIMPLIFY = F)
- mapply(rmvnorm, 1, m, v, SIMPLIFY = F)
-}
-
-conditional_precision_model = function(y, z, m, b, w) {
- labelled = cbind(y, L1 = z)
- cluster = function(d, j) {
- vals = d[which(d$L1 == j), !(names(d) %in% 'L1')]
- }
-
- clustered = lapply(seq_along(m), function(j) { cluster(labelled, j) })
- yt = lapply(clustered, function(foo) { apply(foo, MARGIN = 2, sum) })
-
- center = function(i, j) {
- if (nrow(i) == 0) { as.matrix(i) } else { as.matrix(i - j) }
- }
-
- centered = mapply(center, clustered, m, SIMPLIFY = F)
- ss = lapply(centered, function(x) t(x) %*% x)
- n = lapply(clustered, nrow)
- a = lapply(n, function(j) j + b)
- bet0 = lapply(ss, function(j) { (j + w * b) })
- bet = mapply('/', bet0, a, SIMPLIFY = F)
-
- mapply(function(i, j) drop(rWishart(1, i, j)), a, bet, SIMPLIFY = F)
-}
-
inverse_model = function(n, k, y, a, l, r, b, w) {
gibbs = function(p0, m0, s0) {
z = conditional_label_model(y, p0, m0, s0)
@@ -128,15 +90,14 @@ inverse_model = function(n, k, y, a, l, r, b, w) {
s1 = ps$s
l = lmodel(y, p1, m1, s1)
list(p = p1, m = m1, s = s1, z = z, l = l)
- }
+ }
- p0 = mixing_model(k, a)
- m0 = location_model(k, l, r)
- s0 = precision_model(k, b, w)
params = list(
- p = p0
- , m = lapply(m0, function(j) { matrix(j, ncol = length(j)) })
- , s = s0
+ p = mixing_model(k, a)
+ , m = lapply(
+ location_model(k, l, r)
+ , function(j) { matrix(j, ncol = length(j)) })
+ , s = precision_model(k, b, w)
)
acc = params
@@ -146,11 +107,11 @@ inverse_model = function(n, k, y, a, l, r, b, w) {
acc$p = rbind(acc$p, params$p)
acc$m = mapply(rbind, acc$m, params$m, SIMPLIFY = F)
# FIXME (jtobin): not logging intermediate covariances
- # might be desirable to log some reduced ellipse dims
+ # possibly desirable to log eigenvalues
acc$s = params$s
acc$z = rbind(acc$z, params$z)
acc$l = c(acc$l, params$l)
}
acc
- }
+}
diff --git a/finite-gaussian-mixture/src/simulation_multivariate_conditional.r b/finite-gaussian-mixture/src/simulation_multivariate_conditional.r
@@ -19,14 +19,10 @@ config = list(
set.seed(222)
-origin = list(
- p = mixing_model(config$k, config$a)
- , m = location_model(config$k, config$l, config$r)
- , s = precision_model(config$k, config$b, config$w)
- )
-
-# FIXME generate a known/non-pathological configuration first, to test
-d = model(config$k, config$l, config$r, config$b, config$w, config$n)
+d = list(
+ t(replicate(250, rnorm(2, c(5, 5))))
+ , t(replicate(250, rnorm(2, c(-5, -5))))
+ , t(replicate(500, rnorm(2))))
dn = lapply(d, function(j) { data.frame(x = j[,1], y = j[,2]) })
m = melt(dn, id.vars = c('x', 'y'))
@@ -39,13 +35,12 @@ params = inverse_model(
, config$b, config$w
)
-
-m_ts_plot = function(j) {
- melted = as.data.frame(j, id.vars = c('V1', 'V2'))
- ggplot(
- melted
- , aes(x = V1, y = V2, alpha = seq_along(V1), colour = seq_along(V1))) +
- geom_line() + xlim(-2, 2) + ylim(-10, 10)
-}
+#m_ts_plot = function(j) {
+# melted = as.data.frame(j, id.vars = c('V1', 'V2'))
+# ggplot(
+# melted
+# , aes(x = V1, y = V2, alpha = seq_along(V1), colour = seq_along(V1))) +
+# geom_line() + xlim(-2, 2) + ylim(-10, 10)
+#}