bnp

Some older Bayesian nonparametrics research.
Log | Files | Refs | README | LICENSE

commit e8fe0ce617051805205687ae37dc9ee7e42df09a
parent 4bc704aed8a469c0d232746fb83c45b09627d716
Author: Jared Tobin <jared@jtobin.ca>
Date:   Mon, 14 Mar 2016 21:57:02 +1300

Fix annoying gibbs bug.

Diffstat:
Mfinite-gaussian-mixture/src/fmm_multivariate_conditional.r | 69+++++++++++++++------------------------------------------------------
Mfinite-gaussian-mixture/src/simulation_multivariate_conditional.r | 27+++++++++++----------------
2 files changed, 26 insertions(+), 70 deletions(-)

diff --git a/finite-gaussian-mixture/src/fmm_multivariate_conditional.r b/finite-gaussian-mixture/src/fmm_multivariate_conditional.r @@ -56,7 +56,6 @@ conditional_cluster_parameters_model = function(y, k, z, l, r, b, w) { as.matrix(vals) }) - # FIXME (jtobin): NaN for empty clusters; need to handle this? ybar = lapply(clustered, colMeans) n = lapply(clustered, nrow) pl = function(lj, nj, ybarj) { (lj + nj * ybarj) / (1 + nj) } @@ -64,8 +63,14 @@ conditional_cluster_parameters_model = function(y, k, z, l, r, b, w) { centered = mapply('-', clustered, ybar, SIMPLIFY = F) ss = lapply(centered, function(x) t(x) %*% x) + # NOTE (jtobin): the extra 'solve' calls here helped; came from + # http://thaines.com/content/misc/gaussian_conjugate_prior_cheat_sheet.pdf + # murphy's famous reference at + # http://www.cs.ubc.ca/~murphyk/Papers/bayesGauss.pdf may be incorrect. pt = function(wj, ssj, nj, ybarj) { - wj + ssj + nj / (1 + nj) * ((l - ybarj) %*% t(l - ybarj)) + if (nj == 0) { wj } else { + solve(solve(wj) + ssj + nj / (1 + nj) * ((l - ybarj) %*% t(l - ybarj))) + } } tn = mapply(pt, list(w), ss, n, ybar, SIMPLIFY = F) @@ -76,49 +81,6 @@ conditional_cluster_parameters_model = function(y, k, z, l, r, b, w) { list(m = loc, s = prec) } -conditional_location_model = function(y, z, s, l, r) { - labelled = data.frame(y, L1 = z) - clustered = lapply(seq_along(s), - function(j) { - labelled[which(labelled$L1 == j), !(names(labelled) %in% 'L1')] - }) - - n = lapply(clustered, nrow) - yt = lapply(clustered, function(j) { apply(j, MARGIN = 2, sum) }) - num0 = mapply('%*%', yt, s, SIMPLIFY = F) - - num = lapply(num0, function(z) { z + (l %*% r) }) - den0 = mapply('*', n, s, SIMPLIFY = F) - den = lapply(den0, function(z) z + r) - - v = lapply(den, solve) - m = mapply('%*%', num, v, SIMPLIFY = F) - mapply(rmvnorm, 1, m, v, SIMPLIFY = F) -} - -conditional_precision_model = function(y, z, m, b, w) { - labelled = cbind(y, L1 = z) - cluster = function(d, j) { - vals = d[which(d$L1 == j), !(names(d) %in% 'L1')] - } - - clustered = lapply(seq_along(m), function(j) { cluster(labelled, j) }) - yt = lapply(clustered, function(foo) { apply(foo, MARGIN = 2, sum) }) - - center = function(i, j) { - if (nrow(i) == 0) { as.matrix(i) } else { as.matrix(i - j) } - } - - centered = mapply(center, clustered, m, SIMPLIFY = F) - ss = lapply(centered, function(x) t(x) %*% x) - n = lapply(clustered, nrow) - a = lapply(n, function(j) j + b) - bet0 = lapply(ss, function(j) { (j + w * b) }) - bet = mapply('/', bet0, a, SIMPLIFY = F) - - mapply(function(i, j) drop(rWishart(1, i, j)), a, bet, SIMPLIFY = F) -} - inverse_model = function(n, k, y, a, l, r, b, w) { gibbs = function(p0, m0, s0) { z = conditional_label_model(y, p0, m0, s0) @@ -128,15 +90,14 @@ inverse_model = function(n, k, y, a, l, r, b, w) { s1 = ps$s l = lmodel(y, p1, m1, s1) list(p = p1, m = m1, s = s1, z = z, l = l) - } + } - p0 = mixing_model(k, a) - m0 = location_model(k, l, r) - s0 = precision_model(k, b, w) params = list( - p = p0 - , m = lapply(m0, function(j) { matrix(j, ncol = length(j)) }) - , s = s0 + p = mixing_model(k, a) + , m = lapply( + location_model(k, l, r) + , function(j) { matrix(j, ncol = length(j)) }) + , s = precision_model(k, b, w) ) acc = params @@ -146,11 +107,11 @@ inverse_model = function(n, k, y, a, l, r, b, w) { acc$p = rbind(acc$p, params$p) acc$m = mapply(rbind, acc$m, params$m, SIMPLIFY = F) # FIXME (jtobin): not logging intermediate covariances - # might be desirable to log some reduced ellipse dims + # possibly desirable to log eigenvalues acc$s = params$s acc$z = rbind(acc$z, params$z) acc$l = c(acc$l, params$l) } acc - } +} diff --git a/finite-gaussian-mixture/src/simulation_multivariate_conditional.r b/finite-gaussian-mixture/src/simulation_multivariate_conditional.r @@ -19,14 +19,10 @@ config = list( set.seed(222) -origin = list( - p = mixing_model(config$k, config$a) - , m = location_model(config$k, config$l, config$r) - , s = precision_model(config$k, config$b, config$w) - ) - -# FIXME generate a known/non-pathological configuration first, to test -d = model(config$k, config$l, config$r, config$b, config$w, config$n) +d = list( + t(replicate(250, rnorm(2, c(5, 5)))) + , t(replicate(250, rnorm(2, c(-5, -5)))) + , t(replicate(500, rnorm(2)))) dn = lapply(d, function(j) { data.frame(x = j[,1], y = j[,2]) }) m = melt(dn, id.vars = c('x', 'y')) @@ -39,13 +35,12 @@ params = inverse_model( , config$b, config$w ) - -m_ts_plot = function(j) { - melted = as.data.frame(j, id.vars = c('V1', 'V2')) - ggplot( - melted - , aes(x = V1, y = V2, alpha = seq_along(V1), colour = seq_along(V1))) + - geom_line() + xlim(-2, 2) + ylim(-10, 10) -} +#m_ts_plot = function(j) { +# melted = as.data.frame(j, id.vars = c('V1', 'V2')) +# ggplot( +# melted +# , aes(x = V1, y = V2, alpha = seq_along(V1), colour = seq_along(V1))) + +# geom_line() + xlim(-2, 2) + ylim(-10, 10) +#}