bnp

Some older Bayesian nonparametrics research.
Log | Files | Refs | README | LICENSE

commit 34c0971d760f031c708cce417e06fcb30b17d564
parent fc7f6f60687fddc60226bf597fee63222246f016
Author: Jared Tobin <jared@jtobin.ca>
Date:   Fri, 18 Mar 2016 22:08:06 +1300

Misc additions.

Diffstat:
Mfinite-gaussian-mixture/src/fmm_multivariate_conditional.r | 18++++++++++++++++--
Mfinite-gaussian-mixture/src/fmm_multivariate_conditional_collapsed.r | 17++++++++++++++++-
Mfinite-gaussian-mixture/src/sim_fmm_1d_conditional.r | 8+-------
Mfinite-gaussian-mixture/src/sim_fmm_2d_conditional.r | 8++++++--
Mfinite-gaussian-mixture/src/sim_fmm_2d_conditional_collapsed.r | 8++++++--
5 files changed, 45 insertions(+), 14 deletions(-)

diff --git a/finite-gaussian-mixture/src/fmm_multivariate_conditional.r b/finite-gaussian-mixture/src/fmm_multivariate_conditional.r @@ -88,8 +88,22 @@ inverse_model = function(n, k, y, a, l, r, b, w) { ps = conditional_cluster_parameters_model(y, k, z, l, r, b, w) m1 = ps$m s1 = ps$s - l = lmodel(y, p1, m1, s1) - list(p = p1, m = m1, s = s1, z = z, l = l) + ll = lmodel(y, p1, m1, s1) + + # both likelihood calculations seem very noisy; probably due to label + # switching when estimating cluster probabilities + # + # clustered = lapply(seq(k), + # function(j) { + # vals = y[which(z == j),] + # as.matrix(vals) + # }) + # ps = lapply(clustered, function(j) { nrow(j) / nrow(y) }) + # mus = lapply(clustered, colMeans) + # precs = lapply(clustered, function(j) (solve(cov(j)))) + # ll = lmodel(y, ps, mus, precs) + + list(p = p1, m = m1, s = s1, z = z, l = ll) } params = list( diff --git a/finite-gaussian-mixture/src/fmm_multivariate_conditional_collapsed.r b/finite-gaussian-mixture/src/fmm_multivariate_conditional_collapsed.r @@ -1,5 +1,7 @@ require(mvtnorm) +source('fmm_multivariate_generative.r') + cluster_statistics = function(cluster, l, b, w) { mclust = # R, seriously? @@ -87,13 +89,26 @@ conditional_label_model = function(y, k, z, a, l, b, w) { inverse_model = function(n, k, y, a, l, b, w) { gibbs = function(z0) { - list(z = conditional_label_model(y, k, z0, a, l, b, w)) + z = conditional_label_model(y, k, z0, a, l, b, w) + clustered = lapply(seq(k), + function(j) { + vals = y[which(z == j),] + as.matrix(vals) + }) + + ps = lapply(clustered, function(j) { nrow(j) / nrow(y) }) + mus = lapply(clustered, colMeans) + precs = lapply(clustered, function(j) (solve(cov(j)))) + ll = lmodel(y, ps, mus, precs) + list(z = z, ll = ll) } + params = list(z = sample(seq(k), size = nrow(y), replace = T)) acc = params for (j in seq(n - 1)) { params = gibbs(params$z) acc$z = rbind(acc$z, params$z) + acc$ll = c(acc$ll, params$ll) } acc } diff --git a/finite-gaussian-mixture/src/sim_fmm_1d_conditional.r b/finite-gaussian-mixture/src/sim_fmm_1d_conditional.r @@ -11,13 +11,7 @@ config = list( , r = 0.01 , b = 1 , w = 1 - , n = 500 - ) - -origin = list( - p = mixing_model(config$k, config$a) - , m = location_model(config$k, config$l, config$r) - , s = precision_model(config$k, config$b, config$w) + , n = 1000 ) set.seed(990909) diff --git a/finite-gaussian-mixture/src/sim_fmm_2d_conditional.r b/finite-gaussian-mixture/src/sim_fmm_2d_conditional.r @@ -13,7 +13,7 @@ config = list( , l = rep(0, dimension) , r = diag(0.05, dimension) , b = 2 - , w = diag(1, dimension) + , w = diag(0.05, dimension) , n = 1000 ) @@ -26,7 +26,7 @@ d = list( dn = lapply(d, function(j) { data.frame(x = j[,1], y = j[,2]) }) m = melt(dn, id.vars = c('x', 'y')) -set.seed(990909) +set.seed(222) params = inverse_model( config$n, config$k, m[, c('x', 'y')] @@ -37,6 +37,7 @@ params = inverse_model( dp = melt(data.frame(params$p)) dm = melt(lapply(params$m, data.frame), id.vars = c('x', 'y')) +dl = melt(as.data.frame(params$l)) py = ggplot(m, aes(x, y)) + geom_point() @@ -46,6 +47,9 @@ pp = ggplot(dp, aes(seq_along(value), value, colour = variable)) + pm = ggplot(dm, aes(x, y, colour = factor(L1), fill = factor(L1))) + geom_point(alpha = 0.5) +pl = ggplot(dl, aes(x = seq_along(value), y = value)) + + geom_line(colour = 'darkblue') + early = data.frame(x = m$x, y = m$y, variable = params$z[1,]) mid = data.frame(x = m$x, y = m$y, variable = params$z[round(config$n / 2),]) late = data.frame(x = m$x, y = m$y, variable = params$z[config$n - 1,]) diff --git a/finite-gaussian-mixture/src/sim_fmm_2d_conditional_collapsed.r b/finite-gaussian-mixture/src/sim_fmm_2d_conditional_collapsed.r @@ -13,7 +13,7 @@ config = list( , l = rep(0, dimension) , b = dimension , w = diag(0.05, dimension) - , n = 25 + , n = 100 ) set.seed(222) @@ -50,5 +50,9 @@ p_late = ggplot(late, aes(x, y, value, colour = factor(variable), fill = factor(variable))) + geom_point(alpha = 0.5) -inferred_plots = grid.arrange(p_early, p_mid, p_late, ncol = 3) +dl = melt(as.data.frame(params$ll)) + +pl = ggplot(dl, aes(x = seq_along(value), y = value)) + + geom_line(colour = 'darkblue') +inferred_plots = grid.arrange(p_early, p_mid, p_late, ncol = 3)