bnp

Some older Bayesian nonparametrics research.
Log | Files | Refs | README | LICENSE

commit 68e9a9d0ad44f13657398e747d168c3db7f9b1e3
parent fe130715f4d43b7ae0254393badbc37f500d8fc9
Author: Jared Tobin <jared@jtobin.ca>
Date:   Fri, 12 Feb 2016 17:34:01 +1300

Substantial work on fmm.

* Currently experiencing bug where precisions are just seemingly too big.

Diffstat:
Afinite-gaussian-mixture/doc/images/fmm.png | 0
Afinite-gaussian-mixture/doc/images/fmm.tex | 31+++++++++++++++++++++++++++++++
Mfinite-gaussian-mixture/src/fmm.r | 415++++++++++++++++++++++++-------------------------------------------------------
3 files changed, 156 insertions(+), 290 deletions(-)

diff --git a/finite-gaussian-mixture/doc/images/fmm.png b/finite-gaussian-mixture/doc/images/fmm.png Binary files differ. diff --git a/finite-gaussian-mixture/doc/images/fmm.tex b/finite-gaussian-mixture/doc/images/fmm.tex @@ -0,0 +1,31 @@ +\documentclass{standalone} +\usepackage{tikz} +\usetikzlibrary{positioning} +\usetikzlibrary{fit} + +\begin{document} + +\begin{tikzpicture} + \tikzstyle{random}=[circle, minimum size = 10mm, thick, draw = black!80, node distance = 16mm] + \tikzstyle{connect}=[-latex, thick] + \tikzstyle{plate}=[rectangle, draw = black!100] + + \node[random] (p) { $\pi$ }; + \node[random] (z) [below of = p] { $z_{i}$ }; + \node[random] (y) [below of = z] { $y_{i}$ }; + \node[random] (mu) [below of = y] { $\mu_{k}$ }; + \node[random] (s) [right of = mu] { $s_{k}$ }; + + \path (p) edge [connect] (z) + (z) edge [connect] (y); + + \path (mu) edge [connect] (y); + + \path (s) edge [connect] (y); + + \node[plate, inner sep = 1.0mm, fit = (z) (y)] { }; + \node[plate, inner sep = 1.0mm, fit = (mu) (s)] { }; + +\end{tikzpicture} + +\end{document} diff --git a/finite-gaussian-mixture/src/fmm.r b/finite-gaussian-mixture/src/fmm.r @@ -1,5 +1,6 @@ set.seed(42) +require(dplyr) require(gtools) # basic finite gaussian mixture model @@ -12,328 +13,162 @@ require(gtools) # mixing probabilities -dp_model = function(p, a) { - k = length(p) - ddirichlet(p, rep(a, k)) - } - rp_model = function(k, a) drop(rdirichlet(1, (rep(a, k)))) +rp_conditional = function(n, a0) { + k = length(n) + a1 = sapply(n, function(x) { x + a0 / k }) + drop(rdirichlet(1, a1)) + } + # cluster labels -dn_model = function(n, p) dmultinom(n, prob = p) -rn_model = function(n, p) drop(rmultinom(1, size = n, prob = p)) +rc_model = function(n, p) drop(rmultinom(1, size = n, prob = p)) + +# FIXME generating NA in probability vector for at least one y. +rc_conditional = function(y, p0, mu, s) { + k = length(p0) + config = data.frame(p = p0, mu = mu, s = s) + reducer = function(row) { row[1] * dnorm(y, row[2], 1 / row[3]) } + elements = apply(config, MARGIN = 1, reducer) + p1 = elements / sum(elements) + sample(1:k, size = 1, prob = p1) + } # cluster locations -dmu_model = function(mu, l, r) dnorm(mu, l, 1 / r) rmu_model = function(k, l, r) rnorm(k, l, 1 / r) +rmu_conditional = function(y, s, l, r) { + k = length(s) + c = sapply(y, length) + ybar = sapply(y, safe_mean) # FIXME safe_mean returns 0 on length 0; safe? + m = (ybar * c * s + l * r) / (c * s + r) + v = 1 / (c * s + r) + rnorm(k, m, v) + } + # cluster precisions -ds_model = function(s, b, w) dgamma(s, b, 1 / w) rs_model = function(k, b, w) rgamma(k, b, 1 / w) -# model +rs_conditional = function(y, mu, b, w) { + k = length(y) + c = sapply(y, length) + + centered = mapply("-", y, mu) + squared = lapply(centered, function(x) { x ^ 2 }) + ss = unlist(lapply(squared, sum)) + + a = b + c + bet = a / (w * b + ss) + + # NOTE (jtobin): must be a better way to do this + params = data.frame(a = a, bet = bet) + reducer = function(row) { rgamma(1, row[1], row[2]) } + apply(params, MARGIN = 1, reducer) + } + +# parameter model (prior) rconfig_model = function(k, n) { p = rp_model(k, 1) - c = rn_model(n, p) + c = rc_model(n, p) mu = rmu_model(k, 0, 0.1) s = rs_model(k, 1, 1) data.frame(c = c, mu = mu, s = s) } +# data model (likelihood) + ry_model = function(config) { sampler = function(row) rnorm(row[1], row[2], 1 / row[3]) apply(config, MARGIN = 1, sampler) } +# model + rmodel = function(k, n) { config = rconfig_model(k, n) ry_model(config) } +rmodel_conditional = function(n, y) { + k = 3 + a = 1 + l = 0 + r = 0.1 + b = 1 + w = 1 + + raw = unlist(y) + + kernel = function(p0, mu0, s0) { + z = sapply(raw, function(x) rc_conditional(x, p0, mu0, s0)) # FIXME slow + labelled = data.frame(label = z, y = raw) + id_query = function(c) filter(labelled, label == c)$y + + clustered = sapply(1:k, id_query) + counts = sapply(clustered, length) + + p1 = rp_conditional(counts, a) + mu1 = rmu_conditional(clustered, s0, l, r) + s1 = rs_conditional(clustered, mu1, b, w) # FIXME precisions explode + list(p = p1, mu = mu1, s = s1) + } + + gibbs = function(epochs, acc, p0, mu0, s0) { + if (epochs <= 0) { + return(acc) + } else { + result = kernel(p0, mu0, s0) + acc$p = rbind(acc$p, result$p) + acc$mu = rbind(acc$mu, result$mu) + acc$s = rbind(acc$s, result$s) + gibbs(epochs - 1, acc, result$p, result$mu, result$s) + } + } + + p0 = rp_model(k, a) + mu0 = rmu_model(k, l, r) + s0 = rs_model(k, b, w) + init = list(p = p0, mu = mu0, s = s0) + + gibbs(n, init, p0, mu0, s0) + } + # debug -y = list( - rnorm(801, 3.5, 1) - , rnorm(300, 0.3, 0.8) - , rnorm(722, -4.2, 0.5) +test_data = list( + rnorm(101, 3.5, 1) + , rnorm(38, 0.3, 0.8) + , rnorm(90, -4.2, 0.5) ) -k = length(y) -l = 0 -r = 0.1 -mu = rmu_prior(k, l, r) -w = 1 -b = 1 -s = rs_prior(k, b, w) -p = rp_prior(k, 1) - -c = drop(rmultinom(1, 1, prob = p)) - +# y = list( +# rnorm(801, 3.5, 1) +# , rnorm(300, 0.3, 0.8) +# , rnorm(722, -4.2, 0.5) +# ) +# k = length(y) +# l = 0 +# r = 0.1 +# mu = rmu_model(k, l, r) +# w = 1 +# b = 1 +# s = rs_model(k, b, w) +# p = rp_model(k, 1) +# c = drop(rmultinom(1, 1, prob = p)) +# p0 = p +# mu0 = mu +# s0 = s + +# utilities + +safe_mean = function(x) { + if (is.null(x) || (length(x) == 0)) { + return(0) + } else { + mean(x) + } + } -# # observations -# -# dy_prior = function(y, mu, s, p) sum(p * dnorm(y, mu, 1 / s)) -# -# ry_prior = function(mu, s, p) { -# c = drop(rmultinom(1, 1, prob = p)) -# muc = drop(c %*% mu) -# sc = drop(c %*% s) -# rnorm(1, muc, 1 / sc) -# } -# -# # mu (component means) and parameter models -# -# l_obj_prior = function(y) { -# mu_y = mean(unlist(y)) -# var_y = var(unlist(y)) -# -# density = function(l) { dnorm(l, mu_y, var_y) } -# sampler = rnorm(1, mu_y, var_y) -# list(density, sampler) -# } -# -# dl_obj_prior = function(l, y) { l_obj_prior(y)[[1]](l) } -# rl_obj_prior = function(y) { l_obj_prior(y)[[2]] } -# -# l_posterior = function(y, mu, r) { -# mu_y = mean(unlist(y)) -# var_y = var(unlist(y)) -# prec_y = 1 / var_y -# k = length(y) -# m = (mu_y * prec_y + r * sum(mu)) / (prec_y + k * r) -# v = 1 / (prec_y + k * r) -# -# density = function(l) { dnorm(l, m, v) } -# sampler = rnorm(1, m, v) -# list(density, sampler) -# } -# -# dl_posterior = function(l, y, mu, r) { l_posterior(y, mu, r)[[1]](l) } -# rl_posterior = function(y, mu, r) { l_posterior(y, mu, r)[[2]] } -# -# r_obj_prior = function(y) { -# var_y = var(unlist(y)) -# prec_y = 1 / var_y -# -# density = function(r) { dgamma(r, 1, prec_y) } -# sampler = rgamma(1, 1, prec_y) -# list(density, sampler) -# } -# -# dr_obj_prior = function(r, y) { r_obj_prior(y)[[1]](r) } -# rr_obj_prior = function(y) { r_obj_prior(y)[[2]] } -# -# r_posterior = function(y, mu, l) { -# var_y = var(unlist(y)) -# k = length(y) -# a = k + 1 -# b = a / (var_y + sum((mu - l)^2)) -# -# density = function(r) { dgamma(r, a, b) } -# sampler = rgamma(1, a, b) -# list(density, sampler) -# } -# -# dr_posterior = function(r, y, mu, l) { r_posterior(y, mu, l)[[1]](r) } -# rr_posterior = function(y, mu, l) { r_posterior(y, mu, l)[[2]] } -# -# dmu_prior = function(mu, l, r) dnorm(mu, l, 1 / r) -# rmu_prior = function(k, l, r) rnorm(k, l, 1 / r) -# -# mu_posterior = function(y, s, l, r) { -# n = unlist(lapply(y, length)) -# ybar = unlist(lapply(y, mean)) -# m = (ybar * n * s + l * r) / (n * s + r) -# v = 1 / (n * s + r) -# -# density = function(x) { dnorm(x, m, v) } -# sampler = rnorm(length(y), m, v) -# list(density, sampler) -# } -# -# dmu_posterior = function(mu, y, s, l, r) { mu_posterior(y, s, l, r)[[1]](mu) } -# rmu_posterior = function(y, s, l, r) { mu_posterior(y, s, l, r)[[2]] } -# -# # s (component precisions) and parameter models -# -# ds_prior = function(s, b, w) dgamma(s, b, 1 / w) -# rs_prior = function(k, b, w) rgamma(k, b, 1 / w) -# -# s_posterior = function(y, mu, b, w) { -# n = unlist(lapply(y, length)) -# squares = unlist(mapply("-", y, as.list(mu))) ^ 2 -# a = b + n -# bet = a / (w * b + sum(squares)) -# -# density = function(s) { dgamma(s, a, bet) } -# sampler = rgamma(1, a, bet) -# list(density, sampler) -# } -# -# ds_posterior = function(s, y, mu, b, w) { s_posterior(y, mu, b, w)[[1]](s) } -# rs_posterior = function(y, mu, b, w) { s_posterior(y, mu, b, w)[[2]] } -# -# w_obj_prior = function(y) { -# var_y = var(unlist(y)) -# density = function(w) { dgamma(w, 1, var_y) } -# sampler = rgamma(1, 1, var_y) -# list(density, sampler) -# } -# -# dw_obj_prior = function(w, y) { w_obj_prior(y)[[1]](w) } -# rw_obj_prior = function(y) { w_obj_prior(y)[[2]] } -# -# w_posterior = function(y, s, b) { -# k = length(y) -# var_y = var(unlist(y)) -# prec_y = 1 / var_y -# a = k * b + 1 -# bet = a / (prec_y + b * sum(s)) -# -# density = function(w) { dgamma(w, a, bet) } -# sampler = dgamma(1, a, bet) -# list(density, sampler) -# } -# -# dw_posterior = function(w, y, s, b) { w_posterior(y, s, b)[[1]](w) } -# rw_posterior = function(y, s, b) { w_posterior(y, s, b)[[2]] } -# -# db_prior = function(b) dinvgamma(b, 1, 1) -# rb_prior = function() rinvgamma(1, 1, 1) -# -# b_posterior = function(y, s, w) { -# k = length(y) -# -# log_density = function(b) { -# -k * lgamma(b / 2) - -# (1 / (2 * b)) + -# ((k * b - 3) / 2) * log(b / 2) + -# b / 2 * sum((log(s * w)) - w * s) -# } -# -# density = function(b) { exp(log_density(b)) } -# -# sampler = function() { -# -# # CHECK do i really need to transform this? is it log-concave already? -# density_xformed = function(b) { b * density(b) } -# log_density_xformed = function(b) { log(density_xformed(b)) } -# -# grad_log_density_xformed = function(b) { -# 1 / b - k * digamma(b / 2) + -# 1 / (2 * b ^ 2) + -# k / 2 * log(b / 2) + (k * b - 3) / b + -# sum((log(s * w)) - w * s) / 2 -# } -# -# # error due to non-log-concavity detection -# val = ars( -# n = 1 -# , f = log_density_xformed -# , fprima = grad_log_density_xformed -# , x = c(0.01, 0.5, 1) -# , lb = T -# , xlb = 0.01 -# , ub = T -# , xub = 10) -# -# exp(sample(val, 1)) -# } -# -# list(density, sampler) -# } -# -# db_posterior = function(b, y, s, w) { b_posterior(y, s, w)[[1]](b) } -# rb_posterior = function(y, s, w) { b_posterior(y, s, w)[[2]]() } -# -# # p (mixing probabilities), n (occupation numbers) and parameter models -# -# dp_prior = function(p, a) ddirichlet(p, a) -# rp_prior = function(a) drop(rdirichlet(1, a)) -# -# dn_prior = function(n, p) dmultinom(n, prob = p) -# rn_prior = function(p) drop(rmultinom(1, size = 1, prob = p)) -# -# da_prior = function(a) dinvgamma(a, 1, 1) -# ra_prior = rinvgamma(1, 1, 1) -# -# a_posterior = function(k, n) { -# log_density = function(a) { -# (k - 3 / 2) * log(a) - 1 / (2 * a) + lgamma(a) - lgamma(n + a) -# } -# -# density = function(a) { exp(log_density(a)) } -# -# # log(a) ~ log-concave and can thus be sampled via ARS -# # -# # note that f(log(a)) = a * f(a), or log(f(log(a))) = log a + log f(a) -# # -# # ARS flakes out when k / n is sufficiently small (i.e. when 'a' would be -# # extremely large). probably rare for this to happen in practice, so 'a' is -# # capped at 20 here -# sampler = function() { -# log_xformed = expression( -# (k - 1 / 2) * log(a) - 1 / (2 * a) + lgamma(a) - lgamma(n + a)) -# dens_log_xformed = function(a) eval(log_xformed) -# -# grad_log_xformed = D(log_xformed, 'a') -# grad_dens_log_xformed = function(a) eval(grad_log_xformed) -# -# val = ars( -# n = 1 -# , f = dens_log_xformed -# , fprima = grad_dens_log_xformed -# , x = c(0.51, 1, 1.49) -# , lb = T -# , xlb = 0.001 -# , ub = T -# , xub = 20.0) -# -# exp(val) -# } -# list(density, sampler) -# } -# -# da_posterior = function(a, k, n) { a_posterior(k, n)[[1]](a) } -# ra_posterior = function(k, n) { a_posterior(k, n)[[2]]() } -# -# # model -# -# model_prior = function(y) { -# k = length(y) -# l = rl_obj_prior(y) -# r = rr_obj_prior(y) -# mu = rmu_prior(k, l, r) -# b = rb_prior() -# w = rw_obj_prior(y) -# s = rs_prior(k, b, w) -# a = ra_prior() -# p = rp_prior(rep(a, k)) -# -# sampler = ry_prior(mu, s, p) -# conditional_sampler = function(n) { replicate(n, ry_prior(mu, s, p)()) } -# list(sampler, conditional_sampler) -# } -# -# rmodel_prior = function(y) { model_prior(y)[[1]] } -# -# # conditional model prior; sample a cluster configuration and then sample many -# # observations from that configuration -# -# rmodel_conditional_prior = function(n, y) { model_prior(y)[[2]](n) } -# -# # debug -# -# # y = test_data -# # k = length(y) -# # l = rl_obj_prior(y) -# # r = rr_obj_prior(y) -# # mu = rmu_prior(k, l, r) -# # w = rw_obj_prior(y) -# # b = rb_prior() -# # s = rs_prior(k, b, w) -# -#