commit 68e9a9d0ad44f13657398e747d168c3db7f9b1e3
parent fe130715f4d43b7ae0254393badbc37f500d8fc9
Author: Jared Tobin <jared@jtobin.ca>
Date: Fri, 12 Feb 2016 17:34:01 +1300
Substantial work on fmm.
* Currently experiencing bug where precisions are just seemingly too big.
Diffstat:
3 files changed, 156 insertions(+), 290 deletions(-)
diff --git a/finite-gaussian-mixture/doc/images/fmm.png b/finite-gaussian-mixture/doc/images/fmm.png
Binary files differ.
diff --git a/finite-gaussian-mixture/doc/images/fmm.tex b/finite-gaussian-mixture/doc/images/fmm.tex
@@ -0,0 +1,31 @@
+\documentclass{standalone}
+\usepackage{tikz}
+\usetikzlibrary{positioning}
+\usetikzlibrary{fit}
+
+\begin{document}
+
+\begin{tikzpicture}
+ \tikzstyle{random}=[circle, minimum size = 10mm, thick, draw = black!80, node distance = 16mm]
+ \tikzstyle{connect}=[-latex, thick]
+ \tikzstyle{plate}=[rectangle, draw = black!100]
+
+ \node[random] (p) { $\pi$ };
+ \node[random] (z) [below of = p] { $z_{i}$ };
+ \node[random] (y) [below of = z] { $y_{i}$ };
+ \node[random] (mu) [below of = y] { $\mu_{k}$ };
+ \node[random] (s) [right of = mu] { $s_{k}$ };
+
+ \path (p) edge [connect] (z)
+ (z) edge [connect] (y);
+
+ \path (mu) edge [connect] (y);
+
+ \path (s) edge [connect] (y);
+
+ \node[plate, inner sep = 1.0mm, fit = (z) (y)] { };
+ \node[plate, inner sep = 1.0mm, fit = (mu) (s)] { };
+
+\end{tikzpicture}
+
+\end{document}
diff --git a/finite-gaussian-mixture/src/fmm.r b/finite-gaussian-mixture/src/fmm.r
@@ -1,5 +1,6 @@
set.seed(42)
+require(dplyr)
require(gtools)
# basic finite gaussian mixture model
@@ -12,328 +13,162 @@ require(gtools)
# mixing probabilities
-dp_model = function(p, a) {
- k = length(p)
- ddirichlet(p, rep(a, k))
- }
-
rp_model = function(k, a) drop(rdirichlet(1, (rep(a, k))))
+rp_conditional = function(n, a0) {
+ k = length(n)
+ a1 = sapply(n, function(x) { x + a0 / k })
+ drop(rdirichlet(1, a1))
+ }
+
# cluster labels
-dn_model = function(n, p) dmultinom(n, prob = p)
-rn_model = function(n, p) drop(rmultinom(1, size = n, prob = p))
+rc_model = function(n, p) drop(rmultinom(1, size = n, prob = p))
+
+# FIXME generating NA in probability vector for at least one y.
+rc_conditional = function(y, p0, mu, s) {
+ k = length(p0)
+ config = data.frame(p = p0, mu = mu, s = s)
+ reducer = function(row) { row[1] * dnorm(y, row[2], 1 / row[3]) }
+ elements = apply(config, MARGIN = 1, reducer)
+ p1 = elements / sum(elements)
+ sample(1:k, size = 1, prob = p1)
+ }
# cluster locations
-dmu_model = function(mu, l, r) dnorm(mu, l, 1 / r)
rmu_model = function(k, l, r) rnorm(k, l, 1 / r)
+rmu_conditional = function(y, s, l, r) {
+ k = length(s)
+ c = sapply(y, length)
+ ybar = sapply(y, safe_mean) # FIXME safe_mean returns 0 on length 0; safe?
+ m = (ybar * c * s + l * r) / (c * s + r)
+ v = 1 / (c * s + r)
+ rnorm(k, m, v)
+ }
+
# cluster precisions
-ds_model = function(s, b, w) dgamma(s, b, 1 / w)
rs_model = function(k, b, w) rgamma(k, b, 1 / w)
-# model
+rs_conditional = function(y, mu, b, w) {
+ k = length(y)
+ c = sapply(y, length)
+
+ centered = mapply("-", y, mu)
+ squared = lapply(centered, function(x) { x ^ 2 })
+ ss = unlist(lapply(squared, sum))
+
+ a = b + c
+ bet = a / (w * b + ss)
+
+ # NOTE (jtobin): must be a better way to do this
+ params = data.frame(a = a, bet = bet)
+ reducer = function(row) { rgamma(1, row[1], row[2]) }
+ apply(params, MARGIN = 1, reducer)
+ }
+
+# parameter model (prior)
rconfig_model = function(k, n) {
p = rp_model(k, 1)
- c = rn_model(n, p)
+ c = rc_model(n, p)
mu = rmu_model(k, 0, 0.1)
s = rs_model(k, 1, 1)
data.frame(c = c, mu = mu, s = s)
}
+# data model (likelihood)
+
ry_model = function(config) {
sampler = function(row) rnorm(row[1], row[2], 1 / row[3])
apply(config, MARGIN = 1, sampler)
}
+# model
+
rmodel = function(k, n) {
config = rconfig_model(k, n)
ry_model(config)
}
+rmodel_conditional = function(n, y) {
+ k = 3
+ a = 1
+ l = 0
+ r = 0.1
+ b = 1
+ w = 1
+
+ raw = unlist(y)
+
+ kernel = function(p0, mu0, s0) {
+ z = sapply(raw, function(x) rc_conditional(x, p0, mu0, s0)) # FIXME slow
+ labelled = data.frame(label = z, y = raw)
+ id_query = function(c) filter(labelled, label == c)$y
+
+ clustered = sapply(1:k, id_query)
+ counts = sapply(clustered, length)
+
+ p1 = rp_conditional(counts, a)
+ mu1 = rmu_conditional(clustered, s0, l, r)
+ s1 = rs_conditional(clustered, mu1, b, w) # FIXME precisions explode
+ list(p = p1, mu = mu1, s = s1)
+ }
+
+ gibbs = function(epochs, acc, p0, mu0, s0) {
+ if (epochs <= 0) {
+ return(acc)
+ } else {
+ result = kernel(p0, mu0, s0)
+ acc$p = rbind(acc$p, result$p)
+ acc$mu = rbind(acc$mu, result$mu)
+ acc$s = rbind(acc$s, result$s)
+ gibbs(epochs - 1, acc, result$p, result$mu, result$s)
+ }
+ }
+
+ p0 = rp_model(k, a)
+ mu0 = rmu_model(k, l, r)
+ s0 = rs_model(k, b, w)
+ init = list(p = p0, mu = mu0, s = s0)
+
+ gibbs(n, init, p0, mu0, s0)
+ }
+
# debug
-y = list(
- rnorm(801, 3.5, 1)
- , rnorm(300, 0.3, 0.8)
- , rnorm(722, -4.2, 0.5)
+test_data = list(
+ rnorm(101, 3.5, 1)
+ , rnorm(38, 0.3, 0.8)
+ , rnorm(90, -4.2, 0.5)
)
-k = length(y)
-l = 0
-r = 0.1
-mu = rmu_prior(k, l, r)
-w = 1
-b = 1
-s = rs_prior(k, b, w)
-p = rp_prior(k, 1)
-
-c = drop(rmultinom(1, 1, prob = p))
-
+# y = list(
+# rnorm(801, 3.5, 1)
+# , rnorm(300, 0.3, 0.8)
+# , rnorm(722, -4.2, 0.5)
+# )
+# k = length(y)
+# l = 0
+# r = 0.1
+# mu = rmu_model(k, l, r)
+# w = 1
+# b = 1
+# s = rs_model(k, b, w)
+# p = rp_model(k, 1)
+# c = drop(rmultinom(1, 1, prob = p))
+# p0 = p
+# mu0 = mu
+# s0 = s
+
+# utilities
+
+safe_mean = function(x) {
+ if (is.null(x) || (length(x) == 0)) {
+ return(0)
+ } else {
+ mean(x)
+ }
+ }
-# # observations
-#
-# dy_prior = function(y, mu, s, p) sum(p * dnorm(y, mu, 1 / s))
-#
-# ry_prior = function(mu, s, p) {
-# c = drop(rmultinom(1, 1, prob = p))
-# muc = drop(c %*% mu)
-# sc = drop(c %*% s)
-# rnorm(1, muc, 1 / sc)
-# }
-#
-# # mu (component means) and parameter models
-#
-# l_obj_prior = function(y) {
-# mu_y = mean(unlist(y))
-# var_y = var(unlist(y))
-#
-# density = function(l) { dnorm(l, mu_y, var_y) }
-# sampler = rnorm(1, mu_y, var_y)
-# list(density, sampler)
-# }
-#
-# dl_obj_prior = function(l, y) { l_obj_prior(y)[[1]](l) }
-# rl_obj_prior = function(y) { l_obj_prior(y)[[2]] }
-#
-# l_posterior = function(y, mu, r) {
-# mu_y = mean(unlist(y))
-# var_y = var(unlist(y))
-# prec_y = 1 / var_y
-# k = length(y)
-# m = (mu_y * prec_y + r * sum(mu)) / (prec_y + k * r)
-# v = 1 / (prec_y + k * r)
-#
-# density = function(l) { dnorm(l, m, v) }
-# sampler = rnorm(1, m, v)
-# list(density, sampler)
-# }
-#
-# dl_posterior = function(l, y, mu, r) { l_posterior(y, mu, r)[[1]](l) }
-# rl_posterior = function(y, mu, r) { l_posterior(y, mu, r)[[2]] }
-#
-# r_obj_prior = function(y) {
-# var_y = var(unlist(y))
-# prec_y = 1 / var_y
-#
-# density = function(r) { dgamma(r, 1, prec_y) }
-# sampler = rgamma(1, 1, prec_y)
-# list(density, sampler)
-# }
-#
-# dr_obj_prior = function(r, y) { r_obj_prior(y)[[1]](r) }
-# rr_obj_prior = function(y) { r_obj_prior(y)[[2]] }
-#
-# r_posterior = function(y, mu, l) {
-# var_y = var(unlist(y))
-# k = length(y)
-# a = k + 1
-# b = a / (var_y + sum((mu - l)^2))
-#
-# density = function(r) { dgamma(r, a, b) }
-# sampler = rgamma(1, a, b)
-# list(density, sampler)
-# }
-#
-# dr_posterior = function(r, y, mu, l) { r_posterior(y, mu, l)[[1]](r) }
-# rr_posterior = function(y, mu, l) { r_posterior(y, mu, l)[[2]] }
-#
-# dmu_prior = function(mu, l, r) dnorm(mu, l, 1 / r)
-# rmu_prior = function(k, l, r) rnorm(k, l, 1 / r)
-#
-# mu_posterior = function(y, s, l, r) {
-# n = unlist(lapply(y, length))
-# ybar = unlist(lapply(y, mean))
-# m = (ybar * n * s + l * r) / (n * s + r)
-# v = 1 / (n * s + r)
-#
-# density = function(x) { dnorm(x, m, v) }
-# sampler = rnorm(length(y), m, v)
-# list(density, sampler)
-# }
-#
-# dmu_posterior = function(mu, y, s, l, r) { mu_posterior(y, s, l, r)[[1]](mu) }
-# rmu_posterior = function(y, s, l, r) { mu_posterior(y, s, l, r)[[2]] }
-#
-# # s (component precisions) and parameter models
-#
-# ds_prior = function(s, b, w) dgamma(s, b, 1 / w)
-# rs_prior = function(k, b, w) rgamma(k, b, 1 / w)
-#
-# s_posterior = function(y, mu, b, w) {
-# n = unlist(lapply(y, length))
-# squares = unlist(mapply("-", y, as.list(mu))) ^ 2
-# a = b + n
-# bet = a / (w * b + sum(squares))
-#
-# density = function(s) { dgamma(s, a, bet) }
-# sampler = rgamma(1, a, bet)
-# list(density, sampler)
-# }
-#
-# ds_posterior = function(s, y, mu, b, w) { s_posterior(y, mu, b, w)[[1]](s) }
-# rs_posterior = function(y, mu, b, w) { s_posterior(y, mu, b, w)[[2]] }
-#
-# w_obj_prior = function(y) {
-# var_y = var(unlist(y))
-# density = function(w) { dgamma(w, 1, var_y) }
-# sampler = rgamma(1, 1, var_y)
-# list(density, sampler)
-# }
-#
-# dw_obj_prior = function(w, y) { w_obj_prior(y)[[1]](w) }
-# rw_obj_prior = function(y) { w_obj_prior(y)[[2]] }
-#
-# w_posterior = function(y, s, b) {
-# k = length(y)
-# var_y = var(unlist(y))
-# prec_y = 1 / var_y
-# a = k * b + 1
-# bet = a / (prec_y + b * sum(s))
-#
-# density = function(w) { dgamma(w, a, bet) }
-# sampler = dgamma(1, a, bet)
-# list(density, sampler)
-# }
-#
-# dw_posterior = function(w, y, s, b) { w_posterior(y, s, b)[[1]](w) }
-# rw_posterior = function(y, s, b) { w_posterior(y, s, b)[[2]] }
-#
-# db_prior = function(b) dinvgamma(b, 1, 1)
-# rb_prior = function() rinvgamma(1, 1, 1)
-#
-# b_posterior = function(y, s, w) {
-# k = length(y)
-#
-# log_density = function(b) {
-# -k * lgamma(b / 2) -
-# (1 / (2 * b)) +
-# ((k * b - 3) / 2) * log(b / 2) +
-# b / 2 * sum((log(s * w)) - w * s)
-# }
-#
-# density = function(b) { exp(log_density(b)) }
-#
-# sampler = function() {
-#
-# # CHECK do i really need to transform this? is it log-concave already?
-# density_xformed = function(b) { b * density(b) }
-# log_density_xformed = function(b) { log(density_xformed(b)) }
-#
-# grad_log_density_xformed = function(b) {
-# 1 / b - k * digamma(b / 2) +
-# 1 / (2 * b ^ 2) +
-# k / 2 * log(b / 2) + (k * b - 3) / b +
-# sum((log(s * w)) - w * s) / 2
-# }
-#
-# # error due to non-log-concavity detection
-# val = ars(
-# n = 1
-# , f = log_density_xformed
-# , fprima = grad_log_density_xformed
-# , x = c(0.01, 0.5, 1)
-# , lb = T
-# , xlb = 0.01
-# , ub = T
-# , xub = 10)
-#
-# exp(sample(val, 1))
-# }
-#
-# list(density, sampler)
-# }
-#
-# db_posterior = function(b, y, s, w) { b_posterior(y, s, w)[[1]](b) }
-# rb_posterior = function(y, s, w) { b_posterior(y, s, w)[[2]]() }
-#
-# # p (mixing probabilities), n (occupation numbers) and parameter models
-#
-# dp_prior = function(p, a) ddirichlet(p, a)
-# rp_prior = function(a) drop(rdirichlet(1, a))
-#
-# dn_prior = function(n, p) dmultinom(n, prob = p)
-# rn_prior = function(p) drop(rmultinom(1, size = 1, prob = p))
-#
-# da_prior = function(a) dinvgamma(a, 1, 1)
-# ra_prior = rinvgamma(1, 1, 1)
-#
-# a_posterior = function(k, n) {
-# log_density = function(a) {
-# (k - 3 / 2) * log(a) - 1 / (2 * a) + lgamma(a) - lgamma(n + a)
-# }
-#
-# density = function(a) { exp(log_density(a)) }
-#
-# # log(a) ~ log-concave and can thus be sampled via ARS
-# #
-# # note that f(log(a)) = a * f(a), or log(f(log(a))) = log a + log f(a)
-# #
-# # ARS flakes out when k / n is sufficiently small (i.e. when 'a' would be
-# # extremely large). probably rare for this to happen in practice, so 'a' is
-# # capped at 20 here
-# sampler = function() {
-# log_xformed = expression(
-# (k - 1 / 2) * log(a) - 1 / (2 * a) + lgamma(a) - lgamma(n + a))
-# dens_log_xformed = function(a) eval(log_xformed)
-#
-# grad_log_xformed = D(log_xformed, 'a')
-# grad_dens_log_xformed = function(a) eval(grad_log_xformed)
-#
-# val = ars(
-# n = 1
-# , f = dens_log_xformed
-# , fprima = grad_dens_log_xformed
-# , x = c(0.51, 1, 1.49)
-# , lb = T
-# , xlb = 0.001
-# , ub = T
-# , xub = 20.0)
-#
-# exp(val)
-# }
-# list(density, sampler)
-# }
-#
-# da_posterior = function(a, k, n) { a_posterior(k, n)[[1]](a) }
-# ra_posterior = function(k, n) { a_posterior(k, n)[[2]]() }
-#
-# # model
-#
-# model_prior = function(y) {
-# k = length(y)
-# l = rl_obj_prior(y)
-# r = rr_obj_prior(y)
-# mu = rmu_prior(k, l, r)
-# b = rb_prior()
-# w = rw_obj_prior(y)
-# s = rs_prior(k, b, w)
-# a = ra_prior()
-# p = rp_prior(rep(a, k))
-#
-# sampler = ry_prior(mu, s, p)
-# conditional_sampler = function(n) { replicate(n, ry_prior(mu, s, p)()) }
-# list(sampler, conditional_sampler)
-# }
-#
-# rmodel_prior = function(y) { model_prior(y)[[1]] }
-#
-# # conditional model prior; sample a cluster configuration and then sample many
-# # observations from that configuration
-#
-# rmodel_conditional_prior = function(n, y) { model_prior(y)[[2]](n) }
-#
-# # debug
-#
-# # y = test_data
-# # k = length(y)
-# # l = rl_obj_prior(y)
-# # r = rr_obj_prior(y)
-# # mu = rmu_prior(k, l, r)
-# # w = rw_obj_prior(y)
-# # b = rb_prior()
-# # s = rs_prior(k, b, w)
-#
-#