bnp

Some older Bayesian nonparametrics research.
Log | Files | Refs | README | LICENSE

commit baa29631a0f3a5eae976d2dce891a06f6caaa2cf
parent 36c1fe13e5e4cface9780caa4615a86976dd15ef
Author: Jared Tobin <jared@jtobin.ca>
Date:   Thu, 11 Feb 2016 21:43:14 +1300

More cleanup.

Diffstat:
Mdirichlet-process-mixture/src/fmm.r | 177++++++++++++++++++++++++++++++++++---------------------------------------------
1 file changed, 75 insertions(+), 102 deletions(-)

diff --git a/dirichlet-process-mixture/src/fmm.r b/dirichlet-process-mixture/src/fmm.r @@ -1,5 +1,6 @@ set.seed(42) +require(actuar) require(ars) require(gtools) @@ -17,26 +18,21 @@ require(gtools) # y | p, c, mu, s ~ <p, normal(mu, 1 / s)> test_data = list( - c(1.2, 1.1, 1.23, 0.93, 0.89, 1.01) - , c(0.3, 0.34, 0.32, 0.29, 0.31) - , c(-0.5, -0.1, -0.21, 0.02, -0.81, 0.08, -0.33) + rnorm(801, 3.5, 1) + , rnorm(300, 0.3, 0.8) + , rnorm(722, -4.2, 0.5) ) # observations -y_prior = function(mu, s, p) { - density = function(y) { sum(p * dnorm(y, mu, 1 / s)) } - sampler = function() { - c = drop(rmultinom(1, 1, prob = p)) - muc = drop(c %*% mu) - sc = drop(c %*% s) - rnorm(1, muc, 1 / sc) - } - list(density, sampler) - } +dy_prior = function(y, mu, s, p) sum(p * dnorm(y, mu, 1 / s)) -dy_prior = function(y, mu, s, p) { y_prior(mu, s, p)[[1]](y) } -ry_prior = function(mu, s, p) { y_prior(mu, s, p)[[2]] } +ry_prior = function(mu, s, p) { + c = drop(rmultinom(1, 1, prob = p)) + muc = drop(c %*% mu) + sc = drop(c %*% s) + rnorm(1, muc, 1 / sc) + } # mu (component means) and parameter models @@ -94,14 +90,8 @@ r_posterior = function(y, mu, l) { dr_posterior = function(r, y, mu, l) { r_posterior(y, mu, l)[[1]](r) } rr_posterior = function(y, mu, l) { r_posterior(y, mu, l)[[2]] } -mu_prior = function(l, r) { - density = function(mu) { dnorm(mu, l, 1 / r) } - sampler = function(k) { rnorm(k, l, 1 / r) } - list(density, sampler) - } - -dmu_prior = function(mu, l, r) { mu_prior(l, r)[[1]](mu) } -rmu_prior = function(k, l, r) { mu_prior(l, r)[[2]](k) } +dmu_prior = function(mu, l, r) dnorm(mu, l, 1 / r) +rmu_prior = function(k, l, r) rnorm(k, l, 1 / r) mu_posterior = function(y, s, l, r) { n = unlist(lapply(y, length)) @@ -119,14 +109,8 @@ rmu_posterior = function(y, s, l, r) { mu_posterior(y, s, l, r)[[2]] } # s (component precisions) and parameter models -s_prior = function(b, w) { - density = function(s) { dgamma(s, b, 1 / w) } - sampler = function(k) { rgamma(k, b, 1 / w) } - list(density, sampler) - } - -ds_prior = function(s, b, w) { s_prior(b, w)[[1]](s) } -rs_prior = function(k, b, w) { s_prior(b, w)[[2]](k) } +ds_prior = function(s, b, w) dgamma(s, b, 1 / w) +rs_prior = function(k, b, w) rgamma(k, b, 1 / w) s_posterior = function(y, mu, b, w) { n = unlist(lapply(y, length)) @@ -167,100 +151,80 @@ w_posterior = function(y, s, b) { dw_posterior = function(w, y, s, b) { w_posterior(y, s, b)[[1]](w) } rw_posterior = function(y, s, b) { w_posterior(y, s, b)[[2]] } -b_prior = function() { - density = function(b) { dgamma(1 / b, 1, 1) } - sampler = 1 / rgamma(1, 1, 1) - list(density, sampler) - } - -db_prior = function(b) { b_prior()[[1]](b) } -rb_prior = function() { b_prior()[[2]] } +db_prior = function(b) dinvgamma(b, 1, 1) +rb_prior = function() rinvgamma(1, 1, 1) b_posterior = function(y, s, w) { k = length(y) - # density = function(b) { - # t0 = gamma(b / 2) ^ (- k) - # t1 = exp(-1 / (2 * b)) - # t2 = (b / 2) ^ ((k * b - 3) / 2) - # t3 = prod((s * w) ^ (b / 2) * exp(- b * s * w / 2)) - - # t0 * t1 * t2 * t3 - # } - - # FIXME might be able to clean this up; don't really need the expressions - # since i can't use symbolic diff - - log_density_expr = expression( - - k * lgamma(b / 2) - + log_density = function(b) { + -k * lgamma(b / 2) - (1 / (2 * b)) + - ((k * b - 3) / 2) * log (b / 2) + - sum((b / 2) * log (s * w) - (b * s * w) / 2)) + ((k * b - 3) / 2) * log(b / 2) + + b / 2 * sum((log(s * w)) - w * s) + } - density = function(b) exp(eval(log_density_expr)) + density = function(b) { exp(log_density(b)) } sampler = function() { - log_density_xformed_expr = expression(log(b) + eval(log_density_expr, b)) - log_density_xformed = function(b) { eval(log_density_xformed_expr) } + # CHECK do i really need to transform this? is it log-concave already? + density_xformed = function(b) { b * density(b) } + log_density_xformed = function(b) { log(density_xformed(b)) } - grad_log_density_xformed = function(b) { - 1 / b - k * digamma(b / 2) + - 1 / (2 * b ^ 2) + k / 2 * log(b / 2) + - 2 / b * (k * b - 3) / 2 + - sum(log(s * w) / 2 - s * w / 2) - } + grad_log_density_xformed = function(b) { + 1 / b - k * digamma(b / 2) + + 1 / (2 * b ^ 2) + + k / 2 * log(b / 2) + (k * b - 3) / b + + sum((log(s * w)) - w * s) / 2 + } + # error due to non-log-concavity detection + val = ars( + n = 1 + , f = log_density_xformed + , fprima = grad_log_density_xformed + , x = c(0.01, 0.5, 1) + , lb = T + , xlb = 0.01 + , ub = T + , xub = 10) - } + exp(sample(val, 1)) + } list(density, sampler) } db_posterior = function(b, y, s, w) { b_posterior(y, s, w)[[1]](b) } -rb_posterior = function(y, s, w) { b_posterior(y, s, w)[[2]] } +rb_posterior = function(y, s, w) { b_posterior(y, s, w)[[2]]() } # p (mixing probabilities), n (occupation numbers) and parameter models -# note that the distribution of cluster labels can be written in terms of the -# dirichlet concentration parameter - -p_prior = function(a) { - density = function(p) { ddirichlet(p, a) } - sampler = drop(rdirichlet(1, a)) - list(density, sampler) - } - -dp_prior = function(p, a) { p_prior(a)[[1]](p) } -rp_prior = function(a) { p_prior(a)[[2]] } -n_prior = function(p) { - density = function(n) { dmultinom(n, prob = p) } - sampler = drop(rmultinom(1, size = 1, prob = p)) - list(density, sampler) - } - -dn_prior = function(n, p) { n_prior(p)[[1]](n) } -rn_prior = function(p) { n_prior(p)[[2]] } +dp_prior = function(p, a) ddirichlet(p, a) +rp_prior = function(a) drop(rdirichlet(1, a)) -a_prior = function() { - density = function(a) { dgamma(1 / a, 1, 1) } - sampler = 1 / rgamma(1, 1, 1) - list(density, sampler) - } +dn_prior = function(n, p) dmultinom(n, prob = p) +rn_prior = function(p) drop(rmultinom(1, size = 1, prob = p)) -da_prior = function(a) { a_prior()[[1]](a) } -ra_prior = function() { a_prior()[[2]] } +da_prior = function(a) dinvgamma(a, 1, 1) +ra_prior = rinvgamma(1, 1, 1) a_posterior = function(k, n) { - density = function(a) { - a ^ (k - 3 / 2) * exp(-1 / (2 * a)) * gamma(a) / gamma(n + a) + log_density = function(a) { + (k - 3 / 2) * log(a) - 1 / (2 * a) + lgamma(a) - lgamma(n + a) } + density = function(a) { exp(log_density(a)) } + # log(a) ~ log-concave and can thus be sampled via ARS - # CHECK isn't 'a' ~ log-concave as well? do i need the transformation? - # paper claims yes but seems like i don't + # + # note that f(log(a)) = a * f(a), or log(f(log(a))) = log a + log f(a) + # + # ARS flakes out when k / n is sufficiently small (i.e. when 'a' would be + # extremely large). probably rare for this to happen in practice, so 'a' is + # capped at 20 here sampler = function() { - # f(log(a)) ~ a * f(a) log_xformed = expression( (k - 1 / 2) * log(a) - 1 / (2 * a) + lgamma(a) - lgamma(n + a)) dens_log_xformed = function(a) eval(log_xformed) @@ -268,11 +232,8 @@ a_posterior = function(k, n) { grad_log_xformed = D(log_xformed, 'a') grad_dens_log_xformed = function(a) eval(grad_log_xformed) - # ARS seems to flake out when k / n is sufficiently small (i.e. when 'a' - # would be extremely large). probably rare for this to happen in practice, - # so 'a' is capped at 20 below. val = ars( - n = 30 + n = 1 , f = dens_log_xformed , fprima = grad_dens_log_xformed , x = c(0.51, 1, 1.49) @@ -281,7 +242,7 @@ a_posterior = function(k, n) { , ub = T , xub = 20.0) - exp(sample(val, 1)) + exp(val) } list(density, sampler) } @@ -314,3 +275,15 @@ rmodel_prior = function(y) { model_prior(y)[[1]] } rmodel_conditional_prior = function(n, y) { model_prior(y)[[2]](n) } +# debug + +# y = test_data +# k = length(y) +# l = rl_obj_prior(y) +# r = rr_obj_prior(y) +# mu = rmu_prior(k, l, r) +# w = rw_obj_prior(y) +# b = rb_prior() +# s = rs_prior(k, b, w) + +