commit c70b1a71646b083a3e269b9fef7f67c345887f9c
parent 2ab8ab4200d59ef1af2db70d7c7b8c7eb8778a7d
Author: Jared Tobin <jared@jtobin.ca>
Date: Wed, 10 Feb 2016 11:05:19 +1300
Add some sampling functions.
Diffstat:
2 files changed, 63 insertions(+), 45 deletions(-)
diff --git a/dirichlet-process-mixture/src/Generative.hs b/dirichlet-process-mixture/src/Generative.hs
@@ -1,51 +1,50 @@
-{-# LANGUAGE NoMonomorphismRestriction #-}
-import Control.Monad
+-- | A generative (prior predictive) finite mixture model.
+
+module Generative where
+
+import Control.Monad (replicateM, zipWithM)
import System.Random.MWC.Probability
+-- | Mixing probabilities for the model.
mixing :: Int -> Prob IO [Double]
mixing k = do
- a <- inverseGamma 1 1
+ a <- inverseGamma 1 1
symmetricDirichlet k a
-params :: Int -> Double -> Double -> Prob IO [(Double, Double)]
-params k muy vary = do
+-- | Mean and precision parameters for clusters in the model.
+locationScale :: Int -> Double -> Double -> Prob IO [(Double, Double)]
+locationScale k muy vary = do
l <- normal muy (sqrt vary)
r <- gamma 1 (recip (sqrt vary))
mus <- replicateM k (normal l (sqrt (recip r)))
-
- b <- inverseGamma 1 1
- w <- gamma 1 vary
- ss <- replicateM k (gamma b (recip w))
-
+ b <- inverseGamma 1 1
+ w <- gamma 1 vary
+ ss <- replicateM k (gamma b (recip w))
return $ zip mus ss
+-- | Finite Gaussian mixture model.
fmm :: Int -> Double -> Double -> Prob IO [Double]
fmm k muy vary = do
- (mus, ss) <- fmap unzip (params k muy vary)
- pis <- mixing k
-
- xs <- zipWithM normal mus (fmap (sqrt . recip) ss)
-
+ (mus, ss) <- fmap unzip (locationScale k muy vary)
+ pis <- mixing k
+ xs <- zipWithM normal mus (fmap (sqrt . recip) ss)
return $ zipWith (*) pis xs
+-- | Conditional mixture model. Sample a set of probabilities from the mixing
+-- model and then use those to generate observations in each cluster.
+conditional :: Int -> Int -> Double -> Double -> Prob IO [[Double]]
conditional n k muy vary = do
- (mus, ss) <- fmap unzip (params k muy vary)
- let fs = zipWithM normal mus (fmap (sqrt . recip) ss)
- pis <- mixing k
-
+ (mus, ss) <- fmap unzip (locationScale k muy vary)
+ pis <- mixing k
replicateM n $ do
- f <- fs
+ f <- zipWithM normal mus (fmap (sqrt . recip) ss)
return $ zipWith (*) pis f
+-- | Sample 5000 times from a conditional mixture model.
main :: IO ()
main = do
- samples <- withSystemRandom . asGenIO $ \gen ->
- -- replicateM 5000 (sample (fmm 5 1 1) gen)
- sample (conditional 5000 5 1 1) gen
+ samples <- withSystemRandom . asGenIO $ sample (conditional 5000 5 1 1)
let pretty = putStrLn . filter (`notElem` "[]") . show
mapM_ pretty samples
-
-
-
diff --git a/dirichlet-process-mixture/src/fmm.r b/dirichlet-process-mixture/src/fmm.r
@@ -11,19 +11,22 @@
# w ~ gamma(1, var_y)
# y | p, c, mu, s ~ <p, normal(mu, s^-1)>
-# conditional posterior densities
+# conditional posterior densities / samplers
-dmeans = function(mu, y, s, l, r) {
+dr_mu = function(mu, y, s, l, r) {
cluster_count = unlist(lapply(y, length))
cluster_mean = unlist(lapply(y, mean))
m = (cluster_mean * cluster_count * s + l * r) / (cluster_count * s + r)
v = 1 / (cluster_count * s + r)
- dnorm(x, m, v)
+ list(dnorm(x, m, v), rnorm(1, m, v))
}
-dl = function(l, y, mu, r) {
+dmu = function(mu, y, s, l, r) { dr_mu(mu, y, s, l, r)[[1]] }
+rmu = function(mu, y, s, l, r) { dr_mu(mu, y, s, l, r)[[2]] }
+
+dr_l = function(l, y, mu, r) {
mu_y = mean(unlist(y))
var_y = var(unlist(y))
prec_y = 1 / var_y
@@ -32,20 +35,26 @@ dl = function(l, y, mu, r) {
m = (mu_y * prec_y + r * sum(mu)) / (prec_y + k * r)
v = 1 / (prec_y + k * r)
- dnorm(l, m, v)
+ list(dnorm(l, m, v), rnorm(1, m, v))
}
-dr = function(r, y, mu, l) {
+dl = function(l, y, mu, r) { dr_l(l, y, mu, r)[[1]] }
+rl = function(l, y, mu, r) { dr_l(l, y, mu, r)[[2]] }
+
+dr_r = function(r, y, mu, l) {
var_y = var(unlist(y))
k = length(y)
a = k + 1
b = a / (var_y + sum((mu - l)^2))
- dgamma(r, a, b)
+ list(dgamma(r, a, b), rgamma(1, a, b))
}
-ds = function(s, y, mu, b, w) {
+dr = function(r, y, mu, l) { dr_r(r, y, mu, l)[[1]] }
+rr = function(r, y, mu, l) { dr_r(r, y, mu, l)[[2]] }
+
+dr_s = function(s, y, mu, b, w) {
cluster_count = unlist(lapply(y, length))
squares = unlist(mapply("-", y, as.list(mu))) ^ 2
@@ -53,10 +62,13 @@ ds = function(s, y, mu, b, w) {
a = b + cluster_count
bet = a / (w * b + sum(squares))
- dgamma(s, a, bet)
+ list(dgamma(s, a, bet), rgamma(1, a, bet))
}
-dw = function(w, y, s, b) {
+ds = function(s, y, mu, b, w) { ds_r(s, y, mu, b, w)[[1]] }
+rs = function(s, y, mu, b, w) { ds_r(s, y, mu, b, w)[[2]] }
+
+dr_w = function(w, y, s, b) {
k = length(y)
var_y = var(unlist(y))
prec_y = 1 / var_y
@@ -64,10 +76,13 @@ dw = function(w, y, s, b) {
a = k * b + 1
bet = a / (prec_y + b * sum(s))
- dgamma(w, a, bet)
+ lis(dgamma(w, a, bet), rgamma(1, a, bet))
}
-db = function(b, y, s, w) {
+dw = function(w, y, s, b) { dr_w(w, y, s, b)[[1]] }
+rw = function(w, y, s, b) { dr_w(w, y, s, b)[[2]] }
+
+dr_b = function(b, y, s, w) {
k = length(y)
t0 = gamma(b / 2) ^ (- k)
@@ -75,19 +90,23 @@ db = function(b, y, s, w) {
t2 = (b / 2) ^ ((k * b - 3) / 2)
t3 = prod((s * w) ^ (b / 2) * exp(- b * s * w / 2))
- t0 * t1 * t2 * t3
+ list(t0 * t1 * t2 * t3, NULL) # FIXME sampling function
}
-dn = function(n, a) {
+db = function(b, y, s, w) { dr_b(b, y, s, w)[[1]] }
+
+dr_n = function(n, a) {
nt = sum(n)
k = length(n)
- a ^ k * gamma(a) / gamma(nt + a)
+ list(a ^ k * gamma(a) / gamma(nt + a), NULL) # FIXME sampling function
}
-da = function(a, n, k) {
- a ^ (k - 3 / 2) * exp(-1 / (2 * a)) * gamma(a) / gamma(n + a)
- }
+dn = function(n, a) { dr_n(n, a)[[1]] }
-# sampling functions
+dr_a = function(a, n, k) {
+ list(a ^ (k - 3 / 2) * exp(-1 / (2 * a)) * gamma(a) / gamma(n + a), NULL)
+ # FIXME sampling function
+ }
+da = function(a, n, k) { dr_a(a, n, k)[[1]] }