measurable

A simple shallowly-embedded DSL for dealing with measures.
Log | Files | Refs | README | LICENSE

commit 335f621e064b05d162481a5094a51c441556bd28
parent a7c1e1d51a04dc7662e3ab2a68782bcf28f099ff
Author: Jared Tobin <jared@jtobin.ca>
Date:   Sun, 20 Oct 2013 17:32:49 +1300

Clean up examples & add comments.

Diffstat:
Mtests/Test.hs | 167+++++++++++++++++++++++++++++++++++++++++++------------------------------------
1 file changed, 91 insertions(+), 76 deletions(-)

diff --git a/tests/Test.hs b/tests/Test.hs @@ -3,6 +3,7 @@ import Control.Applicative import Control.Monad import Control.Monad.Trans +import Data.Vector (singleton) import Measurable import Numeric.SpecFunctions import Statistics.Distribution hiding (mean, variance) @@ -13,110 +14,124 @@ import Statistics.Distribution.ChiSquared import System.Random.MWC import System.Random.MWC.Distributions --- | A standard beta-binomial conjugate model. +-- | Some workhorse densities (with respect to Lebesgue measure). +genNormal m v = density $ normalDistr m v +genBeta a b = density $ betaDistr a b +genChiSq d = density $ chiSquared d + +-- | A binomial density (with respect to counting measure). +binom p n k + | n <= 0 = 0 + | k < 0 = 0 + | n < k = 0 + | otherwise = n `choose` k * p ^ k * (1 - p) ^ (n - k) + +-- | Measures created from densities. Notice that the binomial measure has to +-- be treated differently than the measures absolutely continuous WRT Lebesgue +-- measure. +normalMeasure m v = fromDensity $ genNormal m v +betaMeasure a b = fromDensity $ genBeta a b +chiSqMeasure d = fromDensity $ genChiSq d +binomMeasure n p = fromMassFunction (binom p n . truncate) + (fromIntegral <$> [0..n] :: [Double]) + +-- | Sampling functions. +generateExpSamples n l g = replicateM n (exponential l g) +generateNormalSamples n m v g = replicateM n (normal m v g) + +-- | A standard beta-binomial conjugate model. Notice how naturally it's +-- expressed using do-notation! betaBinomialConjugate :: Double -> Double -> Int -> Measure Double betaBinomialConjugate a b n = do p <- betaMeasure a b binomMeasure n p --- | Workhorse densities. -standardNormal = density $ normalDistr 0 1 -genLocationNormal m = density $ normalDistr m 1 -basicBeta a b = density $ betaDistr a b +main :: IO () +main = do + -- Initialize our PRNG. + g <- initialize (singleton 42) --- | A beta measure. -betaMeasure a b = fromDensity $ basicBeta a b + -- Generate some samples (in practice we'd usually create measures directly + -- from samples). + expSamples <- generateExpSamples 1000 1 g + normSamples <- generateNormalSamples 1000 0 1 g --- | A binomial mass function. -binom p n k | n <= 0 = 0 - | k < 0 = 0 - | n < k = 0 - | otherwise = n `choose` k * p ^ k * (1 - p) ^ (n - k) + -- Create a couple of measures from those. + let observedExpMeasure = fromObservations expSamples + observedNormalMeasure = fromObservations normSamples --- | Binomial measure. -binomMeasure n p = fromMassFunction (\x -> binom p n (truncate x)) - (map fromIntegral [0..n] :: [Double]) + putStrLn $ "X ~ N(0, 1)" + putStrLn $ "Y ~ empirical (observed from exponential(1))" + putStrLn $ "Z ~ empirical (observed from N(0, 1))" + putStrLn $ "W ~ ChiSquared(5)" + putStrLn $ "" -main = do - expSamples <- withSystemRandom . asGenIO $ \g -> - replicateM 100 $ exponential 1 g + -- We can mingle our empirical measures with those created directly from + -- densities. We can literally just add measures together (there's a + -- convolution happening under the hood). - normSamples <- withSystemRandom . asGenIO $ \g -> - replicateM 100 $ normal 0 1 g + let mu = normalMeasure 0 1 + observedExpMeasure + putStrLn $ "E(X + Y): " ++ (show $ expectation mu) - let mu = fromDensity standardNormal - nu = fromObservations expSamples - rho = (cos <$> mu) + (sin <$> nu) - eta = exp <$> rho + -- We can create pushforward/image measures by.. pushing functions onto + -- measures. + -- + -- The pushforward operator happens to be trusty old 'fmap', (as infix, <$>). - putStrLn $ "mean of normal samples (should be around 0): " ++ - show (expectation . fromObservations $ normSamples) - putStrLn $ "variance of normal samples (should be around 1): " ++ - show (variance . fromObservations $ normSamples) - putStrLn $ "let X ~ N(0, 1), Y ~ observed. mean of exp(cos X + sin Y): " ++ - show (expectation eta) + let nu = (cos <$> normalMeasure 0 1) * (sin <$> observedNormalMeasure) + putStrLn $ "E(cos X * sin Z): " ++ (show $ expectation nu) - -- Subtraction of measures? + let eta = exp <$> nu + putStrLn $ "E[e^(cos X * sin Z)]: " ++ (show $ expectation eta) - let iota = mu - mu - - putStrLn $ "let X, Y be independent N(0, 1). mean of X - Y: " ++ - show (expectation iota) - putStrLn $ "let X, Y be independent N(0, 1). variance of X - Y: " ++ - show (variance iota) + -- At present the complexity of each Measure operation seems to *explode*, so + -- you can't do more than a few of them without your machine locking up. I + -- have to look into what could be done to make this reasonably efficient. + -- But hey, experiments and such.. - -- Product of measures? *pops out of cake* YEAH WE CAN DO THAT + let zeta = (exp . tanh) <$> (chiSqMeasure 5 * normalMeasure 0 1) + putStrLn $ "E[e^(tanh (X * W))]: " ++ (show $ expectation zeta) - let phi = fromDensity $ genLocationNormal 2 - xi = fromDensity $ genLocationNormal 3 - zeta = phi * xi + putStrLn $ "" - putStrLn $ "let X ~ N(2, 1), Y ~ N(3, 1). mean of XY (should be 6) " ++ - show (expectation zeta) - putStrLn $ "let X ~ N(2, 1), Y ~ N(3, 1). variance of XY (should be 14) " ++ - show (variance zeta) + -- We can do probability by just taking the expectation of an indicator + -- function, and there's a built-in cumulative distribution function. + -- + -- P(X < 0) for example. It should be 0.5, but there is some error due to + -- quadrature. - let alpha = fromDensity $ density $ chiSquared 5 - beta = (exp . tanh) <$> (phi * alpha) + putStrLn $ "P(X < 0): " ++ (show $ cdf (normalMeasure 0 1) 0) - putStrLn $ "let X ~ N(2, 1), Y ~ chisq(5). variance of exp (tanh XY) " ++ - show (variance beta) + -- Everyone knows that for X ~ N(0, 1), P(0 < X < 1) is about 0.341.. - putStrLn "" - putStrLn "Some probability examples:" - putStrLn "" + putStrLn $ "P(0 < X < 1): " + ++ (show $ expectation $ 0 `to` 1 <$> (normalMeasure 0 1)) - putStrLn $ "let X ~ N(0, 1). P(X < 0) (should be ~ 0.5): " ++ - show (cdf mu 0) + putStrLn "" - putStrLn $ "let X ~ N(0, 1). P(0 < X < 1) (should be ~ 0.341): " ++ - show (expectation $ 0 `to` 1 <$> mu) + -- The coolest trick of all is that monadic bind is Bayesian inference. + -- Getting posterior predictive expectations & probabilities is thus really + -- declarative. - putStrLn $ "let X ~ N(0, 1), Y ~ observed. P(0 < X < 0.8): " ++ - show (expectation $ 0 `to` 0.8 <$> (mu + nu)) + putStrLn "X | p ~ binomial(10, p)" + putStrLn "p ~ beta(1, 4)" putStrLn "" - putStrLn "Creating from a mass function:" + putStrLn "(integrate out p..)" putStrLn "" - - let kappa = binomMeasure 10 0.5 + let phi = betaBinomialConjugate 1 4 10 - putStrLn $ "let X ~ binom(10, 0.5). mean of X (should be 5): " ++ - show (expectation kappa) - putStrLn $ "let X ~ binom(10, 0.5). variance of X (should be 2.5): " ++ - show (variance kappa) + putStrLn $ "E(X): " + ++ (show $ expectation $ betaBinomialConjugate 1 4 10) - putStrLn "" - putStrLn "Bayesian inference" - putStrLn "" + putStrLn $ "P(X == 5): " + ++ (show $ expectation $ 5 `to` 5 <$> phi) + + putStrLn $ "P(1 <= X <= 5): " + ++ (show $ expectation $ 1 `to` 5 <$> phi) + + putStrLn $ "var(X): " ++ (show $ variance phi) - let omega = betaBinomialConjugate 1 4 10 + -- Lots of kinks to be worked out, but this is a cool concept. - putStrLn $ - "let X | p ~ binomial(10, p), p ~ beta(1, 4). mean of posterior pred.:\n" - ++ show (expectation omega) - putStrLn $ - "let X | p ~ binomial(10, p), p ~ beta(1, 4). variance of posterior pred:\n" - ++ show (variance omega) -