measurable

A simple shallowly-embedded DSL for dealing with measures.
Log | Files | Refs | README | LICENSE

commit aa11851c2bd13746b405d6cf0395a7746381e24b
parent 6afe6139938ca12810b74b50d5d7ec56181e8bee
Author: Jared Tobin <jared@jtobin.ca>
Date:   Thu,  7 Nov 2013 10:01:59 +1300

Misc updates.

Diffstat:
Msrc/Examples.hs | 51+++++++++++++++++++++++++++++++++++++++++++++++++++
Asrc/Measurable.hs | 161+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Msrc/Measurable/Core.hs | 41+++++++++++++++++++++++++++++++++++++----
3 files changed, 249 insertions(+), 4 deletions(-)

diff --git a/src/Examples.hs b/src/Examples.hs @@ -5,6 +5,7 @@ import Control.Arrow import Control.Error import Control.Lens hiding (to) import Control.Monad +import Control.Monad.Cont import Control.Monad.Primitive import Control.Monad.Trans import Data.HashMap.Strict (HashMap) @@ -39,6 +40,14 @@ altBetaMeasure epochs a b g = do bs <- lift $ replicateM epochs (genContVar (betaDistr a b) g) fromObservationsT bs +approxBetaMeasure nInnerApprox nOuterApprox a b g = do + bs <- lift $ genBetaSamples nInnerApprox a b g + fromObservationsApprox nOuterApprox bs g + +approxBinomialMeasure nInnerApprox nOuterApprox n p g = do + bs <- lift $ genBinomSamples nInnerApprox n p g + fromObservationsApprox nOuterApprox bs g + -- | A standard beta-binomial conjugate model. Notice how naturally it's -- expressed using do-notation! @@ -47,6 +56,38 @@ betaBinomialConjugate a b n = do p <- betaMeasure a b binomMeasure n p +altBetaBinomialConjugate a b n g = do + p <- altBetaMeasure 1000 a b g + binomMeasure n p + +approxBetaBinomialConjugate + :: Double -> Double -> Int -> Gen RealWorld -> Model Int +approxBetaBinomialConjugate a b n g = do + p <- approxBetaMeasure 100000 100 a b g + approxBinomialMeasure 100 100 n p g + + + +-- | Observe a binomial distribution. +genBinomSamples + :: (Applicative m, PrimMonad m) + => Int + -> Int + -> Double + -> Gen (PrimState m) + -> m [Int] +genBinomSamples n m p g = replicateM n $ genBinomial m p g + +-- | Observe a beta distribution. +genBetaSamples + :: (Applicative m, PrimMonad m) + => Int + -> Double + -> Double + -> Gen (PrimState m) + -> m [Double] +genBetaSamples n a b g = replicateM n $ genContVar (betaDistr a b) g + -- | Observe a gamma distribution. genGammaSamples :: (Applicative m, PrimMonad m) @@ -208,3 +249,13 @@ gaussianMixtureModel n observed g = do fromObservationsT samples +-- | Count the number of Trues in a list. +countTrue :: [Bool] -> Int +countTrue = length . filter id + +genBernoulli :: PrimMonad m => Double -> Gen (PrimState m) -> m Bool +genBernoulli p g = uniform g >>= return . (< p) + +genBinomial :: PrimMonad m => Int -> Double -> Gen (PrimState m) -> m Int +genBinomial n p g = replicateM n (genBernoulli p g) >>= return . countTrue + diff --git a/src/Measurable.hs b/src/Measurable.hs @@ -0,0 +1,161 @@ +{-# LANGUAGE BangPatterns #-} + +module Measurable where + +import Control.Applicative +import Control.Monad +import Data.List +import Data.Monoid +import Numeric.Integration.TanhSinh + +-- | A measure is a set function from some sigma-algebra to the extended real +-- line. In practical terms we define probability in terms of measures; for a +-- probability measure /P/ and some measurable set /A/, the measure of the set +-- is defined to be that set's probability. +-- +-- For any sigma field, there is a one-to-one correspondence between measures +-- and increasing linear functionals on its associated space of positive +-- measurable functions. That is, +-- +-- P(A) = f(I_A) +-- +-- For A a measurable set and I_A its indicator function. So we can generally +-- abuse notation to write something like P(I_A), even though we don't +-- directly apply the measure to a function. +-- +-- So we can actually deal with measures in terms of measurable functions, +-- rather than via the sigma algebra or measurable sets. Instead of taking +-- a set and returning a probability, we can take a function and return a +-- probability. +-- +-- Once we have a measure, we use it by integrating against it. Take a +-- real-valued random variable (i.e., measurable function) /X/. The mean of X +-- is E X = int_R X d(Px), for Px the distribution (image measure) of X. +-- +-- We can generalize this to work with arbitrary measurable functions and +-- measures. Expectation can be defined by taking a measurable function and +-- applying a measure to it - i.e., just function application. +-- +-- So really, a Measure in this sense is an expression of a particular +-- computational process - expectation. We leave a callback to be +-- plugged in - a measurable function - and from there, we can finish the +-- computation and return a value. A measure is actually represented as a +-- *program* that, given a measurable function, integrates that function. +-- It's thus completely equivalent to the Continuation monad, albeit with +-- a restricted result type. +-- +-- The Functor instance provides the ability to create pushforward/image +-- measures. That's handled by good ol' fmap. The strength of the Monad +-- instance is that it allows us to do conditional probability. That is, +-- +-- parameterModel >>= dataModel == model +-- +-- Ex, given 'betaMeasure a b' and 'binomMeasure n p' functions that create +-- the obvious measures, we can express a beta-binomial model like so: +-- +-- betaBinomialConjugate :: Double -> Double -> Int -> Measure Double +-- betaBinomialConjugate a b n = do +-- p <- betaMeasure a b +-- binomMeasure n p +-- + +newtype Measure a = Measure { measure :: (a -> Double) -> Double } + +instance Num a => Num (Measure a) where + (+) = liftA2 (+) + (-) = liftA2 (-) + (*) = liftA2 (*) + abs = id + signum mu = error "fromInteger: not supported for Measures" + fromInteger = error "fromInteger: not supported for Measures" + +instance Fractional a => Monoid (Measure a) where + mempty = identityMeasure + mappend = (+) + +instance Functor Measure where + fmap f mu = Measure $ \g -> measure mu $ g . f -- pushforward/image measure + +instance Applicative Measure where + pure = return + (<*>) = ap + +instance Monad Measure where + return x = Measure (\f -> f x) + mu >>= f = Measure $ \d -> + measure mu $ \g -> + measure (f g) d + +-- | The volume is obtained by integrating against a constant. This is '1' for +-- any probability measure. +volume :: Measure a -> Double +volume mu = measure mu (const 1) + +-- | The expectation is obtained by integrating against the identity function. +-- +-- This is just just (`runCont` id). +expectation :: Measure Double -> Double +expectation mu = measure mu id + +-- | The variance is obtained by integrating against the usual function. +variance :: Measure Double -> Double +variance mu = measure mu (^ 2) - expectation mu ^ 2 + +-- | Create a measure from a collection of observations from some distribution. +fromObservations :: Fractional a => [a] -> Measure a +fromObservations xs = Measure (`weightedAverage` xs) + +-- | Create a measure from a density function. +fromDensity :: (Double -> Double) -> Measure Double +fromDensity d = Measure $ \f -> quadratureTanhSinh $ liftA2 (*) f d + where quadratureTanhSinh = result . last . everywhere trap + +-- | Create a measure from a mass function. +fromMassFunction :: (a -> Double) -> [a] -> Measure a +fromMassFunction p support = Measure $ \f -> + sum . map (liftA2 (*) f p) $ support + +-- | The (sum) identity measure. +identityMeasure :: Fractional a => Measure a +identityMeasure = fromObservations [] + +-- | Simple average. +average :: Fractional a => [a] -> a +average xs = fst $ foldl' + (\(!m, !n) x -> (m + (x - m) / fromIntegral (n + 1), n + 1)) (0, 0) xs +{-# INLINE average #-} + +-- | Weighted average. +weightedAverage :: Fractional c => (a -> c) -> [a] -> c +weightedAverage f = average . map f +{-# INLINE weightedAverage #-} + +-- | Integrate from a to b. +to :: (Num a, Ord a) => a -> a -> a -> a +to a b x | x >= a && x <= b = 1 + | otherwise = 0 + +-- | Cumulative distribution function for a measure. +-- +-- Really cool; works perfectly in both discrete and continuous cases. +-- +-- > let f = cdf (fromObservations [1..10]) +-- > cdf 0 +-- 0.0 +-- > cdf 1 +-- 0.1 +-- > cdf 10 +-- 1.0 +-- > cdf 11 +-- 1.0 +-- +-- > let g = cdf (fromDensity standardNormal) +-- > cdf 0 +-- 0.504 +-- > cdf 1 +-- 0.838 +-- > cdf (1 / 0) +-- 0.999 +cdf :: Measure Double -> Double -> Double +cdf mu b = expectation $ negate (1 / 0) `to` b <$> mu + diff --git a/src/Measurable/Core.hs b/src/Measurable/Core.hs @@ -4,6 +4,7 @@ module Measurable.Core where import Control.Applicative import Control.Monad +import Control.Monad.Primitive import Control.Monad.Trans.Cont import Data.Foldable (Foldable) import qualified Data.Foldable as Foldable @@ -11,11 +12,17 @@ import Data.Monoid import qualified Data.Set as Set import Data.Traversable hiding (mapM) import Numeric.Integration.TanhSinh +import System.Random.MWC +import System.Random.Represent --- | A measure is represented as a continuation. +-- | A measure is represented as a continuation. Strictly it should have a +-- double return type, but the polymorphism doesn't seem to hurt in practice. type Measure r a = Cont r a type MeasureT r m a = ContT r m a +-- | A model is a proper measure wrapped around a sampling monad. +type Model a = MeasureT Double IO a + -- | A more appropriate version of runCont. integrate :: (a -> r) -> Measure r a -> r integrate = flip runCont @@ -57,11 +64,11 @@ fromDensityCountingT p support = ContT $ \f -> -- Numeric.Integration.TanhSinh module, the types are also constricted -- to Doubles. -- --- This is included moreso for interest's sake, and doesn't produce --- particularly accurate results. +-- This is included moreso for interest's sake and isn't particularly +-- accurate. fromDensityLebesgue :: (Double -> Double) -> Measure Double Double fromDensityLebesgue d = cont $ \f -> quadratureTanhSinh $ liftA2 (*) f d - where quadratureTanhSinh = result . last . everywhere trap + where quadratureTanhSinh = roundTo 2 . result . last . everywhere trap -- | Create a measure from observations sampled from some distribution. fromObservations @@ -76,6 +83,15 @@ fromObservationsT -> MeasureT r m a fromObservationsT = ContT . flip weightedAverageM +-- | Create an 'approximating measure' from observations. +fromObservationsApprox + :: (Applicative m, PrimMonad m, Fractional r, Traversable f, Integral n) + => n + -> f a + -> Gen (PrimState m) + -> MeasureT r m a +fromObservationsApprox n xs g = ContT $ \f -> weightedAverageApprox n f xs g + -- | Expectation is integration against the identity function. expectation :: Measure r r -> r expectation = integrate id @@ -142,3 +158,20 @@ weightedAverageM weightedAverageM f = liftM average . traverse f {-# INLINE weightedAverageM #-} +-- | An monadic approximate weighted average. +weightedAverageApprox + :: (Fractional c, Traversable f, PrimMonad m, Applicative m, Integral n) + => n + -> (a -> m c) + -> f a + -> Gen (PrimState m) + -> m c +weightedAverageApprox n f xs g = + sampleReplace n xs g >>= (liftM average . traverse f) +{-# INLINE weightedAverageApprox #-} + +-- | Round to a specified number of digits. +roundTo :: Int -> Double -> Double +roundTo n f = fromIntegral (round $ f * (10 ^ n) :: Int) / (10.0 ^^ n) +{-# INLINE roundTo #-} +