commit aa11851c2bd13746b405d6cf0395a7746381e24b
parent 6afe6139938ca12810b74b50d5d7ec56181e8bee
Author: Jared Tobin <jared@jtobin.ca>
Date: Thu, 7 Nov 2013 10:01:59 +1300
Misc updates.
Diffstat:
3 files changed, 249 insertions(+), 4 deletions(-)
diff --git a/src/Examples.hs b/src/Examples.hs
@@ -5,6 +5,7 @@ import Control.Arrow
import Control.Error
import Control.Lens hiding (to)
import Control.Monad
+import Control.Monad.Cont
import Control.Monad.Primitive
import Control.Monad.Trans
import Data.HashMap.Strict (HashMap)
@@ -39,6 +40,14 @@ altBetaMeasure epochs a b g = do
bs <- lift $ replicateM epochs (genContVar (betaDistr a b) g)
fromObservationsT bs
+approxBetaMeasure nInnerApprox nOuterApprox a b g = do
+ bs <- lift $ genBetaSamples nInnerApprox a b g
+ fromObservationsApprox nOuterApprox bs g
+
+approxBinomialMeasure nInnerApprox nOuterApprox n p g = do
+ bs <- lift $ genBinomSamples nInnerApprox n p g
+ fromObservationsApprox nOuterApprox bs g
+
-- | A standard beta-binomial conjugate model. Notice how naturally it's
-- expressed using do-notation!
@@ -47,6 +56,38 @@ betaBinomialConjugate a b n = do
p <- betaMeasure a b
binomMeasure n p
+altBetaBinomialConjugate a b n g = do
+ p <- altBetaMeasure 1000 a b g
+ binomMeasure n p
+
+approxBetaBinomialConjugate
+ :: Double -> Double -> Int -> Gen RealWorld -> Model Int
+approxBetaBinomialConjugate a b n g = do
+ p <- approxBetaMeasure 100000 100 a b g
+ approxBinomialMeasure 100 100 n p g
+
+
+
+-- | Observe a binomial distribution.
+genBinomSamples
+ :: (Applicative m, PrimMonad m)
+ => Int
+ -> Int
+ -> Double
+ -> Gen (PrimState m)
+ -> m [Int]
+genBinomSamples n m p g = replicateM n $ genBinomial m p g
+
+-- | Observe a beta distribution.
+genBetaSamples
+ :: (Applicative m, PrimMonad m)
+ => Int
+ -> Double
+ -> Double
+ -> Gen (PrimState m)
+ -> m [Double]
+genBetaSamples n a b g = replicateM n $ genContVar (betaDistr a b) g
+
-- | Observe a gamma distribution.
genGammaSamples
:: (Applicative m, PrimMonad m)
@@ -208,3 +249,13 @@ gaussianMixtureModel n observed g = do
fromObservationsT samples
+-- | Count the number of Trues in a list.
+countTrue :: [Bool] -> Int
+countTrue = length . filter id
+
+genBernoulli :: PrimMonad m => Double -> Gen (PrimState m) -> m Bool
+genBernoulli p g = uniform g >>= return . (< p)
+
+genBinomial :: PrimMonad m => Int -> Double -> Gen (PrimState m) -> m Int
+genBinomial n p g = replicateM n (genBernoulli p g) >>= return . countTrue
+
diff --git a/src/Measurable.hs b/src/Measurable.hs
@@ -0,0 +1,161 @@
+{-# LANGUAGE BangPatterns #-}
+
+module Measurable where
+
+import Control.Applicative
+import Control.Monad
+import Data.List
+import Data.Monoid
+import Numeric.Integration.TanhSinh
+
+-- | A measure is a set function from some sigma-algebra to the extended real
+-- line. In practical terms we define probability in terms of measures; for a
+-- probability measure /P/ and some measurable set /A/, the measure of the set
+-- is defined to be that set's probability.
+--
+-- For any sigma field, there is a one-to-one correspondence between measures
+-- and increasing linear functionals on its associated space of positive
+-- measurable functions. That is,
+--
+-- P(A) = f(I_A)
+--
+-- For A a measurable set and I_A its indicator function. So we can generally
+-- abuse notation to write something like P(I_A), even though we don't
+-- directly apply the measure to a function.
+--
+-- So we can actually deal with measures in terms of measurable functions,
+-- rather than via the sigma algebra or measurable sets. Instead of taking
+-- a set and returning a probability, we can take a function and return a
+-- probability.
+--
+-- Once we have a measure, we use it by integrating against it. Take a
+-- real-valued random variable (i.e., measurable function) /X/. The mean of X
+-- is E X = int_R X d(Px), for Px the distribution (image measure) of X.
+--
+-- We can generalize this to work with arbitrary measurable functions and
+-- measures. Expectation can be defined by taking a measurable function and
+-- applying a measure to it - i.e., just function application.
+--
+-- So really, a Measure in this sense is an expression of a particular
+-- computational process - expectation. We leave a callback to be
+-- plugged in - a measurable function - and from there, we can finish the
+-- computation and return a value. A measure is actually represented as a
+-- *program* that, given a measurable function, integrates that function.
+-- It's thus completely equivalent to the Continuation monad, albeit with
+-- a restricted result type.
+--
+-- The Functor instance provides the ability to create pushforward/image
+-- measures. That's handled by good ol' fmap. The strength of the Monad
+-- instance is that it allows us to do conditional probability. That is,
+--
+-- parameterModel >>= dataModel == model
+--
+-- Ex, given 'betaMeasure a b' and 'binomMeasure n p' functions that create
+-- the obvious measures, we can express a beta-binomial model like so:
+--
+-- betaBinomialConjugate :: Double -> Double -> Int -> Measure Double
+-- betaBinomialConjugate a b n = do
+-- p <- betaMeasure a b
+-- binomMeasure n p
+--
+
+newtype Measure a = Measure { measure :: (a -> Double) -> Double }
+
+instance Num a => Num (Measure a) where
+ (+) = liftA2 (+)
+ (-) = liftA2 (-)
+ (*) = liftA2 (*)
+ abs = id
+ signum mu = error "fromInteger: not supported for Measures"
+ fromInteger = error "fromInteger: not supported for Measures"
+
+instance Fractional a => Monoid (Measure a) where
+ mempty = identityMeasure
+ mappend = (+)
+
+instance Functor Measure where
+ fmap f mu = Measure $ \g -> measure mu $ g . f -- pushforward/image measure
+
+instance Applicative Measure where
+ pure = return
+ (<*>) = ap
+
+instance Monad Measure where
+ return x = Measure (\f -> f x)
+ mu >>= f = Measure $ \d ->
+ measure mu $ \g ->
+ measure (f g) d
+
+-- | The volume is obtained by integrating against a constant. This is '1' for
+-- any probability measure.
+volume :: Measure a -> Double
+volume mu = measure mu (const 1)
+
+-- | The expectation is obtained by integrating against the identity function.
+--
+-- This is just just (`runCont` id).
+expectation :: Measure Double -> Double
+expectation mu = measure mu id
+
+-- | The variance is obtained by integrating against the usual function.
+variance :: Measure Double -> Double
+variance mu = measure mu (^ 2) - expectation mu ^ 2
+
+-- | Create a measure from a collection of observations from some distribution.
+fromObservations :: Fractional a => [a] -> Measure a
+fromObservations xs = Measure (`weightedAverage` xs)
+
+-- | Create a measure from a density function.
+fromDensity :: (Double -> Double) -> Measure Double
+fromDensity d = Measure $ \f -> quadratureTanhSinh $ liftA2 (*) f d
+ where quadratureTanhSinh = result . last . everywhere trap
+
+-- | Create a measure from a mass function.
+fromMassFunction :: (a -> Double) -> [a] -> Measure a
+fromMassFunction p support = Measure $ \f ->
+ sum . map (liftA2 (*) f p) $ support
+
+-- | The (sum) identity measure.
+identityMeasure :: Fractional a => Measure a
+identityMeasure = fromObservations []
+
+-- | Simple average.
+average :: Fractional a => [a] -> a
+average xs = fst $ foldl'
+ (\(!m, !n) x -> (m + (x - m) / fromIntegral (n + 1), n + 1)) (0, 0) xs
+{-# INLINE average #-}
+
+-- | Weighted average.
+weightedAverage :: Fractional c => (a -> c) -> [a] -> c
+weightedAverage f = average . map f
+{-# INLINE weightedAverage #-}
+
+-- | Integrate from a to b.
+to :: (Num a, Ord a) => a -> a -> a -> a
+to a b x | x >= a && x <= b = 1
+ | otherwise = 0
+
+-- | Cumulative distribution function for a measure.
+--
+-- Really cool; works perfectly in both discrete and continuous cases.
+--
+-- > let f = cdf (fromObservations [1..10])
+-- > cdf 0
+-- 0.0
+-- > cdf 1
+-- 0.1
+-- > cdf 10
+-- 1.0
+-- > cdf 11
+-- 1.0
+--
+-- > let g = cdf (fromDensity standardNormal)
+-- > cdf 0
+-- 0.504
+-- > cdf 1
+-- 0.838
+-- > cdf (1 / 0)
+-- 0.999
+cdf :: Measure Double -> Double -> Double
+cdf mu b = expectation $ negate (1 / 0) `to` b <$> mu
+
diff --git a/src/Measurable/Core.hs b/src/Measurable/Core.hs
@@ -4,6 +4,7 @@ module Measurable.Core where
import Control.Applicative
import Control.Monad
+import Control.Monad.Primitive
import Control.Monad.Trans.Cont
import Data.Foldable (Foldable)
import qualified Data.Foldable as Foldable
@@ -11,11 +12,17 @@ import Data.Monoid
import qualified Data.Set as Set
import Data.Traversable hiding (mapM)
import Numeric.Integration.TanhSinh
+import System.Random.MWC
+import System.Random.Represent
--- | A measure is represented as a continuation.
+-- | A measure is represented as a continuation. Strictly it should have a
+-- double return type, but the polymorphism doesn't seem to hurt in practice.
type Measure r a = Cont r a
type MeasureT r m a = ContT r m a
+-- | A model is a proper measure wrapped around a sampling monad.
+type Model a = MeasureT Double IO a
+
-- | A more appropriate version of runCont.
integrate :: (a -> r) -> Measure r a -> r
integrate = flip runCont
@@ -57,11 +64,11 @@ fromDensityCountingT p support = ContT $ \f ->
-- Numeric.Integration.TanhSinh module, the types are also constricted
-- to Doubles.
--
--- This is included moreso for interest's sake, and doesn't produce
--- particularly accurate results.
+-- This is included moreso for interest's sake and isn't particularly
+-- accurate.
fromDensityLebesgue :: (Double -> Double) -> Measure Double Double
fromDensityLebesgue d = cont $ \f -> quadratureTanhSinh $ liftA2 (*) f d
- where quadratureTanhSinh = result . last . everywhere trap
+ where quadratureTanhSinh = roundTo 2 . result . last . everywhere trap
-- | Create a measure from observations sampled from some distribution.
fromObservations
@@ -76,6 +83,15 @@ fromObservationsT
-> MeasureT r m a
fromObservationsT = ContT . flip weightedAverageM
+-- | Create an 'approximating measure' from observations.
+fromObservationsApprox
+ :: (Applicative m, PrimMonad m, Fractional r, Traversable f, Integral n)
+ => n
+ -> f a
+ -> Gen (PrimState m)
+ -> MeasureT r m a
+fromObservationsApprox n xs g = ContT $ \f -> weightedAverageApprox n f xs g
+
-- | Expectation is integration against the identity function.
expectation :: Measure r r -> r
expectation = integrate id
@@ -142,3 +158,20 @@ weightedAverageM
weightedAverageM f = liftM average . traverse f
{-# INLINE weightedAverageM #-}
+-- | An monadic approximate weighted average.
+weightedAverageApprox
+ :: (Fractional c, Traversable f, PrimMonad m, Applicative m, Integral n)
+ => n
+ -> (a -> m c)
+ -> f a
+ -> Gen (PrimState m)
+ -> m c
+weightedAverageApprox n f xs g =
+ sampleReplace n xs g >>= (liftM average . traverse f)
+{-# INLINE weightedAverageApprox #-}
+
+-- | Round to a specified number of digits.
+roundTo :: Int -> Double -> Double
+roundTo n f = fromIntegral (round $ f * (10 ^ n) :: Int) / (10.0 ^^ n)
+{-# INLINE roundTo #-}
+