measurable

A simple shallowly-embedded DSL for dealing with measures.
Log | Files | Refs | README | LICENSE

commit e8480e19084267851e255e13bdfded08960c97de
parent 488ee483e9468581e68612db15be44b20039994e
Author: Jared Tobin <jared@jtobin.ca>
Date:   Sat, 24 May 2014 23:36:42 +1000

Document Core module and update cabal file.

Diffstat:
Mmeasurable.cabal | 2--
Msrc/Measurable/Core.hs | 257+++++++++++++++++++++++++++++++++++++++++++++++++++++++++----------------------
2 files changed, 185 insertions(+), 74 deletions(-)

diff --git a/measurable.cabal b/measurable.cabal @@ -15,8 +15,6 @@ synopsis: Basic measure wrangling. description: Various types, instances, and combinators that make it easy to play with measures. - . - <expand> source-repository head type: git diff --git a/src/Measurable/Core.hs b/src/Measurable/Core.hs @@ -2,50 +2,139 @@ {-# OPTIONS_GHC -fno-warn-orphans #-} {-# OPTIONS_GHC -fno-warn-type-defaults #-} {-# LANGUAGE BangPatterns #-} +{-# LANGUAGE FlexibleInstances #-} + +-- | +-- +-- The /measurable/ library provides basic facilities for defining, +-- manipulating, and - where possible - evaluating measures. +-- +-- Measure theory is the foundation of formal probability. And while measures +-- are useful for proofs and theoretical work, they're not a particularly +-- efficient way to get anything done in the real world. That said, a solid +-- mental model of probability in terms of abstract volumes and ratios is a +-- valuable (and difficult) thing to achieve, and /measurable/ can help to +-- achieve it. +-- +-- Measures are represented in their dual form as integrals. That is, a +-- measure is represented as an /integration procedure/ that, when provided +-- with a measurable function, evaluates an integral against a measure. +-- Everything corresponds exactly to the more low-level definition of measures +-- being set functions defined on sigma-algebras and all that, but in a fashion +-- that is much more amenable to representation on a computer. Since they are +-- /procedures/ that need a function to complete them, measures are naturally +-- represented by continuations. +-- +-- As continuations, measures are instances of the @Functor@ class; fmapping a +-- function over a measure transforms that measure's support while leaving its +-- density structure unchanged. @fmap@ corresponds to the thing that's +-- variably called a pushforward, distribution, or image measure; it adapts a +-- measure from one measureable space to another. +-- +-- <image here> +-- +-- Measures are also instances of @Monad@. 'return' wraps a value up as a +-- Dirac measure, and 'bind' is an integrating operator that marginalizes +-- existing measures by blending them into others. +-- +-- Measures have to be created from something; /measurable/ offers four +-- functions to build them: +-- +-- * 'fromPoints', for a discrete collection of points +-- +-- * 'fromDensityCounting', for a density with respect to counting measure +-- (i.e. a mass function) +-- +-- * 'fromDensityLebesgue', for a density with espect to Lebesgue measure +-- +-- * 'fromSamplingFunction', for a sampling function that uniquely +-- characterizes a measure. +-- +-- Addtionally, there are two important ways to query measures: +-- +-- * 'integrate' integrates a measurable function against a measure +-- +-- * 'expectation' integrates the identity function against a measure +-- +-- * 'cdf' returns a cumulative distribution function for a measure space +-- +-- Other queries (volume, variance, higher moments) are also available, but are +-- moreso included as curiosities. +-- +-- Measures are not only implemented as stand-alone probabilistic objects, but +-- as a monad transformers as well. This allows measure semantics to be +-- layered on top of any existing monad. module Measurable.Core where import Control.Arrow import Control.Applicative import Control.Monad -import Control.Monad.Trans.Cont +import Control.Monad.Trans import Data.Foldable (Foldable) import qualified Data.Foldable as Foldable -import Data.Hashable -import qualified Data.HashSet as HashSet +import Data.Functor.Identity import Data.Traversable hiding (mapM) import Numeric.Integration.TanhSinh --- | A measure is represented as a continuation. -type Measure a = Cont Double a +-- | We don't want to allow nonlinear things like callCC, so we roll our own +-- Continuation type that doesn't allow that sort of thing. +newtype ContT r m a = ContT { runContT :: (a -> m r) -> m r } + +type Cont r = ContT r Identity + +runCont :: Cont r a -> (a -> r) -> r +runCont m k = runIdentity (runContT m (Identity . k)) + +cont :: ((a -> r) -> r) -> Cont r a +cont f = ContT (\c -> Identity (f (runIdentity . c))) + +instance Functor (ContT r m) where + fmap f m = ContT $ \c -> runContT m (c . f) + +instance Applicative (ContT r m) where + pure x = ContT ($ x) + f <*> v = ContT $ \c -> runContT f $ \g -> runContT v (c . g) + +instance Monad (ContT r m) where + return x = ContT ($ x) + m >>= k = ContT $ \c -> runContT m (\x -> runContT (k x) c) + +instance MonadTrans (ContT r) where + lift m = ContT (m >>=) + +-- | A measure is represented as a continuation with output restricted to the +-- reals. Measures are *integration programs* that, when supplied with a +-- measurable function, integrate that function against some measure. +type Measure a = Cont Double a type MeasureT m a = ContT Double m a --- | A more appropriate version of runCont. +-- | A more domain-specific alias for runCont. This follows the traditional +-- mathematical form; integrate a function against a measure. integrate :: (a -> Double) -> Measure a -> Double integrate = flip runCont integrateT :: Monad m => (a -> Double) -> MeasureT m a -> m Double -integrateT f = (`runContT` fLifted) - where fLifted = return . f +integrateT f = (`runContT` (return . f)) -- | Things like convolution are trivially expressed by lifted arithmetic --- operators. +-- operators. Probability measures in particular - where things like +-- \infty - \infty are not an issue - form a ring. -- -- Note that the complexity of integration over a sum, difference, or product --- of 'n' measures, each encoding 'm' elements, is O(m^n). --- --- The reason for the complexity is that, in the case of dependent measures, --- a lot of book-keeping has to be done. Operations on independent measures --- can (theoretically) be implemented with drastically lower complexity. -instance (Monad m, Num a) => Num (ContT r m a) where +-- of 'n' measures in this situation, each encoding 'm' elements, is O(m^n) +-- in this implementation. Operations on independent measures can +-- theoretically be implemented with drastically lower complexity, possibly +-- by using some cleverness with Arrows. +instance (Monad m, Num a) => Num (ContT Double m a) where (+) = liftA2 (+) (-) = liftA2 (-) (*) = liftA2 (*) abs = id - signum = error "signum: not supported for Measures" + signum = const 1 fromInteger = return . fromInteger --- | Create a measure from a density w/respect to counting measure. +-- | Creates a measure from a density w/respect to counting measure. fromDensityCounting :: (Functor f, Foldable f) => (a -> Double) @@ -55,7 +144,7 @@ fromDensityCounting f support = cont $ \g -> Foldable.sum $ (g /* f) <$> support fromDensityCountingT - :: (Applicative m, Traversable t, Monad m) + :: (Applicative m, Monad m, Traversable t) => (a -> Double) -> t a -> MeasureT m a @@ -67,44 +156,36 @@ fromDensityCountingT p support = ContT $ \f -> -- | Create a measure from a density w/respect to Lebesgue measure. -- -- NOTE The quality of this implementation depends entirely on the underlying --- quadrature routine. As we're presently using the --- Numeric.Integration.TanhSinh module, the types are also constricted --- to Doubles. --- --- This is included moreso for interest's sake and isn't particularly --- accurate. +-- quadrature routine. This is included moreso for interest's sake and +-- isn't particularly accurate. fromDensityLebesgue :: (Double -> Double) -> Measure Double -fromDensityLebesgue d = cont $ \f -> quadratureTanhSinh $ f /* d - where quadratureTanhSinh = result . last . everywhere trap - --- | Create a measure from observations sampled from some distribution. -fromObservations - :: (Functor f, Foldable f) - => f a - -> Measure a -fromObservations = cont . flip weightedAverage - --- fmap f mu = cont $ \g -> integrate (g . f) mu --- liftM2 (+) mu nu = do --- a <- mu --- b <- nu --- return $ a + b --- --- mu + nu = cont $ \g -> integrate mu + integrate nu +fromDensityLebesgue d = cont $ \f -> quadratureTanhSinh $ f /* d where + quadratureTanhSinh = result . last . everywhere trap +-- | Create a measure from a fixed collection of points. +fromPoints :: (Functor f, Foldable f) => f a -> Measure a +fromPoints = cont . flip weightedAverage - -fromObservationsT +fromPointsT :: (Applicative m, Monad m, Traversable f) => f a -> MeasureT m a -fromObservationsT = ContT . flip weightedAverageM - --- | A synonym for fmap. +fromPointsT = ContT . flip weightedAverageM + +-- | Create a measure from a sampling function. Needs access to a random +-- number supply monad, so only a monad transformer version is available. +fromSamplingFunction + :: (Monad m, Applicative m) + => (t -> m b) + -> Int + -> t + -> MeasureT m b +fromSamplingFunction f n g = (lift $ replicateM n (f g)) >>= fromPointsT + +-- | Synonyms for fmap. push :: (a -> b) -> Measure a -> Measure b push = fmap --- | Yet another. pushT :: Monad m => (a -> b) -> MeasureT m a -> MeasureT m b pushT = fmap @@ -115,33 +196,71 @@ expectation = integrate id expectationT :: Monad m => MeasureT m Double -> m Double expectationT = integrateT id --- | The variance is obtained by integrating against the usual function. +-- | Variance is obtained by the usual identity. variance :: Measure Double -> Double variance mu = integrate (^ 2) mu - expectation mu ^ 2 varianceT :: Monad m => MeasureT m Double -> m Double varianceT mu = liftM2 (-) (integrateT (^ 2) mu) (liftM (^ 2) (expectationT mu)) --- | Return the mean & variance in a pair. +-- | Convenience function for returning the expectation & variance as a pair. meanVariance :: Measure Double -> (Double, Double) meanVariance = expectation &&& variance -meanVarianceT :: Monad m => MeasureT m Double -> m (Double, Double) -meanVarianceT mu = do - m <- expectationT mu - v <- varianceT mu - return (m, v) +meanVarianceT + :: (Applicative m, Monad m) + => MeasureT m Double + -> m (Double, Double) +meanVarianceT mu = (,) <$> expectationT mu <*> varianceT mu + +-- | The nth raw moment of a measure. +rawMoment :: Int -> Measure Double -> Double +rawMoment n = integrate (^ n) --- | The measure applied to the underlying space. This is trivially 1 for any --- probability measure. -volume :: Measure Double -> Double +rawMomentT :: Monad m => Int -> MeasureT m Double -> m Double +rawMomentT n = integrateT (^ n) + +-- | All raw moments of a measure. +rawMoments :: Measure Double -> [Double] +rawMoments mu = map (`rawMoment` mu) [1..] + +rawMomentsT :: Monad m => MeasureT m Double -> Int -> m [Double] +rawMomentsT mu n = mapM (`rawMomentT` mu) (take n [1..]) + +-- | The nth central moment of a measure. +centralMoment :: Int -> Measure Double -> Double +centralMoment n mu = integrate (\x -> (x - rm) ^ n) $ mu + where rm = rawMoment 1 mu + +centralMomentT :: Monad m => Int -> MeasureT m Double -> m Double +centralMomentT n mu = integrateT (^ n) $ do + rm <- lift $ rawMomentT 1 mu + (\x -> x - rm) <$> mu + +-- | All central moments. +centralMoments :: Measure Double -> [Double] +centralMoments mu = map (`centralMoment` mu) [1..] + +centralMomentsT :: Monad m => MeasureT m Double -> Int -> m [Double] +centralMomentsT mu n = mapM (`centralMomentT` mu) (take n [1..]) + +-- | The moment generating function for a measure. +momentGeneratingFunction :: Measure Double -> Double -> Double +momentGeneratingFunction mu t = integrate (exp . (* t) . id) mu + +-- | The cumulant generating function for a measure. +cumulantGeneratingFunction :: Measure Double -> Double -> Double +cumulantGeneratingFunction mu = log . momentGeneratingFunction mu + +-- | Apply a measure to the underlying space. In particular, this is trivially +-- 1 for any probability measure. +volume :: Measure a -> Double volume = integrate (const 1) -volumeT :: Monad m => MeasureT m Double -> m Double +volumeT :: Monad m => MeasureT m a -> m Double volumeT = integrateT (const 1) --- | Cumulative distribution function. Only makes sense for Fractional/Ord --- inputs. +-- | Cumulative distribution function. cdf :: Measure Double -> Double -> Double cdf mu x = expectation $ negativeInfinity `to` x <$> mu @@ -151,16 +270,15 @@ cdfT mu x = expectationT $ negativeInfinity `to` x <$> mu -- | Indicator function for the interval a <= x <= b. Useful for integrating -- from a to b. to :: (Num a, Ord a) => a -> a -> a -> a -to a b x | x >= a && x <= b = 1 - | otherwise = 0 +to a b x + | x >= a && x <= b = 1 + | otherwise = 0 -- | Integrate over a discrete, possibly unordered set. -containing :: (Num a, Eq b, Hashable b) => [b] -> b -> a +containing :: (Num a, Eq b) => [b] -> b -> a containing xs x - | x `HashSet.member` set = 1 - | otherwise = 0 - where - set = HashSet.fromList xs + | x `elem` xs = 1 + | otherwise = 0 -- | End of the line. negativeInfinity :: Fractional a => a @@ -190,11 +308,6 @@ weightedAverageM weightedAverageM f = liftM average . traverse f {-# INLINE weightedAverageM #-} --- | Round to a specified number of digits. -roundTo :: Int -> Double -> Double -roundTo n f = fromIntegral (round $ f * (10 ^ n) :: Int) / (10.0 ^^ n) -{-# INLINE roundTo #-} - -- | Lifted multiplication. (/*) :: (Num c, Applicative f) => f c -> f c -> f c (/*) = liftA2 (*)