measurable

A simple shallowly-embedded DSL for dealing with measures.
git clone git://git.jtobin.io/measurable.git
Log | Files | Refs | README | LICENSE

Core.hs (6225B)


      1 {-# OPTIONS_GHC -Wall #-}
      2 {-# OPTIONS_GHC -fno-warn-type-defaults #-}
      3 
      4 module Measurable.Core where
      5 
      6 import Control.Applicative
      7 import Control.Foldl (Fold)
      8 import qualified Control.Foldl as Foldl
      9 import Data.List (foldl')
     10 import Numeric.Integration.TanhSinh
     11 
     12 -- | A measure can be represented by nothing more than a continuation with a
     13 --   restricted output type corresponding to the reals.
     14 --
     15 --   A @Functor@ instance implements pushforward or image measures - merely
     16 --   @fmap@ a measurable function over a measure to create one.
     17 --
     18 --   An @Applicative@ instance adds product measure, and in turn measure
     19 --   convolution, subtraction, and multiplication by enabling a @Num@ instance.
     20 --
     21 --   A @Monad@ instance lumps the ability to create measures from graphs of
     22 --   measures on top of that.
     23 newtype Measure a = Measure ((a -> Double) -> Double)
     24 
     25 instance Functor Measure where
     26   fmap f nu = Measure $ \g ->
     27     integrate (g . f) nu
     28 
     29 instance Applicative Measure where
     30   pure x = Measure (\f -> f x)
     31   Measure h <*> Measure g = Measure $ \f ->
     32     h (\k -> g (f . k))
     33 
     34 instance Monad Measure where
     35   return x  = Measure (\f -> f x)
     36   rho >>= g = Measure $ \f ->
     37     integrate (\nu -> integrate f (g nu)) rho
     38 
     39 instance Num a => Num (Measure a) where
     40   (+)         = liftA2 (+)
     41   (-)         = liftA2 (-)
     42   (*)         = liftA2 (*)
     43   abs         = fmap abs
     44   signum      = fmap signum
     45   fromInteger = pure . fromInteger
     46 
     47 -- | The 'integrate' function is just 'runCont' with its arguments reversed
     48 --   in order to resemble the conventional mathematical notation, in which one
     49 --   integrates a measurable function against a measure.
     50 --
     51 --   >>> let mu = fromSample [-1, 0, 1]
     52 --   >>> expectation mu
     53 --   0.0
     54 --   >>> variance mu
     55 --   0.6666666666666666
     56 integrate :: (a -> Double) -> Measure a -> Double
     57 integrate f (Measure nu) = nu f
     58 
     59 -- | The expectation of a measure is typically understood to be its expected
     60 --   value, which is found by integrating it against the identity function.
     61 expectation :: Measure Double -> Double
     62 expectation = integrate id
     63 
     64 -- | The variance of a measure, as per the usual formula
     65 --   @var X = E^2 X - EX^2@.
     66 variance :: Measure Double -> Double
     67 variance mu = integrate (^ 2) mu - expectation mu ^ 2
     68 
     69 -- | Calculates the volume of a 'Measure' over its entire space.  Trivially 1
     70 --   for any probability measure.
     71 --
     72 --   >>> let mu = fromSample [1..10]
     73 --   >>> volume mu
     74 --   1.0
     75 volume :: Measure a -> Double
     76 volume = integrate $ const 1
     77 
     78 -- | Create a 'Measure' from a probability mass function and its support,
     79 --   provided as a foldable container.
     80 --
     81 --   The requirement to supply the entire support is restrictive but necessary;
     82 --   for approximations, consider using 'fromSample'.
     83 --
     84 --   >>> let mu = fromMassFunction (binomialPmf 10 0.2) [0..10]
     85 --   >>> integrate fromIntegral mu
     86 --   2.0
     87 fromMassFunction :: Foldable f => (a -> Double) -> f a -> Measure a
     88 fromMassFunction f support = Measure $ \g ->
     89   foldl' (\acc x -> acc + f x * g x) 0 support
     90 
     91 -- | Create a 'Measure' from a probability density function.
     92 --
     93 --   Note that queries on measures constructed with @fromDensityFunction@ are
     94 --   subject to numerical error due to the underlying dependency on quadrature!
     95 --
     96 --   >>> let f x = 1 / (sqrt (2 * pi)) * exp (- (x ^ 2) / 2)
     97 --   >>> let mu = fromDensityFunction f
     98 --   >>> expectation mu
     99 --   0.0
    100 --   >>> variance mu
    101 --   1.0000000000000002
    102 fromDensityFunction :: (Double -> Double) -> Measure Double
    103 fromDensityFunction d = Measure $ \f ->
    104     quadratureTanhSinh (\x -> f x * d x)
    105   where
    106     quadratureTanhSinh = result . last . everywhere trap
    107 
    108 -- | Create a measure from a collection of observations.
    109 --
    110 --   Useful for creating general purpose empirical measures.
    111 --
    112 --   >>> let mu = fromSample [(1, 2), (3, 4)]
    113 --   >>> integrate (uncurry (+)) mu
    114 --   5.0
    115 fromSample :: Foldable f => f a -> Measure a
    116 fromSample = Measure . flip weightedAverage where
    117   weightedAverage :: (Foldable f, Fractional r) => (a -> r) -> f a -> r
    118   weightedAverage f = Foldl.fold (weightedAverageFold f)
    119 
    120   weightedAverageFold :: Fractional r => (a -> r) -> Fold a r
    121   weightedAverageFold f = Foldl.premap f averageFold
    122 
    123   averageFold :: Fractional a => Fold a a
    124   averageFold = (/) <$> Foldl.sum <*> Foldl.genericLength
    125 
    126 -- | The @nth@ raw moment of a 'Measure'.
    127 rawMoment :: Int -> Measure Double -> Double
    128 rawMoment n = integrate (^ n)
    129 
    130 -- | The @nth@ central moment of a 'Measure'.
    131 centralMoment :: Int -> Measure Double -> Double
    132 centralMoment n mu = integrate (\x -> (x - rm) ^ n) mu where
    133   rm = rawMoment 1 mu
    134 
    135 -- | The moment generating function corresponding to a 'Measure'.
    136 --
    137 --   >>> let mu = fromSample [1..10]
    138 --   >>> let mgfMu = mgf mu
    139 --   >>> fmap mgfMu [0, 0.5, 1]
    140 --   [1.0,37.4649671547254,3484.377384533132]
    141 mgf :: Measure Double -> Double -> Double
    142 mgf mu t = integrate (\x -> exp (t * x)) mu
    143 
    144 -- | The cumulant generating function corresponding to a 'Measure'.
    145 --
    146 --   >>> let mu = fromSample [1..10]
    147 --   >>> let cgfMu = cgf mu
    148 --   >>> fmap cgfMu [0, 0.5, 1]
    149 --   [0.0,3.6234062871236543,8.156044651432666]
    150 cgf :: Measure Double -> Double -> Double
    151 cgf mu = log . mgf mu
    152 
    153 -- | The cumulative distribution function corresponding to a 'Measure'
    154 --
    155 --   >>> let mu = fromSample [1..10]
    156 --   >>> let cdfMu = cdf mu
    157 --   >>> fmap cdfMu [0..10]
    158 --   [0.0,0.1,0.2,0.3,0.4,0.5,0.6,0.7,0.8,0.9,1.0]
    159 cdf :: Measure Double -> Double -> Double
    160 cdf nu x = integrate (negativeInfinity `to` x) nu where
    161   negativeInfinity = negate (1 / 0)
    162 
    163 
    164 -- | A helpful utility for calculating the volume of a region in a measure
    165 --   space.
    166 --
    167 --   >>> let mu = fromSample [1..10]
    168 --   >>> integrate (2 `to` 8) mu
    169 --   0.7
    170 to :: (Num a, Ord a) => a -> a -> a -> a
    171 to a b x
    172   | x >= a && x <= b = 1
    173   | otherwise        = 0
    174 
    175 -- | An analogue of 'to' for measures defined over non-ordered domains.
    176 --
    177 --   >>> data Group = A | B | C deriving Eq
    178 --   >>> let mu = fromSample [A, A, A, B, A, B, C]
    179 --   >>> integrate (containing [B]) mu
    180 --   0.2857142857142857
    181 --   >>> integrate (containing [A,C]) mu
    182 --   0.7142857142857143
    183 --   >>> integrate (containing [A,B,C]) mu
    184 --   1.0
    185 containing :: (Num a, Eq b) => [b] -> b -> a
    186 containing xs x
    187   | x `elem` xs = 1
    188   | otherwise   = 0
    189