measurable

A simple shallowly-embedded DSL for dealing with measures.
Log | Files | Refs | README | LICENSE

commit 343e05321668dc63acfa3cbaf66dae4346c3f3cb
parent 872e0ea389171703cc707443641b767838de8997
Author: Jared Tobin <jared@jtobin.ca>
Date:   Wed, 11 Dec 2013 20:44:21 +1300

Clean up.

Diffstat:
M.gitignore | 5+++++
ALICENSE | 30++++++++++++++++++++++++++++++
Ameasurable.cabal | 20++++++++++++++++++++
Msrc/Examples.hs | 22++++++----------------
Msrc/Measurable/Core.hs | 76+++++++++++++++++++++++++++++++++++++++++++---------------------------------
5 files changed, 104 insertions(+), 49 deletions(-)

diff --git a/.gitignore b/.gitignore @@ -1 +1,6 @@ *swp +deprecated +dist +Setup.hs +*hi +*o diff --git a/LICENSE b/LICENSE @@ -0,0 +1,30 @@ +Copyright (c) 2013, Jared Tobin + +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are met: + + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + + * Redistributions in binary form must reproduce the above + copyright notice, this list of conditions and the following + disclaimer in the documentation and/or other materials provided + with the distribution. + + * Neither the name of Jared Tobin nor the names of other + contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. diff --git a/measurable.cabal b/measurable.cabal @@ -0,0 +1,20 @@ +name: measurable +version: 0.1.0.0 +license: BSD3 +license-file: LICENSE +author: Jared Tobin +maintainer: jared@jtobin.ca +build-type: Simple +extra-source-files: README.md +cabal-version: >=1.10 + +library + exposed-modules: Measurable.Core + other-extensions: BangPatterns + build-depends: base >= 4.6 + , unordered-containers >= 0.2 + , integration >= 0.2 + , transformers >= 0.3 + , hashable >= 1.2 + hs-source-dirs: src + default-language: Haskell2010 diff --git a/src/Examples.hs b/src/Examples.hs @@ -14,7 +14,7 @@ import Data.Vector (singleton) import qualified Data.Traversable as Traversable import Measurable.Core import Numeric.SpecFunctions -import Statistics.Distribution +import Statistics.Distribution hiding (mean, variance) import Statistics.Distribution.Normal import Statistics.Distribution.Beta import Statistics.Distribution.ChiSquared @@ -38,14 +38,11 @@ altBetaMeasure epochs a b g = do bs <- lift $ replicateM epochs (genContVar (betaDistr a b) g) fromObservationsT bs -approxBetaMeasure nInnerApprox nOuterApprox a b g = do - bs <- lift $ genBetaSamples nInnerApprox a b g - fromObservationsApprox nOuterApprox bs g - -approxBinomialMeasure nInnerApprox nOuterApprox n p g = do - bs <- lift $ genBinomSamples nInnerApprox n p g - fromObservationsApprox nOuterApprox bs g +weirdFunction x + | x < 0 = sin x + | x >= 0 && x <= 1 = cos x + | x > 1 = log x -- | A standard beta-binomial conjugate model. Notice how naturally it's -- expressed using do-notation! @@ -58,13 +55,6 @@ altBetaBinomialConjugate a b n g = do p <- altBetaMeasure 1000 a b g binomMeasure n p -approxBetaBinomialConjugate - :: Double -> Double -> Int -> Gen RealWorld -> Model Int -approxBetaBinomialConjugate a b n g = do - p <- approxBetaMeasure 100000 100 a b g - approxBinomialMeasure 100 100 n p g - - -- | Observe a binomial distribution. genBinomSamples @@ -205,7 +195,7 @@ binomMeasure n p = fromDensityCountingT (binom p n) [0..n] -- to counting measure. They don't necessarily have to have domains that -- are instances of Num (or even have Ordered domains, though that's the case -- here). -data Group = A | B | C deriving (Enum, Eq, Ord, Show) +data Group = A | B | C deriving (Eq, Show) -- | Density of a categorical measure. categoricalOnGroupDensity :: Fractional a => Group -> a diff --git a/src/Measurable/Core.hs b/src/Measurable/Core.hs @@ -5,25 +5,21 @@ module Measurable.Core where +import Control.Arrow import Control.Applicative import Control.Monad -import Control.Monad.Primitive import Control.Monad.Trans.Cont import Data.Foldable (Foldable) import qualified Data.Foldable as Foldable -import qualified Data.Set as Set +import Data.Hashable +import qualified Data.HashSet as HashSet import Data.Traversable hiding (mapM) import Numeric.Integration.TanhSinh -import System.Random.MWC -import System.Random.Represent -- | A measure is represented as a continuation. type Measure a = Cont Double a type MeasureT m a = ContT Double m a --- | A model is a proper measure wrapped around a sampling monad. -type Model a = MeasureT IO a - -- | A more appropriate version of runCont. integrate :: (a -> Double) -> Measure a -> Double integrate = flip runCont @@ -34,13 +30,20 @@ integrateT f = (`runContT` fLifted) -- | Things like convolution are trivially expressed by lifted arithmetic -- operators. +-- +-- Note that the complexity of integration over a sum, difference, or product +-- of 'n' measures, each encoding 'm' elements, is O(m^n). +-- +-- The reason for the complexity is that, in the case of dependent measures, +-- a lot of book-keeping has to be done. Operations on independent measures +-- can (theoretically) be implemented with drastically lower complexity. instance (Monad m, Num a) => Num (ContT r m a) where (+) = liftA2 (+) (-) = liftA2 (-) (*) = liftA2 (*) abs = id signum = error "signum: not supported for Measures" - fromInteger = error "fromInteger: not supported for Measures" + fromInteger = return . fromInteger -- | Create a measure from a density w/respect to counting measure. fromDensityCounting @@ -72,7 +75,7 @@ fromDensityCountingT p support = ContT $ \f -> -- accurate. fromDensityLebesgue :: (Double -> Double) -> Measure Double fromDensityLebesgue d = cont $ \f -> quadratureTanhSinh $ f /* d - where quadratureTanhSinh = roundTo 2 . result . last . everywhere trap + where quadratureTanhSinh = result . last . everywhere trap -- | Create a measure from observations sampled from some distribution. fromObservations @@ -81,20 +84,29 @@ fromObservations -> Measure a fromObservations = cont . flip weightedAverage +-- fmap f mu = cont $ \g -> integrate (g . f) mu +-- liftM2 (+) mu nu = do +-- a <- mu +-- b <- nu +-- return $ a + b +-- +-- mu + nu = cont $ \g -> integrate mu + integrate nu + + + fromObservationsT :: (Applicative m, Monad m, Traversable f) => f a -> MeasureT m a fromObservationsT = ContT . flip weightedAverageM --- | Create an 'approximating measure' from observations. -fromObservationsApprox - :: (Applicative m, PrimMonad m, Traversable f, Integral n) - => n - -> f a - -> Gen (PrimState m) - -> MeasureT m a -fromObservationsApprox n xs g = ContT $ \f -> weightedAverageApprox n f xs g +-- | A synonym for fmap. +push :: (a -> b) -> Measure a -> Measure b +push = fmap + +-- | Yet another. +pushT :: Monad m => (a -> b) -> MeasureT m a -> MeasureT m b +pushT = fmap -- | Expectation is integration against the identity function. expectation :: Measure Double -> Double @@ -110,6 +122,16 @@ variance mu = integrate (^ 2) mu - expectation mu ^ 2 varianceT :: Monad m => MeasureT m Double -> m Double varianceT mu = liftM2 (-) (integrateT (^ 2) mu) (liftM (^ 2) (expectationT mu)) +-- | Return the mean & variance in a pair. +meanVariance :: Measure Double -> (Double, Double) +meanVariance = expectation &&& variance + +meanVarianceT :: Monad m => MeasureT m Double -> m (Double, Double) +meanVarianceT mu = do + m <- expectationT mu + v <- varianceT mu + return (m, v) + -- | The measure applied to the underlying space. This is trivially 1 for any -- probability measure. volume :: Measure Double -> Double @@ -132,13 +154,13 @@ to :: (Num a, Ord a) => a -> a -> a -> a to a b x | x >= a && x <= b = 1 | otherwise = 0 --- | Integrate over an ordered, discrete set. -containing :: (Num a, Ord b) => [b] -> b -> a +-- | Integrate over a discrete, possibly unordered set. +containing :: (Num a, Eq b, Hashable b) => [b] -> b -> a containing xs x - | x `Set.member` set = 1 + | x `HashSet.member` set = 1 | otherwise = 0 where - set = Set.fromList xs + set = HashSet.fromList xs -- | End of the line. negativeInfinity :: Fractional a => a @@ -168,18 +190,6 @@ weightedAverageM weightedAverageM f = liftM average . traverse f {-# INLINE weightedAverageM #-} --- | An monadic approximate weighted average. -weightedAverageApprox - :: (Fractional c, Traversable f, PrimMonad m, Applicative m, Integral n) - => n - -> (a -> m c) - -> f a - -> Gen (PrimState m) - -> m c -weightedAverageApprox n f xs g = - sampleReplace n xs g >>= (liftM average . traverse f) -{-# INLINE weightedAverageApprox #-} - -- | Round to a specified number of digits. roundTo :: Int -> Double -> Double roundTo n f = fromIntegral (round $ f * (10 ^ n) :: Int) / (10.0 ^^ n)