Clean up documentation. - mwc-probability - Sampling function-based probability distributions.

commit 4d0baf4ab4c299d50b06108cad5c2ad9a8c838ef
parent 52c4d52c4b2f50a77d4265b9c1d70fb56568640e
Author: Jared Tobin <jared@jtobin.ca>
Date:   Sat, 30 Jun 2018 17:41:04 +1200

Clean up documentation.

* Clean up intro
* Add documentation on parameters (finally)
* Streamlining tweaks throughout module

Diffstat:
M src/System/Random/MWC/Probability.hs  | 182 ++++++++++++++++++++++++++++++++++++++++++++++++-------------------------------

1 file changed, 110 insertions(+), 72 deletions(-)
diff --git a/src/System/Random/MWC/Probability.hs b/src/System/Random/MWC/Probability.hs
@@ -4,18 +4,20 @@
 
 -- |
 -- Module: System.Random.MWC.Probability
--- Copyright: (c) 2015-2017 Jared Tobin, Marco Zocca
+-- Copyright: (c) 2015-2018 Jared Tobin, Marco Zocca
 -- License: MIT
 --
 -- Maintainer: Jared Tobin <jared@jtobin.ca>, Marco Zocca <zocca.marco gmail>
 -- Stability: unstable
 -- Portability: ghc
 --
--- A probability monad based on sampling functions.
+-- A probability monad based on sampling functions, implemented as a thin
+-- wrapper over the
+-- [mwc-random](https://hackage.haskell.org/package/mwc-random) library.
 --
 -- Probability distributions are abstract constructs that can be represented in
 -- a variety of ways.  The sampling function representation is particularly
--- useful - it's computationally efficient, and collections of samples are
+-- useful -- it's computationally efficient, and collections of samples are
 -- amenable to much practical work.
 --
 -- Probability monads propagate uncertainty under the hood.  An expression like
@@ -37,27 +39,18 @@
 -- >   n <- uniformR (5, 10)
 -- >   binomial n p
 --
--- The functor instance for a probability monad transforms the support of the
--- distribution while leaving its density structure invariant in some sense.
--- For example, @'uniform'@ is a distribution over the 0-1 interval, but @fmap
--- (+ 1) uniform@ is the translated distribution over the 1-2 interval.
+-- The functor instance allows one to transforms the support of a distribution
+-- while leaving its density structure invariant.  For example, @'uniform'@ is
+-- a distribution over the 0-1 interval, but @fmap (+ 1) uniform@ is the
+-- translated distribution over the 1-2 interval:
 --
 -- >>> create >>= sample (fmap (+ 1) uniform)
 -- 1.5480073474340754
 --
--- == Running the examples
+-- The applicative instance guarantees that the generated samples are generated
+-- independently:
 --
--- In the following we will assume an interactive GHCi session; the user should first declare a random number generator: 
---
--- >>> gen <- create
---
--- which will be reused throughout all examples.
--- Note: creating a random generator is an expensive operation, so it should be only performed once in the code (usually in the top-level IO action, e.g `main`).
---
--- == References
---
--- 1) L.Devroye, Non-Uniform Random Variate Generation, Springer-Verlag, New York, 1986. (Made freely available by the author: http://www.nrbook.com/devroye )
-
+-- >>> create >>= sample ((,) <$> uniform <*> uniform)
 
 module System.Random.MWC.Probability (
     module MWC
@@ -116,6 +109,7 @@ import System.Random.MWC.CondensedTable
 
 -- | A probability distribution characterized by a sampling function.
 --
+-- >>> gen <- createSystemRandom
 -- >>> sample uniform gen
 -- 0.4208881170464097
 newtype Prob m a = Prob { sample :: Gen (PrimState m) -> m a }
@@ -150,8 +144,6 @@ instance (Monad m, Num a) => Num (Prob m a) where
   signum      = fmap signum
   fromInteger = pure . fromInteger
 
-
-
 instance MonadTrans Prob where
   lift m = Prob $ const m
 
@@ -163,7 +155,10 @@ instance PrimMonad m => PrimMonad (Prob m) where
   primitive = lift . primitive
   {-# INLINE primitive #-}
 
--- | The uniform distribution over a type.
+-- | The uniform distribution at a specified type.
+--
+--   Note that `Double` and `Float` variates are defined over the unit
+--   interval.
 --
 --   >>> sample uniform gen :: IO Double
 --   0.29308497534914946
@@ -193,75 +188,95 @@ discreteUniform cs = do
   return $ F.toList cs !! j
 {-# INLINABLE discreteUniform #-}
 
--- | The standard normal or Gaussian distribution (with mean 0 and standard
---   deviation 1).
+-- | The standard normal or Gaussian distribution with mean 0 and standard
+--   deviation 1.
 standardNormal :: PrimMonad m => Prob m Double
 standardNormal = Prob MWC.Dist.standard
 {-# INLINABLE standardNormal #-}
 
--- | The normal or Gaussian distribution with a specified mean and standard
+-- | The normal or Gaussian distribution with specified mean and standard
 --   deviation.
+--
+--   Note that `sd` should be positive.
 normal :: PrimMonad m => Double -> Double -> Prob m Double
 normal m sd = Prob $ MWC.Dist.normal m sd
 {-# INLINABLE normal #-}
 
 -- | The log-normal distribution with specified mean and standard deviation.
+--
+--   Note that `sd` should be positive.
 logNormal :: PrimMonad m => Double -> Double -> Prob m Double
 logNormal m sd = exp <$> normal m sd
 {-# INLINABLE logNormal #-}
 
 -- | The exponential distribution with provided rate parameter.
+--
+--   Note that `r` should be positive.
 exponential :: PrimMonad m => Double -> Prob m Double
 exponential r = Prob $ MWC.Dist.exponential r
 {-# INLINABLE exponential #-}
 
--- | The Laplace distribution with provided location and scale parameters.
+-- | The Laplace or double-exponential distribution with provided location and
+--   scale parameters.
+--
+--   Note that `sigma` should be positive.
 laplace :: (Floating a, Variate a, PrimMonad m) => a -> a -> Prob m a
 laplace mu sigma = do
   u <- uniformR (-0.5, 0.5)
   let b = sigma / sqrt 2
   return $ mu - b * signum u * log (1 - 2 * abs u)
-{-# INLINABLE laplace #-}  
-
+{-# INLINABLE laplace #-}
 
 -- | The Weibull distribution with provided shape and scale parameters.
+--
+--   Note that both parameters should be positive.
 weibull :: (Floating a, Variate a, PrimMonad m) => a -> a -> Prob m a
 weibull a b = do
   x <- uniform
   return $ (- 1/a * log (1 - x)) ** 1/b
 {-# INLINABLE weibull #-}
 
-
--- | The gamma distribution with shape parameter a and scale parameter b.
+-- | The gamma distribution with shape parameter `a` and scale parameter `b`.
 --
 --   This is the parameterization used more traditionally in frequentist
 --   statistics.  It has the following corresponding probability density
 --   function:
 --
---   f(x; a, b) = 1 / (Gamma(a) * b ^ a) x ^ (a - 1) e ^ (- x / b)
+-- > f(x; a, b) = 1 / (Gamma(a) * b ^ a) x ^ (a - 1) e ^ (- x / b)
+--
+--   Note that both parameters should be positive.
 gamma :: PrimMonad m => Double -> Double -> Prob m Double
 gamma a b = Prob $ MWC.Dist.gamma a b
 {-# INLINABLE gamma #-}
 
--- | The inverse-gamma distribution.
+-- | The inverse-gamma distribution with shape parameter `a` and scale
+--   parameter `b`.
+--
+--   Note that both parameters should be positive.
 inverseGamma :: PrimMonad m => Double -> Double -> Prob m Double
 inverseGamma a b = recip <$> gamma a b
 {-# INLINABLE inverseGamma #-}
 
--- | The Normal-Gamma distribution of parameters mu, lambda, a, b
+-- | The Normal-Gamma distribution.
+--
+--   Note that the `lambda`, `a`, and `b` parameters should be positive.
 normalGamma :: PrimMonad m => Double -> Double -> Double -> Double -> Prob m Double
 normalGamma mu lambda a b = do
   tau <- gamma a b
-  let xsd = sqrt $ 1 / (lambda * tau)
+  let xsd = sqrt (recip (lambda * tau))
   normal mu xsd
 {-# INLINABLE normalGamma #-}
 
--- | The chi-square distribution.
+-- | The chi-square distribution with the specified degrees of freedom.
+--
+--   Note that `k` should be positive.
 chiSquare :: PrimMonad m => Int -> Prob m Double
 chiSquare k = Prob $ MWC.Dist.chiSquare k
 {-# INLINABLE chiSquare #-}
 
--- | The beta distribution.
+-- | The beta distribution with the specified shape parameters.
+--
+--   Note that both parameters should be positive.
 beta :: PrimMonad m => Double -> Double -> Prob m Double
 beta a b = do
   u <- gamma a 1
@@ -269,16 +284,24 @@ beta a b = do
   return $ u / (u + w)
 {-# INLINABLE beta #-}
 
--- | The Pareto distribution with specified index `a` and minimum `xmin` parameters.
+-- | The Pareto distribution with specified index `a` and minimum `xmin`
+--   parameters.
 --
--- Both `a` and `xmin` must be positive.
+--   Note that both parameters should be positive.
 pareto :: PrimMonad m => Double -> Double -> Prob m Double
 pareto a xmin = do
   y <- exponential a
   return $ xmin * exp y
-{-# INLINABLE pareto #-}  
+{-# INLINABLE pareto #-}
 
--- | The Dirichlet distribution.
+-- | The Dirichlet distribution with the provided concentration parameters.
+--   The dimension of the distribution is determined by the number of
+--   concentration parameters supplied.
+--
+--   >>> sample (dirichlet [0.1, 1, 10]) gen
+--   [1.2375387187120799e-5,3.4952460651813816e-3,0.9964923785476316]
+--
+--   Note that all concentration parameters should be positive.
 dirichlet
   :: (Traversable f, PrimMonad m) => f Double -> Prob m (f Double)
 dirichlet as = do
@@ -286,39 +309,51 @@ dirichlet as = do
   return $ fmap (/ sum zs) zs
 {-# INLINABLE dirichlet #-}
 
--- | The symmetric Dirichlet distribution of dimension n.
+-- | The symmetric Dirichlet distribution with dimension `n`.  The provided
+--   concentration parameter is simply replicated `n` times.
+--
+--   Note that `a` should be positive.
 symmetricDirichlet :: PrimMonad m => Int -> Double -> Prob m [Double]
 symmetricDirichlet n a = dirichlet (replicate n a)
 {-# INLINABLE symmetricDirichlet #-}
 
--- | The Bernoulli distribution.
+-- | The Bernoulli distribution with success probability `p`.
 bernoulli :: PrimMonad m => Double -> Prob m Bool
 bernoulli p = (< p) <$> uniform
 {-# INLINABLE bernoulli #-}
 
--- | The binomial distribution.
+-- | The binomial distribution with number of trials `n` and success
+--   probability `p`.
+--
+--   >>> sample (binomial 10 0.3) gen
+--   4
 binomial :: PrimMonad m => Int -> Double -> Prob m Int
 binomial n p = fmap (length . filter id) $ replicateM n (bernoulli p)
 {-# INLINABLE binomial #-}
 
--- | The negative binomial distribution with `n` trials each with "success" probability `p`.
--- Example X.1.5 in [1].
+-- | The negative binomial distribution with number of trials `n` and success
+--   probability `p`.
 --
--- Note: `n` must be larger than 1 and `p` included between 0 and 1.
+--   >>> sample (negativeBinomial 10 0.3) gen
+--   21
 negativeBinomial :: (PrimMonad m, Integral a) => a -> Double -> Prob m Int
 negativeBinomial n p = do
-  y <- gamma (fromIntegral n) ((1-p) / p)
+  y <- gamma (fromIntegral n) ((1 - p) / p)
   poisson y
 {-# INLINABLE negativeBinomial #-}
 
--- | The multinomial distribution.
+-- | The multinomial distribution of `n` trials and category probabilities
+--   `ps`.
+--
+--   Note that `ps` is a vector of probabilities and should sum to one.
 multinomial :: (Foldable f, PrimMonad m) => Int -> f Double -> Prob m [Int]
 multinomial n ps = do
   let cumulative = scanl1 (+) (F.toList ps)
   replicateM n $ do
     z <- uniform
-    let Just g = findIndex (> z) cumulative
-    return g
+    case findIndex (> z) cumulative of
+      Just g  -> return g
+      Nothing -> error "mwc-probability: invalid probability vector"
 {-# INLINABLE multinomial #-}
 
 -- | Student's t distribution.
@@ -329,15 +364,18 @@ student m s k = do
 {-# INLINABLE student #-}
 
 -- | An isotropic or spherical Gaussian distribution with specified mean
--- vector and scalar standard deviation parameter.
+--   vector and scalar standard deviation parameter.
+--
+--   Note that `sd` should be positive.
 isoNormal
   :: (Traversable f, PrimMonad m) => f Double -> Double -> Prob m (f Double)
 isoNormal ms sd = traverse (`normal` sd) ms
 {-# INLINABLE isoNormal #-}
 
--- | The inverse Gaussian (also known as Wald) distribution.
+-- | The inverse Gaussian (also known as Wald) distribution with mean parameter
+--   `mu` and shape parameter `lambda`.
 --
--- Both the mean parameter 'mu' and the shape parameter 'lambda' must be positive.
+--   Note that both 'mu' and 'lambda' should be positive.
 inverseGaussian :: PrimMonad m => Double -> Double -> Prob m Double
 inverseGaussian lambda mu = do
   nu <- standardNormal
@@ -349,37 +387,37 @@ inverseGaussian lambda mu = do
   if z <= thresh
     then return x
     else return (mu ** 2 / x)
-{-# INLINABLE inverseGaussian #-}    
-  
+{-# INLINABLE inverseGaussian #-}
 
--- | The Poisson distribution.
+-- | The Poisson distribution with rate parameter `l`.
+--
+--   Note that `l` should be positive.
 poisson :: PrimMonad m => Double -> Prob m Int
 poisson l = Prob $ genFromTable table where
   table = tablePoisson l
 {-# INLINABLE poisson #-}
 
--- | A categorical distribution defined by the supplied list of probabilities.
+-- | A categorical distribution defined by the supplied probabilities.
+--
+--   Note that the supplied container of probabilities must sum to 1.
 categorical :: (Foldable f, PrimMonad m) => f Double -> Prob m Int
 categorical ps = do
   xs <- multinomial 1 ps
   case xs of
     [x] -> return x
-    _   -> error "categorical: invalid return value"
+    _   -> error "mwc-probability: invalid probability vector"
 {-# INLINABLE categorical #-}
 
-
--- | The Zipf-Mandelbrot distribution, generated with the rejection
--- sampling algorithm X.6.1 shown in [1].
+-- | The Zipf-Mandelbrot distribution.
+--
+--   Note that `a` should be positive, and that values close to 1 should be
+--   avoided as they are very computationally intensive.
+--
+--   >>> samples 10 (zipf 1.1) gen
+--   [11315371987423520,2746946,653,609,2,13,85,4,256184577853,50]
 --
--- The parameter should be positive, but values close to 1 should be
--- avoided as they are very computationally intensive. The following
--- code illustrates this behaviour.
--- 
--- >>> samples 10 (zipf 1.1) gen
--- [11315371987423520,2746946,653,609,2,13,85,4,256184577853,50]
--- 
--- >>> samples 10 (zipf 1.5) gen
--- [19,3,3,1,1,2,1,191,2,1]
+--   >>> samples 10 (zipf 1.5) gen
+--   [19,3,3,1,1,2,1,191,2,1]
 zipf :: (PrimMonad m, Integral b) => Double -> Prob m b
 zipf a = do
   let
@@ -387,11 +425,11 @@ zipf a = do
     go = do
         u <- uniform
         v <- uniform
-        let xInt = floor (u ** (- 1 / (a - 1))) 
+        let xInt = floor (u ** (- 1 / (a - 1)))
             x = fromIntegral xInt
             t = (1 + 1 / x) ** (a - 1)
         if v * x * (t - 1) / (b - 1) <= t / b
           then return xInt
           else go
   go
-{-# INLINABLE zipf #-}  
+{-# INLINABLE zipf #-}

	mwc-probability Sampling function-based probability distributions.
	Log \| Files \| Refs \| README \| LICENSE