sampling

Basic sampling functionality for Haskell.
git clone git://git.jtobin.io/sampling.git
Log | Files | Refs | README | LICENSE

Sampling.hs (4691B)


      1 {-# OPTIONS_GHC -Wall #-}
      2 {-# OPTIONS_GHC -fno-warn-type-defaults #-}
      3 {-# LANGUAGE BangPatterns     #-}
      4 {-# LANGUAGE CPP #-}
      5 {-# LANGUAGE FlexibleContexts #-}
      6 
      7 module Numeric.Sampling (
      8     -- * Without replacement
      9     sample
     10   , sampleIO
     11 
     12     -- * With replacement
     13   , resample
     14   , resampleIO
     15 
     16     -- * Unequal probability, without replacement
     17   , psample
     18   , psampleIO
     19 
     20     -- * Unequal probability, with replacement
     21   , presample
     22   , presampleIO
     23 
     24     -- * Re-exported
     25   , module System.Random.MWC
     26   ) where
     27 
     28 import qualified Control.Foldl as F
     29 import Control.Monad.Primitive (PrimMonad, PrimState)
     30 import qualified Data.Foldable as Foldable
     31 #if __GLASGOW_HASKELL__ < 710
     32 import Data.Foldable (Foldable)
     33 #endif
     34 import Data.Function (on)
     35 import Data.List (sortBy)
     36 #if __GLASGOW_HASKELL__ < 800
     37 import Data.Monoid
     38 #endif
     39 import qualified Data.Sequence as S
     40 import qualified Data.Vector as V (toList)
     41 import Numeric.Sampling.Internal
     42 import System.Random.MWC
     43 
     44 -- | (/O(n)/) Sample uniformly, without replacement.
     45 --
     46 --   Returns Nothing if the desired sample size is larger than the collection
     47 --   being sampled from.
     48 sample
     49   :: (PrimMonad m, Foldable f)
     50   => Int -> f a -> Gen (PrimState m) -> m (Maybe [a])
     51 sample n xs gen
     52   | n < 0     = return Nothing
     53   | otherwise = do
     54       collected <- F.foldM (randomN n gen) xs
     55       return $ fmap V.toList collected
     56 {-# INLINABLE sample #-}
     57 
     58 -- | (/O(n)/) 'sample' specialized to IO.
     59 sampleIO :: Foldable f => Int -> f a -> IO (Maybe [a])
     60 sampleIO n xs = withSystemRandom . asGenIO $ sample n xs
     61 {-# INLINABLE sampleIO #-}
     62 
     63 -- | (/O(n log n)/) Sample uniformly with replacement (bootstrap).
     64 resample
     65   :: (PrimMonad m, Foldable f)
     66   => Int -> f a -> Gen (PrimState m) -> m [a]
     67 resample n xs = presample n weighted where
     68   weight   = recip (F.fold F.genericLength xs)
     69   weighted = zip (repeat weight) (Foldable.toList xs)
     70 {-# INLINABLE resample #-}
     71 
     72 -- | (/O(n log n)/) 'resample' specialized to IO.
     73 resampleIO :: Foldable f => Int -> f a -> IO [a]
     74 resampleIO n xs = withSystemRandom . asGenIO $ resample n xs
     75 {-# INLINABLE resampleIO #-}
     76 
     77 -- | (/O(n log n)/) Unequal probability sampling.
     78 --
     79 --   Returns Nothing if the desired sample size is larger than the collection
     80 --   being sampled from.
     81 psample
     82   :: (PrimMonad m, Foldable f)
     83   => Int -> f (Double, a) -> Gen (PrimState m) -> m (Maybe [a])
     84 psample n weighted gen = do
     85     let sorted = sortProbs weighted
     86     computeSample n sorted gen
     87   where
     88     computeSample
     89       :: PrimMonad m
     90       => Int -> [(Double, a)] -> Gen (PrimState m) -> m (Maybe [a])
     91     computeSample size xs g = go 1 [] size (S.fromList xs) where
     92       go !mass !acc j vs
     93         | j <  0    = return Nothing
     94         | j <= 0    = return (Just acc)
     95         | otherwise = do
     96             z <- fmap (* mass) (uniform g)
     97 
     98             let cumulative = S.drop 1 $ S.scanl (\s (pr, _) -> s + pr) 0 vs
     99                 midx       = S.findIndexL (>= z) cumulative
    100 
    101                 idx = case midx of
    102                   Nothing -> error "psample: no index found"
    103                   Just x  -> x
    104 
    105                 (p, val) = S.index vs idx
    106                 (l, r)   = S.splitAt idx vs
    107                 deleted = l <> S.drop 1 r
    108 
    109             go (mass - p) (val:acc) (pred j) deleted
    110 {-# INLINABLE psample #-}
    111 
    112 -- | (/O(n log n)/) 'psample' specialized to IO.
    113 psampleIO :: Foldable f => Int -> f (Double, a) -> IO (Maybe [a])
    114 psampleIO n weighted = withSystemRandom . asGenIO $ psample n weighted
    115 {-# INLINABLE psampleIO #-}
    116 
    117 -- | (/O(n log n)/) Unequal probability resampling.
    118 presample
    119   :: (PrimMonad m, Foldable f)
    120   => Int -> f (Double, a) -> Gen (PrimState m) -> m [a]
    121 presample n weighted gen
    122     | n <= 0    = return []
    123     | otherwise = do
    124         let (bprobs, vals) = unzip $ sortProbs weighted
    125             probs          = drop 1 (F.scan F.sum bprobs)
    126             cumulative     = zip probs vals
    127         computeSample n cumulative gen
    128   where
    129     computeSample
    130       :: PrimMonad m => Int -> [(Double, a)] -> Gen (PrimState m) -> m [a]
    131     computeSample size xs g = go [] size where
    132       go !acc s
    133         | s <= 0    = return acc
    134         | otherwise = do
    135             z <- uniform g
    136             case F.fold (F.find ((>= z) . fst)) xs of
    137               Just (_, val) -> go (val:acc) (pred s)
    138               Nothing       -> return acc
    139 {-# INLINABLE presample #-}
    140 
    141 -- | (/O(n log n)/) 'presample' specialized to IO.
    142 presampleIO :: (Foldable f) => Int -> f (Double, a) -> IO [a]
    143 presampleIO n weighted = withSystemRandom . asGenIO $ presample n weighted
    144 {-# INLINABLE presampleIO #-}
    145 
    146 sortProbs :: (Foldable f, Ord a) => f (a, b) -> [(a, b)]
    147 sortProbs = sortBy (flip compare `on` fst) . Foldable.toList
    148 {-# INLINABLE sortProbs #-}
    149