Sampling.hs (4691B)
1 {-# OPTIONS_GHC -Wall #-} 2 {-# OPTIONS_GHC -fno-warn-type-defaults #-} 3 {-# LANGUAGE BangPatterns #-} 4 {-# LANGUAGE CPP #-} 5 {-# LANGUAGE FlexibleContexts #-} 6 7 module Numeric.Sampling ( 8 -- * Without replacement 9 sample 10 , sampleIO 11 12 -- * With replacement 13 , resample 14 , resampleIO 15 16 -- * Unequal probability, without replacement 17 , psample 18 , psampleIO 19 20 -- * Unequal probability, with replacement 21 , presample 22 , presampleIO 23 24 -- * Re-exported 25 , module System.Random.MWC 26 ) where 27 28 import qualified Control.Foldl as F 29 import Control.Monad.Primitive (PrimMonad, PrimState) 30 import qualified Data.Foldable as Foldable 31 #if __GLASGOW_HASKELL__ < 710 32 import Data.Foldable (Foldable) 33 #endif 34 import Data.Function (on) 35 import Data.List (sortBy) 36 #if __GLASGOW_HASKELL__ < 800 37 import Data.Monoid 38 #endif 39 import qualified Data.Sequence as S 40 import qualified Data.Vector as V (toList) 41 import Numeric.Sampling.Internal 42 import System.Random.MWC 43 44 -- | (/O(n)/) Sample uniformly, without replacement. 45 -- 46 -- Returns Nothing if the desired sample size is larger than the collection 47 -- being sampled from. 48 sample 49 :: (PrimMonad m, Foldable f) 50 => Int -> f a -> Gen (PrimState m) -> m (Maybe [a]) 51 sample n xs gen 52 | n < 0 = return Nothing 53 | otherwise = do 54 collected <- F.foldM (randomN n gen) xs 55 return $ fmap V.toList collected 56 {-# INLINABLE sample #-} 57 58 -- | (/O(n)/) 'sample' specialized to IO. 59 sampleIO :: Foldable f => Int -> f a -> IO (Maybe [a]) 60 sampleIO n xs = withSystemRandom . asGenIO $ sample n xs 61 {-# INLINABLE sampleIO #-} 62 63 -- | (/O(n log n)/) Sample uniformly with replacement (bootstrap). 64 resample 65 :: (PrimMonad m, Foldable f) 66 => Int -> f a -> Gen (PrimState m) -> m [a] 67 resample n xs = presample n weighted where 68 weight = recip (F.fold F.genericLength xs) 69 weighted = zip (repeat weight) (Foldable.toList xs) 70 {-# INLINABLE resample #-} 71 72 -- | (/O(n log n)/) 'resample' specialized to IO. 73 resampleIO :: Foldable f => Int -> f a -> IO [a] 74 resampleIO n xs = withSystemRandom . asGenIO $ resample n xs 75 {-# INLINABLE resampleIO #-} 76 77 -- | (/O(n log n)/) Unequal probability sampling. 78 -- 79 -- Returns Nothing if the desired sample size is larger than the collection 80 -- being sampled from. 81 psample 82 :: (PrimMonad m, Foldable f) 83 => Int -> f (Double, a) -> Gen (PrimState m) -> m (Maybe [a]) 84 psample n weighted gen = do 85 let sorted = sortProbs weighted 86 computeSample n sorted gen 87 where 88 computeSample 89 :: PrimMonad m 90 => Int -> [(Double, a)] -> Gen (PrimState m) -> m (Maybe [a]) 91 computeSample size xs g = go 1 [] size (S.fromList xs) where 92 go !mass !acc j vs 93 | j < 0 = return Nothing 94 | j <= 0 = return (Just acc) 95 | otherwise = do 96 z <- fmap (* mass) (uniform g) 97 98 let cumulative = S.drop 1 $ S.scanl (\s (pr, _) -> s + pr) 0 vs 99 midx = S.findIndexL (>= z) cumulative 100 101 idx = case midx of 102 Nothing -> error "psample: no index found" 103 Just x -> x 104 105 (p, val) = S.index vs idx 106 (l, r) = S.splitAt idx vs 107 deleted = l <> S.drop 1 r 108 109 go (mass - p) (val:acc) (pred j) deleted 110 {-# INLINABLE psample #-} 111 112 -- | (/O(n log n)/) 'psample' specialized to IO. 113 psampleIO :: Foldable f => Int -> f (Double, a) -> IO (Maybe [a]) 114 psampleIO n weighted = withSystemRandom . asGenIO $ psample n weighted 115 {-# INLINABLE psampleIO #-} 116 117 -- | (/O(n log n)/) Unequal probability resampling. 118 presample 119 :: (PrimMonad m, Foldable f) 120 => Int -> f (Double, a) -> Gen (PrimState m) -> m [a] 121 presample n weighted gen 122 | n <= 0 = return [] 123 | otherwise = do 124 let (bprobs, vals) = unzip $ sortProbs weighted 125 probs = drop 1 (F.scan F.sum bprobs) 126 cumulative = zip probs vals 127 computeSample n cumulative gen 128 where 129 computeSample 130 :: PrimMonad m => Int -> [(Double, a)] -> Gen (PrimState m) -> m [a] 131 computeSample size xs g = go [] size where 132 go !acc s 133 | s <= 0 = return acc 134 | otherwise = do 135 z <- uniform g 136 case F.fold (F.find ((>= z) . fst)) xs of 137 Just (_, val) -> go (val:acc) (pred s) 138 Nothing -> return acc 139 {-# INLINABLE presample #-} 140 141 -- | (/O(n log n)/) 'presample' specialized to IO. 142 presampleIO :: (Foldable f) => Int -> f (Double, a) -> IO [a] 143 presampleIO n weighted = withSystemRandom . asGenIO $ presample n weighted 144 {-# INLINABLE presampleIO #-} 145 146 sortProbs :: (Foldable f, Ord a) => f (a, b) -> [(a, b)] 147 sortProbs = sortBy (flip compare `on` fst) . Foldable.toList 148 {-# INLINABLE sortProbs #-} 149