sampling

Basic sampling functionality for Haskell.
Log | Files | Refs | README | LICENSE

commit 6cbfa45b988907e94ca7826072ccced8b1317e42
parent a29f755f45537a863a114685e465791ecf4bcea7
Author: Jared Tobin <jared@jtobin.ca>
Date:   Mon,  8 Feb 2016 23:15:53 +1300

More functional approach.

Diffstat:
Mlib/Numeric/Sampling.hs | 40+++++++++++++++++++++-------------------
Mlib/Numeric/Sampling/Functional.hs | 40+++++-----------------------------------
Msampling.cabal | 1-
Msrc/Main.hs | 2+-
4 files changed, 27 insertions(+), 56 deletions(-)

diff --git a/lib/Numeric/Sampling.hs b/lib/Numeric/Sampling.hs @@ -20,9 +20,10 @@ module Numeric.Sampling ( import qualified Control.Foldl as F import Control.Monad.Primitive (PrimMonad, PrimState) import qualified Data.Foldable as Foldable +import Data.Function (on) +import Data.List (sortBy) import Data.Maybe (fromJust) import Data.Vector (Vector) -import qualified Data.Vector as V import Numeric.Sampling.Internal import System.Random.MWC @@ -48,14 +49,14 @@ sampleIO n xs = do -- | (/O(n log n)/) Sample uniformly with replacement (bootstrap). resample :: (PrimMonad m, Foldable f) - => Int -> f a -> Gen (PrimState m) -> m (Vector a) + => Int -> f a -> Gen (PrimState m) -> m [a] resample n xs = presample n weighted where weight = recip (F.fold F.genericLength xs) weighted = zip (repeat weight) (Foldable.toList xs) {-# INLINABLE resample #-} -- | (/O(n log n)/) 'resample' specialized to IO. -resampleIO :: (Foldable f) => Int -> f a -> IO (Vector a) +resampleIO :: (Foldable f) => Int -> f a -> IO [a] resampleIO n xs = do gen <- createSystemRandom resample n xs gen @@ -64,31 +65,32 @@ resampleIO n xs = do -- | (/O(n log n)/) Unequal probability resampling. presample :: (PrimMonad m, Foldable f) - => Int -> f (Double, a) -> Gen (PrimState m) -> m (Vector a) + => Int -> f (Double, a) -> Gen (PrimState m) -> m [a] presample n weighted gen - | n <= 0 = return V.empty + | n <= 0 = return [] | otherwise = do - let vweighted = V.fromList $ Foldable.toList weighted - sorted <- mutableSortByProbability vweighted - let probs = drop 1 (F.scan (F.premap fst F.sum) (V.toList sorted)) - cdf = V.zip (V.fromList probs) (V.map snd sorted) - accumulateSample n cdf gen + let (bprobs, vals) = unzip $ sortProbs weighted + probs = drop 1 (F.scan F.sum bprobs) + cumulative = zip probs vals + computeSample n cumulative gen where - accumulateSample - :: (PrimMonad m) - => Int -> Vector (Double, a) -> Gen (PrimState m) -> m (Vector a) - accumulateSample size xs g = go V.empty size where + computeSample + :: PrimMonad m => Int -> [(Double, a)] -> Gen (PrimState m) -> m [a] + computeSample size xs g = go [] size where go !acc s - | s <= 0 = return $! acc + | s <= 0 = return acc | otherwise = do z <- uniform g - let pair = F.fold (F.find ((>= z) . fst)) xs - result = snd . fromJust $ pair -- FIXME - go (V.cons result acc) (pred s) + let (_, v) = fromJust $ F.fold (F.find ((>= z) . fst)) xs + go (v:acc) (pred s) + + sortProbs :: (Foldable f, Ord a) => f (a, b) -> [(a, b)] + sortProbs = sortBy (compare `on` fst) . Foldable.toList + {-# INLINABLE presample #-} -- | (/O(n log n)/) 'presample' specialized to IO. -presampleIO :: (Foldable f) => Int -> f (Double, a) -> IO (Vector a) +presampleIO :: (Foldable f) => Int -> f (Double, a) -> IO [a] presampleIO n weighted = do gen <- createSystemRandom presample n weighted gen diff --git a/lib/Numeric/Sampling/Functional.hs b/lib/Numeric/Sampling/Functional.hs @@ -1,39 +1,18 @@ {-# OPTIONS_GHC -fno-warn-type-defaults #-} {-# LANGUAGE BangPatterns #-} -{-# LANGUAGE DeriveFunctor #-} -module Numeric.Sampling.Functional ( - resample - , resampleIO - ) where +module Numeric.Sampling.Functional where import qualified Control.Foldl as F import Control.Monad.Primitive (PrimMonad, PrimState) import qualified Data.Foldable as Foldable (toList) import Data.Function (on) -import Data.List.Ordered (mergeBy) +import Data.List (sortBy) import Data.Maybe (fromJust) import System.Random.MWC -data TreeF a r = - EmptyF - | LeafF a - | NodeF r r - deriving Functor - sortProbs :: (Foldable f, Ord a) => f (a, b) -> [(a, b)] -sortProbs = hylo alg coalg . Foldable.toList where - alg EmptyF = [] - alg (LeafF x) = [x] - alg (NodeF l r) = mergeBy (compare `on` fst) l r - - coalg [] = EmptyF - coalg [x] = LeafF x - coalg xs = NodeF l r where - (l, r) = splitAt (length xs `div` 2) xs - - hylo :: Functor f => (f a -> a) -> (b -> f b) -> b -> a - hylo f g = h where h = f . fmap h . g +sortProbs = sortBy (compare `on` fst) . Foldable.toList presample :: (PrimMonad m, Foldable f) @@ -55,20 +34,11 @@ presample n weighted gen z <- uniform g let (_, v) = fromJust $ F.fold (F.find ((>= z) . fst)) xs go (v:acc) (pred s) +{-# INLINABLE presample #-} presampleIO :: Foldable f => Int -> f (Double, a) -> IO [a] presampleIO n weighted = do gen <- createSystemRandom presample n weighted gen +{-# INLINABLE presampleIO #-} -resample - :: (PrimMonad m, Foldable f) => Int -> f a -> Gen (PrimState m) -> m [a] -resample n xs gen = do - let len = F.fold F.genericLength xs - weighted = zip (repeat (1 / len)) (Foldable.toList xs) - presample n weighted gen - -resampleIO :: Foldable f => Int -> f a -> IO [a] -resampleIO n xs = do - gen <- createSystemRandom - resample n xs gen diff --git a/sampling.cabal b/sampling.cabal @@ -37,7 +37,6 @@ library base < 5 , foldl , mwc-random - , data-ordlist , primitive , vector , vector-algorithms diff --git a/src/Main.hs b/src/Main.hs @@ -2,7 +2,7 @@ module Main where -import Numeric.Sampling.Functional +import Numeric.Sampling main :: IO () main = do