sampling

Basic sampling functionality for Haskell.
Log | Files | Refs | README | LICENSE

commit f7b7423b55addfa72225bb599163d4032d2ef044
parent f4b23ca3a4b71d68ab4e0b9bf73b8f575b812e69
Author: Jared Tobin <jared@jtobin.ca>
Date:   Mon,  8 Feb 2016 18:43:35 +1300

Touch up basics.

Diffstat:
Mlib/Numeric/Sampling.hs | 107++++++++++++++++++++++---------------------------------------------------------
Mlib/Numeric/Sampling/Internal.hs | 18+++++++++++++++++-
2 files changed, 47 insertions(+), 78 deletions(-)

diff --git a/lib/Numeric/Sampling.hs b/lib/Numeric/Sampling.hs @@ -1,7 +1,7 @@ {-# OPTIONS_GHC -Wall #-} {-# OPTIONS_GHC -fno-warn-type-defaults #-} -{-# LANGUAGE BangPatterns #-} -{-# LANGUAGE DeriveFunctor #-} +{-# LANGUAGE BangPatterns #-} +{-# LANGUAGE FlexibleContexts #-} module Numeric.Sampling ( -- * Without replacement @@ -12,25 +12,18 @@ module Numeric.Sampling ( , resample , resampleIO - -- * Unequal probability sampling without replacement - , probSample - , probSampleIO - - -- * Unequal probability sampling with replacement - , probResample - , probResampleIO + -- * Unequal probability, with replacement + , presample + , presampleIO ) where -import qualified Control.Foldl as F -import Control.Monad.Primitive (PrimMonad, PrimState) -import qualified Data.Foldable as Foldable -import Data.Function (on) -import Data.List.Ordered (mergeBy) -import Data.Maybe (fromJust) -- FIXME -import Data.Vector (Vector) -import qualified Data.Vector as V -import qualified Data.Vector.Algorithms.Merge as V -import Numeric.Sampling.Internal (randomN) +import qualified Control.Foldl as F +import Control.Monad.Primitive (PrimMonad, PrimState) +import qualified Data.Foldable as Foldable +import Data.Maybe (fromJust) +import Data.Vector (Vector) +import qualified Data.Vector as V +import Numeric.Sampling.Internal import System.Random.MWC -- | (/O(n)/) Sample uniformly, without replacement. @@ -50,88 +43,48 @@ sampleIO n xs = do gen <- createSystemRandom sample n xs gen --- | (/O(n)/) Sample uniformly with replacement (bootstrap). +-- | (/O(n log n)/) Sample uniformly with replacement (bootstrap). resample :: (PrimMonad m, Foldable f) => Int -> f a -> Gen (PrimState m) -> m (Vector a) -resample n xs = probResample n weighted where +resample n xs = presample n weighted where weight = recip (F.fold F.genericLength xs) weighted = zip (repeat weight) (Foldable.toList xs) --- | (/O(n)/) 'resample' specialized to IO. -resampleIO :: Foldable f => Int -> f a -> IO (Vector a) +-- | (/O(n log n)/) 'resample' specialized to IO. +resampleIO :: (Foldable f) => Int -> f a -> IO (Vector a) resampleIO n xs = do gen <- createSystemRandom resample n xs gen - - - - -probResample +-- | (/O(n log n)/) Unequal probability resampling. +presample :: (PrimMonad m, Foldable f) => Int -> f (Double, a) -> Gen (PrimState m) -> m (Vector a) -probResample n weighted gen +presample n weighted gen | n <= 0 = return V.empty | otherwise = do - -- let vweighted = V.fromList $ Foldable.toList weighted - -- sorted <- mutableSortBy descendingOnFirst vweighted - let lweighted = Foldable.toList weighted - sorted = sortProbs lweighted - let probs = F.scan (F.premap fst F.sum) sorted - cdf = V.fromList $ zip probs (fmap snd sorted) + let vweighted = V.fromList $ Foldable.toList weighted + sorted <- mutableSortByProbability vweighted + let probs = drop 1 (F.scan (F.premap fst F.sum) (V.toList sorted)) + cdf = V.zip (V.fromList probs) (V.map snd sorted) accumulateSample n cdf gen where accumulateSample - :: PrimMonad m + :: (PrimMonad m) => Int -> Vector (Double, a) -> Gen (PrimState m) -> m (Vector a) accumulateSample size xs g = go V.empty size where go !acc s | s <= 0 = return $! acc | otherwise = do z <- uniform g - let result = snd . fromJust . F.fold (F.find ((>= z) . fst)) $ xs -- FIXME fromJust + let pair = F.fold (F.find ((>= z) . fst)) xs + result = snd . fromJust $ pair -- FIXME go (V.cons result acc) (pred s) -probResampleIO :: Foldable f => Int -> f (Double, a) -> IO (Vector a) -probResampleIO n weighted = do +-- | (/O(n log n)/) 'presample' specialized to IO. +presampleIO :: (Foldable f) => Int -> f (Double, a) -> IO (Vector a) +presampleIO n weighted = do gen <- createSystemRandom - probResample n weighted gen - - -sortProbs :: Ord a => [(a, b)] -> [(a, b)] -sortProbs = hylo alg coalg where - alg EmptyF = [] - alg (LeafF x) = [x] - alg (NodeF l r) = mergeBy (compare `on` fst) l r - - coalg [] = EmptyF - coalg [x] = LeafF x - coalg xs = NodeF l r where - (l, r) = splitAt (length xs `div` 2) xs - - hylo :: Functor f => (f a -> a) -> (b -> f b) -> b -> a - hylo f g = h where h = f . fmap h . g - -data TreeF a r = - EmptyF - | LeafF a - | NodeF r r - deriving Functor - --- | Wrapper over the mutable sort process. -mutableSortBy :: PrimMonad m => V.Comparison a -> Vector a -> m (Vector a) -mutableSortBy comparison xs = do - warm <- V.unsafeThaw xs - V.sortBy comparison warm - cool <- V.unsafeFreeze warm - return $! cool - -descendingOnFirst :: Ord a => (a, b) -> (a, b) -> Ordering -descendingOnFirst = flip compare `on` fst - - -probSample = undefined -probSampleIO = undefined - + presample n weighted gen diff --git a/lib/Numeric/Sampling/Internal.hs b/lib/Numeric/Sampling/Internal.hs @@ -28,11 +28,17 @@ -- (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS -- SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -module Numeric.Sampling.Internal where +module Numeric.Sampling.Internal ( + randomN + + , mutableSortByProbability + ) where import Control.Foldl (FoldM (..)) import Control.Monad (when) import Control.Monad.Primitive +import Data.Function (on) +import qualified Data.Vector.Algorithms.Intro as V import Data.Vector.Generic (Mutable, Vector) import qualified Data.Vector.Generic as V import Data.Vector.Generic.Mutable (MVector) @@ -76,4 +82,14 @@ randomN n gen = FoldM step begin done where done (RandomNState Complete mv _ _) = do v <- V.freeze mv return (Just v) +{-# INLINABLE randomN #-} + +-- | Wrapper over the mutable sort process. +mutableSortByProbability + :: (Vector v (Double, a), PrimMonad m) => v (Double, a) -> m (v (Double, a)) +mutableSortByProbability xs = do + warm <- V.unsafeThaw xs + V.sortBy (flip compare `on` fst) warm + cool <- V.unsafeFreeze warm + return $! cool