sampling

Basic sampling functionality for Haskell.
Log | Files | Refs | README | LICENSE

commit b35b6311239ea155e54f144ed9bfd47897d64700
parent 3a58f8b5adb4b69d84f9c278a0e5cd179c629478
Author: Jared Tobin <jared@jtobin.ca>
Date:   Sun,  7 Feb 2016 22:52:14 +1300

Resampling skeleton.

Diffstat:
Mlib/Numeric/Sampling.hs | 108++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++-------
Msampling.cabal | 12++++++++++++
Asrc/Main.hs | 11+++++++++++
3 files changed, 122 insertions(+), 9 deletions(-)

diff --git a/lib/Numeric/Sampling.hs b/lib/Numeric/Sampling.hs @@ -1,14 +1,31 @@ {-# OPTIONS_GHC -Wall #-} +{-# OPTIONS_GHC -fno-warn-type-defaults #-} +{-# LANGUAGE BangPatterns #-} +{-# LANGUAGE DeriveFunctor #-} module Numeric.Sampling ( -- * Without replacement sample , sampleIO + + -- * With replacement + , resample + , resampleIO + + -- * Unequal probability sampling with replacement + , probResample + , probResampleIO ) where import qualified Control.Foldl as F import Control.Monad.Primitive (PrimMonad, PrimState) +import qualified Data.Foldable as Foldable +import Data.Function (on) +import Data.List.Ordered (mergeBy) +import Data.Maybe (fromJust) -- FIXME +import Data.Vector (Vector) import qualified Data.Vector as V +import qualified Data.Vector.Algorithms.Merge as V import Numeric.Sampling.Internal (randomN) import System.Random.MWC @@ -18,24 +35,97 @@ import System.Random.MWC -- being sampled from. sample :: (PrimMonad m, Foldable f) - => Int -> f a -> Gen (PrimState m) -> m (Maybe (V.Vector a)) + => Int -> f a -> Gen (PrimState m) -> m (Maybe (Vector a)) sample n xs gen | n < 0 = return Nothing | otherwise = F.foldM (randomN n gen) xs --- | (/O(n)/) Sample uniformly without replacement, specialized to IO. -sampleIO :: Foldable f => Int -> f a -> IO (Maybe (V.Vector a)) +-- | (/O(n)/) 'sample' specialized to IO. +sampleIO :: Foldable f => Int -> f a -> IO (Maybe (Vector a)) sampleIO n xs = do gen <- createSystemRandom sample n xs gen +probResample + :: (PrimMonad m, Foldable f) + => Int -> f (Double, a) -> Gen (PrimState m) -> m (Vector a) +probResample n weighted gen + | n <= 0 = return V.empty + | otherwise = do + -- let vweighted = V.fromList $ Foldable.toList weighted + -- sorted <- mutableSortBy descendingOnFirst vweighted + let lweighted = Foldable.toList weighted + sorted = V.fromList $ sortProbs lweighted + let probs = V.scanl1' (+) $ fmap fst sorted + cdf = V.zip probs (fmap snd sorted) + accumulateSample n cdf gen + where + accumulateSample + :: PrimMonad m + => Int -> Vector (Double, a) -> Gen (PrimState m) -> m (Vector a) + accumulateSample size xs g = go [] size where + go !acc s + | s <= 0 = return $! V.fromList acc + | otherwise = do + z <- uniform g + let result = snd . fromJust . V.find ((>= z) . fst) $ xs -- FIXME fromJust + go (result : acc) (pred s) + +probResampleIO :: Foldable f => Int -> f (Double, a) -> IO (Vector a) +probResampleIO n weighted = do + gen <- createSystemRandom + probResample n weighted gen + +resample :: (PrimMonad m, Foldable f) => Int -> f a -> Gen (PrimState m) -> m (Vector a) +resample n xs = probResample n weighted where + len = F.fold F.length xs + weighted = zip (repeat (1 / fromIntegral len)) (Foldable.toList xs) + +resampleIO :: Foldable f => Int -> f a -> IO (Vector a) +resampleIO n xs = do + gen <- createSystemRandom + resample n xs gen + + +sortProbs :: Ord a => [(a, b)] -> [(a, b)] +sortProbs = hylo alg coalg where + alg EmptyF = [] + alg (LeafF x) = [x] + alg (NodeF l r) = mergeBy (compare `on` fst) l r + + coalg [] = EmptyF + coalg [x] = LeafF x + coalg xs = NodeF l r where + (l, r) = splitAt (length xs `div` 2) xs + + hylo :: Functor f => (f a -> a) -> (b -> f b) -> b -> a + hylo f g = h where h = f . fmap h . g + +data TreeF a r = + EmptyF + | LeafF a + | NodeF r r + deriving Functor + +-- | Wrapper over the mutable sort process. +mutableSortBy :: PrimMonad m => V.Comparison a -> Vector a -> m (Vector a) +mutableSortBy comparison xs = do + warm <- V.unsafeThaw xs + V.sortBy comparison warm + cool <- V.unsafeFreeze warm + return $! cool + +descendingOnFirst :: Ord a => (a, b) -> (a, b) -> Ordering +descendingOnFirst = flip compare `on` fst + + -probSample = undefined -probSampleIO = undefined -resample = undefined -resampleIO = undefined -probResample = undefined -probResampleIO = undefined +-- probSample = undefined +-- probSampleIO = undefined +-- resample = undefined +-- resampleIO = undefined +-- probResample = undefined +-- probResampleIO = undefined -- streams? diff --git a/sampling.cabal b/sampling.cabal @@ -28,7 +28,19 @@ library Numeric.Sampling build-depends: base < 5 + , data-ordlist , foldl , mwc-random , primitive , vector + , vector-algorithms + +executable sample-test + hs-source-dirs: src + Main-is: Main.hs + default-language: Haskell2010 + ghc-options: + -Wall -O2 + build-depends: + base + , sampling diff --git a/src/Main.hs b/src/Main.hs @@ -0,0 +1,11 @@ +{-# OPTIONS_GHC -fno-warn-type-defaults #-} + +module Main where + +import Numeric.Sampling (resampleIO) + +main :: IO () +main = do + foo <- resampleIO 100 [1..1000000] + print foo +