commit b35b6311239ea155e54f144ed9bfd47897d64700
parent 3a58f8b5adb4b69d84f9c278a0e5cd179c629478
Author: Jared Tobin <jared@jtobin.ca>
Date: Sun, 7 Feb 2016 22:52:14 +1300
Resampling skeleton.
Diffstat:
3 files changed, 122 insertions(+), 9 deletions(-)
diff --git a/lib/Numeric/Sampling.hs b/lib/Numeric/Sampling.hs
@@ -1,14 +1,31 @@
{-# OPTIONS_GHC -Wall #-}
+{-# OPTIONS_GHC -fno-warn-type-defaults #-}
+{-# LANGUAGE BangPatterns #-}
+{-# LANGUAGE DeriveFunctor #-}
module Numeric.Sampling (
-- * Without replacement
sample
, sampleIO
+
+ -- * With replacement
+ , resample
+ , resampleIO
+
+ -- * Unequal probability sampling with replacement
+ , probResample
+ , probResampleIO
) where
import qualified Control.Foldl as F
import Control.Monad.Primitive (PrimMonad, PrimState)
+import qualified Data.Foldable as Foldable
+import Data.Function (on)
+import Data.List.Ordered (mergeBy)
+import Data.Maybe (fromJust) -- FIXME
+import Data.Vector (Vector)
import qualified Data.Vector as V
+import qualified Data.Vector.Algorithms.Merge as V
import Numeric.Sampling.Internal (randomN)
import System.Random.MWC
@@ -18,24 +35,97 @@ import System.Random.MWC
-- being sampled from.
sample
:: (PrimMonad m, Foldable f)
- => Int -> f a -> Gen (PrimState m) -> m (Maybe (V.Vector a))
+ => Int -> f a -> Gen (PrimState m) -> m (Maybe (Vector a))
sample n xs gen
| n < 0 = return Nothing
| otherwise = F.foldM (randomN n gen) xs
--- | (/O(n)/) Sample uniformly without replacement, specialized to IO.
-sampleIO :: Foldable f => Int -> f a -> IO (Maybe (V.Vector a))
+-- | (/O(n)/) 'sample' specialized to IO.
+sampleIO :: Foldable f => Int -> f a -> IO (Maybe (Vector a))
sampleIO n xs = do
gen <- createSystemRandom
sample n xs gen
+probResample
+ :: (PrimMonad m, Foldable f)
+ => Int -> f (Double, a) -> Gen (PrimState m) -> m (Vector a)
+probResample n weighted gen
+ | n <= 0 = return V.empty
+ | otherwise = do
+ -- let vweighted = V.fromList $ Foldable.toList weighted
+ -- sorted <- mutableSortBy descendingOnFirst vweighted
+ let lweighted = Foldable.toList weighted
+ sorted = V.fromList $ sortProbs lweighted
+ let probs = V.scanl1' (+) $ fmap fst sorted
+ cdf = V.zip probs (fmap snd sorted)
+ accumulateSample n cdf gen
+ where
+ accumulateSample
+ :: PrimMonad m
+ => Int -> Vector (Double, a) -> Gen (PrimState m) -> m (Vector a)
+ accumulateSample size xs g = go [] size where
+ go !acc s
+ | s <= 0 = return $! V.fromList acc
+ | otherwise = do
+ z <- uniform g
+ let result = snd . fromJust . V.find ((>= z) . fst) $ xs -- FIXME fromJust
+ go (result : acc) (pred s)
+
+probResampleIO :: Foldable f => Int -> f (Double, a) -> IO (Vector a)
+probResampleIO n weighted = do
+ gen <- createSystemRandom
+ probResample n weighted gen
+
+resample :: (PrimMonad m, Foldable f) => Int -> f a -> Gen (PrimState m) -> m (Vector a)
+resample n xs = probResample n weighted where
+ len = F.fold F.length xs
+ weighted = zip (repeat (1 / fromIntegral len)) (Foldable.toList xs)
+
+resampleIO :: Foldable f => Int -> f a -> IO (Vector a)
+resampleIO n xs = do
+ gen <- createSystemRandom
+ resample n xs gen
+
+
+sortProbs :: Ord a => [(a, b)] -> [(a, b)]
+sortProbs = hylo alg coalg where
+ alg EmptyF = []
+ alg (LeafF x) = [x]
+ alg (NodeF l r) = mergeBy (compare `on` fst) l r
+
+ coalg [] = EmptyF
+ coalg [x] = LeafF x
+ coalg xs = NodeF l r where
+ (l, r) = splitAt (length xs `div` 2) xs
+
+ hylo :: Functor f => (f a -> a) -> (b -> f b) -> b -> a
+ hylo f g = h where h = f . fmap h . g
+
+data TreeF a r =
+ EmptyF
+ | LeafF a
+ | NodeF r r
+ deriving Functor
+
+-- | Wrapper over the mutable sort process.
+mutableSortBy :: PrimMonad m => V.Comparison a -> Vector a -> m (Vector a)
+mutableSortBy comparison xs = do
+ warm <- V.unsafeThaw xs
+ V.sortBy comparison warm
+ cool <- V.unsafeFreeze warm
+ return $! cool
+
+descendingOnFirst :: Ord a => (a, b) -> (a, b) -> Ordering
+descendingOnFirst = flip compare `on` fst
+
+
-probSample = undefined
-probSampleIO = undefined
-resample = undefined
-resampleIO = undefined
-probResample = undefined
-probResampleIO = undefined
+-- probSample = undefined
+-- probSampleIO = undefined
+-- resample = undefined
+-- resampleIO = undefined
+-- probResample = undefined
+-- probResampleIO = undefined
-- streams?
diff --git a/sampling.cabal b/sampling.cabal
@@ -28,7 +28,19 @@ library
Numeric.Sampling
build-depends:
base < 5
+ , data-ordlist
, foldl
, mwc-random
, primitive
, vector
+ , vector-algorithms
+
+executable sample-test
+ hs-source-dirs: src
+ Main-is: Main.hs
+ default-language: Haskell2010
+ ghc-options:
+ -Wall -O2
+ build-depends:
+ base
+ , sampling
diff --git a/src/Main.hs b/src/Main.hs
@@ -0,0 +1,11 @@
+{-# OPTIONS_GHC -fno-warn-type-defaults #-}
+
+module Main where
+
+import Numeric.Sampling (resampleIO)
+
+main :: IO ()
+main = do
+ foo <- resampleIO 100 [1..1000000]
+ print foo
+