commit b35b6311239ea155e54f144ed9bfd47897d64700
parent 3a58f8b5adb4b69d84f9c278a0e5cd179c629478
Author: Jared Tobin <jared@jtobin.ca>
Date:   Sun,  7 Feb 2016 22:52:14 +1300
Resampling skeleton.
Diffstat:
3 files changed, 122 insertions(+), 9 deletions(-)
diff --git a/lib/Numeric/Sampling.hs b/lib/Numeric/Sampling.hs
@@ -1,14 +1,31 @@
 {-# OPTIONS_GHC -Wall #-}
+{-# OPTIONS_GHC -fno-warn-type-defaults #-}
+{-# LANGUAGE BangPatterns #-}
+{-# LANGUAGE DeriveFunctor #-}
 
 module Numeric.Sampling (
     -- * Without replacement
     sample
   , sampleIO
+
+    -- * With replacement
+  , resample
+  , resampleIO
+
+    -- * Unequal probability sampling with replacement
+  , probResample
+  , probResampleIO
   ) where
 
 import qualified Control.Foldl             as F
 import           Control.Monad.Primitive   (PrimMonad, PrimState)
+import qualified Data.Foldable             as Foldable
+import           Data.Function             (on)
+import           Data.List.Ordered         (mergeBy)
+import           Data.Maybe                (fromJust) -- FIXME
+import           Data.Vector               (Vector)
 import qualified Data.Vector               as V
+import qualified Data.Vector.Algorithms.Merge as V
 import           Numeric.Sampling.Internal (randomN)
 import           System.Random.MWC
 
@@ -18,24 +35,97 @@ import           System.Random.MWC
 --   being sampled from.
 sample
   :: (PrimMonad m, Foldable f)
-  => Int -> f a -> Gen (PrimState m) -> m (Maybe (V.Vector a))
+  => Int -> f a -> Gen (PrimState m) -> m (Maybe (Vector a))
 sample n xs gen
   | n < 0     = return Nothing
   | otherwise = F.foldM (randomN n gen) xs
 
--- | (/O(n)/) Sample uniformly without replacement, specialized to IO.
-sampleIO :: Foldable f => Int -> f a -> IO (Maybe (V.Vector a))
+-- | (/O(n)/) 'sample' specialized to IO.
+sampleIO :: Foldable f => Int -> f a -> IO (Maybe (Vector a))
 sampleIO n xs = do
   gen <- createSystemRandom
   sample n xs gen
 
+probResample
+  :: (PrimMonad m, Foldable f)
+  => Int -> f (Double, a) -> Gen (PrimState m) -> m (Vector a)
+probResample n weighted gen
+    | n <= 0    = return V.empty
+    | otherwise = do
+        -- let vweighted = V.fromList $ Foldable.toList weighted
+        -- sorted <- mutableSortBy descendingOnFirst vweighted
+        let lweighted = Foldable.toList weighted
+            sorted    = V.fromList $ sortProbs lweighted
+        let probs     = V.scanl1' (+) $ fmap fst sorted
+            cdf       = V.zip probs (fmap snd sorted)
+        accumulateSample n cdf gen
+  where
+    accumulateSample
+      :: PrimMonad m
+      => Int -> Vector (Double, a) -> Gen (PrimState m) -> m (Vector a)
+    accumulateSample size xs g = go [] size where
+      go !acc s
+        | s <= 0    = return $! V.fromList acc
+        | otherwise = do
+            z <- uniform g
+            let result = snd . fromJust . V.find ((>= z) . fst) $ xs -- FIXME fromJust
+            go (result : acc) (pred s)
+
+probResampleIO :: Foldable f => Int -> f (Double, a) -> IO (Vector a)
+probResampleIO n weighted = do
+  gen <- createSystemRandom
+  probResample n weighted gen
+
+resample :: (PrimMonad m, Foldable f) => Int -> f a -> Gen (PrimState m) -> m (Vector a)
+resample n xs = probResample n weighted where
+  len      = F.fold F.length xs
+  weighted = zip (repeat (1 / fromIntegral len)) (Foldable.toList xs)
+
+resampleIO :: Foldable f => Int -> f a -> IO (Vector a)
+resampleIO n xs = do
+  gen <- createSystemRandom
+  resample n xs gen
+
+
+sortProbs :: Ord a => [(a, b)] -> [(a, b)]
+sortProbs = hylo alg coalg where
+  alg EmptyF      = []
+  alg (LeafF x)   = [x]
+  alg (NodeF l r) = mergeBy (compare `on` fst) l r
+
+  coalg []  = EmptyF
+  coalg [x] = LeafF x
+  coalg xs  = NodeF l r where
+    (l, r) = splitAt (length xs `div` 2) xs
+
+  hylo :: Functor f => (f a -> a) -> (b -> f b) -> b -> a
+  hylo f g = h where h = f . fmap h . g
+
+data TreeF a r =
+    EmptyF
+  | LeafF a
+  | NodeF r r
+  deriving Functor
+
+-- | Wrapper over the mutable sort process.
+mutableSortBy :: PrimMonad m => V.Comparison a -> Vector a -> m (Vector a)
+mutableSortBy comparison xs = do
+  warm <- V.unsafeThaw xs
+  V.sortBy comparison warm
+  cool <- V.unsafeFreeze warm
+  return $! cool
+
+descendingOnFirst :: Ord a => (a, b) -> (a, b) -> Ordering
+descendingOnFirst = flip compare `on` fst
+
+
 
-probSample = undefined
-probSampleIO = undefined
-resample = undefined
-resampleIO = undefined
-probResample = undefined
-probResampleIO = undefined
+-- probSample = undefined
+-- probSampleIO = undefined
+-- resample = undefined
+-- resampleIO = undefined
+-- probResample = undefined
+-- probResampleIO = undefined
 -- streams?
 
 
diff --git a/sampling.cabal b/sampling.cabal
@@ -28,7 +28,19 @@ library
     Numeric.Sampling
   build-depends:
       base        < 5
+    , data-ordlist
     , foldl
     , mwc-random
     , primitive
     , vector
+    , vector-algorithms
+
+executable sample-test
+  hs-source-dirs:    src
+  Main-is:           Main.hs
+  default-language:  Haskell2010
+  ghc-options:
+    -Wall -O2
+  build-depends:
+      base
+    , sampling
diff --git a/src/Main.hs b/src/Main.hs
@@ -0,0 +1,11 @@
+{-# OPTIONS_GHC -fno-warn-type-defaults #-}
+
+module Main where
+
+import Numeric.Sampling (resampleIO)
+
+main :: IO ()
+main = do
+  foo <- resampleIO 100 [1..1000000]
+  print foo
+