commit 61f1ccabe76270583dc679c1cb9adbf21ee156bb
parent 3a58f8b5adb4b69d84f9c278a0e5cd179c629478
Author: Jared Tobin <jared@jtobin.ca>
Date:   Mon,  8 Feb 2016 18:44:49 +1300
Merge pull request #1 from jtobin/jt-resample
Add basic functionality.
Diffstat:
4 files changed, 102 insertions(+), 16 deletions(-)
diff --git a/lib/Numeric/Sampling.hs b/lib/Numeric/Sampling.hs
@@ -1,15 +1,29 @@
 {-# OPTIONS_GHC -Wall #-}
+{-# OPTIONS_GHC -fno-warn-type-defaults #-}
+{-# LANGUAGE BangPatterns     #-}
+{-# LANGUAGE FlexibleContexts #-}
 
 module Numeric.Sampling (
     -- * Without replacement
     sample
   , sampleIO
+
+    -- * With replacement
+  , resample
+  , resampleIO
+
+    -- * Unequal probability, with replacement
+  , presample
+  , presampleIO
   ) where
 
-import qualified Control.Foldl             as F
-import           Control.Monad.Primitive   (PrimMonad, PrimState)
-import qualified Data.Vector               as V
-import           Numeric.Sampling.Internal (randomN)
+import qualified Control.Foldl               as F
+import           Control.Monad.Primitive     (PrimMonad, PrimState)
+import qualified Data.Foldable               as Foldable
+import           Data.Maybe                  (fromJust)
+import           Data.Vector                 (Vector)
+import qualified Data.Vector                 as V
+import           Numeric.Sampling.Internal
 import           System.Random.MWC
 
 -- | (/O(n)/) Sample uniformly, without replacement.
@@ -18,24 +32,59 @@ import           System.Random.MWC
 --   being sampled from.
 sample
   :: (PrimMonad m, Foldable f)
-  => Int -> f a -> Gen (PrimState m) -> m (Maybe (V.Vector a))
+  => Int -> f a -> Gen (PrimState m) -> m (Maybe (Vector a))
 sample n xs gen
   | n < 0     = return Nothing
   | otherwise = F.foldM (randomN n gen) xs
 
--- | (/O(n)/) Sample uniformly without replacement, specialized to IO.
-sampleIO :: Foldable f => Int -> f a -> IO (Maybe (V.Vector a))
+-- | (/O(n)/) 'sample' specialized to IO.
+sampleIO :: Foldable f => Int -> f a -> IO (Maybe (Vector a))
 sampleIO n xs = do
   gen <- createSystemRandom
   sample n xs gen
 
+-- | (/O(n log n)/) Sample uniformly with replacement (bootstrap).
+resample
+  :: (PrimMonad m, Foldable f)
+  => Int -> f a -> Gen (PrimState m) -> m (Vector a)
+resample n xs = presample n weighted where
+  weight   = recip (F.fold F.genericLength xs)
+  weighted = zip (repeat weight) (Foldable.toList xs)
 
-probSample = undefined
-probSampleIO = undefined
-resample = undefined
-resampleIO = undefined
-probResample = undefined
-probResampleIO = undefined
--- streams?
+-- | (/O(n log n)/) 'resample' specialized to IO.
+resampleIO :: (Foldable f) => Int -> f a -> IO (Vector a)
+resampleIO n xs = do
+  gen <- createSystemRandom
+  resample n xs gen
 
+-- | (/O(n log n)/) Unequal probability resampling.
+presample
+  :: (PrimMonad m, Foldable f)
+  => Int -> f (Double, a) -> Gen (PrimState m) -> m (Vector a)
+presample n weighted gen
+    | n <= 0    = return V.empty
+    | otherwise = do
+        let vweighted = V.fromList $ Foldable.toList weighted
+        sorted <- mutableSortByProbability vweighted
+        let probs     = drop 1 (F.scan (F.premap fst F.sum) (V.toList sorted))
+            cdf       = V.zip (V.fromList probs) (V.map snd sorted)
+        accumulateSample n cdf gen
+  where
+    accumulateSample
+      :: (PrimMonad m)
+      => Int -> Vector (Double, a) -> Gen (PrimState m) -> m (Vector a)
+    accumulateSample size xs g = go V.empty size where
+      go !acc s
+        | s <= 0    = return $! acc
+        | otherwise = do
+            z <- uniform g
+            let pair   = F.fold (F.find ((>= z) . fst)) xs
+                result = snd . fromJust $ pair -- FIXME
+            go (V.cons result acc) (pred s)
+
+-- | (/O(n log n)/) 'presample' specialized to IO.
+presampleIO :: (Foldable f) => Int -> f (Double, a) -> IO (Vector a)
+presampleIO n weighted = do
+  gen <- createSystemRandom
+  presample n weighted gen
 
diff --git a/lib/Numeric/Sampling/Internal.hs b/lib/Numeric/Sampling/Internal.hs
@@ -28,11 +28,17 @@
 -- (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
 -- SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 
-module Numeric.Sampling.Internal where
+module Numeric.Sampling.Internal (
+    randomN
+
+  , mutableSortByProbability
+  ) where
 
 import           Control.Foldl               (FoldM (..))
 import           Control.Monad               (when)
 import           Control.Monad.Primitive
+import           Data.Function                (on)
+import qualified Data.Vector.Algorithms.Intro as V
 import           Data.Vector.Generic         (Mutable, Vector)
 import qualified Data.Vector.Generic         as V
 import           Data.Vector.Generic.Mutable (MVector)
@@ -76,4 +82,14 @@ randomN n gen = FoldM step begin done where
   done (RandomNState  Complete      mv _ _) = do
       v <- V.freeze mv
       return (Just v)
+{-# INLINABLE randomN #-}
+
+-- | Wrapper over the mutable sort process.
+mutableSortByProbability
+  :: (Vector v (Double, a), PrimMonad m) => v (Double, a) -> m (v (Double, a))
+mutableSortByProbability xs = do
+  warm <- V.unsafeThaw xs
+  V.sortBy (flip compare `on` fst) warm
+  cool <- V.unsafeFreeze warm
+  return $! cool
 
diff --git a/sampling.cabal b/sampling.cabal
@@ -21,7 +21,7 @@ Source-repository head
 library
   default-language:    Haskell2010
   hs-source-dirs:      lib
-  ghc-options:         -Wall
+  ghc-options:         -O2
   other-modules:
     Numeric.Sampling.Internal
   exposed-modules:
@@ -32,3 +32,13 @@ library
     , mwc-random
     , primitive
     , vector
+    , vector-algorithms
+
+executable sample-test
+  hs-source-dirs:    src
+  Main-is:           Main.hs
+  default-language:  Haskell2010
+  ghc-options: -O2
+  build-depends:
+      base
+    , sampling
diff --git a/src/Main.hs b/src/Main.hs
@@ -0,0 +1,11 @@
+{-# OPTIONS_GHC -fno-warn-type-defaults #-}
+
+module Main where
+
+import Numeric.Sampling (sampleIO, resampleIO)
+
+main :: IO ()
+main = do
+  foo <- resampleIO 100 [1..1000000]
+  print foo
+