commit 3a58f8b5adb4b69d84f9c278a0e5cd179c629478
parent bd4744c7b051b9b1e973a099da5788ef16025cc4
Author: Jared Tobin <jared@jtobin.ca>
Date: Sun, 7 Feb 2016 21:08:07 +1300
Add basic support for sampling w/o replacement.
Diffstat:
3 files changed, 125 insertions(+), 2 deletions(-)
diff --git a/lib/Numeric/Sampling.hs b/lib/Numeric/Sampling.hs
@@ -1,4 +1,41 @@
+{-# OPTIONS_GHC -Wall #-}
-module Numeric.Sampling where
+module Numeric.Sampling (
+ -- * Without replacement
+ sample
+ , sampleIO
+ ) where
+
+import qualified Control.Foldl as F
+import Control.Monad.Primitive (PrimMonad, PrimState)
+import qualified Data.Vector as V
+import Numeric.Sampling.Internal (randomN)
+import System.Random.MWC
+
+-- | (/O(n)/) Sample uniformly, without replacement.
+--
+-- Returns Nothing if the desired sample size is larger than the collection
+-- being sampled from.
+sample
+ :: (PrimMonad m, Foldable f)
+ => Int -> f a -> Gen (PrimState m) -> m (Maybe (V.Vector a))
+sample n xs gen
+ | n < 0 = return Nothing
+ | otherwise = F.foldM (randomN n gen) xs
+
+-- | (/O(n)/) Sample uniformly without replacement, specialized to IO.
+sampleIO :: Foldable f => Int -> f a -> IO (Maybe (V.Vector a))
+sampleIO n xs = do
+ gen <- createSystemRandom
+ sample n xs gen
+
+
+probSample = undefined
+probSampleIO = undefined
+resample = undefined
+resampleIO = undefined
+probResample = undefined
+probResampleIO = undefined
+-- streams?
diff --git a/lib/Numeric/Sampling/Internal.hs b/lib/Numeric/Sampling/Internal.hs
@@ -0,0 +1,79 @@
+{-# LANGUAGE FlexibleContexts #-}
+
+-- Much of the code in this module is a modification of that found in the
+-- 'foldl' library by Gabriel Gonzalez. Its license is reproduced below.
+
+-- Copyright (c) 2013 Gabriel Gonzalez
+-- All rights reserved.
+--
+-- Redistribution and use in source and binary forms, with or without modification,
+-- are permitted provided that the following conditions are met:
+-- * Redistributions of source code must retain the above copyright notice,
+-- this list of conditions and the following disclaimer.
+-- * Redistributions in binary form must reproduce the above copyright notice,
+-- this list of conditions and the following disclaimer in the documentation
+-- and/or other materials provided with the distribution.
+-- * Neither the name of Gabriel Gonzalez nor the names of other contributors
+-- may be used to endorse or promote products derived from this software
+-- without specific prior written permission.
+--
+-- THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
+-- ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+-- WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+-- DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
+-- ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+-- (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+-- LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
+-- ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+-- (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+-- SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+module Numeric.Sampling.Internal where
+
+import Control.Foldl (FoldM (..))
+import Control.Monad (when)
+import Control.Monad.Primitive
+import Data.Vector.Generic (Mutable, Vector)
+import qualified Data.Vector.Generic as V
+import Data.Vector.Generic.Mutable (MVector)
+import qualified Data.Vector.Generic.Mutable as M
+import System.Random.MWC
+
+data VectorState = Incomplete {-# UNPACK #-} !Int | Complete
+
+data RandomNState m v a = RandomNState
+ { _size :: !VectorState
+ , _reservoir :: !(Mutable v (PrimState m) a)
+ , _position :: {-# UNPACK #-} !Int
+ , _gen :: !(Gen (PrimState m))
+ }
+
+randomN
+ :: (PrimMonad m, Vector v a)
+ => Int -> Gen (PrimState m) -> FoldM m a (Maybe (v a))
+randomN n gen = FoldM step begin done where
+ step
+ :: (PrimMonad m, MVector (Mutable v) a)
+ => RandomNState m v a -> a -> m (RandomNState m v a)
+ step (RandomNState (Incomplete m) mv i g) a = do
+ M.write mv m a
+ let m' = m + 1
+ let s = if n <= m' then Complete else Incomplete m'
+ return $! RandomNState s mv (i + 1) g
+
+ step (RandomNState Complete mv i g) a = do
+ r <- uniformR (0, i - 1) g
+ when (r < n) (M.unsafeWrite mv r a)
+ return (RandomNState Complete mv (i + 1) g)
+
+ begin = do
+ mv <- M.new n
+ let s = if n <= 0 then Complete else Incomplete 0
+ return (RandomNState s mv 1 gen)
+
+ done :: (PrimMonad m, Vector v a) => RandomNState m v a -> m (Maybe (v a))
+ done (RandomNState (Incomplete _) _ _ _) = return Nothing
+ done (RandomNState Complete mv _ _) = do
+ v <- V.freeze mv
+ return (Just v)
+
diff --git a/sampling.cabal b/sampling.cabal
@@ -21,7 +21,14 @@ Source-repository head
library
default-language: Haskell2010
hs-source-dirs: lib
+ ghc-options: -Wall
+ other-modules:
+ Numeric.Sampling.Internal
exposed-modules:
Numeric.Sampling
build-depends:
- base < 5
+ base < 5
+ , foldl
+ , mwc-random
+ , primitive
+ , vector