sampling

Basic sampling functionality for Haskell.
Log | Files | Refs | README | LICENSE

commit 61f1ccabe76270583dc679c1cb9adbf21ee156bb
parent 3a58f8b5adb4b69d84f9c278a0e5cd179c629478
Author: Jared Tobin <jared@jtobin.ca>
Date:   Mon,  8 Feb 2016 18:44:49 +1300

Merge pull request #1 from jtobin/jt-resample

Add basic functionality.
Diffstat:
Mlib/Numeric/Sampling.hs | 77+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++--------------
Mlib/Numeric/Sampling/Internal.hs | 18+++++++++++++++++-
Msampling.cabal | 12+++++++++++-
Asrc/Main.hs | 11+++++++++++
4 files changed, 102 insertions(+), 16 deletions(-)

diff --git a/lib/Numeric/Sampling.hs b/lib/Numeric/Sampling.hs @@ -1,15 +1,29 @@ {-# OPTIONS_GHC -Wall #-} +{-# OPTIONS_GHC -fno-warn-type-defaults #-} +{-# LANGUAGE BangPatterns #-} +{-# LANGUAGE FlexibleContexts #-} module Numeric.Sampling ( -- * Without replacement sample , sampleIO + + -- * With replacement + , resample + , resampleIO + + -- * Unequal probability, with replacement + , presample + , presampleIO ) where -import qualified Control.Foldl as F -import Control.Monad.Primitive (PrimMonad, PrimState) -import qualified Data.Vector as V -import Numeric.Sampling.Internal (randomN) +import qualified Control.Foldl as F +import Control.Monad.Primitive (PrimMonad, PrimState) +import qualified Data.Foldable as Foldable +import Data.Maybe (fromJust) +import Data.Vector (Vector) +import qualified Data.Vector as V +import Numeric.Sampling.Internal import System.Random.MWC -- | (/O(n)/) Sample uniformly, without replacement. @@ -18,24 +32,59 @@ import System.Random.MWC -- being sampled from. sample :: (PrimMonad m, Foldable f) - => Int -> f a -> Gen (PrimState m) -> m (Maybe (V.Vector a)) + => Int -> f a -> Gen (PrimState m) -> m (Maybe (Vector a)) sample n xs gen | n < 0 = return Nothing | otherwise = F.foldM (randomN n gen) xs --- | (/O(n)/) Sample uniformly without replacement, specialized to IO. -sampleIO :: Foldable f => Int -> f a -> IO (Maybe (V.Vector a)) +-- | (/O(n)/) 'sample' specialized to IO. +sampleIO :: Foldable f => Int -> f a -> IO (Maybe (Vector a)) sampleIO n xs = do gen <- createSystemRandom sample n xs gen +-- | (/O(n log n)/) Sample uniformly with replacement (bootstrap). +resample + :: (PrimMonad m, Foldable f) + => Int -> f a -> Gen (PrimState m) -> m (Vector a) +resample n xs = presample n weighted where + weight = recip (F.fold F.genericLength xs) + weighted = zip (repeat weight) (Foldable.toList xs) -probSample = undefined -probSampleIO = undefined -resample = undefined -resampleIO = undefined -probResample = undefined -probResampleIO = undefined --- streams? +-- | (/O(n log n)/) 'resample' specialized to IO. +resampleIO :: (Foldable f) => Int -> f a -> IO (Vector a) +resampleIO n xs = do + gen <- createSystemRandom + resample n xs gen +-- | (/O(n log n)/) Unequal probability resampling. +presample + :: (PrimMonad m, Foldable f) + => Int -> f (Double, a) -> Gen (PrimState m) -> m (Vector a) +presample n weighted gen + | n <= 0 = return V.empty + | otherwise = do + let vweighted = V.fromList $ Foldable.toList weighted + sorted <- mutableSortByProbability vweighted + let probs = drop 1 (F.scan (F.premap fst F.sum) (V.toList sorted)) + cdf = V.zip (V.fromList probs) (V.map snd sorted) + accumulateSample n cdf gen + where + accumulateSample + :: (PrimMonad m) + => Int -> Vector (Double, a) -> Gen (PrimState m) -> m (Vector a) + accumulateSample size xs g = go V.empty size where + go !acc s + | s <= 0 = return $! acc + | otherwise = do + z <- uniform g + let pair = F.fold (F.find ((>= z) . fst)) xs + result = snd . fromJust $ pair -- FIXME + go (V.cons result acc) (pred s) + +-- | (/O(n log n)/) 'presample' specialized to IO. +presampleIO :: (Foldable f) => Int -> f (Double, a) -> IO (Vector a) +presampleIO n weighted = do + gen <- createSystemRandom + presample n weighted gen diff --git a/lib/Numeric/Sampling/Internal.hs b/lib/Numeric/Sampling/Internal.hs @@ -28,11 +28,17 @@ -- (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS -- SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -module Numeric.Sampling.Internal where +module Numeric.Sampling.Internal ( + randomN + + , mutableSortByProbability + ) where import Control.Foldl (FoldM (..)) import Control.Monad (when) import Control.Monad.Primitive +import Data.Function (on) +import qualified Data.Vector.Algorithms.Intro as V import Data.Vector.Generic (Mutable, Vector) import qualified Data.Vector.Generic as V import Data.Vector.Generic.Mutable (MVector) @@ -76,4 +82,14 @@ randomN n gen = FoldM step begin done where done (RandomNState Complete mv _ _) = do v <- V.freeze mv return (Just v) +{-# INLINABLE randomN #-} + +-- | Wrapper over the mutable sort process. +mutableSortByProbability + :: (Vector v (Double, a), PrimMonad m) => v (Double, a) -> m (v (Double, a)) +mutableSortByProbability xs = do + warm <- V.unsafeThaw xs + V.sortBy (flip compare `on` fst) warm + cool <- V.unsafeFreeze warm + return $! cool diff --git a/sampling.cabal b/sampling.cabal @@ -21,7 +21,7 @@ Source-repository head library default-language: Haskell2010 hs-source-dirs: lib - ghc-options: -Wall + ghc-options: -O2 other-modules: Numeric.Sampling.Internal exposed-modules: @@ -32,3 +32,13 @@ library , mwc-random , primitive , vector + , vector-algorithms + +executable sample-test + hs-source-dirs: src + Main-is: Main.hs + default-language: Haskell2010 + ghc-options: -O2 + build-depends: + base + , sampling diff --git a/src/Main.hs b/src/Main.hs @@ -0,0 +1,11 @@ +{-# OPTIONS_GHC -fno-warn-type-defaults #-} + +module Main where + +import Numeric.Sampling (sampleIO, resampleIO) + +main :: IO () +main = do + foo <- resampleIO 100 [1..1000000] + print foo +