sampling

Basic sampling functionality for Haskell.
Log | Files | Refs | README | LICENSE

commit bb995926c2952eb9d9f3d6d3aaac3c43ef8df777
parent 6cbfa45b988907e94ca7826072ccced8b1317e42
Author: Jared Tobin <jared@jtobin.ca>
Date:   Tue,  9 Feb 2016 21:24:28 +1300

Prep for release.

Diffstat:
A.travis.yml | 11+++++++++++
MREADME.md | 41++++++++++++++++++++++++++++++++++++++---
Mbench/Main.hs | 9+++------
Mlib/Numeric/Sampling.hs | 10++++++----
Dlib/Numeric/Sampling/Functional.hs | 44--------------------------------------------
Mlib/Numeric/Sampling/Internal.hs | 13-------------
Msampling.cabal | 19+++++++++----------
Msrc/Main.hs | 5+++--
8 files changed, 70 insertions(+), 82 deletions(-)

diff --git a/.travis.yml b/.travis.yml @@ -0,0 +1,11 @@ +sudo: false +env: + STACK_YAML: stack.yml + +install: + - stack --no-terminal --skip-ghc-check setup + - stack --no-terminal --skip-ghc-check build --copy-bins + +script: + - stack --no-terminal --skip-ghc-check test --flag "*":release + diff --git a/README.md b/README.md @@ -1,11 +1,46 @@ # sampling -Basic sampling tools. +[![Build Status](https://secure.travis-ci.org/jtobin/sampling.png)](http://travis-ci.org/jtobin/sampling) +[![Hackage Version](https://img.shields.io/hackage/v/sampling.svg)](http://hackage.haskell.org/package/sampling) + +Basic sampling functionality. Exports variations on two simple functions for sampling from arbitrary 'Foldable' collections: -* 'sample', for sampling without replacement -* 'resample', for sampling with replacement (i.e. a bootstrap) +* *sample*, for sampling without replacement +* *resample*, for sampling with replacement (i.e. a bootstrap) + +## Usage + +*sampling* uses the PRNG provided by +[mwc-random](https://hackage.haskell.org/package/mwc-random) for randomness. +You can either provide a generator for functions that require one, e.g.: + + > import Numeric.Sampling + > gen <- createSystemRandom + > resample 100 [1..1000] gen + +Or simply use the `IO`-specialized versions that will use the system's source +of randomness: + + > resampleIO 100 [1..1000] + +The non-`IO` specialized functions can be used with any `PrimMonad`. + +## Examples + +Sample ten elements from a list, with replacement: + + > resampleIO 10 ['a'..'g'] + "ddgaefbgef" + +Sample five elements from a Map, without replacement: + + > import qualified Data.Map.Strict as Map + > sampleIO 5 (Map.fromList (zip [1..1000] (scanl1 (/) [1..]))) + Just [0.0,1.0126536951759521e-203,2.9893108271424046e-50,0.0,0.0] +## Etc. +PRs and issues welcome. diff --git a/bench/Main.hs b/bench/Main.hs @@ -5,14 +5,11 @@ module Main where import Criterion.Main import Numeric.Sampling (sampleIO, resampleIO) -setupEnv :: IO [Int] -setupEnv = return [1..1000000] - main :: IO () main = defaultMain [ - env setupEnv $ \x -> - bgroup "small" [ - bench "sample" $ nfIO (sampleIO 100 x) + env (return ([1..10000] :: [Int])) $ \x -> + bgroup "benchmarks" [ + bench "sample" $ nfIO (sampleIO 100 x) , bench "resample" $ nfIO (resampleIO 100 x) ] ] diff --git a/lib/Numeric/Sampling.hs b/lib/Numeric/Sampling.hs @@ -15,6 +15,9 @@ module Numeric.Sampling ( -- * Unequal probability, with replacement , presample , presampleIO + + -- * Re-exported + , module System.Random.MWC ) where import qualified Control.Foldl as F @@ -22,7 +25,6 @@ import Control.Monad.Primitive (PrimMonad, PrimState) import qualified Data.Foldable as Foldable import Data.Function (on) import Data.List (sortBy) -import Data.Maybe (fromJust) import Data.Vector (Vector) import Numeric.Sampling.Internal import System.Random.MWC @@ -81,12 +83,12 @@ presample n weighted gen | s <= 0 = return acc | otherwise = do z <- uniform g - let (_, v) = fromJust $ F.fold (F.find ((>= z) . fst)) xs - go (v:acc) (pred s) + case F.fold (F.find ((>= z) . fst)) xs of + Just (_, val) -> go (val:acc) (pred s) + Nothing -> return acc sortProbs :: (Foldable f, Ord a) => f (a, b) -> [(a, b)] sortProbs = sortBy (compare `on` fst) . Foldable.toList - {-# INLINABLE presample #-} -- | (/O(n log n)/) 'presample' specialized to IO. diff --git a/lib/Numeric/Sampling/Functional.hs b/lib/Numeric/Sampling/Functional.hs @@ -1,44 +0,0 @@ -{-# OPTIONS_GHC -fno-warn-type-defaults #-} -{-# LANGUAGE BangPatterns #-} - -module Numeric.Sampling.Functional where - -import qualified Control.Foldl as F -import Control.Monad.Primitive (PrimMonad, PrimState) -import qualified Data.Foldable as Foldable (toList) -import Data.Function (on) -import Data.List (sortBy) -import Data.Maybe (fromJust) -import System.Random.MWC - -sortProbs :: (Foldable f, Ord a) => f (a, b) -> [(a, b)] -sortProbs = sortBy (compare `on` fst) . Foldable.toList - -presample - :: (PrimMonad m, Foldable f) - => Int -> f (Double, a) -> Gen (PrimState m) -> m [a] -presample n weighted gen - | n <= 0 = return [] - | otherwise = do - let (bprobs, vals) = unzip $ sortProbs weighted - probs = drop 1 (F.scan F.sum bprobs) - cumulative = zip probs vals - computeSample n cumulative gen - where - computeSample - :: PrimMonad m => Int -> [(Double, a)] -> Gen (PrimState m) -> m [a] - computeSample size xs g = go [] size where - go !acc s - | s <= 0 = return acc - | otherwise = do - z <- uniform g - let (_, v) = fromJust $ F.fold (F.find ((>= z) . fst)) xs - go (v:acc) (pred s) -{-# INLINABLE presample #-} - -presampleIO :: Foldable f => Int -> f (Double, a) -> IO [a] -presampleIO n weighted = do - gen <- createSystemRandom - presample n weighted gen -{-# INLINABLE presampleIO #-} - diff --git a/lib/Numeric/Sampling/Internal.hs b/lib/Numeric/Sampling/Internal.hs @@ -30,15 +30,11 @@ module Numeric.Sampling.Internal ( randomN - - , mutableSortByProbability ) where import Control.Foldl (FoldM (..)) import Control.Monad (when) import Control.Monad.Primitive -import Data.Function (on) -import qualified Data.Vector.Algorithms.Intro as V import Data.Vector.Generic (Mutable, Vector) import qualified Data.Vector.Generic as V import Data.Vector.Generic.Mutable (MVector) @@ -84,12 +80,3 @@ randomN n gen = FoldM step begin done where return (Just v) {-# INLINABLE randomN #-} --- | Wrapper over the mutable sort process. -mutableSortByProbability - :: (Vector v (Double, a), PrimMonad m) => v (Double, a) -> m (v (Double, a)) -mutableSortByProbability xs = do - warm <- V.unsafeThaw xs - V.sortBy (flip compare `on` fst) warm - cool <- V.unsafeFreeze warm - return $! cool - diff --git a/sampling.cabal b/sampling.cabal @@ -32,34 +32,33 @@ library Numeric.Sampling.Internal exposed-modules: Numeric.Sampling - , Numeric.Sampling.Functional build-depends: base < 5 , foldl , mwc-random , primitive , vector - , vector-algorithms -benchmark bench-sampling - type: exitcode-stdio-1.0 - hs-source-dirs: bench - Main-is: Main.hs +executable sampling-test + hs-source-dirs: src + Main-is: Main.hs default-language: Haskell2010 ghc-options: -Wall -O2 build-depends: base - , criterion + , containers , sampling -executable sample-test - hs-source-dirs: src - Main-is: Main.hs +benchmark bench-sampling + type: exitcode-stdio-1.0 + hs-source-dirs: bench + Main-is: Main.hs default-language: Haskell2010 ghc-options: -Wall -O2 build-depends: base + , criterion , sampling diff --git a/src/Main.hs b/src/Main.hs @@ -3,9 +3,10 @@ module Main where import Numeric.Sampling +import qualified Data.Map.Strict as Map main :: IO () main = do - foo <- resampleIO 100 ([1..1000000] :: [Int]) - -- foo <- resampleIO 100 [1..1000000] + -- foo <- resampleIO 100 ([1..1000000] :: [Int]) + foo <- resampleIO 100 (Map.fromList $ zip [1..1000] (repeat 'a')) print foo