commit bb995926c2952eb9d9f3d6d3aaac3c43ef8df777
parent 6cbfa45b988907e94ca7826072ccced8b1317e42
Author: Jared Tobin <jared@jtobin.ca>
Date: Tue, 9 Feb 2016 21:24:28 +1300
Prep for release.
Diffstat:
8 files changed, 70 insertions(+), 82 deletions(-)
diff --git a/.travis.yml b/.travis.yml
@@ -0,0 +1,11 @@
+sudo: false
+env:
+ STACK_YAML: stack.yml
+
+install:
+ - stack --no-terminal --skip-ghc-check setup
+ - stack --no-terminal --skip-ghc-check build --copy-bins
+
+script:
+ - stack --no-terminal --skip-ghc-check test --flag "*":release
+
diff --git a/README.md b/README.md
@@ -1,11 +1,46 @@
# sampling
-Basic sampling tools.
+[![Build Status](https://secure.travis-ci.org/jtobin/sampling.png)](http://travis-ci.org/jtobin/sampling)
+[![Hackage Version](https://img.shields.io/hackage/v/sampling.svg)](http://hackage.haskell.org/package/sampling)
+
+Basic sampling functionality.
Exports variations on two simple functions for sampling from arbitrary
'Foldable' collections:
-* 'sample', for sampling without replacement
-* 'resample', for sampling with replacement (i.e. a bootstrap)
+* *sample*, for sampling without replacement
+* *resample*, for sampling with replacement (i.e. a bootstrap)
+
+## Usage
+
+*sampling* uses the PRNG provided by
+[mwc-random](https://hackage.haskell.org/package/mwc-random) for randomness.
+You can either provide a generator for functions that require one, e.g.:
+
+ > import Numeric.Sampling
+ > gen <- createSystemRandom
+ > resample 100 [1..1000] gen
+
+Or simply use the `IO`-specialized versions that will use the system's source
+of randomness:
+
+ > resampleIO 100 [1..1000]
+
+The non-`IO` specialized functions can be used with any `PrimMonad`.
+
+## Examples
+
+Sample ten elements from a list, with replacement:
+
+ > resampleIO 10 ['a'..'g']
+ "ddgaefbgef"
+
+Sample five elements from a Map, without replacement:
+
+ > import qualified Data.Map.Strict as Map
+ > sampleIO 5 (Map.fromList (zip [1..1000] (scanl1 (/) [1..])))
+ Just [0.0,1.0126536951759521e-203,2.9893108271424046e-50,0.0,0.0]
+## Etc.
+PRs and issues welcome.
diff --git a/bench/Main.hs b/bench/Main.hs
@@ -5,14 +5,11 @@ module Main where
import Criterion.Main
import Numeric.Sampling (sampleIO, resampleIO)
-setupEnv :: IO [Int]
-setupEnv = return [1..1000000]
-
main :: IO ()
main = defaultMain [
- env setupEnv $ \x ->
- bgroup "small" [
- bench "sample" $ nfIO (sampleIO 100 x)
+ env (return ([1..10000] :: [Int])) $ \x ->
+ bgroup "benchmarks" [
+ bench "sample" $ nfIO (sampleIO 100 x)
, bench "resample" $ nfIO (resampleIO 100 x)
]
]
diff --git a/lib/Numeric/Sampling.hs b/lib/Numeric/Sampling.hs
@@ -15,6 +15,9 @@ module Numeric.Sampling (
-- * Unequal probability, with replacement
, presample
, presampleIO
+
+ -- * Re-exported
+ , module System.Random.MWC
) where
import qualified Control.Foldl as F
@@ -22,7 +25,6 @@ import Control.Monad.Primitive (PrimMonad, PrimState)
import qualified Data.Foldable as Foldable
import Data.Function (on)
import Data.List (sortBy)
-import Data.Maybe (fromJust)
import Data.Vector (Vector)
import Numeric.Sampling.Internal
import System.Random.MWC
@@ -81,12 +83,12 @@ presample n weighted gen
| s <= 0 = return acc
| otherwise = do
z <- uniform g
- let (_, v) = fromJust $ F.fold (F.find ((>= z) . fst)) xs
- go (v:acc) (pred s)
+ case F.fold (F.find ((>= z) . fst)) xs of
+ Just (_, val) -> go (val:acc) (pred s)
+ Nothing -> return acc
sortProbs :: (Foldable f, Ord a) => f (a, b) -> [(a, b)]
sortProbs = sortBy (compare `on` fst) . Foldable.toList
-
{-# INLINABLE presample #-}
-- | (/O(n log n)/) 'presample' specialized to IO.
diff --git a/lib/Numeric/Sampling/Functional.hs b/lib/Numeric/Sampling/Functional.hs
@@ -1,44 +0,0 @@
-{-# OPTIONS_GHC -fno-warn-type-defaults #-}
-{-# LANGUAGE BangPatterns #-}
-
-module Numeric.Sampling.Functional where
-
-import qualified Control.Foldl as F
-import Control.Monad.Primitive (PrimMonad, PrimState)
-import qualified Data.Foldable as Foldable (toList)
-import Data.Function (on)
-import Data.List (sortBy)
-import Data.Maybe (fromJust)
-import System.Random.MWC
-
-sortProbs :: (Foldable f, Ord a) => f (a, b) -> [(a, b)]
-sortProbs = sortBy (compare `on` fst) . Foldable.toList
-
-presample
- :: (PrimMonad m, Foldable f)
- => Int -> f (Double, a) -> Gen (PrimState m) -> m [a]
-presample n weighted gen
- | n <= 0 = return []
- | otherwise = do
- let (bprobs, vals) = unzip $ sortProbs weighted
- probs = drop 1 (F.scan F.sum bprobs)
- cumulative = zip probs vals
- computeSample n cumulative gen
- where
- computeSample
- :: PrimMonad m => Int -> [(Double, a)] -> Gen (PrimState m) -> m [a]
- computeSample size xs g = go [] size where
- go !acc s
- | s <= 0 = return acc
- | otherwise = do
- z <- uniform g
- let (_, v) = fromJust $ F.fold (F.find ((>= z) . fst)) xs
- go (v:acc) (pred s)
-{-# INLINABLE presample #-}
-
-presampleIO :: Foldable f => Int -> f (Double, a) -> IO [a]
-presampleIO n weighted = do
- gen <- createSystemRandom
- presample n weighted gen
-{-# INLINABLE presampleIO #-}
-
diff --git a/lib/Numeric/Sampling/Internal.hs b/lib/Numeric/Sampling/Internal.hs
@@ -30,15 +30,11 @@
module Numeric.Sampling.Internal (
randomN
-
- , mutableSortByProbability
) where
import Control.Foldl (FoldM (..))
import Control.Monad (when)
import Control.Monad.Primitive
-import Data.Function (on)
-import qualified Data.Vector.Algorithms.Intro as V
import Data.Vector.Generic (Mutable, Vector)
import qualified Data.Vector.Generic as V
import Data.Vector.Generic.Mutable (MVector)
@@ -84,12 +80,3 @@ randomN n gen = FoldM step begin done where
return (Just v)
{-# INLINABLE randomN #-}
--- | Wrapper over the mutable sort process.
-mutableSortByProbability
- :: (Vector v (Double, a), PrimMonad m) => v (Double, a) -> m (v (Double, a))
-mutableSortByProbability xs = do
- warm <- V.unsafeThaw xs
- V.sortBy (flip compare `on` fst) warm
- cool <- V.unsafeFreeze warm
- return $! cool
-
diff --git a/sampling.cabal b/sampling.cabal
@@ -32,34 +32,33 @@ library
Numeric.Sampling.Internal
exposed-modules:
Numeric.Sampling
- , Numeric.Sampling.Functional
build-depends:
base < 5
, foldl
, mwc-random
, primitive
, vector
- , vector-algorithms
-benchmark bench-sampling
- type: exitcode-stdio-1.0
- hs-source-dirs: bench
- Main-is: Main.hs
+executable sampling-test
+ hs-source-dirs: src
+ Main-is: Main.hs
default-language: Haskell2010
ghc-options:
-Wall -O2
build-depends:
base
- , criterion
+ , containers
, sampling
-executable sample-test
- hs-source-dirs: src
- Main-is: Main.hs
+benchmark bench-sampling
+ type: exitcode-stdio-1.0
+ hs-source-dirs: bench
+ Main-is: Main.hs
default-language: Haskell2010
ghc-options:
-Wall -O2
build-depends:
base
+ , criterion
, sampling
diff --git a/src/Main.hs b/src/Main.hs
@@ -3,9 +3,10 @@
module Main where
import Numeric.Sampling
+import qualified Data.Map.Strict as Map
main :: IO ()
main = do
- foo <- resampleIO 100 ([1..1000000] :: [Int])
- -- foo <- resampleIO 100 [1..1000000]
+ -- foo <- resampleIO 100 ([1..1000000] :: [Int])
+ foo <- resampleIO 100 (Map.fromList $ zip [1..1000] (repeat 'a'))
print foo