sampling

Basic sampling functionality for Haskell.
Log | Files | Refs | README | LICENSE

commit a29f755f45537a863a114685e465791ecf4bcea7
parent 7ce815a74a5a4933859dd2bc5b9d4b4a85b9537d
Author: Jared Tobin <jared@jtobin.ca>
Date:   Mon,  8 Feb 2016 22:57:33 +1300

Add functional module.

* Mergesort on lists beats sorting w/a mutable vector on time.

Diffstat:
Alib/Numeric/Sampling/Functional.hs | 74++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Msampling.cabal | 16++++++++++++++--
Asrc/Main.hs | 11+++++++++++
3 files changed, 99 insertions(+), 2 deletions(-)

diff --git a/lib/Numeric/Sampling/Functional.hs b/lib/Numeric/Sampling/Functional.hs @@ -0,0 +1,74 @@ +{-# OPTIONS_GHC -fno-warn-type-defaults #-} +{-# LANGUAGE BangPatterns #-} +{-# LANGUAGE DeriveFunctor #-} + +module Numeric.Sampling.Functional ( + resample + , resampleIO + ) where + +import qualified Control.Foldl as F +import Control.Monad.Primitive (PrimMonad, PrimState) +import qualified Data.Foldable as Foldable (toList) +import Data.Function (on) +import Data.List.Ordered (mergeBy) +import Data.Maybe (fromJust) +import System.Random.MWC + +data TreeF a r = + EmptyF + | LeafF a + | NodeF r r + deriving Functor + +sortProbs :: (Foldable f, Ord a) => f (a, b) -> [(a, b)] +sortProbs = hylo alg coalg . Foldable.toList where + alg EmptyF = [] + alg (LeafF x) = [x] + alg (NodeF l r) = mergeBy (compare `on` fst) l r + + coalg [] = EmptyF + coalg [x] = LeafF x + coalg xs = NodeF l r where + (l, r) = splitAt (length xs `div` 2) xs + + hylo :: Functor f => (f a -> a) -> (b -> f b) -> b -> a + hylo f g = h where h = f . fmap h . g + +presample + :: (PrimMonad m, Foldable f) + => Int -> f (Double, a) -> Gen (PrimState m) -> m [a] +presample n weighted gen + | n <= 0 = return [] + | otherwise = do + let (bprobs, vals) = unzip $ sortProbs weighted + probs = drop 1 (F.scan F.sum bprobs) + cumulative = zip probs vals + computeSample n cumulative gen + where + computeSample + :: PrimMonad m => Int -> [(Double, a)] -> Gen (PrimState m) -> m [a] + computeSample size xs g = go [] size where + go !acc s + | s <= 0 = return acc + | otherwise = do + z <- uniform g + let (_, v) = fromJust $ F.fold (F.find ((>= z) . fst)) xs + go (v:acc) (pred s) + +presampleIO :: Foldable f => Int -> f (Double, a) -> IO [a] +presampleIO n weighted = do + gen <- createSystemRandom + presample n weighted gen + +resample + :: (PrimMonad m, Foldable f) => Int -> f a -> Gen (PrimState m) -> m [a] +resample n xs gen = do + let len = F.fold F.genericLength xs + weighted = zip (repeat (1 / len)) (Foldable.toList xs) + presample n weighted gen + +resampleIO :: Foldable f => Int -> f a -> IO [a] +resampleIO n xs = do + gen <- createSystemRandom + resample n xs gen diff --git a/sampling.cabal b/sampling.cabal @@ -29,13 +29,15 @@ library ghc-options: -Wall -O2 other-modules: - Numeric.Sampling.Internal + Numeric.Sampling.Internal exposed-modules: - Numeric.Sampling + Numeric.Sampling + , Numeric.Sampling.Functional build-depends: base < 5 , foldl , mwc-random + , data-ordlist , primitive , vector , vector-algorithms @@ -52,3 +54,13 @@ benchmark bench-sampling , criterion , sampling +executable sample-test + hs-source-dirs: src + Main-is: Main.hs + default-language: Haskell2010 + ghc-options: + -Wall -O2 + build-depends: + base + , sampling + diff --git a/src/Main.hs b/src/Main.hs @@ -0,0 +1,11 @@ +{-# OPTIONS_GHC -fno-warn-type-defaults #-} + +module Main where + +import Numeric.Sampling.Functional + +main :: IO () +main = do + foo <- resampleIO 100 ([1..1000000] :: [Int]) + -- foo <- resampleIO 100 [1..1000000] + print foo