commit f7b7423b55addfa72225bb599163d4032d2ef044
parent f4b23ca3a4b71d68ab4e0b9bf73b8f575b812e69
Author: Jared Tobin <jared@jtobin.ca>
Date:   Mon,  8 Feb 2016 18:43:35 +1300
Touch up basics.
Diffstat:
2 files changed, 47 insertions(+), 78 deletions(-)
diff --git a/lib/Numeric/Sampling.hs b/lib/Numeric/Sampling.hs
@@ -1,7 +1,7 @@
 {-# OPTIONS_GHC -Wall #-}
 {-# OPTIONS_GHC -fno-warn-type-defaults #-}
-{-# LANGUAGE BangPatterns #-}
-{-# LANGUAGE DeriveFunctor #-}
+{-# LANGUAGE BangPatterns     #-}
+{-# LANGUAGE FlexibleContexts #-}
 
 module Numeric.Sampling (
     -- * Without replacement
@@ -12,25 +12,18 @@ module Numeric.Sampling (
   , resample
   , resampleIO
 
-    -- * Unequal probability sampling without replacement
-  , probSample
-  , probSampleIO
-
-    -- * Unequal probability sampling with replacement
-  , probResample
-  , probResampleIO
+    -- * Unequal probability, with replacement
+  , presample
+  , presampleIO
   ) where
 
-import qualified Control.Foldl             as F
-import           Control.Monad.Primitive   (PrimMonad, PrimState)
-import qualified Data.Foldable             as Foldable
-import           Data.Function             (on)
-import           Data.List.Ordered         (mergeBy)
-import           Data.Maybe                (fromJust) -- FIXME
-import           Data.Vector               (Vector)
-import qualified Data.Vector               as V
-import qualified Data.Vector.Algorithms.Merge as V
-import           Numeric.Sampling.Internal (randomN)
+import qualified Control.Foldl               as F
+import           Control.Monad.Primitive     (PrimMonad, PrimState)
+import qualified Data.Foldable               as Foldable
+import           Data.Maybe                  (fromJust)
+import           Data.Vector                 (Vector)
+import qualified Data.Vector                 as V
+import           Numeric.Sampling.Internal
 import           System.Random.MWC
 
 -- | (/O(n)/) Sample uniformly, without replacement.
@@ -50,88 +43,48 @@ sampleIO n xs = do
   gen <- createSystemRandom
   sample n xs gen
 
--- | (/O(n)/) Sample uniformly with replacement (bootstrap).
+-- | (/O(n log n)/) Sample uniformly with replacement (bootstrap).
 resample
   :: (PrimMonad m, Foldable f)
   => Int -> f a -> Gen (PrimState m) -> m (Vector a)
-resample n xs = probResample n weighted where
+resample n xs = presample n weighted where
   weight   = recip (F.fold F.genericLength xs)
   weighted = zip (repeat weight) (Foldable.toList xs)
 
--- | (/O(n)/) 'resample' specialized to IO.
-resampleIO :: Foldable f => Int -> f a -> IO (Vector a)
+-- | (/O(n log n)/) 'resample' specialized to IO.
+resampleIO :: (Foldable f) => Int -> f a -> IO (Vector a)
 resampleIO n xs = do
   gen <- createSystemRandom
   resample n xs gen
 
-
-
-
-
-probResample
+-- | (/O(n log n)/) Unequal probability resampling.
+presample
   :: (PrimMonad m, Foldable f)
   => Int -> f (Double, a) -> Gen (PrimState m) -> m (Vector a)
-probResample n weighted gen
+presample n weighted gen
     | n <= 0    = return V.empty
     | otherwise = do
-        -- let vweighted = V.fromList $ Foldable.toList weighted
-        -- sorted <- mutableSortBy descendingOnFirst vweighted
-        let lweighted = Foldable.toList weighted
-            sorted    = sortProbs lweighted
-        let probs     = F.scan (F.premap fst F.sum) sorted
-            cdf       = V.fromList $ zip probs (fmap snd sorted)
+        let vweighted = V.fromList $ Foldable.toList weighted
+        sorted <- mutableSortByProbability vweighted
+        let probs     = drop 1 (F.scan (F.premap fst F.sum) (V.toList sorted))
+            cdf       = V.zip (V.fromList probs) (V.map snd sorted)
         accumulateSample n cdf gen
   where
     accumulateSample
-      :: PrimMonad m
+      :: (PrimMonad m)
       => Int -> Vector (Double, a) -> Gen (PrimState m) -> m (Vector a)
     accumulateSample size xs g = go V.empty size where
       go !acc s
         | s <= 0    = return $! acc
         | otherwise = do
             z <- uniform g
-            let result = snd . fromJust . F.fold (F.find ((>= z) . fst)) $ xs -- FIXME fromJust
+            let pair   = F.fold (F.find ((>= z) . fst)) xs
+                result = snd . fromJust $ pair -- FIXME
             go (V.cons result acc) (pred s)
 
-probResampleIO :: Foldable f => Int -> f (Double, a) -> IO (Vector a)
-probResampleIO n weighted = do
+-- | (/O(n log n)/) 'presample' specialized to IO.
+presampleIO :: (Foldable f) => Int -> f (Double, a) -> IO (Vector a)
+presampleIO n weighted = do
   gen <- createSystemRandom
-  probResample n weighted gen
-
-
-sortProbs :: Ord a => [(a, b)] -> [(a, b)]
-sortProbs = hylo alg coalg where
-  alg EmptyF      = []
-  alg (LeafF x)   = [x]
-  alg (NodeF l r) = mergeBy (compare `on` fst) l r
-
-  coalg []  = EmptyF
-  coalg [x] = LeafF x
-  coalg xs  = NodeF l r where
-    (l, r) = splitAt (length xs `div` 2) xs
-
-  hylo :: Functor f => (f a -> a) -> (b -> f b) -> b -> a
-  hylo f g = h where h = f . fmap h . g
-
-data TreeF a r =
-    EmptyF
-  | LeafF a
-  | NodeF r r
-  deriving Functor
-
--- | Wrapper over the mutable sort process.
-mutableSortBy :: PrimMonad m => V.Comparison a -> Vector a -> m (Vector a)
-mutableSortBy comparison xs = do
-  warm <- V.unsafeThaw xs
-  V.sortBy comparison warm
-  cool <- V.unsafeFreeze warm
-  return $! cool
-
-descendingOnFirst :: Ord a => (a, b) -> (a, b) -> Ordering
-descendingOnFirst = flip compare `on` fst
-
-
-probSample = undefined
-probSampleIO = undefined
-
+  presample n weighted gen
 
diff --git a/lib/Numeric/Sampling/Internal.hs b/lib/Numeric/Sampling/Internal.hs
@@ -28,11 +28,17 @@
 -- (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
 -- SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 
-module Numeric.Sampling.Internal where
+module Numeric.Sampling.Internal (
+    randomN
+
+  , mutableSortByProbability
+  ) where
 
 import           Control.Foldl               (FoldM (..))
 import           Control.Monad               (when)
 import           Control.Monad.Primitive
+import           Data.Function                (on)
+import qualified Data.Vector.Algorithms.Intro as V
 import           Data.Vector.Generic         (Mutable, Vector)
 import qualified Data.Vector.Generic         as V
 import           Data.Vector.Generic.Mutable (MVector)
@@ -76,4 +82,14 @@ randomN n gen = FoldM step begin done where
   done (RandomNState  Complete      mv _ _) = do
       v <- V.freeze mv
       return (Just v)
+{-# INLINABLE randomN #-}
+
+-- | Wrapper over the mutable sort process.
+mutableSortByProbability
+  :: (Vector v (Double, a), PrimMonad m) => v (Double, a) -> m (v (Double, a))
+mutableSortByProbability xs = do
+  warm <- V.unsafeThaw xs
+  V.sortBy (flip compare `on` fst) warm
+  cool <- V.unsafeFreeze warm
+  return $! cool