commit 4511e1328992439d6496a8f14e5f95b4a96c9acc
parent b7cdef4899fe9bedbcc919fb8a33452d62d7d640
Author: Jared Tobin <jared@jtobin.ca>
Date: Sun, 24 Jan 2016 21:59:31 +1300
Update entropy.
Diffstat:
2 files changed, 22 insertions(+), 30 deletions(-)
diff --git a/20160123_entropy/Entropy b/20160123_entropy/Entropy
Binary files differ.
diff --git a/20160123_entropy/Entropy.hs b/20160123_entropy/Entropy.hs
@@ -1,40 +1,32 @@
--- streaming entropy calculation; memory usage is linear in the number of
--- unique symbols
+{-# OPTIONS_GHC -Wall #-}
+{-# OPTIONS_GHC -fno-warn-type-defaults #-}
+{-# OPTIONS_GHC -fno-warn-unused-binds #-}
-import Control.Foldl (Fold (..))
-import qualified Control.Foldl as L
-import qualified Control.Foldl.ByteString as LB
-import Data.Map.Strict (Map)
-import qualified Data.Map.Strict as Map
-import Data.Monoid (mempty, (<>))
-import Pipes
-import qualified Pipes.ByteString as P
-import qualified Pipes.Prelude as PP
+import Control.Foldl (Fold (..))
+import qualified Control.Foldl as L
+import Data.Map.Strict (Map)
+import qualified Data.Map.Strict as Map
+import Pipes ((>->))
+import qualified Pipes.ByteString as P
-count :: (Ord k, Num a) => Fold k (Map k a)
-count = Fold step mempty id where
- step m k
- | Map.member k m = Map.update (pure . (+ 1)) k m
- | otherwise = Map.insert k 1 m
+group :: (Ord k, Num a) => Fold k (Map k a)
+group = Fold step mempty id where
+ step m k = Map.insertWith (+) k 1 m
-divide :: (Fractional b, Functor f) => f b -> b -> f b
-divide m n = fmap (/ n) m
+distribution :: (Ord k, Fractional a) => Fold k (Map k a)
+distribution = divide <$> group <*> L.genericLength where
+ divide m n = fmap (/ n) m
-average :: (Ord k, Fractional a) => Fold k (Map k a)
-average = divide <$> count <*> L.genericLength
+entropize :: Floating a => a -> a
+entropize m = negate (m * logBase 2 m)
--- in-memory entropy
entropy :: (Foldable f, Ord k, Floating a) => f k -> a
-entropy xs = L.fold (L.premap (negate . entropize) L.sum) folded where
- folded = L.fold average xs
-
-entropize :: Floating a => a -> a
-entropize m = m * logBase 2 m
+entropy = L.fold (L.premap entropize L.sum) . L.fold distribution
--- streaming byte content-based entropy
main :: IO ()
main = do
- m <- case average of
- Fold step start f -> P.foldBytes step start f P.stdin
- print $ L.fold (L.premap (negate . entropize) L.sum) m
+ let source = P.stdin >-> P.filter (/= 10)
+ dist <- case distribution of
+ Fold step start extract -> P.foldBytes step start extract source
+ print $ L.fold L.sum (fmap entropize dist)