praxis

Various programming exercises.
Log | Files | Refs

commit 4511e1328992439d6496a8f14e5f95b4a96c9acc
parent b7cdef4899fe9bedbcc919fb8a33452d62d7d640
Author: Jared Tobin <jared@jtobin.ca>
Date:   Sun, 24 Jan 2016 21:59:31 +1300

Update entropy.

Diffstat:
M20160123_entropy/Entropy | 0
M20160123_entropy/Entropy.hs | 52++++++++++++++++++++++------------------------------
2 files changed, 22 insertions(+), 30 deletions(-)

diff --git a/20160123_entropy/Entropy b/20160123_entropy/Entropy Binary files differ. diff --git a/20160123_entropy/Entropy.hs b/20160123_entropy/Entropy.hs @@ -1,40 +1,32 @@ --- streaming entropy calculation; memory usage is linear in the number of --- unique symbols +{-# OPTIONS_GHC -Wall #-} +{-# OPTIONS_GHC -fno-warn-type-defaults #-} +{-# OPTIONS_GHC -fno-warn-unused-binds #-} -import Control.Foldl (Fold (..)) -import qualified Control.Foldl as L -import qualified Control.Foldl.ByteString as LB -import Data.Map.Strict (Map) -import qualified Data.Map.Strict as Map -import Data.Monoid (mempty, (<>)) -import Pipes -import qualified Pipes.ByteString as P -import qualified Pipes.Prelude as PP +import Control.Foldl (Fold (..)) +import qualified Control.Foldl as L +import Data.Map.Strict (Map) +import qualified Data.Map.Strict as Map +import Pipes ((>->)) +import qualified Pipes.ByteString as P -count :: (Ord k, Num a) => Fold k (Map k a) -count = Fold step mempty id where - step m k - | Map.member k m = Map.update (pure . (+ 1)) k m - | otherwise = Map.insert k 1 m +group :: (Ord k, Num a) => Fold k (Map k a) +group = Fold step mempty id where + step m k = Map.insertWith (+) k 1 m -divide :: (Fractional b, Functor f) => f b -> b -> f b -divide m n = fmap (/ n) m +distribution :: (Ord k, Fractional a) => Fold k (Map k a) +distribution = divide <$> group <*> L.genericLength where + divide m n = fmap (/ n) m -average :: (Ord k, Fractional a) => Fold k (Map k a) -average = divide <$> count <*> L.genericLength +entropize :: Floating a => a -> a +entropize m = negate (m * logBase 2 m) --- in-memory entropy entropy :: (Foldable f, Ord k, Floating a) => f k -> a -entropy xs = L.fold (L.premap (negate . entropize) L.sum) folded where - folded = L.fold average xs - -entropize :: Floating a => a -> a -entropize m = m * logBase 2 m +entropy = L.fold (L.premap entropize L.sum) . L.fold distribution --- streaming byte content-based entropy main :: IO () main = do - m <- case average of - Fold step start f -> P.foldBytes step start f P.stdin - print $ L.fold (L.premap (negate . entropize) L.sum) m + let source = P.stdin >-> P.filter (/= 10) + dist <- case distribution of + Fold step start extract -> P.foldBytes step start extract source + print $ L.fold L.sum (fmap entropize dist)