praxis

Various programming exercises.
Log | Files | Refs

commit b7cdef4899fe9bedbcc919fb8a33452d62d7d640
parent 895c2c80df9f3152ea361670da82b6e8a0b51934
Author: Jared Tobin <jared@jtobin.ca>
Date:   Sun, 24 Jan 2016 01:34:49 +1300

Add entropy.

Diffstat:
A20160123_entropy/Entropy | 0
A20160123_entropy/Entropy.hs | 40++++++++++++++++++++++++++++++++++++++++
2 files changed, 40 insertions(+), 0 deletions(-)

diff --git a/20160123_entropy/Entropy b/20160123_entropy/Entropy Binary files differ. diff --git a/20160123_entropy/Entropy.hs b/20160123_entropy/Entropy.hs @@ -0,0 +1,40 @@ +-- streaming entropy calculation; memory usage is linear in the number of +-- unique symbols + +import Control.Foldl (Fold (..)) +import qualified Control.Foldl as L +import qualified Control.Foldl.ByteString as LB +import Data.Map.Strict (Map) +import qualified Data.Map.Strict as Map +import Data.Monoid (mempty, (<>)) +import Pipes +import qualified Pipes.ByteString as P +import qualified Pipes.Prelude as PP + +count :: (Ord k, Num a) => Fold k (Map k a) +count = Fold step mempty id where + step m k + | Map.member k m = Map.update (pure . (+ 1)) k m + | otherwise = Map.insert k 1 m + +divide :: (Fractional b, Functor f) => f b -> b -> f b +divide m n = fmap (/ n) m + +average :: (Ord k, Fractional a) => Fold k (Map k a) +average = divide <$> count <*> L.genericLength + +-- in-memory entropy +entropy :: (Foldable f, Ord k, Floating a) => f k -> a +entropy xs = L.fold (L.premap (negate . entropize) L.sum) folded where + folded = L.fold average xs + +entropize :: Floating a => a -> a +entropize m = m * logBase 2 m + +-- streaming byte content-based entropy +main :: IO () +main = do + m <- case average of + Fold step start f -> P.foldBytes step start f P.stdin + print $ L.fold (L.premap (negate . entropize) L.sum) m +