cryptopals

Matasano's cryptopals challenges (cryptopals.com).
git clone git://git.jtobin.io/cryptopals.git
Log | Files | Refs | README | LICENSE

Similarity.hs (4194B)


      1 module Cryptopals.Util.Similarity (
      2     score
      3   , scoreAlt
      4   , tally
      5   , gtally
      6   , often
      7   , mse
      8   , mseAlt
      9   , english
     10   , dist
     11   ) where
     12 
     13 import qualified Data.ByteString as BS
     14 import qualified Data.Foldable as F
     15 import Data.Function (on)
     16 import qualified Data.IntMap.Strict as IMS
     17 import qualified Data.List as L
     18 import qualified Data.Map.Strict as MS
     19 
     20 -- | Distance of the encoding from expected English plaintext.
     21 score :: BS.ByteString -> Double
     22 score = mse english . dist
     23 
     24 scoreAlt :: BS.ByteString -> Maybe Double
     25 scoreAlt = mseAlt english . dist
     26 
     27 mse :: IMS.IntMap Double -> IMS.IntMap Double -> Double
     28 mse ref tar =
     29     let res = IMS.foldlWithKey' alg mempty ref
     30         siz = IMS.size res
     31     in  IMS.foldl' (\acc val -> acc + val / fromIntegral siz) 0 res
     32   where
     33     alg acc key val = case IMS.lookup key tar of
     34       Nothing  -> acc
     35       Just tal -> IMS.insert key ((tal - val) ^ 2) acc
     36 
     37 mseAlt :: IMS.IntMap Double -> IMS.IntMap Double -> Maybe Double
     38 mseAlt ref tar = case F.foldlM alg mempty (IMS.toList tar) of
     39   Nothing  -> Nothing
     40   Just res -> pure $
     41     let siz = IMS.size res
     42     in  IMS.foldl' (\acc val -> acc + val / fromIntegral siz) 0 res
     43   where
     44     alg acc (key, val) = case IMS.lookup key ref of
     45       Nothing  -> Nothing
     46       Just tal -> pure (IMS.insert key ((tal - val) ^ 2) acc)
     47 
     48 dist :: BS.ByteString -> IMS.IntMap Double
     49 dist = normalize . tally
     50 
     51 often :: BS.ByteString -> [(Int, Int)]
     52 often = L.sortBy (flip compare `on` snd) . IMS.toList . tally
     53 
     54 tally :: BS.ByteString -> IMS.IntMap Int
     55 tally = BS.foldl' alg mempty where
     56   alg acc (fromIntegral -> byt)
     57     | IMS.member byt acc = IMS.adjust succ byt acc
     58     | otherwise          = IMS.insert byt 1 acc
     59 
     60 gtally :: Ord a => [a] -> MS.Map a Int
     61 gtally = L.foldl' alg mempty where
     62   alg acc val
     63     | MS.member val acc = MS.adjust succ val acc
     64     | otherwise         = MS.insert val 1 acc
     65 
     66 normalize :: IMS.IntMap Int -> IMS.IntMap Double
     67 normalize m =
     68   let siz = fromIntegral $ IMS.foldl' (+) 0 m
     69   in  fmap (\val -> fromIntegral val / siz) m
     70 
     71 -- | Observed frequency distribution of bytes in English corpora.
     72 english :: IMS.IntMap Double
     73 english = IMS.fromAscList [
     74     (9, 0.000057)
     75   , (23, 0.000000)
     76   , (32, 0.171662)
     77   , (33, 0.000072)
     78   , (34, 0.002442)
     79   , (35, 0.000179)
     80   , (36, 0.000561)
     81   , (37, 0.000160)
     82   , (38, 0.000226)
     83   , (39, 0.002447)
     84   , (40, 0.002178)
     85   , (41, 0.002233)
     86   , (42, 0.000628)
     87   , (43, 0.000215)
     88   , (44, 0.007384)
     89   , (45, 0.013734)
     90   , (46, 0.015124)
     91   , (47, 0.001549)
     92   , (48, 0.005516)
     93   , (49, 0.004594)
     94   , (50, 0.003322)
     95   , (51, 0.001847)
     96   , (52, 0.001348)
     97   , (53, 0.001663)
     98   , (54, 0.001153)
     99   , (55, 0.001030)
    100   , (56, 0.001054)
    101   , (57, 0.001024)
    102   , (58, 0.004354)
    103   , (59, 0.001214)
    104   , (60, 0.001225)
    105   , (61, 0.000227)
    106   , (62, 0.001242)
    107   , (63, 0.001474)
    108   , (64, 0.000073)
    109   , (65, 0.003132)
    110   , (66, 0.002163)
    111   , (67, 0.003906)
    112   , (68, 0.003151)
    113   , (69, 0.002673)
    114   , (70, 0.001416)
    115   , (71, 0.001876)
    116   , (72, 0.002321)
    117   , (73, 0.003211)
    118   , (74, 0.001726)
    119   , (75, 0.000687)
    120   , (76, 0.001884)
    121   , (77, 0.003529)
    122   , (78, 0.002085)
    123   , (79, 0.001842)
    124   , (80, 0.002614)
    125   , (81, 0.000316)
    126   , (82, 0.002519)
    127   , (83, 0.004003)
    128   , (84, 0.003322)
    129   , (85, 0.000814)
    130   , (86, 0.000892)
    131   , (87, 0.002527)
    132   , (88, 0.000343)
    133   , (89, 0.000304)
    134   , (90, 0.000076)
    135   , (91, 0.000086)
    136   , (92, 0.000016)
    137   , (93, 0.000088)
    138   , (94, 0.000003)
    139   , (95, 0.001159)
    140   , (96, 0.000009)
    141   , (97, 0.051880)
    142   , (98, 0.010195)
    143   , (99, 0.021129)
    144   , (100, 0.025071)
    145   , (101, 0.085771)
    146   , (102, 0.013725)
    147   , (103, 0.015597)
    148   , (104, 0.027444)
    149   , (105, 0.049019)
    150   , (106, 0.000867)
    151   , (107, 0.006753)
    152   , (108, 0.031750)
    153   , (109, 0.016437)
    154   , (110, 0.049701)
    155   , (111, 0.057701)
    156   , (112, 0.015482)
    157   , (113, 0.000747)
    158   , (114, 0.042586)
    159   , (115, 0.043686)
    160   , (116, 0.063700)
    161   , (117, 0.020999)
    162   , (118, 0.008462)
    163   , (119, 0.013034)
    164   , (120, 0.001950)
    165   , (121, 0.011330)
    166   , (122, 0.000596)
    167   , (123, 0.000026)
    168   , (124, 0.000007)
    169   , (125, 0.000026)
    170   , (126, 0.000003)
    171   , (131, 0.000000)
    172   , (149, 0.006410)
    173   , (183, 0.000010)
    174   , (223, 0.000000)
    175   , (226, 0.000000)
    176   , (229, 0.000000)
    177   , (230, 0.000000)
    178   , (237, 0.000000)
    179   ]
    180