Similarity.hs (4194B)
1 module Cryptopals.Util.Similarity ( 2 score 3 , scoreAlt 4 , tally 5 , gtally 6 , often 7 , mse 8 , mseAlt 9 , english 10 , dist 11 ) where 12 13 import qualified Data.ByteString as BS 14 import qualified Data.Foldable as F 15 import Data.Function (on) 16 import qualified Data.IntMap.Strict as IMS 17 import qualified Data.List as L 18 import qualified Data.Map.Strict as MS 19 20 -- | Distance of the encoding from expected English plaintext. 21 score :: BS.ByteString -> Double 22 score = mse english . dist 23 24 scoreAlt :: BS.ByteString -> Maybe Double 25 scoreAlt = mseAlt english . dist 26 27 mse :: IMS.IntMap Double -> IMS.IntMap Double -> Double 28 mse ref tar = 29 let res = IMS.foldlWithKey' alg mempty ref 30 siz = IMS.size res 31 in IMS.foldl' (\acc val -> acc + val / fromIntegral siz) 0 res 32 where 33 alg acc key val = case IMS.lookup key tar of 34 Nothing -> acc 35 Just tal -> IMS.insert key ((tal - val) ^ 2) acc 36 37 mseAlt :: IMS.IntMap Double -> IMS.IntMap Double -> Maybe Double 38 mseAlt ref tar = case F.foldlM alg mempty (IMS.toList tar) of 39 Nothing -> Nothing 40 Just res -> pure $ 41 let siz = IMS.size res 42 in IMS.foldl' (\acc val -> acc + val / fromIntegral siz) 0 res 43 where 44 alg acc (key, val) = case IMS.lookup key ref of 45 Nothing -> Nothing 46 Just tal -> pure (IMS.insert key ((tal - val) ^ 2) acc) 47 48 dist :: BS.ByteString -> IMS.IntMap Double 49 dist = normalize . tally 50 51 often :: BS.ByteString -> [(Int, Int)] 52 often = L.sortBy (flip compare `on` snd) . IMS.toList . tally 53 54 tally :: BS.ByteString -> IMS.IntMap Int 55 tally = BS.foldl' alg mempty where 56 alg acc (fromIntegral -> byt) 57 | IMS.member byt acc = IMS.adjust succ byt acc 58 | otherwise = IMS.insert byt 1 acc 59 60 gtally :: Ord a => [a] -> MS.Map a Int 61 gtally = L.foldl' alg mempty where 62 alg acc val 63 | MS.member val acc = MS.adjust succ val acc 64 | otherwise = MS.insert val 1 acc 65 66 normalize :: IMS.IntMap Int -> IMS.IntMap Double 67 normalize m = 68 let siz = fromIntegral $ IMS.foldl' (+) 0 m 69 in fmap (\val -> fromIntegral val / siz) m 70 71 -- | Observed frequency distribution of bytes in English corpora. 72 english :: IMS.IntMap Double 73 english = IMS.fromAscList [ 74 (9, 0.000057) 75 , (23, 0.000000) 76 , (32, 0.171662) 77 , (33, 0.000072) 78 , (34, 0.002442) 79 , (35, 0.000179) 80 , (36, 0.000561) 81 , (37, 0.000160) 82 , (38, 0.000226) 83 , (39, 0.002447) 84 , (40, 0.002178) 85 , (41, 0.002233) 86 , (42, 0.000628) 87 , (43, 0.000215) 88 , (44, 0.007384) 89 , (45, 0.013734) 90 , (46, 0.015124) 91 , (47, 0.001549) 92 , (48, 0.005516) 93 , (49, 0.004594) 94 , (50, 0.003322) 95 , (51, 0.001847) 96 , (52, 0.001348) 97 , (53, 0.001663) 98 , (54, 0.001153) 99 , (55, 0.001030) 100 , (56, 0.001054) 101 , (57, 0.001024) 102 , (58, 0.004354) 103 , (59, 0.001214) 104 , (60, 0.001225) 105 , (61, 0.000227) 106 , (62, 0.001242) 107 , (63, 0.001474) 108 , (64, 0.000073) 109 , (65, 0.003132) 110 , (66, 0.002163) 111 , (67, 0.003906) 112 , (68, 0.003151) 113 , (69, 0.002673) 114 , (70, 0.001416) 115 , (71, 0.001876) 116 , (72, 0.002321) 117 , (73, 0.003211) 118 , (74, 0.001726) 119 , (75, 0.000687) 120 , (76, 0.001884) 121 , (77, 0.003529) 122 , (78, 0.002085) 123 , (79, 0.001842) 124 , (80, 0.002614) 125 , (81, 0.000316) 126 , (82, 0.002519) 127 , (83, 0.004003) 128 , (84, 0.003322) 129 , (85, 0.000814) 130 , (86, 0.000892) 131 , (87, 0.002527) 132 , (88, 0.000343) 133 , (89, 0.000304) 134 , (90, 0.000076) 135 , (91, 0.000086) 136 , (92, 0.000016) 137 , (93, 0.000088) 138 , (94, 0.000003) 139 , (95, 0.001159) 140 , (96, 0.000009) 141 , (97, 0.051880) 142 , (98, 0.010195) 143 , (99, 0.021129) 144 , (100, 0.025071) 145 , (101, 0.085771) 146 , (102, 0.013725) 147 , (103, 0.015597) 148 , (104, 0.027444) 149 , (105, 0.049019) 150 , (106, 0.000867) 151 , (107, 0.006753) 152 , (108, 0.031750) 153 , (109, 0.016437) 154 , (110, 0.049701) 155 , (111, 0.057701) 156 , (112, 0.015482) 157 , (113, 0.000747) 158 , (114, 0.042586) 159 , (115, 0.043686) 160 , (116, 0.063700) 161 , (117, 0.020999) 162 , (118, 0.008462) 163 , (119, 0.013034) 164 , (120, 0.001950) 165 , (121, 0.011330) 166 , (122, 0.000596) 167 , (123, 0.000026) 168 , (124, 0.000007) 169 , (125, 0.000026) 170 , (126, 0.000003) 171 , (131, 0.000000) 172 , (149, 0.006410) 173 , (183, 0.000010) 174 , (223, 0.000000) 175 , (226, 0.000000) 176 , (229, 0.000000) 177 , (230, 0.000000) 178 , (237, 0.000000) 179 ] 180