cryptopals

Matasano's cryptopals challenges (cryptopals.com).
Log | Files | Refs | README | LICENSE

commit 46476d6f71955fc525cd73e1d529d7569e498977
parent dc9d806977074e3a8dffed3df9b0785ace902264
Author: Jared Tobin <jared@jtobin.io>
Date:   Thu,  1 Jun 2023 19:07:10 +0400

Misc work.

Diffstat:
Mcryptopals.cabal | 17+++++++++++++++++
Mdocs/s1.md | 12++++++------
Mlib/Cryptopals/Util.hs | 18+++++++++++-------
Alib/Cryptopals/Util/Similarity.hs | 145+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Asrc/BreakSingleByteXor.hs | 65+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Msrc/FixedXor.hs | 30+++++++++++++++++++++---------
6 files changed, 265 insertions(+), 22 deletions(-)

diff --git a/cryptopals.cabal b/cryptopals.cabal @@ -15,15 +15,18 @@ library LambdaCase OverloadedStrings RecordWildCards + ViewPatterns hs-source-dirs: lib exposed-modules: Cryptopals.Util + , Cryptopals.Util.Similarity build-depends: base , base16 , base64 , bytestring + , containers , text executable fixed-xor @@ -33,7 +36,21 @@ executable fixed-xor hs-source-dirs: src build-depends: base + , base16 + , bytestring + , cryptopals + , optparse-applicative + , text + +executable break-single-byte-xor + main-is: BreakSingleByteXor.hs + ghc-options: -Wall -O2 + default-language: Haskell2010 + hs-source-dirs: src + build-depends: + base , bytestring , cryptopals , optparse-applicative , text + diff --git a/docs/s1.md b/docs/s1.md @@ -18,7 +18,7 @@ for some (decimal-equivalent) coefficients {a0, a1, .. } in the alphabet for other coefficients {b0, b1, .. } in the alphabet A-Za-z0-9\+/. `xxd` is a hexdump utility; one can use `xxd -r` to go from hex to binary, or -`xxd -r -p` to go from hex to ASCII: +`xxd -r -p` to go from hex to UTF8: $ echo $(xxd -r -p data/s1/q1_input.txt) I'm killing your brain like a poisonous mushroom @@ -62,7 +62,7 @@ and running `fixed_xor` on the question input yields the following: $ SOLUTION=$(fixed_xor $(< data/s1/q2_input0.txt) $(< data/s1/q2_input1.txt)) 746865206b696420646f6e277420706c6179 -The ASCII encoding is fun: +The UTF8 encoding is fun: $ echo $SOLUTION | xxd -r -p the kid don't play @@ -71,7 +71,7 @@ The ASCII encoding is fun: Fun fact: it's easy to memorize the (approximate) ranking of the most commonly used characters in English. ETAOIN SHRDLU CMFWYP etc. etc. Here we can grab a -table of ASCII character frequencies on the interwebs; I used [this +table of UTF8 character frequencies on the interwebs; I used [this guy's](http://www.fitaly.com/board/domper3/posts/136.html). You can calculate the MSE between the observed frequency distribution and the @@ -111,7 +111,7 @@ Repeating '?' as many times as is necessary and manually xoring bitwise yields: (N.b. it's worth noting that bitwise xor is simply addition modulo 2 -- i.e., addition in GF(2), the Galois field of two elements.) -The result in ASCII, going through hex, is: +The result in UTF8, going through hex, is: $ echo 'obase=16; ibase=2; 0101011101011010010100110101001101010000' | \ bc | xxd -r -p @@ -119,7 +119,7 @@ The result in ASCII, going through hex, is: Since xor is its own inverse, going backwards will get us 'hello' again. -For the actual question here: given an input, one can iterate over the ASCII +For the actual question here: given an input, one can iterate over the UTF8 printable bytes (in decimal, 32-126), compute the single-byte xor against it, and calculate its MSE. The result with the smallest MSE is probably the byte that was used to encrypt it. @@ -134,7 +134,7 @@ Using a binary that does that, we get: $ parallel -a data/s1/q4_input.txt ./bin/charfreq | less Look for strings w/high-frequency bytes and you'll find the following -w/five hits of ASCII-encoded 21. There's another input in which ']' gets five +w/five hits of UTF8-encoded 21. There's another input in which ']' gets five hits, but it doesn't seem to decrypt to anything. $ INPUT=7b5a4215415d544115415d5015455447414c155c46155f4058455c5b523f diff --git a/lib/Cryptopals/Util.hs b/lib/Cryptopals/Util.hs @@ -4,13 +4,17 @@ module Cryptopals.Util ( , hexToB64 , fixedXor + , CUS.score + , singleByteXor ) where +import qualified Cryptopals.Util.Similarity as CUS import qualified Data.Bits as B import qualified Data.ByteString as BS import qualified Data.ByteString.Base16 as B16 import qualified Data.ByteString.Base64 as B64 import qualified Data.Text as T +import GHC.Word (Word8) newtype Hex = Hex BS.ByteString deriving (Eq, Show) @@ -23,11 +27,11 @@ hexToB64 (Hex b) = do b16 <- B16.decodeBase16 b pure $ Base64 (B64.encodeBase64' b16) -fixedXor :: Hex -> Hex -> Either T.Text Hex -fixedXor (Hex a) (Hex b) = do - l <- B16.decodeBase16 a - r <- B16.decodeBase16 b - if BS.length l /= BS.length r - then Left "fixedXor: unequal-length buffers" - else pure $ Hex (B16.encodeBase16' . BS.pack $ BS.zipWith B.xor l r) +fixedXor :: BS.ByteString -> BS.ByteString -> BS.ByteString +fixedXor l r = BS.pack $ BS.zipWith B.xor l r + +singleByteXor :: Word8 -> Hex -> Either T.Text Hex +singleByteXor byt (Hex bs) = do + s <- B16.decodeBase16 bs + pure $ Hex (B16.encodeBase16' . BS.map (B.xor byt) $ s) diff --git a/lib/Cryptopals/Util/Similarity.hs b/lib/Cryptopals/Util/Similarity.hs @@ -0,0 +1,145 @@ +module Cryptopals.Util.Similarity ( + score + ) where + +import qualified Data.ByteString as BS +import qualified Data.IntMap.Strict as IMS + +-- | Similarity of the byte encoding to English plaintext. Smaller is better. +score :: BS.ByteString -> Double +score = mse english . dist + +mse :: IMS.IntMap Double -> IMS.IntMap Double -> Double +mse ref tar = + let res = IMS.foldlWithKey' alg mempty ref + siz = IMS.size res + in IMS.foldl' (\acc val -> (acc + val) / fromIntegral siz) 0 res + where + alg acc key val = case IMS.lookup key tar of + Nothing -> acc + Just tal -> IMS.insert key ((tal - val) ^ 2) acc + +dist :: BS.ByteString -> IMS.IntMap Double +dist = normalize . tally + +tally :: BS.ByteString -> IMS.IntMap Int +tally = BS.foldl' alg mempty where + alg acc (fromIntegral -> byt) + | IMS.member byt acc = IMS.adjust succ byt acc + | otherwise = IMS.insert byt 1 acc + +normalize :: IMS.IntMap Int -> IMS.IntMap Double +normalize m = + let siz = fromIntegral $ IMS.foldl' (+) 0 m + in fmap (\val -> fromIntegral val / siz) m + +-- | Observed frequency distribution of bytes in English corpora. +english :: IMS.IntMap Double +english = IMS.fromList [ + (9, 0.000057) + , (23, 0.000000) + , (32, 0.171662) + , (33, 0.000072) + , (34, 0.002442) + , (35, 0.000179) + , (36, 0.000561) + , (37, 0.000160) + , (38, 0.000226) + , (39, 0.002447) + , (40, 0.002178) + , (41, 0.002233) + , (42, 0.000628) + , (43, 0.000215) + , (44, 0.007384) + , (45, 0.013734) + , (46, 0.015124) + , (47, 0.001549) + , (48, 0.005516) + , (49, 0.004594) + , (50, 0.003322) + , (51, 0.001847) + , (52, 0.001348) + , (53, 0.001663) + , (54, 0.001153) + , (55, 0.001030) + , (56, 0.001054) + , (57, 0.001024) + , (58, 0.004354) + , (59, 0.001214) + , (60, 0.001225) + , (61, 0.000227) + , (62, 0.001242) + , (63, 0.001474) + , (64, 0.000073) + , (65, 0.003132) + , (66, 0.002163) + , (67, 0.003906) + , (68, 0.003151) + , (69, 0.002673) + , (70, 0.001416) + , (71, 0.001876) + , (72, 0.002321) + , (73, 0.003211) + , (74, 0.001726) + , (75, 0.000687) + , (76, 0.001884) + , (77, 0.003529) + , (78, 0.002085) + , (79, 0.001842) + , (80, 0.002614) + , (81, 0.000316) + , (82, 0.002519) + , (83, 0.004003) + , (84, 0.003322) + , (85, 0.000814) + , (86, 0.000892) + , (87, 0.002527) + , (88, 0.000343) + , (89, 0.000304) + , (90, 0.000076) + , (91, 0.000086) + , (92, 0.000016) + , (93, 0.000088) + , (94, 0.000003) + , (95, 0.001159) + , (96, 0.000009) + , (97, 0.051880) + , (98, 0.010195) + , (99, 0.021129) + , (100, 0.025071) + , (101, 0.085771) + , (102, 0.013725) + , (103, 0.015597) + , (104, 0.027444) + , (105, 0.049019) + , (106, 0.000867) + , (107, 0.006753) + , (108, 0.031750) + , (109, 0.016437) + , (110, 0.049701) + , (111, 0.057701) + , (112, 0.015482) + , (113, 0.000747) + , (114, 0.042586) + , (115, 0.043686) + , (116, 0.063700) + , (117, 0.020999) + , (118, 0.008462) + , (119, 0.013034) + , (120, 0.001950) + , (121, 0.011330) + , (122, 0.000596) + , (123, 0.000026) + , (124, 0.000007) + , (125, 0.000026) + , (126, 0.000003) + , (131, 0.000000) + , (149, 0.006410) + , (183, 0.000010) + , (223, 0.000000) + , (226, 0.000000) + , (229, 0.000000) + , (230, 0.000000) + , (237, 0.000000) + ] + diff --git a/src/BreakSingleByteXor.hs b/src/BreakSingleByteXor.hs @@ -0,0 +1,65 @@ +{-# LANGUAGE OverloadedStrings #-} +{-# LANGUAGE RecordWildCards #-} + +module Main where + +import Control.Monad (unless) +import qualified Cryptopals.Util as CU +import qualified Data.ByteString as BS +import Data.List (foldl') +import qualified Data.Text as T +import qualified Data.Text.IO as TIO +import qualified Data.Text.Encoding as TE +import GHC.Word (Word8) +import qualified Options.Applicative as O +import qualified System.IO as SIO + +data Args = Args { argsInp :: T.Text } + +ops :: O.Parser Args +ops = Args <$> O.argument O.str (O.metavar "INPUT") + +best :: BS.ByteString -> (Word8, Double, BS.ByteString) +best s = foldl' alg (0, CU.score s, s) [32..126] where + alg acc@(_, asc, _) b = + let xo = CU.singleByteXor b s + sc = CU.score xo + in if sc < asc + then (b, sc, xo) + else acc + +render :: Show a => a -> T.Text +render = T.pack. show + +decipher :: Args -> IO () +decipher Args {..} = do + let s = TE.encodeUtf8 argsInp + + TIO.hPutStrLn SIO.stderr $ + "cryptopals: input similarity score is " <> render (CU.score s) + + let (byt, bsc, b) = best s + + unless (b == s) $ do + + TIO.hPutStrLn SIO.stderr ( + "cryptopals: xor-ing with " <> render byt <> + " yields score " <> render bsc + ) + + TIO.hPutStrLn SIO.stderr $ + "cryptopals: result" + + TIO.hPutStrLn SIO.stdout $ TE.decodeUtf8 b + +main :: IO () +main = do + let pars = O.info (O.helper <*> ops) $ + O.fullDesc + <> O.progDesc "attempt to break single-byte xor'd ciphertext" + <> O.header "break-single-byte-xor" + + args <- O.execParser pars + + decipher args + diff --git a/src/FixedXor.hs b/src/FixedXor.hs @@ -1,13 +1,17 @@ +{-# LANGUAGE OverloadedStrings #-} {-# LANGUAGE RecordWildCards #-} module Main where -import Cryptopals.Util (Hex(..)) import qualified Cryptopals.Util as CU +import qualified Data.ByteString as BS +import qualified Data.ByteString.Base16 as B16 import qualified Data.Text as T -import qualified Data.Text.IO as TIO import qualified Data.Text.Encoding as TE +import qualified Data.Text.IO as TIO import qualified Options.Applicative as O +import qualified System.Exit as SE +import qualified System.IO as SIO data Args = Args { argsKey :: T.Text @@ -21,13 +25,21 @@ ops = Args fxor :: Args -> IO () fxor Args {..} = do - let k = Hex (TE.encodeUtf8 argsKey) - v = Hex (TE.encodeUtf8 argsInp) - r = CU.fixedXor k v - - case r of - Left e -> TIO.putStrLn e - Right (Hex b) -> TIO.putStrLn (TE.decodeUtf8 b) + let args = do + k <- B16.decodeBase16 $ TE.encodeUtf8 argsKey + v <- B16.decodeBase16 $ TE.encodeUtf8 argsInp + if BS.length k /= BS.length v + then Left "fixed-xor: unequal-length inputs" + else pure (k, v) + + case args of + Left e -> do + TIO.hPutStrLn SIO.stderr ("cryptopals: " <> e) + SE.exitFailure + + Right (k, v) -> do + let res = CU.fixedXor k v + TIO.putStrLn . TE.decodeUtf8 . B16.encodeBase16' $ res main :: IO () main = do