commit 6cbe4b901a02775ac0d39426cbd61c44eb86f271
parent 769f94dcd712b2f1903c322f1544dd0bcb7d05ec
Author: Jared Tobin <jared@jtobin.io>
Date: Fri, 2 Jun 2023 09:10:43 +0400
Add more stuff from set 1.
Diffstat:
7 files changed, 260 insertions(+), 47 deletions(-)
diff --git a/cryptopals.cabal b/cryptopals.cabal
@@ -55,3 +55,42 @@ executable break-single-byte-xor
, optparse-applicative
, text
+executable byte-frequency
+ main-is: ByteFrequency.hs
+ ghc-options: -Wall -O2
+ default-language: Haskell2010
+ hs-source-dirs: src
+ build-depends:
+ base
+ , base16
+ , bytestring
+ , cryptopals
+ , optparse-applicative
+ , text
+
+executable detect-single-byte-xor
+ main-is: DetectSingleByteXor.hs
+ ghc-options: -Wall -O2
+ default-language: Haskell2010
+ hs-source-dirs: src
+ build-depends:
+ base
+ , base16
+ , bytestring
+ , cryptopals
+ , optparse-applicative
+ , text
+
+executable repeating-key-xor
+ main-is: RepeatingKeyXor.hs
+ ghc-options: -Wall -O2
+ default-language: Haskell2010
+ hs-source-dirs: src
+ build-depends:
+ base
+ , base16
+ , bytestring
+ , cryptopals
+ , optparse-applicative
+ , text
+
diff --git a/docs/s1.md b/docs/s1.md
@@ -51,15 +51,15 @@ zero-padded:
$ echo 'obase=16; ibase=2; 000011110000001000000111' | bc
F0207
-The `fixed_xor` executable included will perform the reverse task on the
-zero-padded string:
+The `fixed-xor` executable included will perform the reverse task on the
+(zero-padded) hex string:
- $ fixed_xor '0f0207' $(echo -n ICE | xxd -p) | xxd -r -p
+ $ fixed-xor '0f0207' $(echo -n ICE | xxd -p) | xxd -r -p
FAB
-and running `fixed_xor` on the question input yields the following:
+and running `fixed-xor` on the question input yields the following:
- $ SOLUTION=$(fixed_xor $(< data/s1/q2_input0.txt) $(< data/s1/q2_input1.txt))
+ $ SOLUTION=$(fixed-xor $(< data/s1/q2_input0.txt) $(< data/s1/q2_input1.txt))
746865206b696420646f6e277420706c6179
The UTF8 encoding is fun:
@@ -69,33 +69,23 @@ The UTF8 encoding is fun:
#### 1.3
-Fun fact: it's easy to memorize the (approximate) ranking of the most commonly
-used characters in English. ETAOIN SHRDLU CMFWYP etc. etc. Here we can grab a
-table of UTF8 character frequencies on the interwebs; I used [this
-guy's](http://www.fitaly.com/board/domper3/posts/136.html).
+(N.b., it's easy to memorize the (approximate) ranking of the most
+commonly used characters in English. ETAOIN SHRDLU CMFWYP etc.)
-You can calculate the MSE between the observed frequency distribution and the
-expected one. Just tally and normalise the bytes in the input to get the
-observed distribution, and then:
+Here we want to determine which byte has been used to produce a known
+single-byte xor'd ciphertext. One wants to score bytes by their
+"closeness" to what might be expected given typical English character
+frequencies, then check the scores of the ciphertext single-byte-xor'd
+against various bytes.
- fn mse(expected: HashMap<u8, f32>, observed: HashMap<u8, f32>) -> f32 {
- let mut result = HashMap::new();
+Here we can grab a table of UTF8 character frequencies
+found in English corpora on the interwebs; I used [this
+guy's](http://www.fitaly.com/board/domper3/posts/136.html). One can then
+"score" bytestrings by e.g. calculating the mean squared error between
+an observed frequency distribution and this expected one.
- for (key, val) in expected.iter() {
- if observed.contains_key(key) {
- let tval = observed.get(key).unwrap();
- let sqdiff = (tval - val).powf(2.0);
- result.insert(key, sqdiff);
- }
- }
-
- let size = result.len();
-
- result.iter().fold(0.0, |sum, (_, val)| sum + val / size as f32)
- }
-
-In single-byte xor, the entire input has simply been xor'd by.. uh, a single
-byte. Take plaintext 'hello' and '?', for example:
+In single-byte xor, the entire input has simply been xor'd by.. a single
+byte. Take plaintext 'hello' and '?', for example:
$ parallel 'echo -n {} | xxd -b' ::: hello ?
00000000: 01101000 01100101 01101100 01101100 01101111 hello
@@ -126,35 +116,54 @@ that was used to encrypt it.
Using a binary that does that, we get:
- $ cat data/s1/q3_input.txt | tr -d '\n' | ./bin/break_single_byte_xor
+ $ break-single-byte-xor $(cat data/s1/q3_input.txt | tr -d '\n') | \
+ xxd -r -p
+ cryptopals: input similarity score is 4.760245723733781e-3
+ cryptopals: xor-ing with 88 yields 1.1213430401648154e-3
+ cryptopals: result
Cooking MC's like a pound of bacon
#### 1.4
- $ parallel -a data/s1/q4_input.txt ./bin/charfreq | less
-
-Look for strings w/high-frequency bytes and you'll find the following
-w/five hits of UTF8-encoded 21. There's another input in which ']' gets five
-hits, but it doesn't seem to decrypt to anything.
-
- $ INPUT=7b5a4215415d544115415d5015455447414c155c46155f4058455c5b523f
- $ echo $INPUT | ./bin/break_single_byte_xor
+The idea in detecting single-byte XOR is to look for an unusually high
+frequency of particular bytes. Natural English sentences will produce
+some characters (e.g. ETAOIN.., whitespace) with high frequency, and
+these when XOR'd qgainst a single byte will always produce the same
+result. Inputs with a large number of repeated bytes should thus be
+considered suspect.
+
+The `detect-single-byte-xor` executable will prune out the most suspect
+inputs included in a file. Piping the results to `break-single-byte-xor`
+exposes the enciphered text; some relevant output is highlighted below:
+
+ ```
+ $ detect-single-byte-xor data/s1/q4_input.txt | \
+ parallel --keep-order 'break-single-byte-xor {} | xxd -r -p'
+ cryptopals: suspect inputs
+ cryptopals: 7b5a4215415d544115415d5015455447414c155c46155f4058455c5b523f
+ cryptopals: input similarity score is 3.016788883590278e-3
+ cryptopals: xor-ing with 76 yields 7.568718315873016e-4
+ cryptopals: result
Now that the party is jumping
+ ```
#### 1.5
- $ echo -n $(< data/s1/q5_input.txt) | ./bin/repeating_key_xor ICE | fold -w 74
- original:
- Burning 'em, if you ain't quick and nimble I go crazy when I hear a cymbal
- xored with: ICE
- result:
+Repeating-key XOR just cyclically XOR's bytes in the plaintext against
+bytes in the key:
+
+ $ sec=$(repeating-key-xor ICE "$(< data/s1/q5_input.txt)")
+ $ echo $sec | fold -w 74
0b3637272a2b2e63622c2e69692a23693a2a3c6324202d623d63343c2a2622632427276527
- 2a282b2f20690a652e2c652a3124333a653e2b2027630c692b20283165286326302e27282f
+ 2a282b2f20430a652e2c652a3124333a653e2b2027630c692b20283165286326302e27282f
-Can check this by grabbing the result under 'INPUT' and xor-ing it again:
+Note the `repeating-key-xor` binary assumes its input isn't hex-encoded,
+so if we want to decrypt the ciphertext we'll need to pipe it through
+`xxd` when going in reverse:
- $ xxd -r -p <<< $INPUT | ./bin/repeating_key_xor ICE | xxd -r -p | tail -c +2
- Burning 'em, if you ain't quick and nimble I go crazy when I hear a cymbal
+ $ repeating-key-xor ICE "$(echo $sec | xxd -r -p)" | xxd -r -p
+ Burning 'em, if you ain't quick and nimble
+ I go crazy when I hear a cymbal
#### 1.6
diff --git a/lib/Cryptopals/Util.hs b/lib/Cryptopals/Util.hs
@@ -4,8 +4,11 @@ module Cryptopals.Util (
, hexToB64
, fixedXor
+ , CUS.often
, CUS.score
+ , repeatingKeyXor
, singleByteXor
+ , CUS.tally
) where
import qualified Cryptopals.Util.Similarity as CUS
@@ -33,3 +36,9 @@ fixedXor l r = BS.pack $ BS.zipWith B.xor l r
singleByteXor :: Word8 -> BS.ByteString -> BS.ByteString
singleByteXor byt = BS.map (B.xor byt)
+repeatingKeyXor :: BS.ByteString -> BS.ByteString -> BS.ByteString
+repeatingKeyXor key pla =
+ let pl = BS.length pla
+ ks = BS.pack $ take pl (cycle (BS.unpack key))
+ in BS.pack $ BS.zipWith B.xor ks pla
+
diff --git a/lib/Cryptopals/Util/Similarity.hs b/lib/Cryptopals/Util/Similarity.hs
@@ -1,9 +1,13 @@
module Cryptopals.Util.Similarity (
score
+ , tally
+ , often
) where
import qualified Data.ByteString as BS
+import Data.Function (on)
import qualified Data.IntMap.Strict as IMS
+import qualified Data.List as L
-- | Similarity of the byte encoding to English plaintext. Smaller is better.
score :: BS.ByteString -> Double
@@ -22,6 +26,9 @@ mse ref tar =
dist :: BS.ByteString -> IMS.IntMap Double
dist = normalize . tally
+often :: BS.ByteString -> [(Int, Int)]
+often = L.sortBy (flip compare `on` snd) . IMS.toList . tally
+
tally :: BS.ByteString -> IMS.IntMap Int
tally = BS.foldl' alg mempty where
alg acc (fromIntegral -> byt)
diff --git a/src/ByteFrequency.hs b/src/ByteFrequency.hs
@@ -0,0 +1,50 @@
+{-# LANGUAGE OverloadedStrings #-}
+{-# LANGUAGE RecordWildCards #-}
+
+module Main where
+
+import qualified Cryptopals.Util as CU
+import qualified Data.ByteString.Base16 as B16
+import qualified Data.Text as T
+import qualified Data.Text.IO as TIO
+import qualified Data.Text.Encoding as TE
+import qualified Options.Applicative as O
+import qualified System.Exit as SE
+import qualified System.IO as SIO
+
+data Args = Args { argsInp :: T.Text }
+
+ops :: O.Parser Args
+ops = Args <$> O.argument O.str (O.metavar "INPUT")
+
+freq :: Args -> IO ()
+freq Args {..} = do
+ let render :: Show a => a -> T.Text
+ render = T.pack . show
+
+ err = TIO.hPutStrLn SIO.stderr
+
+ args = B16.decodeBase16 $ TE.encodeUtf8 argsInp
+
+ case args of
+ Left e -> do
+ err $ "cryptopals: " <> e
+ SE.exitFailure
+
+ Right s -> do
+ let freqs = take 3 $ CU.often s
+
+ err $ "cryptopals: common bytes (" <> argsInp <> ")"
+ err $ render freqs
+
+main :: IO ()
+main = do
+ let pars = O.info (O.helper <*> ops) $
+ O.fullDesc
+ <> O.progDesc "produce byte frequencies"
+ <> O.header "byte-frequency"
+
+ args <- O.execParser pars
+
+ freq args
+
diff --git a/src/DetectSingleByteXor.hs b/src/DetectSingleByteXor.hs
@@ -0,0 +1,59 @@
+{-# LANGUAGE OverloadedStrings #-}
+{-# LANGUAGE RecordWildCards #-}
+
+module Main where
+
+import qualified Cryptopals.Util as CU
+import qualified Data.ByteString.Base16 as B16
+import qualified Data.ByteString.Char8 as B8
+import qualified Data.Foldable as F
+import Data.Function (on)
+import qualified Data.List as L
+-- import qualified Data.Text as T
+import qualified Data.Text.IO as TIO
+import qualified Data.Text.Encoding as TE
+import qualified Options.Applicative as O
+import qualified System.Exit as SE
+import qualified System.IO as SIO
+
+data Args = Args { argsFil :: SIO.FilePath }
+
+ops :: O.Parser Args
+ops = Args <$> O.argument O.str (O.metavar "FILE")
+
+detect :: Args -> IO ()
+detect Args {..} = do
+ let err = TIO.hPutStrLn SIO.stderr
+ out = TIO.hPutStrLn SIO.stdout
+
+ contents <- B8.readFile argsFil
+
+ let ls = B8.lines contents
+ es = traverse B16.decodeBase16 ls
+
+ case es of
+ Left e -> do
+ err $ "cryptopals: " <> e
+ SE.exitFailure
+
+ Right bs -> do
+ let fs = concatMap (\s -> [(head . CU.often $ s, s)]) bs -- XX hack
+ sorted = L.sortBy (flip compare `on` (snd . fst)) fs
+ most = take 3 sorted
+
+ err "cryptopals: suspect inputs"
+ F.for_ most $ \((_, _), s) -> do
+ err $ "cryptopals: " <> (TE.decodeUtf8 . B16.encodeBase16' $ s)
+ out . TE.decodeUtf8 . B16.encodeBase16' $ s
+
+main :: IO ()
+main = do
+ let pars = O.info (O.helper <*> ops) $
+ O.fullDesc
+ <> O.progDesc "produce byte frequencies"
+ <> O.header "detect-single-byte-xor"
+
+ args <- O.execParser pars
+
+ detect args
+
diff --git a/src/RepeatingKeyXor.hs b/src/RepeatingKeyXor.hs
@@ -0,0 +1,40 @@
+{-# LANGUAGE OverloadedStrings #-}
+{-# LANGUAGE RecordWildCards #-}
+
+module Main where
+
+import qualified Cryptopals.Util as CU
+import qualified Data.ByteString.Base16 as B16
+import qualified Data.Text as T
+import qualified Data.Text.Encoding as TE
+import qualified Data.Text.IO as TIO
+import qualified Options.Applicative as O
+
+data Args = Args {
+ argsKey :: T.Text
+ , argsInp :: T.Text
+ }
+
+ops :: O.Parser Args
+ops = Args
+ <$> O.argument O.str (O.metavar "KEY")
+ <*> O.argument O.str (O.metavar "INPUT")
+
+rxor :: Args -> IO ()
+rxor Args {..} = do
+ let (k, v) = (TE.encodeUtf8 argsKey, TE.encodeUtf8 argsInp)
+ res = CU.repeatingKeyXor k v
+
+ TIO.putStrLn . TE.decodeUtf8 . B16.encodeBase16' $ res
+
+main :: IO ()
+main = do
+ let pars = O.info (O.helper <*> ops) $
+ O.fullDesc
+ <> O.progDesc "compute repeating-key-xor KEY on INPUT"
+ <> O.header "repeating-key-xor"
+
+ args <- O.execParser pars
+
+ rxor args
+