commit 46476d6f71955fc525cd73e1d529d7569e498977
parent dc9d806977074e3a8dffed3df9b0785ace902264
Author: Jared Tobin <jared@jtobin.io>
Date: Thu, 1 Jun 2023 19:07:10 +0400
Misc work.
Diffstat:
6 files changed, 265 insertions(+), 22 deletions(-)
diff --git a/cryptopals.cabal b/cryptopals.cabal
@@ -15,15 +15,18 @@ library
LambdaCase
OverloadedStrings
RecordWildCards
+ ViewPatterns
hs-source-dirs:
lib
exposed-modules:
Cryptopals.Util
+ , Cryptopals.Util.Similarity
build-depends:
base
, base16
, base64
, bytestring
+ , containers
, text
executable fixed-xor
@@ -33,7 +36,21 @@ executable fixed-xor
hs-source-dirs: src
build-depends:
base
+ , base16
+ , bytestring
+ , cryptopals
+ , optparse-applicative
+ , text
+
+executable break-single-byte-xor
+ main-is: BreakSingleByteXor.hs
+ ghc-options: -Wall -O2
+ default-language: Haskell2010
+ hs-source-dirs: src
+ build-depends:
+ base
, bytestring
, cryptopals
, optparse-applicative
, text
+
diff --git a/docs/s1.md b/docs/s1.md
@@ -18,7 +18,7 @@ for some (decimal-equivalent) coefficients {a0, a1, .. } in the alphabet
for other coefficients {b0, b1, .. } in the alphabet A-Za-z0-9\+/.
`xxd` is a hexdump utility; one can use `xxd -r` to go from hex to binary, or
-`xxd -r -p` to go from hex to ASCII:
+`xxd -r -p` to go from hex to UTF8:
$ echo $(xxd -r -p data/s1/q1_input.txt)
I'm killing your brain like a poisonous mushroom
@@ -62,7 +62,7 @@ and running `fixed_xor` on the question input yields the following:
$ SOLUTION=$(fixed_xor $(< data/s1/q2_input0.txt) $(< data/s1/q2_input1.txt))
746865206b696420646f6e277420706c6179
-The ASCII encoding is fun:
+The UTF8 encoding is fun:
$ echo $SOLUTION | xxd -r -p
the kid don't play
@@ -71,7 +71,7 @@ The ASCII encoding is fun:
Fun fact: it's easy to memorize the (approximate) ranking of the most commonly
used characters in English. ETAOIN SHRDLU CMFWYP etc. etc. Here we can grab a
-table of ASCII character frequencies on the interwebs; I used [this
+table of UTF8 character frequencies on the interwebs; I used [this
guy's](http://www.fitaly.com/board/domper3/posts/136.html).
You can calculate the MSE between the observed frequency distribution and the
@@ -111,7 +111,7 @@ Repeating '?' as many times as is necessary and manually xoring bitwise yields:
(N.b. it's worth noting that bitwise xor is simply addition modulo 2 -- i.e.,
addition in GF(2), the Galois field of two elements.)
-The result in ASCII, going through hex, is:
+The result in UTF8, going through hex, is:
$ echo 'obase=16; ibase=2; 0101011101011010010100110101001101010000' | \
bc | xxd -r -p
@@ -119,7 +119,7 @@ The result in ASCII, going through hex, is:
Since xor is its own inverse, going backwards will get us 'hello' again.
-For the actual question here: given an input, one can iterate over the ASCII
+For the actual question here: given an input, one can iterate over the UTF8
printable bytes (in decimal, 32-126), compute the single-byte xor against it,
and calculate its MSE. The result with the smallest MSE is probably the byte
that was used to encrypt it.
@@ -134,7 +134,7 @@ Using a binary that does that, we get:
$ parallel -a data/s1/q4_input.txt ./bin/charfreq | less
Look for strings w/high-frequency bytes and you'll find the following
-w/five hits of ASCII-encoded 21. There's another input in which ']' gets five
+w/five hits of UTF8-encoded 21. There's another input in which ']' gets five
hits, but it doesn't seem to decrypt to anything.
$ INPUT=7b5a4215415d544115415d5015455447414c155c46155f4058455c5b523f
diff --git a/lib/Cryptopals/Util.hs b/lib/Cryptopals/Util.hs
@@ -4,13 +4,17 @@ module Cryptopals.Util (
, hexToB64
, fixedXor
+ , CUS.score
+ , singleByteXor
) where
+import qualified Cryptopals.Util.Similarity as CUS
import qualified Data.Bits as B
import qualified Data.ByteString as BS
import qualified Data.ByteString.Base16 as B16
import qualified Data.ByteString.Base64 as B64
import qualified Data.Text as T
+import GHC.Word (Word8)
newtype Hex = Hex BS.ByteString
deriving (Eq, Show)
@@ -23,11 +27,11 @@ hexToB64 (Hex b) = do
b16 <- B16.decodeBase16 b
pure $ Base64 (B64.encodeBase64' b16)
-fixedXor :: Hex -> Hex -> Either T.Text Hex
-fixedXor (Hex a) (Hex b) = do
- l <- B16.decodeBase16 a
- r <- B16.decodeBase16 b
- if BS.length l /= BS.length r
- then Left "fixedXor: unequal-length buffers"
- else pure $ Hex (B16.encodeBase16' . BS.pack $ BS.zipWith B.xor l r)
+fixedXor :: BS.ByteString -> BS.ByteString -> BS.ByteString
+fixedXor l r = BS.pack $ BS.zipWith B.xor l r
+
+singleByteXor :: Word8 -> Hex -> Either T.Text Hex
+singleByteXor byt (Hex bs) = do
+ s <- B16.decodeBase16 bs
+ pure $ Hex (B16.encodeBase16' . BS.map (B.xor byt) $ s)
diff --git a/lib/Cryptopals/Util/Similarity.hs b/lib/Cryptopals/Util/Similarity.hs
@@ -0,0 +1,145 @@
+module Cryptopals.Util.Similarity (
+ score
+ ) where
+
+import qualified Data.ByteString as BS
+import qualified Data.IntMap.Strict as IMS
+
+-- | Similarity of the byte encoding to English plaintext. Smaller is better.
+score :: BS.ByteString -> Double
+score = mse english . dist
+
+mse :: IMS.IntMap Double -> IMS.IntMap Double -> Double
+mse ref tar =
+ let res = IMS.foldlWithKey' alg mempty ref
+ siz = IMS.size res
+ in IMS.foldl' (\acc val -> (acc + val) / fromIntegral siz) 0 res
+ where
+ alg acc key val = case IMS.lookup key tar of
+ Nothing -> acc
+ Just tal -> IMS.insert key ((tal - val) ^ 2) acc
+
+dist :: BS.ByteString -> IMS.IntMap Double
+dist = normalize . tally
+
+tally :: BS.ByteString -> IMS.IntMap Int
+tally = BS.foldl' alg mempty where
+ alg acc (fromIntegral -> byt)
+ | IMS.member byt acc = IMS.adjust succ byt acc
+ | otherwise = IMS.insert byt 1 acc
+
+normalize :: IMS.IntMap Int -> IMS.IntMap Double
+normalize m =
+ let siz = fromIntegral $ IMS.foldl' (+) 0 m
+ in fmap (\val -> fromIntegral val / siz) m
+
+-- | Observed frequency distribution of bytes in English corpora.
+english :: IMS.IntMap Double
+english = IMS.fromList [
+ (9, 0.000057)
+ , (23, 0.000000)
+ , (32, 0.171662)
+ , (33, 0.000072)
+ , (34, 0.002442)
+ , (35, 0.000179)
+ , (36, 0.000561)
+ , (37, 0.000160)
+ , (38, 0.000226)
+ , (39, 0.002447)
+ , (40, 0.002178)
+ , (41, 0.002233)
+ , (42, 0.000628)
+ , (43, 0.000215)
+ , (44, 0.007384)
+ , (45, 0.013734)
+ , (46, 0.015124)
+ , (47, 0.001549)
+ , (48, 0.005516)
+ , (49, 0.004594)
+ , (50, 0.003322)
+ , (51, 0.001847)
+ , (52, 0.001348)
+ , (53, 0.001663)
+ , (54, 0.001153)
+ , (55, 0.001030)
+ , (56, 0.001054)
+ , (57, 0.001024)
+ , (58, 0.004354)
+ , (59, 0.001214)
+ , (60, 0.001225)
+ , (61, 0.000227)
+ , (62, 0.001242)
+ , (63, 0.001474)
+ , (64, 0.000073)
+ , (65, 0.003132)
+ , (66, 0.002163)
+ , (67, 0.003906)
+ , (68, 0.003151)
+ , (69, 0.002673)
+ , (70, 0.001416)
+ , (71, 0.001876)
+ , (72, 0.002321)
+ , (73, 0.003211)
+ , (74, 0.001726)
+ , (75, 0.000687)
+ , (76, 0.001884)
+ , (77, 0.003529)
+ , (78, 0.002085)
+ , (79, 0.001842)
+ , (80, 0.002614)
+ , (81, 0.000316)
+ , (82, 0.002519)
+ , (83, 0.004003)
+ , (84, 0.003322)
+ , (85, 0.000814)
+ , (86, 0.000892)
+ , (87, 0.002527)
+ , (88, 0.000343)
+ , (89, 0.000304)
+ , (90, 0.000076)
+ , (91, 0.000086)
+ , (92, 0.000016)
+ , (93, 0.000088)
+ , (94, 0.000003)
+ , (95, 0.001159)
+ , (96, 0.000009)
+ , (97, 0.051880)
+ , (98, 0.010195)
+ , (99, 0.021129)
+ , (100, 0.025071)
+ , (101, 0.085771)
+ , (102, 0.013725)
+ , (103, 0.015597)
+ , (104, 0.027444)
+ , (105, 0.049019)
+ , (106, 0.000867)
+ , (107, 0.006753)
+ , (108, 0.031750)
+ , (109, 0.016437)
+ , (110, 0.049701)
+ , (111, 0.057701)
+ , (112, 0.015482)
+ , (113, 0.000747)
+ , (114, 0.042586)
+ , (115, 0.043686)
+ , (116, 0.063700)
+ , (117, 0.020999)
+ , (118, 0.008462)
+ , (119, 0.013034)
+ , (120, 0.001950)
+ , (121, 0.011330)
+ , (122, 0.000596)
+ , (123, 0.000026)
+ , (124, 0.000007)
+ , (125, 0.000026)
+ , (126, 0.000003)
+ , (131, 0.000000)
+ , (149, 0.006410)
+ , (183, 0.000010)
+ , (223, 0.000000)
+ , (226, 0.000000)
+ , (229, 0.000000)
+ , (230, 0.000000)
+ , (237, 0.000000)
+ ]
+
diff --git a/src/BreakSingleByteXor.hs b/src/BreakSingleByteXor.hs
@@ -0,0 +1,65 @@
+{-# LANGUAGE OverloadedStrings #-}
+{-# LANGUAGE RecordWildCards #-}
+
+module Main where
+
+import Control.Monad (unless)
+import qualified Cryptopals.Util as CU
+import qualified Data.ByteString as BS
+import Data.List (foldl')
+import qualified Data.Text as T
+import qualified Data.Text.IO as TIO
+import qualified Data.Text.Encoding as TE
+import GHC.Word (Word8)
+import qualified Options.Applicative as O
+import qualified System.IO as SIO
+
+data Args = Args { argsInp :: T.Text }
+
+ops :: O.Parser Args
+ops = Args <$> O.argument O.str (O.metavar "INPUT")
+
+best :: BS.ByteString -> (Word8, Double, BS.ByteString)
+best s = foldl' alg (0, CU.score s, s) [32..126] where
+ alg acc@(_, asc, _) b =
+ let xo = CU.singleByteXor b s
+ sc = CU.score xo
+ in if sc < asc
+ then (b, sc, xo)
+ else acc
+
+render :: Show a => a -> T.Text
+render = T.pack. show
+
+decipher :: Args -> IO ()
+decipher Args {..} = do
+ let s = TE.encodeUtf8 argsInp
+
+ TIO.hPutStrLn SIO.stderr $
+ "cryptopals: input similarity score is " <> render (CU.score s)
+
+ let (byt, bsc, b) = best s
+
+ unless (b == s) $ do
+
+ TIO.hPutStrLn SIO.stderr (
+ "cryptopals: xor-ing with " <> render byt <>
+ " yields score " <> render bsc
+ )
+
+ TIO.hPutStrLn SIO.stderr $
+ "cryptopals: result"
+
+ TIO.hPutStrLn SIO.stdout $ TE.decodeUtf8 b
+
+main :: IO ()
+main = do
+ let pars = O.info (O.helper <*> ops) $
+ O.fullDesc
+ <> O.progDesc "attempt to break single-byte xor'd ciphertext"
+ <> O.header "break-single-byte-xor"
+
+ args <- O.execParser pars
+
+ decipher args
+
diff --git a/src/FixedXor.hs b/src/FixedXor.hs
@@ -1,13 +1,17 @@
+{-# LANGUAGE OverloadedStrings #-}
{-# LANGUAGE RecordWildCards #-}
module Main where
-import Cryptopals.Util (Hex(..))
import qualified Cryptopals.Util as CU
+import qualified Data.ByteString as BS
+import qualified Data.ByteString.Base16 as B16
import qualified Data.Text as T
-import qualified Data.Text.IO as TIO
import qualified Data.Text.Encoding as TE
+import qualified Data.Text.IO as TIO
import qualified Options.Applicative as O
+import qualified System.Exit as SE
+import qualified System.IO as SIO
data Args = Args {
argsKey :: T.Text
@@ -21,13 +25,21 @@ ops = Args
fxor :: Args -> IO ()
fxor Args {..} = do
- let k = Hex (TE.encodeUtf8 argsKey)
- v = Hex (TE.encodeUtf8 argsInp)
- r = CU.fixedXor k v
-
- case r of
- Left e -> TIO.putStrLn e
- Right (Hex b) -> TIO.putStrLn (TE.decodeUtf8 b)
+ let args = do
+ k <- B16.decodeBase16 $ TE.encodeUtf8 argsKey
+ v <- B16.decodeBase16 $ TE.encodeUtf8 argsInp
+ if BS.length k /= BS.length v
+ then Left "fixed-xor: unequal-length inputs"
+ else pure (k, v)
+
+ case args of
+ Left e -> do
+ TIO.hPutStrLn SIO.stderr ("cryptopals: " <> e)
+ SE.exitFailure
+
+ Right (k, v) -> do
+ let res = CU.fixedXor k v
+ TIO.putStrLn . TE.decodeUtf8 . B16.encodeBase16' $ res
main :: IO ()
main = do