commit cf916d5e5d5165d3bc3f343e35a2655859a129ae
parent d54dbabe2b0294e471ff9b390aeeea8df64c9563
Author: Jared Tobin <jared@jtobin.io>
Date: Thu, 10 Aug 2023 20:57:39 -0230
Add 4.29.
Diffstat:
6 files changed, 315 insertions(+), 12 deletions(-)
diff --git a/.ghci b/.ghci
@@ -4,6 +4,8 @@ import qualified Data.Binary.Get as BG
import qualified Data.Binary.Put as BP
import qualified Data.Bits as B
import qualified Data.ByteString as BS
+import qualified Data.ByteString.Char8 as B8
+import qualified Data.ByteString.Lazy.Char8 as BL8
import qualified Data.ByteString.Lazy as BSL
import qualified Data.ByteString.Base16 as B16
import qualified Data.ByteString.Base64 as B64
diff --git a/cryptopals.cabal b/cryptopals.cabal
@@ -25,6 +25,7 @@ library
, Cryptopals.Block.Tools
, Cryptopals.Digest.Pure.SHA
, Cryptopals.MAC
+ , Cryptopals.MAC.Attacks
, Cryptopals.Stream.Attacks
, Cryptopals.Stream.RNG
, Cryptopals.Stream.RNG.MT19937
@@ -43,6 +44,7 @@ library
, primitive
, text
, time
+ , transformers
, unordered-containers
, vector
diff --git a/docs/s4.md b/docs/s4.md
@@ -125,13 +125,145 @@ such that, trivially:
#### 4.28
-Using the SHA1 implementation from the 'sha' package under the hood,
-Cryptopals.MAC.sha1mac implements the desired MAC (i.e. message
-authentication code):
+Using the SHA1 (Secure Hashing Algorithm) implementation from the 'sha'
+package under the hood, Cryptopals.MAC.sha1mac implements the desired
+MAC (i.e. message authentication code):
- > sha1mac "YELLOW SUBMARINE" "question 4.28"
+ > let mac = sha1mac "YELLOW SUBMARINE" "question 4.28"
+ > B16.encodeBase16 . BSL.toStrict $ mac
"45b5bb1ab02988df4609ff1227c90fe997236719"
+verifysha1mac verifies a MAC given a key and message:
+
+ > verifysha1mac "YELLOW SUBMARINE" mac "question 4.28"
+ True
+
+and we obviously can't tamper with anything without the MAC failing to
+verify:
+
+ > verifysha1mac "YELLOW SUBMARINE" mac "question 4.29"
+ False
+
#### 4.29
+So, length extension on SHA-1. A preliminary note: "MD
+padding" refers to Merkle-Damgård compliant padding, described
+[here](https://en.wikipedia.org/wiki/Merkle%E2%80%93Damg%C3%A5rd_constru
+ction) and concretely elaborated on in
+[RFC1321](http://www.faqs.org/rfcs/rfc1321.html), section 3.1, and
+[RFC3174](http://www.faqs.org/rfcs/rfc3174.html), section 4. The latter
+contains the TLDR summary:
+
+> The purpose of message padding is to make the total length of a padded
+> message a multiple of 512. SHA-1 sequentially processes blocks of
+> 512 bits when computing the message digest. The following specifies
+> how this padding shall be performed. As a summary, a "1" followed by
+> m "0"s followed by a 64- bit integer are appended to the end of the
+> message to produce a padded message of length 512 * n. The 64-bit
+> integer is the length of the original message.
+
+The crux of this attack is that, like the Mersenne Twister, the SHA1
+algorithm (indeed, all SHA algorithms) maintains internal state.
+In the Mersenne Twister we use the state and temper it ("twisting"
+occasionally) to produce outputs; in SHA1, the output on termination
+of the (padded) input *is* the internal state, i.e. five 32-bit words,
+glued together. Any hash output thus corresponds to the internal state
+of the algorithm at the end of some (padded) input.
+
+Here, as the attacker, we know the message -- but not the key -- used
+to produce the MAC, and we know the MAC itself. The MAC corresponds
+to the internal state of SHA1 when we finished hashing the original
+`key <> message` input, padded to a multiple of 512 bits. So, we can
+reinitialise SHA1 with that state and continue hashing further input,
+the goal being to produce a MAC that verifies for an arbitrary extension
+of the original message without knowledge of the secret key.
+
+(N.b., two things in this challenge drove me crazy. The first was
+getting the padding right everywhere, something which broke my brain
+for awhile until things eventually slotted into place. The second was a
+really, really stupid error, in which I accidentally treated a 40-byte
+UTF8-encoded hex string as a 20-byte bytestring, messing up the internal
+state of SHA1 whenever I'd restore it from an output. That was annoying
+to track down.)
+
+*Alors*. Various functions in Cryptopals.MAC.Attacks will be employed to
+forge a MAC from another MAC. First, as recommended, let's hack together
+something to grab a key from /usr/share/dict:
+
+ key :: IO BSL.ByteString
+ key = do
+ gen <- MWC.createSystemRandom
+ idx <- MWC.uniformR (0, 235885) gen
+ dict <- BL8.readFile "/usr/share/dict/words"
+ let ls = BL8.lines dict
+ pure $ ls !! idx
+
+and then grab it and produce a mac ('raw' is the given input text):
+
+ > k <- key
+ > let mac = CM.sha1mac k raw
+
+Now, the evil message for which we will forge a MAC. This evil message
+must include the *original* padding of the 'key + message' input used to
+produce the MAC, since SHA1 stopped hashing on completion of pad(key +
+message). All we know is that the message length is at least the same as
+'raw', and, for nontrivial keys, is strictly more.
+
+Similarly, to verify integrity, one computes sha1(key + message) and
+checks that it equals the provided MAC. I.e., for an evil message and
+forged MAC, one checks that:
+
+ sha1(key + evil) == forged
+
+SHA1 will terminate at pad(key + evil), which includes the total
+message length of 'key + evil'. So we must ensure that our resumed
+length-extension hash uses this padding.
+
+As best I can tell, in order to guess productively at the key length
+used to construct the original MAC, we need access to an oracle that can
+validate message/MAC pairs for us. A type-y way to simulate this is via
+the following 'forge' procedure that, while interacting with something
+that needs a key, doesn't make use of a key itself:
+
+ forge
+ :: BSL.ByteString
+ -> BSL.ByteString
+ -> BSL.ByteString
+ -> R.Reader BSL.ByteString (BSL.ByteString, BSL.ByteString)
+ forge input mac addl = loop 0 where
+ loop j = do
+ let len = fromIntegral $ BSL.length input
+ evil = pad (len + j) input <> addl
+ rs = inject mac
+ p = fromIntegral (BSL.length evil) + j
+ forged = sha1 rs p addl
+ validates <- oracleValidates evil forged
+ if validates
+ then pure (evil, forged)
+ else loop (succ j)
+
+ oracleValidates
+ :: BSL.ByteString
+ -> BSL.ByteString
+ -> R.Reader BSL.ByteString Bool
+ oracleValidates msg mac = do
+ k <- R.ask
+ pure $ CM.verifysha1mac k mac msg
+
+'sha1' here calls the modified SHA1 allowing us to 1) initialize its
+internal state from the provided registers, and 2) use the specified
+message length for padding, instead of calculating it from the provided
+bytestring.
+
+So, with all that, let's *cackles* forge the evil message and a MAC that
+will validate for it. 'mal' is the malicious text ";admin=true":
+
+ > let (evil, forged) = R.runReader (forge raw mac mal) k
+ > evil
+ "comment1=cooking%20MCs;userdata=foo;comment2=%20like%20a%20pound%20of%20ba
+ con\128\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL
+ \NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NU
+ L\NUL\NUL\NUL\STX\176;admin=true"
+ > B16.encodeBase16 . BSL.toStrict $ forged
+ "0ad748ec8eef1a0b510b01b8f9ff692cf050bd15"
diff --git a/lib/Cryptopals/Digest/Pure/SHA.hs b/lib/Cryptopals/Digest/Pure/SHA.hs
@@ -1,4 +1,4 @@
--- NB (jtobin): this entire module is a copy-paste of
+-- NB (jtobin): this entire module is a modified copy-paste of
--
-- https://hackage.haskell.org/package/SHA-1.6.4.4/docs/src/Data.Digest.Pure.SHA.html
@@ -9,12 +9,13 @@
module Cryptopals.Digest.Pure.SHA
( -- * 'Digest' and related functions
Digest
- , SHA1State, SHA256State, SHA512State
+ , SHA1State(..), SHA256State, SHA512State
, showDigest
, integerDigest
, bytestringDigest
-- * Calculating hashes
, sha1
+ , sha1', sha1''
, sha224
, sha256
, sha384
@@ -40,6 +41,8 @@ module Cryptopals.Digest.Pure.SHA
, calc_k
, padSHA1, padSHA512
, padSHA1Chunks, padSHA512Chunks
+ -- etc (jtobin)
+ , getSHA1
)
where
@@ -78,6 +81,7 @@ instance Binary (Digest SHA512State) where
-- --------------------------------------------------------------------------
data SHA1State = SHA1S !Word32 !Word32 !Word32 !Word32 !Word32
+ deriving Show
initialSHA1State :: SHA1State
initialSHA1State = SHA1S 0x67452301 0xefcdab89 0x98badcfe 0x10325476 0xc3d2e1f0
@@ -997,6 +1001,50 @@ sha1 bs_in = Digest bs_out
fstate = runSHA initialSHA1State processSHA1Block bs_pad
bs_out = runPut $! synthesizeSHA1 fstate
+-- required padding bytes
+pbytes :: Integral a => a -> a
+pbytes ((\k -> 64 - k `mod` 64) -> l)
+ | l == 0 = l + 56
+ | otherwise = l - 8
+
+-- padding for a supplied message, using arbitrary bytelength n
+evilpadding :: Word64 -> BS.ByteString -> BS.ByteString
+evilpadding n bs = runPut $ do
+ putWord8 128
+ loop (pred (pbytes (BS.length bs)))
+ where
+ loop l
+ | l == 0 = putWord64be (n * 8)
+ | otherwise = do
+ putWord8 0
+ loop (pred l)
+
+-- sha1 with specified internal state and manual padding
+sha1'
+ :: Word32
+ -> Word32
+ -> Word32
+ -> Word32
+ -> Word32
+ -> Word64
+ -> ByteString
+ -> Digest SHA1State
+sha1' a b c d e n bs_in = Digest bs_out
+ where
+ bs_pad = bs_in <> evilpadding n bs_in
+ fstate = runSHA init processSHA1Block bs_pad
+ bs_out = runPut $! synthesizeSHA1 fstate
+ init :: SHA1State
+ init = SHA1S a b c d e
+
+-- sha1 with manual padding
+sha1'' :: Word64 -> ByteString -> Digest SHA1State
+sha1'' n bs_in = Digest bs_out
+ where
+ bs_pad = bs_in <> evilpadding n bs_in
+ fstate = runSHA initialSHA1State processSHA1Block bs_pad
+ bs_out = runPut $! synthesizeSHA1 fstate
+
-- |Similar to `sha1` but use an incremental interface. When the decoder has
-- been completely fed, `completeSha1Incremental` must be used so it can
-- finish successfully.
diff --git a/lib/Cryptopals/MAC.hs b/lib/Cryptopals/MAC.hs
@@ -1,15 +1,22 @@
module Cryptopals.MAC (
- sha1mac
+ sha1
+ , sha1mac
+ , verifysha1mac
) where
-import qualified Data.ByteString as BS
-import qualified Data.ByteString.Char8 as B8
+import qualified Data.ByteString.Lazy.Char8 as BL8
import qualified Data.ByteString.Lazy as BSL
-import qualified Cryptopals.Digest.Pure.SHA as S (Digest, SHA1State, sha1)
+import qualified Cryptopals.Digest.Pure.SHA as S
-sha1mac :: BS.ByteString -> BS.ByteString -> BS.ByteString
-sha1mac k m = B8.pack . show . hash . BSL.fromStrict $ k <> m where
+sha1 :: BSL.ByteString -> BSL.ByteString
+sha1 = S.bytestringDigest . S.sha1
+
+sha1mac :: BSL.ByteString -> BSL.ByteString -> BSL.ByteString
+sha1mac k m = S.bytestringDigest . hash $ k <> m where
hash :: BSL.ByteString -> S.Digest S.SHA1State
hash = S.sha1
+verifysha1mac :: BSL.ByteString -> BSL.ByteString -> BSL.ByteString -> Bool
+verifysha1mac key mac message = sha1mac key message == mac
+
diff --git a/lib/Cryptopals/MAC/Attacks.hs b/lib/Cryptopals/MAC/Attacks.hs
@@ -0,0 +1,112 @@
+{-# LANGUAGE ApplicativeDo #-}
+
+module Cryptopals.MAC.Attacks where
+
+import qualified Control.Monad.Trans.Reader as R
+import qualified Data.ByteString.Lazy as BSL
+import qualified Data.ByteString.Lazy.Char8 as BL8
+import qualified Data.Binary.Get as BG
+import qualified Data.Binary.Put as BP
+import qualified Data.Bits as B
+import qualified Data.ByteString as BS
+import qualified Cryptopals.MAC as CM
+import qualified Cryptopals.Digest.Pure.SHA as S
+import GHC.Word (Word8, Word32, Word64)
+import qualified System.Random.MWC as MWC
+
+-- sha1-keyed MAC via length extension
+
+-- FIXME maybe move some of this to a digest module or something
+
+data SHA1Registers = SHA1Registers !Word32 !Word32 !Word32 !Word32 !Word32
+ deriving (Eq, Show)
+
+sha1 :: SHA1Registers -> Word64 -> BSL.ByteString -> BSL.ByteString
+sha1 (SHA1Registers a b c d e) n s =
+ S.bytestringDigest $ S.sha1' a b c d e n s
+
+-- pad a message using the specified message length
+pad :: Word64 -> BSL.ByteString -> BSL.ByteString
+pad n bs = bs <> padding n
+
+padding :: Word64 -> BSL.ByteString
+padding n = BP.runPut $ do
+ BP.putWord8 128
+ loop (pred (pbytes n))
+ where
+ loop l
+ | l == 0 = BP.putWord64be (n * 8)
+ | otherwise = do
+ BP.putWord8 0
+ loop (pred l)
+
+ pbytes :: Integral a => a -> a
+ pbytes ((\k -> 64 - k `mod` 64) -> l)
+ | l == 0 = l + 56
+ | otherwise = l - 8
+
+inject :: BSL.ByteString -> SHA1Registers
+inject = BG.runGet $ do
+ a <- BG.getWord32be
+ b <- BG.getWord32be
+ c <- BG.getWord32be
+ d <- BG.getWord32be
+ e <- BG.getWord32be
+ pure $ SHA1Registers a b c d e
+
+extract :: SHA1Registers -> BSL.ByteString
+extract (SHA1Registers a b c d e) = BP.runPut $ do
+ BP.putWord32be a
+ BP.putWord32be b
+ BP.putWord32be c
+ BP.putWord32be d
+ BP.putWord32be e
+
+raw :: BSL.ByteString
+raw = mconcat [
+ "comment1=cooking%20MCs;userdata=foo;"
+ , "comment2=%20like%20a%20pound%20of%20bacon"
+ ]
+
+mal :: BSL.ByteString
+mal = ";admin=true"
+
+-- procedure
+--
+-- k <- key
+-- let mac = CM.sha1mac k raw
+-- let (evil, forged) = R.runReader (forge raw mac mal) k
+
+key :: IO BSL.ByteString
+key = do
+ gen <- MWC.createSystemRandom
+ idx <- MWC.uniformR (0, 235885) gen
+ dict <- BL8.readFile "/usr/share/dict/words"
+ let ls = BL8.lines dict
+ pure $ ls !! idx
+
+forge
+ :: BSL.ByteString
+ -> BSL.ByteString
+ -> BSL.ByteString
+ -> R.Reader BSL.ByteString (BSL.ByteString, BSL.ByteString)
+forge input mac addl = loop 0 where
+ loop j = do
+ let len = fromIntegral $ BSL.length input
+ evil = pad (len + j) input <> addl
+ rs = inject mac
+ p = fromIntegral (BSL.length evil) + j
+ forged = sha1 rs p addl
+ validates <- oracleValidates evil forged
+ if validates
+ then pure (evil, forged)
+ else loop (succ j)
+
+oracleValidates
+ :: BSL.ByteString
+ -> BSL.ByteString
+ -> R.Reader BSL.ByteString Bool
+oracleValidates msg mac = do
+ k <- R.ask
+ pure $ CM.verifysha1mac k mac msg
+