commit e193dc3c3c4facfc2ce0f4a3e314836529330c3a
parent 764abb1eac66fed75c70cd50bb98c976968c6c30
Author: Jared Tobin <jared@jtobin.io>
Date: Thu, 22 Nov 2018 20:51:09 +1300
Update.
Diffstat:
2 files changed, 59 insertions(+), 12 deletions(-)
diff --git a/docs/s1.md b/docs/s1.md
@@ -33,15 +33,15 @@ In Rust it's easy enough to just use the appropriate functionality from the
#### 1.2
-Fixed-xor just encrypts by XOR'ing every bit with some corresponding bit.
+Fixed-xor just encrypts by xoring every bit with some corresponding bit.
An example, using `xxd -b` to output bits this time:
$ parallel -k 'echo -n {} | xxd -b' ::: FAB ICE
00000000: 01000110 01000001 01000010 FAB
00000000: 01001001 01000011 01000101 ICE
-So XOR-ing 'FAB' with 'ICE' can be done manually -- one just computes the
-bitwise XOR:
+So xoring 'FAB' with 'ICE' can be done manually -- one just computes the
+bitwise xor:
0100 0110 0100 0001 0100 0010
0100 1001 0100 0011 0100 0101
@@ -86,6 +86,58 @@ used characters in English. ETAOIN SHRDLU CMFWYP etc. etc. Here we can grab a
table of ASCII character frequencies on the interwebs; I used [this
guy's](http://www.fitaly.com/board/domper3/posts/136.html).
+You can calculate the MSE between the observed frequency distribution and the
+expected one. Just tally and normalise the bytes in the input to get the
+observed distribution, and then:
+
+ fn mse(expected: HashMap<u8, f32>, observed: HashMap<u8, f32>) -> f32 {
+ let mut result = HashMap::new();
+
+ for (key, val) in expected.iter() {
+ if observed.contains_key(key) {
+ let tval = observed.get(key).unwrap();
+ let sqdiff = (tval - val).powf(2.0);
+ result.insert(key, sqdiff);
+ }
+ }
+
+ let size = result.len();
+
+ result.iter().fold(0.0, |sum, (_, val)| sum + val / size as f32)
+ }
+
+In single-byte xor, the entire input has simply been xor'd by.. uh, a single
+byte. Take plaintext 'hello' and '?', for example:
+
+ $ parallel 'echo -n {} | xxd -b' ::: hello ?
+ 00000000: 01101000 01100101 01101100 01101100 01101111 hello
+ 00000000: 00111111 ?
+
+Repeating '?' as many times as is necessary and manually xoring bitwise yields:
+
+ 01101000 01100101 01101100 01101100 01101111
+ 00111111 00111111 00111111 00111111 00111111
+
+ 01010111 01011010 01010011 01010011 01010000
+
+(N.b. it's worth noting that bitwise xor is simply addition modulo 2 -- i.e.,
+addition in GF(2), the Galois field of two elements.)
+
+The result in ASCII, going through hex, is:
+
+ $ echo 'obase=16; ibase=2; 0101011101011010010100110101001101010000' | \
+ bc | xxd -r -p
+ WZSSP
+
+Since xor is its own inverse, going backwards will get us 'hello' again.
+
+For the actual question here: given an input, one can iterate over the ASCII
+printable bytes (in decimal, 32-126), compute the single-byte xor against it,
+and calculate its MSE. The result with the smallest MSE is probably the byte
+that was used to encrypt it.
+
+Using a binary that does that, we get:
+
$ cat data/s1/q3_input.txt | tr -d '\n' | ./bin/break_single_byte_xor
Cooking MC's like a pound of bacon
diff --git a/src/s1c03.rs b/src/s1c03.rs
@@ -138,17 +138,12 @@ pub fn freqs_ascii() -> HashMap<u8, f32> {
].iter().cloned().collect()
}
-fn mse(reference: HashMap<u8, f32>, target: HashMap<u8, f32>) -> f32 {
+fn mse(expected: HashMap<u8, f32>, observed: HashMap<u8, f32>) -> f32 {
let mut result = HashMap::new();
- for (key, val) in reference.iter() {
- if target.contains_key(key) {
-
- // NB. (jtobin)
- //
- // Branch is only entered if 'target' contains 'key', so 'unwrap'
- // can't be called.
- let tval = target.get(key).unwrap();
+ for (key, val) in expected.iter() {
+ if observed.contains_key(key) {
+ let tval = observed.get(key).unwrap();
let sqdiff = (tval - val).powf(2.0);
result.insert(key, sqdiff);
}