diff --git a/src/letter_frequencies.zig b/src/letter_frequencies.zig new file mode 100644 index 0000000..e87014d --- /dev/null +++ b/src/letter_frequencies.zig @@ -0,0 +1,71 @@ +const std = @import("std"); +const Freq = struct { letter: u8, frequency: f32 }; + +// relative frequencies a-z, with lowest scaled to 1.0 +// from some random website +const en = [_]f32{ + 43.3, + 10.5, + 23.1, + 17.2, + 56.8, + 9.2, + 12.5, + 15.3, + 38.4, + 1.0, + 5.6, + 27.9, + 15.3, + 33.9, + 36.5, + 16.1, + 1.0, + 38.6, + 29.2, + 35.4, + 18.5, + 5.1, + 6.5, + 1.4, + 9.0, + 1.3, +}; + +pub fn score_en(ascii: u8) f32 { + const ascii_lower = std.ascii.toLower(ascii); + const index: i32 = @as(i32, ascii_lower) - @as(i32, 'a'); + + if (index >= 0 and index < en.len) { + if (ascii_lower != ascii) { + // don't score uppercase chars as they shouldn't be that + // frequent, except someone screams. Retrospectively + // pulled that one out of my ass but works. + return 0.0; + } + return en[@intCast(index)] / 2; + } + + if (std.ascii.isPrint(ascii)) { + // we don't know that char, but it is still printable ascii, + // so give essentially a score of 0. Pulled that one out of my + // ass too. + return 0.0; + } + + if (std.ascii.isWhitespace(ascii)) { + // we don't know that char, but it is some printable + // whitespace, so give essentially a score of 0. Same, I have + // a very roomy ass. + return 0.0; + } + + return -penalty(); // unscoreable +} + +fn penalty() comptime_float { + var sum: comptime_float = 0.0; + inline for (en) |f| sum += f; + + return sum / @as(f32, @floatFromInt(en.len)); +} diff --git a/src/main.zig b/src/main.zig index a830a36..f029fbc 100644 --- a/src/main.zig +++ b/src/main.zig @@ -2,6 +2,7 @@ const std = @import("std"); const b64 = @import("base64.zig"); const hex = @import("hex.zig"); const xor = @import("xor.zig"); +const xor_crack = @import("xor_crack.zig"); pub fn main() !void { var gpa = std.heap.GeneralPurposeAllocator(.{}){}; @@ -48,4 +49,27 @@ pub fn main() !void { try stdout.print("{s}", .{out_hex}); } + + if (std.mem.eql(u8, args[1], "crack-xor")) { + const in_a = args[2]; + + const buf_a = try hex.decode(allocator, in_a); + defer allocator.free(buf_a); + + const oracle = xor_crack.Oracle{ + .decrypt = xor.xor_byte_noalloc, + }; + + const res = try xor_crack.single(allocator, buf_a, oracle) orelse { + try stdout.print("Did not find a solution", .{}); + return; + }; + + try stdout.print("Found solution key={c}, score={d}\n", .{ res[0], res[1] }); + + const out = try xor.xor_byte(allocator, buf_a, res[0]); + defer allocator.free(out); + + try stdout.print("{s}\n", .{out}); + } } diff --git a/src/xor.zig b/src/xor.zig index 762a095..d52c89a 100644 --- a/src/xor.zig +++ b/src/xor.zig @@ -12,3 +12,22 @@ pub fn xor_buffers(allocator: std.mem.Allocator, buf_a: []u8, buf_b: []u8) ![]u8 return out; } + +/// buf_a ^ byte_b. Caller must free result. +pub fn xor_byte(allocator: std.mem.Allocator, buf_a: []u8, byte_b: u8) ![]u8 { + var out: []u8 = try allocator.alloc(u8, buf_a.len); + + for (0..buf_a.len) |i| { + out[i] = buf_a[i] ^ byte_b; + } + + return out; +} + +/// buf_a ^ byte_b. Caller must provide out buffer long enough to hold +/// result (buf_a.len == out.len). +pub fn xor_byte_noalloc(buf_a: []const u8, byte_b: u8, out: []u8) void { + for (0..buf_a.len) |i| { + out[i] = buf_a[i] ^ byte_b; + } +} diff --git a/src/xor_crack.zig b/src/xor_crack.zig new file mode 100644 index 0000000..f757327 --- /dev/null +++ b/src/xor_crack.zig @@ -0,0 +1,58 @@ +const std = @import("std"); + +const score_en = @import("letter_frequencies.zig").score_en; +const penalty = @import("letter_frequencies.zig").penalty(); + +pub const Oracle = struct { + decrypt: fn (in_buf: []u8, key: u8, out_buf: []u8) void, +}; + +/// Crack a buffer encrypted with single-byte XOR +pub fn single(allocator: std.mem.Allocator, cipher: []u8, oracle: Oracle) !?struct { u8, f32 } { + var key_scores = std.AutoHashMap(u8, f32).init(allocator); + defer key_scores.deinit(); + + const out = try allocator.alloc(u8, cipher.len); + defer allocator.free(out); + + for (32..127) |c| { + oracle.decrypt(cipher, @intCast(c), out); + + var buf_score: f32 = 0.0; + for (out) |o| buf_score += score_en(o); + + try key_scores.put(@intCast(c), buf_score); + } + + var init_search_it = key_scores.iterator(); + var max_key: u8 = undefined; + var max_score: f32 = undefined; + if (init_search_it.next()) |entry| { + max_key = entry.key_ptr.*; + max_score = entry.value_ptr.*; + } else { + return null; + } + + var it = key_scores.iterator(); + var is_init = true; + while (it.next()) |entry| { + if (entry.value_ptr.* == max_score and !is_init) { + std.log.warn("Found equal max scores ({c}, {d}), ({c}, {d})", .{ + max_key, + max_score, + entry.key_ptr.*, + entry.value_ptr.*, + }); + } + + if (entry.value_ptr.* > max_score) { + max_key = entry.key_ptr.*; + max_score = entry.value_ptr.*; + } + + is_init = false; + } + + return .{ max_key, max_score }; +}