matasano/src/base64.zig

const std = @import("std");
const builtin = @import("builtin");
const native_endian = builtin.cpu.arch.endian();

const charset = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/";

/// Encode a byte buffer with base64. Caller must free the result.
pub fn encode(alloc: std.mem.Allocator, buf: []const u8) ![]u8 {
    var out = std.ArrayList(u8).init(alloc);
    defer out.deinit();

    var in_idx: usize = 0;
    var out_idx: usize = 0;

    while (in_idx < buf.len) {
        const b1 = buf[in_idx];
        const b2 = if (in_idx + 1 < buf.len) buf[in_idx + 1] else 0;
        const b3 = if (in_idx + 2 < buf.len) buf[in_idx + 2] else 0;

        // top 6 bit of b1
        try out.append(charset[(b1 >> 2) & 0b111_111]);
        // last 4 bit of b1 + top 4 bit of b2 & 0b111_111
        // -> last 2 bit of b1 + top 4 bit of b2
        try out.append(charset[((b1 << 4) | (b2 >> 4)) & 0b111_111]);

        if (in_idx + 1 < buf.len) {
            // last 6 bit of b2 + top 2 bit of b3 & 0b111_111
            // -> last 4 bit of b2 + top 2 bit of b3
            try out.append(charset[((b2 << 2) | (b3 >> 6)) & 0b111_111]);
        } else {
            try out.append('=');
        }

        if (in_idx + 2 < buf.len) {
            // last 6 bit of b3
            try out.append(charset[b3 & 0b111_111]);
        } else {
            try out.append('=');
        }

        in_idx += 3;
        out_idx += 4;
    }

    return out.toOwnedSlice();
}

/// Decode a base64 string to a byte buffer. Caller must free the result.
pub fn decode(alloc: std.mem.Allocator, buf: []const u8) ![]u8 {
    var out = std.ArrayList(u8).init(alloc);
    defer out.deinit();

    var in_idx: usize = 0;
    var out_idx: usize = 0;

    while (in_idx < buf.len) {
        // Decode 4 Base64 characters to 3 bytes
        const c1 = try to_bin(buf[in_idx]);
        const c2 = try to_bin(buf[in_idx + 1]);
        const c3 = try to_bin(buf[in_idx + 2]);
        const c4 = try to_bin(buf[in_idx + 3]);

        // note that we can assume that the u8 is actually just a u6
        // and the high 2 bit are always 0
        //
        // also the terms all, top, and bottom refer to the 6 relevant
        // bit only

        // all bit of c1 at top of byte + top 2 bit of c2 at bottom
        try out.append((c1 << 2) | (c2 >> 4));
        // bottom 4 bit of c2 + top 4 bit of c3
        try out.append(((c2 & 0b0000_1111) << 4) | (c3 >> 2));

        if (c3 != 0xff and c4 != 0xff) {
            // bottom 2 bit of c3 + all 6 bits of c4
            try out.append(((c3 & 0b0000_0011) << 6) | c4);
        } else if (c3 != 0xff and c4 == 0xff) {
            // chop off 2 bits, i.e. just don't add last 2 bits of c3
            // and c4 is ignored anyways since it is the padding
        } else if (c3 == 0xff and c4 == 0xff) {
            // chop off 4 bits. this means the last 2 6-bit values
            // produced only 8-bit output instead of 12. That means we
            // can just drop the last byte, why:
            // for the last byte we used 4 bit of c2 and 4 bit of c3
            // c3 is the padding so can be dropped
            // the 2 padding bytes mean we can drop 4 bit of the
            // non-padding values, which is exactly the 4 bits
            // we used from c2
            _ = out.pop();
        } else {
            return error.InvalidPadding;
        }

        in_idx += 4;
        out_idx += 3;
    }

    return out.toOwnedSlice();
}

// Tailor-made helper function for `decode`. This is likely not
// usefuly anywhere else ever due to idiosyncrasies.
//
// Converts a char from b64 charset back to the corresponding binary
// value.
//
// This yields actually just a 6 byte value but u8 is easier to
// calculate with, the caller must handle this correctly and never use
// the highest 2 bit of the returned u8.
//
// The only exception to this is the padding '=' which is
// returned as 0xff and must be handled specially. This is a
// hack to simplify the code in `decode`.
fn to_bin(char: u8) !u8 {
    return switch (char) {
        'A'...'Z' => char - 'A',
        'a'...'z' => char - 'a' + 26,
        '0'...'9' => char - '0' + 52,
        '+' => 62,
        '/' => 63,
        '=' => 0xff,
        else => error.InvalidBase64Character,
    };
}

test "b64 decode" {
    const data = [_][2][]const u8{
        .{ &[_]u8{0x00}, "AA==" },
        .{ &[_]u8{0xFF}, "/w==" },
        .{ &[_]u8{ 0x01, 0x02, 0x03 }, "AQID" },
        .{ &[_]u8{ 0xFE, 0xFE, 0xFE }, "/v7+" },
        .{ &[_]u8{ 0x7F, 0x7F, 0x7F, 0x7F }, "f39/fw==" },
        .{ &[_]u8{ 0x7F, 0x7F, 0x7F, 0x7F, 0x7F, 0x7F }, "f39/f39/" },
        .{ &[_]u8{ 0xFE, 0xFE, 0xFE, 0xFE, 0xFE, 0xFE, 0xFE }, "/v7+/v7+/g==" },
        .{ &[_]u8{ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01 }, "AAAAAAAAAAE=" },
    };

    for (data) |d| {
        const res = try decode(std.testing.allocator, d[1]);
        defer std.testing.allocator.free(res);
        try std.testing.expectEqualSlices(u8, d[0], res);
    }
}

test "b64 encode" {
    const data = [_][2][]const u8{
        .{ &[_]u8{0x00}, "AA==" },
        .{ &[_]u8{0xFF}, "/w==" },
        .{ &[_]u8{ 0x01, 0x02, 0x03 }, "AQID" },
        .{ &[_]u8{ 0xFE, 0xFE, 0xFE }, "/v7+" },
        .{ &[_]u8{ 0x7F, 0x7F, 0x7F, 0x7F }, "f39/fw==" },
        .{ &[_]u8{ 0x7F, 0x7F, 0x7F, 0x7F, 0x7F, 0x7F }, "f39/f39/" },
        .{ &[_]u8{ 0xFE, 0xFE, 0xFE, 0xFE, 0xFE, 0xFE, 0xFE }, "/v7+/v7+/g==" },
        .{ &[_]u8{ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01 }, "AAAAAAAAAAE=" },
    };

    for (data) |d| {
        const res = try encode(std.testing.allocator, d[0]);
        defer std.testing.allocator.free(res);

        try std.testing.expectEqualStrings(d[1], res);
    }
}
Convert hex to base64 2025-02-07 21:35:45 +01:00			`const std = @import("std");`
			`const builtin = @import("builtin");`
			`const native_endian = builtin.cpu.arch.endian();`

			`const charset = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/";`

			`/// Encode a byte buffer with base64. Caller must free the result.`
			`pub fn encode(alloc: std.mem.Allocator, buf: []const u8) ![]u8 {`
			`var out = std.ArrayList(u8).init(alloc);`
			`defer out.deinit();`

			`var in_idx: usize = 0;`
			`var out_idx: usize = 0;`

			`while (in_idx < buf.len) {`
			`const b1 = buf[in_idx];`
			`const b2 = if (in_idx + 1 < buf.len) buf[in_idx + 1] else 0;`
			`const b3 = if (in_idx + 2 < buf.len) buf[in_idx + 2] else 0;`

			`// top 6 bit of b1`
			`try out.append(charset[(b1 >> 2) & 0b111_111]);`
			`// last 4 bit of b1 + top 4 bit of b2 & 0b111_111`
			`// -> last 2 bit of b1 + top 4 bit of b2`
			`try out.append(charset[((b1 << 4) \| (b2 >> 4)) & 0b111_111]);`

			`if (in_idx + 1 < buf.len) {`
			`// last 6 bit of b2 + top 2 bit of b3 & 0b111_111`
			`// -> last 4 bit of b2 + top 2 bit of b3`
			`try out.append(charset[((b2 << 2) \| (b3 >> 6)) & 0b111_111]);`
			`} else {`
			`try out.append('=');`
			`}`

			`if (in_idx + 2 < buf.len) {`
			`// last 6 bit of b3`
			`try out.append(charset[b3 & 0b111_111]);`
			`} else {`
			`try out.append('=');`
			`}`

			`in_idx += 3;`
			`out_idx += 4;`
			`}`

			`return out.toOwnedSlice();`
			`}`

			`/// Decode a base64 string to a byte buffer. Caller must free the result.`
			`pub fn decode(alloc: std.mem.Allocator, buf: []const u8) ![]u8 {`
			`var out = std.ArrayList(u8).init(alloc);`
			`defer out.deinit();`

			`var in_idx: usize = 0;`
			`var out_idx: usize = 0;`

			`while (in_idx < buf.len) {`
			`// Decode 4 Base64 characters to 3 bytes`
			`const c1 = try to_bin(buf[in_idx]);`
			`const c2 = try to_bin(buf[in_idx + 1]);`
			`const c3 = try to_bin(buf[in_idx + 2]);`
			`const c4 = try to_bin(buf[in_idx + 3]);`

			`// note that we can assume that the u8 is actually just a u6`
			`// and the high 2 bit are always 0`
			`//`
			`// also the terms all, top, and bottom refer to the 6 relevant`
			`// bit only`

			`// all bit of c1 at top of byte + top 2 bit of c2 at bottom`
			`try out.append((c1 << 2) \| (c2 >> 4));`
			`// bottom 4 bit of c2 + top 4 bit of c3`
			`try out.append(((c2 & 0b0000_1111) << 4) \| (c3 >> 2));`

			`if (c3 != 0xff and c4 != 0xff) {`
			`// bottom 2 bit of c3 + all 6 bits of c4`
			`try out.append(((c3 & 0b0000_0011) << 6) \| c4);`
			`} else if (c3 != 0xff and c4 == 0xff) {`
			`// chop off 2 bits, i.e. just don't add last 2 bits of c3`
			`// and c4 is ignored anyways since it is the padding`
			`} else if (c3 == 0xff and c4 == 0xff) {`
			`// chop off 4 bits. this means the last 2 6-bit values`
			`// produced only 8-bit output instead of 12. That means we`
			`// can just drop the last byte, why:`
			`// for the last byte we used 4 bit of c2 and 4 bit of c3`
			`// c3 is the padding so can be dropped`
			`// the 2 padding bytes mean we can drop 4 bit of the`
			`// non-padding values, which is exactly the 4 bits`
			`// we used from c2`
			`_ = out.pop();`
			`} else {`
			`return error.InvalidPadding;`
			`}`

			`in_idx += 4;`
			`out_idx += 3;`
			`}`

			`return out.toOwnedSlice();`
			`}`

			// Tailor-made helper function for `decode`. This is likely not
			`// usefuly anywhere else ever due to idiosyncrasies.`
			`//`
			`// Converts a char from b64 charset back to the corresponding binary`
			`// value.`
			`//`
			`// This yields actually just a 6 byte value but u8 is easier to`
			`// calculate with, the caller must handle this correctly and never use`
			`// the highest 2 bit of the returned u8.`
			`//`
			`// The only exception to this is the padding '=' which is`
			`// returned as 0xff and must be handled specially. This is a`
			// hack to simplify the code in `decode`.
			`fn to_bin(char: u8) !u8 {`
			`return switch (char) {`
			`'A'...'Z' => char - 'A',`
			`'a'...'z' => char - 'a' + 26,`
			`'0'...'9' => char - '0' + 52,`
			`'+' => 62,`
			`'/' => 63,`
			`'=' => 0xff,`
			`else => error.InvalidBase64Character,`
			`};`
			`}`

			`test "b64 decode" {`
			`const data = [_][2][]const u8{`
			`.{ &[_]u8{0x00}, "AA==" },`
			`.{ &[_]u8{0xFF}, "/w==" },`
			`.{ &[_]u8{ 0x01, 0x02, 0x03 }, "AQID" },`
			`.{ &[_]u8{ 0xFE, 0xFE, 0xFE }, "/v7+" },`
			`.{ &[_]u8{ 0x7F, 0x7F, 0x7F, 0x7F }, "f39/fw==" },`
			`.{ &[_]u8{ 0x7F, 0x7F, 0x7F, 0x7F, 0x7F, 0x7F }, "f39/f39/" },`
			`.{ &[_]u8{ 0xFE, 0xFE, 0xFE, 0xFE, 0xFE, 0xFE, 0xFE }, "/v7+/v7+/g==" },`
			`.{ &[_]u8{ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01 }, "AAAAAAAAAAE=" },`
			`};`

			`for (data) \|d\| {`
			`const res = try decode(std.testing.allocator, d[1]);`
			`defer std.testing.allocator.free(res);`
			`try std.testing.expectEqualSlices(u8, d[0], res);`
			`}`
			`}`

			`test "b64 encode" {`
			`const data = [_][2][]const u8{`
			`.{ &[_]u8{0x00}, "AA==" },`
			`.{ &[_]u8{0xFF}, "/w==" },`
			`.{ &[_]u8{ 0x01, 0x02, 0x03 }, "AQID" },`
			`.{ &[_]u8{ 0xFE, 0xFE, 0xFE }, "/v7+" },`
			`.{ &[_]u8{ 0x7F, 0x7F, 0x7F, 0x7F }, "f39/fw==" },`
			`.{ &[_]u8{ 0x7F, 0x7F, 0x7F, 0x7F, 0x7F, 0x7F }, "f39/f39/" },`
			`.{ &[_]u8{ 0xFE, 0xFE, 0xFE, 0xFE, 0xFE, 0xFE, 0xFE }, "/v7+/v7+/g==" },`
			`.{ &[_]u8{ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01 }, "AAAAAAAAAAE=" },`
			`};`

			`for (data) \|d\| {`
			`const res = try encode(std.testing.allocator, d[0]);`
			`defer std.testing.allocator.free(res);`

			`try std.testing.expectEqualStrings(d[1], res);`
			`}`
			`}`