164 lines
5.6 KiB
Zig
164 lines
5.6 KiB
Zig
|
const std = @import("std");
|
||
|
const builtin = @import("builtin");
|
||
|
const native_endian = builtin.cpu.arch.endian();
|
||
|
|
||
|
const charset = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/";
|
||
|
|
||
|
/// Encode a byte buffer with base64. Caller must free the result.
|
||
|
pub fn encode(alloc: std.mem.Allocator, buf: []const u8) ![]u8 {
|
||
|
var out = std.ArrayList(u8).init(alloc);
|
||
|
defer out.deinit();
|
||
|
|
||
|
var in_idx: usize = 0;
|
||
|
var out_idx: usize = 0;
|
||
|
|
||
|
while (in_idx < buf.len) {
|
||
|
const b1 = buf[in_idx];
|
||
|
const b2 = if (in_idx + 1 < buf.len) buf[in_idx + 1] else 0;
|
||
|
const b3 = if (in_idx + 2 < buf.len) buf[in_idx + 2] else 0;
|
||
|
|
||
|
// top 6 bit of b1
|
||
|
try out.append(charset[(b1 >> 2) & 0b111_111]);
|
||
|
// last 4 bit of b1 + top 4 bit of b2 & 0b111_111
|
||
|
// -> last 2 bit of b1 + top 4 bit of b2
|
||
|
try out.append(charset[((b1 << 4) | (b2 >> 4)) & 0b111_111]);
|
||
|
|
||
|
if (in_idx + 1 < buf.len) {
|
||
|
// last 6 bit of b2 + top 2 bit of b3 & 0b111_111
|
||
|
// -> last 4 bit of b2 + top 2 bit of b3
|
||
|
try out.append(charset[((b2 << 2) | (b3 >> 6)) & 0b111_111]);
|
||
|
} else {
|
||
|
try out.append('=');
|
||
|
}
|
||
|
|
||
|
if (in_idx + 2 < buf.len) {
|
||
|
// last 6 bit of b3
|
||
|
try out.append(charset[b3 & 0b111_111]);
|
||
|
} else {
|
||
|
try out.append('=');
|
||
|
}
|
||
|
|
||
|
in_idx += 3;
|
||
|
out_idx += 4;
|
||
|
}
|
||
|
|
||
|
return out.toOwnedSlice();
|
||
|
}
|
||
|
|
||
|
/// Decode a base64 string to a byte buffer. Caller must free the result.
|
||
|
pub fn decode(alloc: std.mem.Allocator, buf: []const u8) ![]u8 {
|
||
|
var out = std.ArrayList(u8).init(alloc);
|
||
|
defer out.deinit();
|
||
|
|
||
|
var in_idx: usize = 0;
|
||
|
var out_idx: usize = 0;
|
||
|
|
||
|
while (in_idx < buf.len) {
|
||
|
// Decode 4 Base64 characters to 3 bytes
|
||
|
const c1 = try to_bin(buf[in_idx]);
|
||
|
const c2 = try to_bin(buf[in_idx + 1]);
|
||
|
const c3 = try to_bin(buf[in_idx + 2]);
|
||
|
const c4 = try to_bin(buf[in_idx + 3]);
|
||
|
|
||
|
// note that we can assume that the u8 is actually just a u6
|
||
|
// and the high 2 bit are always 0
|
||
|
//
|
||
|
// also the terms all, top, and bottom refer to the 6 relevant
|
||
|
// bit only
|
||
|
|
||
|
// all bit of c1 at top of byte + top 2 bit of c2 at bottom
|
||
|
try out.append((c1 << 2) | (c2 >> 4));
|
||
|
// bottom 4 bit of c2 + top 4 bit of c3
|
||
|
try out.append(((c2 & 0b0000_1111) << 4) | (c3 >> 2));
|
||
|
|
||
|
if (c3 != 0xff and c4 != 0xff) {
|
||
|
// bottom 2 bit of c3 + all 6 bits of c4
|
||
|
try out.append(((c3 & 0b0000_0011) << 6) | c4);
|
||
|
} else if (c3 != 0xff and c4 == 0xff) {
|
||
|
// chop off 2 bits, i.e. just don't add last 2 bits of c3
|
||
|
// and c4 is ignored anyways since it is the padding
|
||
|
} else if (c3 == 0xff and c4 == 0xff) {
|
||
|
// chop off 4 bits. this means the last 2 6-bit values
|
||
|
// produced only 8-bit output instead of 12. That means we
|
||
|
// can just drop the last byte, why:
|
||
|
// for the last byte we used 4 bit of c2 and 4 bit of c3
|
||
|
// c3 is the padding so can be dropped
|
||
|
// the 2 padding bytes mean we can drop 4 bit of the
|
||
|
// non-padding values, which is exactly the 4 bits
|
||
|
// we used from c2
|
||
|
_ = out.pop();
|
||
|
} else {
|
||
|
return error.InvalidPadding;
|
||
|
}
|
||
|
|
||
|
in_idx += 4;
|
||
|
out_idx += 3;
|
||
|
}
|
||
|
|
||
|
return out.toOwnedSlice();
|
||
|
}
|
||
|
|
||
|
// Tailor-made helper function for `decode`. This is likely not
|
||
|
// usefuly anywhere else ever due to idiosyncrasies.
|
||
|
//
|
||
|
// Converts a char from b64 charset back to the corresponding binary
|
||
|
// value.
|
||
|
//
|
||
|
// This yields actually just a 6 byte value but u8 is easier to
|
||
|
// calculate with, the caller must handle this correctly and never use
|
||
|
// the highest 2 bit of the returned u8.
|
||
|
//
|
||
|
// The only exception to this is the padding '=' which is
|
||
|
// returned as 0xff and must be handled specially. This is a
|
||
|
// hack to simplify the code in `decode`.
|
||
|
fn to_bin(char: u8) !u8 {
|
||
|
return switch (char) {
|
||
|
'A'...'Z' => char - 'A',
|
||
|
'a'...'z' => char - 'a' + 26,
|
||
|
'0'...'9' => char - '0' + 52,
|
||
|
'+' => 62,
|
||
|
'/' => 63,
|
||
|
'=' => 0xff,
|
||
|
else => error.InvalidBase64Character,
|
||
|
};
|
||
|
}
|
||
|
|
||
|
test "b64 decode" {
|
||
|
const data = [_][2][]const u8{
|
||
|
.{ &[_]u8{0x00}, "AA==" },
|
||
|
.{ &[_]u8{0xFF}, "/w==" },
|
||
|
.{ &[_]u8{ 0x01, 0x02, 0x03 }, "AQID" },
|
||
|
.{ &[_]u8{ 0xFE, 0xFE, 0xFE }, "/v7+" },
|
||
|
.{ &[_]u8{ 0x7F, 0x7F, 0x7F, 0x7F }, "f39/fw==" },
|
||
|
.{ &[_]u8{ 0x7F, 0x7F, 0x7F, 0x7F, 0x7F, 0x7F }, "f39/f39/" },
|
||
|
.{ &[_]u8{ 0xFE, 0xFE, 0xFE, 0xFE, 0xFE, 0xFE, 0xFE }, "/v7+/v7+/g==" },
|
||
|
.{ &[_]u8{ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01 }, "AAAAAAAAAAE=" },
|
||
|
};
|
||
|
|
||
|
for (data) |d| {
|
||
|
const res = try decode(std.testing.allocator, d[1]);
|
||
|
defer std.testing.allocator.free(res);
|
||
|
try std.testing.expectEqualSlices(u8, d[0], res);
|
||
|
}
|
||
|
}
|
||
|
|
||
|
test "b64 encode" {
|
||
|
const data = [_][2][]const u8{
|
||
|
.{ &[_]u8{0x00}, "AA==" },
|
||
|
.{ &[_]u8{0xFF}, "/w==" },
|
||
|
.{ &[_]u8{ 0x01, 0x02, 0x03 }, "AQID" },
|
||
|
.{ &[_]u8{ 0xFE, 0xFE, 0xFE }, "/v7+" },
|
||
|
.{ &[_]u8{ 0x7F, 0x7F, 0x7F, 0x7F }, "f39/fw==" },
|
||
|
.{ &[_]u8{ 0x7F, 0x7F, 0x7F, 0x7F, 0x7F, 0x7F }, "f39/f39/" },
|
||
|
.{ &[_]u8{ 0xFE, 0xFE, 0xFE, 0xFE, 0xFE, 0xFE, 0xFE }, "/v7+/v7+/g==" },
|
||
|
.{ &[_]u8{ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01 }, "AAAAAAAAAAE=" },
|
||
|
};
|
||
|
|
||
|
for (data) |d| {
|
||
|
const res = try encode(std.testing.allocator, d[0]);
|
||
|
defer std.testing.allocator.free(res);
|
||
|
|
||
|
try std.testing.expectEqualStrings(d[1], res);
|
||
|
}
|
||
|
}
|