Convert hex to base64

2025-02-07 21:35:45 +01:00 · 2025-02-07 21:35:45 +01:00 · d7a7e8a4ad
commit d7a7e8a4ad
7 changed files with 552 additions and 0 deletions
--- a/.gitignore
+++ b/.gitignore
@ -0,0 +1,79 @@
 # Created by https://www.toptal.com/developers/gitignore/api/zig,emacs,linux
 # Edit at https://www.toptal.com/developers/gitignore?templates=zig,emacs,linux
 ### Emacs ###
 # -*- mode: gitignore; -*-
 *~
 \#*\#
 /.emacs.desktop
 /.emacs.desktop.lock
 *.elc
 auto-save-list
 tramp
 .\#*
 # Org-mode
 .org-id-locations
 *_archive
 # flymake-mode
 *_flymake.*
 # eshell files
 /eshell/history
 /eshell/lastdir
 # elpa packages
 /elpa/
 # reftex files
 *.rel
 # AUCTeX auto folder
 /auto/
 # cask packages
 .cask/
 dist/
 # Flycheck
 flycheck_*.el
 # server auth directory
 /server/
 # projectiles files
 .projectile
 # directory configuration
 .dir-locals.el
 # network security
 /network-security.data
 ### Linux ###
 # temporary files which can be created if a process still has a handle open of a deleted file
 .fuse_hidden*
 # KDE directory preferences
 .directory
 # Linux trash folder which might appear on any partition or disk
 .Trash-*
 # .nfs files are created when an open file is removed but is still being accessed
 .nfs*
 ### zig ###
 # Zig programming language
 .zig-cache/
 zig-out/
 build/
 build-*/
 docgen_tmp/
 .idea
 # End of https://www.toptal.com/developers/gitignore/api/zig,emacs,linux
--- a/build.zig
+++ b/build.zig
@ -0,0 +1,91 @@
 const std = @import("std");
 // Although this function looks imperative, note that its job is to
 // declaratively construct a build graph that will be executed by an external
 // runner.
 pub fn build(b: *std.Build) void {
    // Standard target options allows the person running `zig build` to choose
    // what target to build for. Here we do not override the defaults, which
    // means any target is allowed, and the default is native. Other options
    // for restricting supported target set are available.
    const target = b.standardTargetOptions(.{});
    // Standard optimization options allow the person running `zig build` to select
    // between Debug, ReleaseSafe, ReleaseFast, and ReleaseSmall. Here we do not
    // set a preferred release mode, allowing the user to decide how to optimize.
    const optimize = b.standardOptimizeOption(.{});
    const lib = b.addStaticLibrary(.{
        .name = "matasano",
        // In this case the main source file is merely a path, however, in more
        // complicated build scripts, this could be a generated file.
        .root_source_file = b.path("src/root.zig"),
        .target = target,
        .optimize = optimize,
    });
    // This declares intent for the library to be installed into the standard
    // location when the user invokes the "install" step (the default step when
    // running `zig build`).
    b.installArtifact(lib);
    const exe = b.addExecutable(.{
        .name = "matasano",
        .root_source_file = b.path("src/main.zig"),
        .target = target,
        .optimize = optimize,
    });
    // This declares intent for the executable to be installed into the
    // standard location when the user invokes the "install" step (the default
    // step when running `zig build`).
    b.installArtifact(exe);
    // This *creates* a Run step in the build graph, to be executed when another
    // step is evaluated that depends on it. The next line below will establish
    // such a dependency.
    const run_cmd = b.addRunArtifact(exe);
    // By making the run step depend on the install step, it will be run from the
    // installation directory rather than directly from within the cache directory.
    // This is not necessary, however, if the application depends on other installed
    // files, this ensures they will be present and in the expected location.
    run_cmd.step.dependOn(b.getInstallStep());
    // This allows the user to pass arguments to the application in the build
    // command itself, like this: `zig build run -- arg1 arg2 etc`
    if (b.args) |args| {
        run_cmd.addArgs(args);
    }
    // This creates a build step. It will be visible in the `zig build --help` menu,
    // and can be selected like this: `zig build run`
    // This will evaluate the `run` step rather than the default, which is "install".
    const run_step = b.step("run", "Run the app");
    run_step.dependOn(&run_cmd.step);
    // Creates a step for unit testing. This only builds the test executable
    // but does not run it.
    const lib_unit_tests = b.addTest(.{
        .root_source_file = b.path("src/root.zig"),
        .target = target,
        .optimize = optimize,
    });
    const run_lib_unit_tests = b.addRunArtifact(lib_unit_tests);
    const exe_unit_tests = b.addTest(.{
        .root_source_file = b.path("src/main.zig"),
        .target = target,
        .optimize = optimize,
    });
    const run_exe_unit_tests = b.addRunArtifact(exe_unit_tests);
    // Similar to creating the run step earlier, this exposes a `test` step to
    // the `zig build --help` menu, providing a way for the user to request
    // running the unit tests.
    const test_step = b.step("test", "Run unit tests");
    test_step.dependOn(&run_lib_unit_tests.step);
    test_step.dependOn(&run_exe_unit_tests.step);
 }
--- a/build.zig.zon
+++ b/build.zig.zon
@ -0,0 +1,72 @@
 .{
    // This is the default name used by packages depending on this one. For
    // example, when a user runs `zig fetch --save <url>`, this field is used
    // as the key in the `dependencies` table. Although the user can choose a
    // different name, most users will stick with this provided value.
    //
    // It is redundant to include "zig" in this name because it is already
    // within the Zig package namespace.
    .name = "matasano",
    // This is a [Semantic Version](https://semver.org/).
    // In a future version of Zig it will be used for package deduplication.
    .version = "0.0.0",
    // This field is optional.
    // This is currently advisory only; Zig does not yet do anything
    // with this value.
    //.minimum_zig_version = "0.11.0",
    // This field is optional.
    // Each dependency must either provide a `url` and `hash`, or a `path`.
    // `zig build --fetch` can be used to fetch all dependencies of a package, recursively.
    // Once all dependencies are fetched, `zig build` no longer requires
    // internet connectivity.
    .dependencies = .{
        // See `zig fetch --save <url>` for a command-line interface for adding dependencies.
        //.example = .{
        //    // When updating this field to a new URL, be sure to delete the corresponding
        //    // `hash`, otherwise you are communicating that you expect to find the old hash at
        //    // the new URL.
        //    .url = "https://example.com/foo.tar.gz",
        //
        //    // This is computed from the file contents of the directory of files that is
        //    // obtained after fetching `url` and applying the inclusion rules given by
        //    // `paths`.
        //    //
        //    // This field is the source of truth; packages do not come from a `url`; they
        //    // come from a `hash`. `url` is just one of many possible mirrors for how to
        //    // obtain a package matching this `hash`.
        //    //
        //    // Uses the [multihash](https://multiformats.io/multihash/) format.
        //    .hash = "...",
        //
        //    // When this is provided, the package is found in a directory relative to the
        //    // build root. In this case the package's hash is irrelevant and therefore not
        //    // computed. This field and `url` are mutually exclusive.
        //    .path = "foo",
        //    // When this is set to `true`, a package is declared to be lazily
        //    // fetched. This makes the dependency only get fetched if it is
        //    // actually used.
        //    .lazy = false,
        //},
    },
    // Specifies the set of files and directories that are included in this package.
    // Only files and directories listed here are included in the `hash` that
    // is computed for this package. Only files listed here will remain on disk
    // when using the zig package manager. As a rule of thumb, one should list
    // files required for compilation plus any license(s).
    // Paths are relative to the build root. Use the empty string (`""`) to refer to
    // the build root itself.
    // A directory listed here means that all files within, recursively, are included.
    .paths = .{
        "build.zig",
        "build.zig.zon",
        "src",
        // For example...
        //"LICENSE",
        //"README.md",
    },
 }
--- a/src/base64.zig
+++ b/src/base64.zig
@ -0,0 +1,163 @@
 const std = @import("std");
 const builtin = @import("builtin");
 const native_endian = builtin.cpu.arch.endian();
 const charset = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/";
 /// Encode a byte buffer with base64. Caller must free the result.
 pub fn encode(alloc: std.mem.Allocator, buf: []const u8) ![]u8 {
    var out = std.ArrayList(u8).init(alloc);
    defer out.deinit();
    var in_idx: usize = 0;
    var out_idx: usize = 0;
    while (in_idx < buf.len) {
        const b1 = buf[in_idx];
        const b2 = if (in_idx + 1 < buf.len) buf[in_idx + 1] else 0;
        const b3 = if (in_idx + 2 < buf.len) buf[in_idx + 2] else 0;
        // top 6 bit of b1
        try out.append(charset[(b1 >> 2) & 0b111_111]);
        // last 4 bit of b1 + top 4 bit of b2 & 0b111_111
        // -> last 2 bit of b1 + top 4 bit of b2
        try out.append(charset[((b1 << 4) | (b2 >> 4)) & 0b111_111]);
        if (in_idx + 1 < buf.len) {
            // last 6 bit of b2 + top 2 bit of b3 & 0b111_111
            // -> last 4 bit of b2 + top 2 bit of b3
            try out.append(charset[((b2 << 2) | (b3 >> 6)) & 0b111_111]);
        } else {
            try out.append('=');
        }
        if (in_idx + 2 < buf.len) {
            // last 6 bit of b3
            try out.append(charset[b3 & 0b111_111]);
        } else {
            try out.append('=');
        }
        in_idx += 3;
        out_idx += 4;
    }
    return out.toOwnedSlice();
 }
 /// Decode a base64 string to a byte buffer. Caller must free the result.
 pub fn decode(alloc: std.mem.Allocator, buf: []const u8) ![]u8 {
    var out = std.ArrayList(u8).init(alloc);
    defer out.deinit();
    var in_idx: usize = 0;
    var out_idx: usize = 0;
    while (in_idx < buf.len) {
        // Decode 4 Base64 characters to 3 bytes
        const c1 = try to_bin(buf[in_idx]);
        const c2 = try to_bin(buf[in_idx + 1]);
        const c3 = try to_bin(buf[in_idx + 2]);
        const c4 = try to_bin(buf[in_idx + 3]);
        // note that we can assume that the u8 is actually just a u6
        // and the high 2 bit are always 0
        //
        // also the terms all, top, and bottom refer to the 6 relevant
        // bit only
        // all bit of c1 at top of byte + top 2 bit of c2 at bottom
        try out.append((c1 << 2) | (c2 >> 4));
        // bottom 4 bit of c2 + top 4 bit of c3
        try out.append(((c2 & 0b0000_1111) << 4) | (c3 >> 2));
        if (c3 != 0xff and c4 != 0xff) {
            // bottom 2 bit of c3 + all 6 bits of c4
            try out.append(((c3 & 0b0000_0011) << 6) | c4);
        } else if (c3 != 0xff and c4 == 0xff) {
            // chop off 2 bits, i.e. just don't add last 2 bits of c3
            // and c4 is ignored anyways since it is the padding
        } else if (c3 == 0xff and c4 == 0xff) {
            // chop off 4 bits. this means the last 2 6-bit values
            // produced only 8-bit output instead of 12. That means we
            // can just drop the last byte, why:
            // for the last byte we used 4 bit of c2 and 4 bit of c3
            // c3 is the padding so can be dropped
            // the 2 padding bytes mean we can drop 4 bit of the
            // non-padding values, which is exactly the 4 bits
            // we used from c2
            _ = out.pop();
        } else {
            return error.InvalidPadding;
        }
        in_idx += 4;
        out_idx += 3;
    }
    return out.toOwnedSlice();
 }
 // Tailor-made helper function for `decode`. This is likely not
 // usefuly anywhere else ever due to idiosyncrasies.
 //
 // Converts a char from b64 charset back to the corresponding binary
 // value.
 //
 // This yields actually just a 6 byte value but u8 is easier to
 // calculate with, the caller must handle this correctly and never use
 // the highest 2 bit of the returned u8.
 //
 // The only exception to this is the padding '=' which is
 // returned as 0xff and must be handled specially. This is a
 // hack to simplify the code in `decode`.
 fn to_bin(char: u8) !u8 {
    return switch (char) {
        'A'...'Z' => char - 'A',
        'a'...'z' => char - 'a' + 26,
        '0'...'9' => char - '0' + 52,
        '+' => 62,
        '/' => 63,
        '=' => 0xff,
        else => error.InvalidBase64Character,
    };
 }
 test "b64 decode" {
    const data = [_][2][]const u8{
        .{ &[_]u8{0x00}, "AA==" },
        .{ &[_]u8{0xFF}, "/w==" },
        .{ &[_]u8{ 0x01, 0x02, 0x03 }, "AQID" },
        .{ &[_]u8{ 0xFE, 0xFE, 0xFE }, "/v7+" },
        .{ &[_]u8{ 0x7F, 0x7F, 0x7F, 0x7F }, "f39/fw==" },
        .{ &[_]u8{ 0x7F, 0x7F, 0x7F, 0x7F, 0x7F, 0x7F }, "f39/f39/" },
        .{ &[_]u8{ 0xFE, 0xFE, 0xFE, 0xFE, 0xFE, 0xFE, 0xFE }, "/v7+/v7+/g==" },
        .{ &[_]u8{ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01 }, "AAAAAAAAAAE=" },
    };
    for (data) |d| {
        const res = try decode(std.testing.allocator, d[1]);
        defer std.testing.allocator.free(res);
        try std.testing.expectEqualSlices(u8, d[0], res);
    }
 }
 test "b64 encode" {
    const data = [_][2][]const u8{
        .{ &[_]u8{0x00}, "AA==" },
        .{ &[_]u8{0xFF}, "/w==" },
        .{ &[_]u8{ 0x01, 0x02, 0x03 }, "AQID" },
        .{ &[_]u8{ 0xFE, 0xFE, 0xFE }, "/v7+" },
        .{ &[_]u8{ 0x7F, 0x7F, 0x7F, 0x7F }, "f39/fw==" },
        .{ &[_]u8{ 0x7F, 0x7F, 0x7F, 0x7F, 0x7F, 0x7F }, "f39/f39/" },
        .{ &[_]u8{ 0xFE, 0xFE, 0xFE, 0xFE, 0xFE, 0xFE, 0xFE }, "/v7+/v7+/g==" },
        .{ &[_]u8{ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01 }, "AAAAAAAAAAE=" },
    };
    for (data) |d| {
        const res = try encode(std.testing.allocator, d[0]);
        defer std.testing.allocator.free(res);
        try std.testing.expectEqualStrings(d[1], res);
    }
 }
--- a/src/hex.zig
+++ b/src/hex.zig
@ -0,0 +1,106 @@
 const std = @import("std");
 const Allocator = std.mem.Allocator;
 /// Convert hex string to binary byte array. Caller must free the
 /// result.
 pub fn decode(allocator: Allocator, hex: []const u8) ![]u8 {
    if (hex.len % 2 != 0) return error.InvalidInputLength;
    var out = try allocator.alloc(u8, hex.len / 2);
    errdefer allocator.free(out);
    const hexToByte = struct {
        fn f(h: u8) !u4 {
            return switch (h) {
                '0'...'9' => @intCast(h - '0'),
                'A'...'F' => @intCast(h - 'A' + 10),
                'a'...'f' => @intCast(h - 'a' + 10),
                else => return error.InvalidHex,
            };
        }
    }.f;
    var i: usize = 0;
    while (i < hex.len) : (i += 2) {
        const hi: u4 = try hexToByte(hex[i]);
        const lo: u4 = try hexToByte(hex[i + 1]);
        out[i / 2] = (@as(u8, hi) << 4) | lo;
    }
    return out;
 }
 /// Convert binary byte array to hex string. Caller must free the
 /// result.
 pub fn encode(allocator: Allocator, bin: []const u8) ![]u8 {
    var out = try allocator.alloc(u8, bin.len * 2);
    errdefer allocator.free(out);
    for (bin, 0..) |b, i| {
        const hi: u8 = (b >> 4) & 0b0000_1111;
        const lo: u8 = b & 0b0000_1111;
        out[i * 2] = if (hi < 10) hi + '0' else hi - 10 + 'A';
        out[i * 2 + 1] = if (lo < 10) lo + '0' else lo - 10 + 'A';
    }
    return out;
 }
 test "encode" {
    const allocator = std.testing.allocator;
    const hex_bytes = [_][2][]const u8{
        .{ "AB", &[_]u8{0b1010_1011} },
        .{ "00", &[_]u8{0b0000_0000} },
        .{ "FF", &[_]u8{0b1111_1111} },
        .{ "5C", &[_]u8{0b0101_1100} },
        .{ "5CF0", &[_]u8{ 0b0101_1100, 0b1111_0000 } },
        .{ "5CF0FF00ABABAB00", &[_]u8{
            0b0101_1100,
            0b1111_0000,
            0b1111_1111,
            0b0000_0000,
            0b1010_1011,
            0b1010_1011,
            0b1010_1011,
            0b0000_0000,
        } },
    };
    for (hex_bytes) |hex_byte| {
        const res = try encode(allocator, hex_byte[1]);
        defer allocator.free(res);
        try std.testing.expectEqualSlices(u8, hex_byte[0], res);
    }
 }
 test "decode" {
    const allocator = std.testing.allocator;
    const hex_bytes = [_][2][]const u8{
        .{ "AB", &[_]u8{0b1010_1011} },
        .{ "00", &[_]u8{0b0000_0000} },
        .{ "FF", &[_]u8{0b1111_1111} },
        .{ "5C", &[_]u8{0b0101_1100} },
        .{ "5CF0", &[_]u8{ 0b0101_1100, 0b1111_0000 } },
        .{ "5CF0FF00ABABAB00", &[_]u8{
            0b0101_1100,
            0b1111_0000,
            0b1111_1111,
            0b0000_0000,
            0b1010_1011,
            0b1010_1011,
            0b1010_1011,
            0b0000_0000,
        } },
    };
    for (hex_bytes) |hex_byte| {
        const res = try decode(allocator, hex_byte[0]);
        defer allocator.free(res);
        try std.testing.expectEqualSlices(u8, hex_byte[1], res);
    }
 }
--- a/src/main.zig
+++ b/src/main.zig
@ -0,0 +1,31 @@
 const std = @import("std");
 const b64 = @import("base64.zig");
 const hex = @import("hex.zig");
 pub fn main() !void {
    var gpa = std.heap.GeneralPurposeAllocator(.{}){};
    const allocator = gpa.allocator();
    const args = try std.process.argsAlloc(allocator);
    defer std.process.argsFree(allocator, args);
    const stdout = std.io.getStdOut().writer();
    if (std.mem.eql(u8, args[1], "b64")) {
        if (std.mem.eql(u8, args[2], "-d")) {
            const result = try b64.decode(allocator, args[3]);
            defer allocator.free(result);
            try stdout.print("{s}", .{result});
        }
        if (std.mem.eql(u8, args[2], "-e")) {
            const buf = try hex.decode(allocator, args[3]);
            defer allocator.free(buf);
            const result = try b64.encode(allocator, buf);
            defer allocator.free(result);
            try stdout.print("{s}", .{result});
        }
    }
 }
--- a/src/root.zig
+++ b/src/root.zig
@ -0,0 +1,10 @@
 const std = @import("std");
 const testing = std.testing;
 export fn add(a: i32, b: i32) i32 {
    return a + b;
 }
 test "basic add functionality" {
    try testing.expect(add(3, 7) == 10);
 }