commit d7a7e8a4ad79f5dbbde63831b6831b90d401baf5 Author: Armin Friedl Date: Fri Feb 7 21:35:45 2025 +0100 Convert hex to base64 diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..4ac67a5 --- /dev/null +++ b/.gitignore @@ -0,0 +1,79 @@ +# Created by https://www.toptal.com/developers/gitignore/api/zig,emacs,linux +# Edit at https://www.toptal.com/developers/gitignore?templates=zig,emacs,linux + +### Emacs ### +# -*- mode: gitignore; -*- +*~ +\#*\# +/.emacs.desktop +/.emacs.desktop.lock +*.elc +auto-save-list +tramp +.\#* + +# Org-mode +.org-id-locations +*_archive + +# flymake-mode +*_flymake.* + +# eshell files +/eshell/history +/eshell/lastdir + +# elpa packages +/elpa/ + +# reftex files +*.rel + +# AUCTeX auto folder +/auto/ + +# cask packages +.cask/ +dist/ + +# Flycheck +flycheck_*.el + +# server auth directory +/server/ + +# projectiles files +.projectile + +# directory configuration +.dir-locals.el + +# network security +/network-security.data + + +### Linux ### + +# temporary files which can be created if a process still has a handle open of a deleted file +.fuse_hidden* + +# KDE directory preferences +.directory + +# Linux trash folder which might appear on any partition or disk +.Trash-* + +# .nfs files are created when an open file is removed but is still being accessed +.nfs* + +### zig ### +# Zig programming language + +.zig-cache/ +zig-out/ +build/ +build-*/ +docgen_tmp/ +.idea + +# End of https://www.toptal.com/developers/gitignore/api/zig,emacs,linux \ No newline at end of file diff --git a/build.zig b/build.zig new file mode 100644 index 0000000..8053445 --- /dev/null +++ b/build.zig @@ -0,0 +1,91 @@ +const std = @import("std"); + +// Although this function looks imperative, note that its job is to +// declaratively construct a build graph that will be executed by an external +// runner. +pub fn build(b: *std.Build) void { + // Standard target options allows the person running `zig build` to choose + // what target to build for. Here we do not override the defaults, which + // means any target is allowed, and the default is native. Other options + // for restricting supported target set are available. + const target = b.standardTargetOptions(.{}); + + // Standard optimization options allow the person running `zig build` to select + // between Debug, ReleaseSafe, ReleaseFast, and ReleaseSmall. Here we do not + // set a preferred release mode, allowing the user to decide how to optimize. + const optimize = b.standardOptimizeOption(.{}); + + const lib = b.addStaticLibrary(.{ + .name = "matasano", + // In this case the main source file is merely a path, however, in more + // complicated build scripts, this could be a generated file. + .root_source_file = b.path("src/root.zig"), + .target = target, + .optimize = optimize, + }); + + // This declares intent for the library to be installed into the standard + // location when the user invokes the "install" step (the default step when + // running `zig build`). + b.installArtifact(lib); + + const exe = b.addExecutable(.{ + .name = "matasano", + .root_source_file = b.path("src/main.zig"), + .target = target, + .optimize = optimize, + }); + + // This declares intent for the executable to be installed into the + // standard location when the user invokes the "install" step (the default + // step when running `zig build`). + b.installArtifact(exe); + + // This *creates* a Run step in the build graph, to be executed when another + // step is evaluated that depends on it. The next line below will establish + // such a dependency. + const run_cmd = b.addRunArtifact(exe); + + // By making the run step depend on the install step, it will be run from the + // installation directory rather than directly from within the cache directory. + // This is not necessary, however, if the application depends on other installed + // files, this ensures they will be present and in the expected location. + run_cmd.step.dependOn(b.getInstallStep()); + + // This allows the user to pass arguments to the application in the build + // command itself, like this: `zig build run -- arg1 arg2 etc` + if (b.args) |args| { + run_cmd.addArgs(args); + } + + // This creates a build step. It will be visible in the `zig build --help` menu, + // and can be selected like this: `zig build run` + // This will evaluate the `run` step rather than the default, which is "install". + const run_step = b.step("run", "Run the app"); + run_step.dependOn(&run_cmd.step); + + // Creates a step for unit testing. This only builds the test executable + // but does not run it. + const lib_unit_tests = b.addTest(.{ + .root_source_file = b.path("src/root.zig"), + .target = target, + .optimize = optimize, + }); + + const run_lib_unit_tests = b.addRunArtifact(lib_unit_tests); + + const exe_unit_tests = b.addTest(.{ + .root_source_file = b.path("src/main.zig"), + .target = target, + .optimize = optimize, + }); + + const run_exe_unit_tests = b.addRunArtifact(exe_unit_tests); + + // Similar to creating the run step earlier, this exposes a `test` step to + // the `zig build --help` menu, providing a way for the user to request + // running the unit tests. + const test_step = b.step("test", "Run unit tests"); + test_step.dependOn(&run_lib_unit_tests.step); + test_step.dependOn(&run_exe_unit_tests.step); +} diff --git a/build.zig.zon b/build.zig.zon new file mode 100644 index 0000000..7ceb608 --- /dev/null +++ b/build.zig.zon @@ -0,0 +1,72 @@ +.{ + // This is the default name used by packages depending on this one. For + // example, when a user runs `zig fetch --save `, this field is used + // as the key in the `dependencies` table. Although the user can choose a + // different name, most users will stick with this provided value. + // + // It is redundant to include "zig" in this name because it is already + // within the Zig package namespace. + .name = "matasano", + + // This is a [Semantic Version](https://semver.org/). + // In a future version of Zig it will be used for package deduplication. + .version = "0.0.0", + + // This field is optional. + // This is currently advisory only; Zig does not yet do anything + // with this value. + //.minimum_zig_version = "0.11.0", + + // This field is optional. + // Each dependency must either provide a `url` and `hash`, or a `path`. + // `zig build --fetch` can be used to fetch all dependencies of a package, recursively. + // Once all dependencies are fetched, `zig build` no longer requires + // internet connectivity. + .dependencies = .{ + // See `zig fetch --save ` for a command-line interface for adding dependencies. + //.example = .{ + // // When updating this field to a new URL, be sure to delete the corresponding + // // `hash`, otherwise you are communicating that you expect to find the old hash at + // // the new URL. + // .url = "https://example.com/foo.tar.gz", + // + // // This is computed from the file contents of the directory of files that is + // // obtained after fetching `url` and applying the inclusion rules given by + // // `paths`. + // // + // // This field is the source of truth; packages do not come from a `url`; they + // // come from a `hash`. `url` is just one of many possible mirrors for how to + // // obtain a package matching this `hash`. + // // + // // Uses the [multihash](https://multiformats.io/multihash/) format. + // .hash = "...", + // + // // When this is provided, the package is found in a directory relative to the + // // build root. In this case the package's hash is irrelevant and therefore not + // // computed. This field and `url` are mutually exclusive. + // .path = "foo", + + // // When this is set to `true`, a package is declared to be lazily + // // fetched. This makes the dependency only get fetched if it is + // // actually used. + // .lazy = false, + //}, + }, + + // Specifies the set of files and directories that are included in this package. + // Only files and directories listed here are included in the `hash` that + // is computed for this package. Only files listed here will remain on disk + // when using the zig package manager. As a rule of thumb, one should list + // files required for compilation plus any license(s). + // Paths are relative to the build root. Use the empty string (`""`) to refer to + // the build root itself. + // A directory listed here means that all files within, recursively, are included. + .paths = .{ + "build.zig", + "build.zig.zon", + "src", + // For example... + //"LICENSE", + //"README.md", + }, +} diff --git a/src/base64.zig b/src/base64.zig new file mode 100644 index 0000000..42727dc --- /dev/null +++ b/src/base64.zig @@ -0,0 +1,163 @@ +const std = @import("std"); +const builtin = @import("builtin"); +const native_endian = builtin.cpu.arch.endian(); + +const charset = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/"; + +/// Encode a byte buffer with base64. Caller must free the result. +pub fn encode(alloc: std.mem.Allocator, buf: []const u8) ![]u8 { + var out = std.ArrayList(u8).init(alloc); + defer out.deinit(); + + var in_idx: usize = 0; + var out_idx: usize = 0; + + while (in_idx < buf.len) { + const b1 = buf[in_idx]; + const b2 = if (in_idx + 1 < buf.len) buf[in_idx + 1] else 0; + const b3 = if (in_idx + 2 < buf.len) buf[in_idx + 2] else 0; + + // top 6 bit of b1 + try out.append(charset[(b1 >> 2) & 0b111_111]); + // last 4 bit of b1 + top 4 bit of b2 & 0b111_111 + // -> last 2 bit of b1 + top 4 bit of b2 + try out.append(charset[((b1 << 4) | (b2 >> 4)) & 0b111_111]); + + if (in_idx + 1 < buf.len) { + // last 6 bit of b2 + top 2 bit of b3 & 0b111_111 + // -> last 4 bit of b2 + top 2 bit of b3 + try out.append(charset[((b2 << 2) | (b3 >> 6)) & 0b111_111]); + } else { + try out.append('='); + } + + if (in_idx + 2 < buf.len) { + // last 6 bit of b3 + try out.append(charset[b3 & 0b111_111]); + } else { + try out.append('='); + } + + in_idx += 3; + out_idx += 4; + } + + return out.toOwnedSlice(); +} + +/// Decode a base64 string to a byte buffer. Caller must free the result. +pub fn decode(alloc: std.mem.Allocator, buf: []const u8) ![]u8 { + var out = std.ArrayList(u8).init(alloc); + defer out.deinit(); + + var in_idx: usize = 0; + var out_idx: usize = 0; + + while (in_idx < buf.len) { + // Decode 4 Base64 characters to 3 bytes + const c1 = try to_bin(buf[in_idx]); + const c2 = try to_bin(buf[in_idx + 1]); + const c3 = try to_bin(buf[in_idx + 2]); + const c4 = try to_bin(buf[in_idx + 3]); + + // note that we can assume that the u8 is actually just a u6 + // and the high 2 bit are always 0 + // + // also the terms all, top, and bottom refer to the 6 relevant + // bit only + + // all bit of c1 at top of byte + top 2 bit of c2 at bottom + try out.append((c1 << 2) | (c2 >> 4)); + // bottom 4 bit of c2 + top 4 bit of c3 + try out.append(((c2 & 0b0000_1111) << 4) | (c3 >> 2)); + + if (c3 != 0xff and c4 != 0xff) { + // bottom 2 bit of c3 + all 6 bits of c4 + try out.append(((c3 & 0b0000_0011) << 6) | c4); + } else if (c3 != 0xff and c4 == 0xff) { + // chop off 2 bits, i.e. just don't add last 2 bits of c3 + // and c4 is ignored anyways since it is the padding + } else if (c3 == 0xff and c4 == 0xff) { + // chop off 4 bits. this means the last 2 6-bit values + // produced only 8-bit output instead of 12. That means we + // can just drop the last byte, why: + // for the last byte we used 4 bit of c2 and 4 bit of c3 + // c3 is the padding so can be dropped + // the 2 padding bytes mean we can drop 4 bit of the + // non-padding values, which is exactly the 4 bits + // we used from c2 + _ = out.pop(); + } else { + return error.InvalidPadding; + } + + in_idx += 4; + out_idx += 3; + } + + return out.toOwnedSlice(); +} + +// Tailor-made helper function for `decode`. This is likely not +// usefuly anywhere else ever due to idiosyncrasies. +// +// Converts a char from b64 charset back to the corresponding binary +// value. +// +// This yields actually just a 6 byte value but u8 is easier to +// calculate with, the caller must handle this correctly and never use +// the highest 2 bit of the returned u8. +// +// The only exception to this is the padding '=' which is +// returned as 0xff and must be handled specially. This is a +// hack to simplify the code in `decode`. +fn to_bin(char: u8) !u8 { + return switch (char) { + 'A'...'Z' => char - 'A', + 'a'...'z' => char - 'a' + 26, + '0'...'9' => char - '0' + 52, + '+' => 62, + '/' => 63, + '=' => 0xff, + else => error.InvalidBase64Character, + }; +} + +test "b64 decode" { + const data = [_][2][]const u8{ + .{ &[_]u8{0x00}, "AA==" }, + .{ &[_]u8{0xFF}, "/w==" }, + .{ &[_]u8{ 0x01, 0x02, 0x03 }, "AQID" }, + .{ &[_]u8{ 0xFE, 0xFE, 0xFE }, "/v7+" }, + .{ &[_]u8{ 0x7F, 0x7F, 0x7F, 0x7F }, "f39/fw==" }, + .{ &[_]u8{ 0x7F, 0x7F, 0x7F, 0x7F, 0x7F, 0x7F }, "f39/f39/" }, + .{ &[_]u8{ 0xFE, 0xFE, 0xFE, 0xFE, 0xFE, 0xFE, 0xFE }, "/v7+/v7+/g==" }, + .{ &[_]u8{ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01 }, "AAAAAAAAAAE=" }, + }; + + for (data) |d| { + const res = try decode(std.testing.allocator, d[1]); + defer std.testing.allocator.free(res); + try std.testing.expectEqualSlices(u8, d[0], res); + } +} + +test "b64 encode" { + const data = [_][2][]const u8{ + .{ &[_]u8{0x00}, "AA==" }, + .{ &[_]u8{0xFF}, "/w==" }, + .{ &[_]u8{ 0x01, 0x02, 0x03 }, "AQID" }, + .{ &[_]u8{ 0xFE, 0xFE, 0xFE }, "/v7+" }, + .{ &[_]u8{ 0x7F, 0x7F, 0x7F, 0x7F }, "f39/fw==" }, + .{ &[_]u8{ 0x7F, 0x7F, 0x7F, 0x7F, 0x7F, 0x7F }, "f39/f39/" }, + .{ &[_]u8{ 0xFE, 0xFE, 0xFE, 0xFE, 0xFE, 0xFE, 0xFE }, "/v7+/v7+/g==" }, + .{ &[_]u8{ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01 }, "AAAAAAAAAAE=" }, + }; + + for (data) |d| { + const res = try encode(std.testing.allocator, d[0]); + defer std.testing.allocator.free(res); + + try std.testing.expectEqualStrings(d[1], res); + } +} diff --git a/src/hex.zig b/src/hex.zig new file mode 100644 index 0000000..c07a37a --- /dev/null +++ b/src/hex.zig @@ -0,0 +1,106 @@ +const std = @import("std"); +const Allocator = std.mem.Allocator; + +/// Convert hex string to binary byte array. Caller must free the +/// result. +pub fn decode(allocator: Allocator, hex: []const u8) ![]u8 { + if (hex.len % 2 != 0) return error.InvalidInputLength; + + var out = try allocator.alloc(u8, hex.len / 2); + errdefer allocator.free(out); + + const hexToByte = struct { + fn f(h: u8) !u4 { + return switch (h) { + '0'...'9' => @intCast(h - '0'), + 'A'...'F' => @intCast(h - 'A' + 10), + 'a'...'f' => @intCast(h - 'a' + 10), + else => return error.InvalidHex, + }; + } + }.f; + + var i: usize = 0; + while (i < hex.len) : (i += 2) { + const hi: u4 = try hexToByte(hex[i]); + const lo: u4 = try hexToByte(hex[i + 1]); + out[i / 2] = (@as(u8, hi) << 4) | lo; + } + + return out; +} + +/// Convert binary byte array to hex string. Caller must free the +/// result. +pub fn encode(allocator: Allocator, bin: []const u8) ![]u8 { + var out = try allocator.alloc(u8, bin.len * 2); + errdefer allocator.free(out); + + for (bin, 0..) |b, i| { + const hi: u8 = (b >> 4) & 0b0000_1111; + const lo: u8 = b & 0b0000_1111; + + out[i * 2] = if (hi < 10) hi + '0' else hi - 10 + 'A'; + out[i * 2 + 1] = if (lo < 10) lo + '0' else lo - 10 + 'A'; + } + + return out; +} + +test "encode" { + const allocator = std.testing.allocator; + + const hex_bytes = [_][2][]const u8{ + .{ "AB", &[_]u8{0b1010_1011} }, + .{ "00", &[_]u8{0b0000_0000} }, + .{ "FF", &[_]u8{0b1111_1111} }, + .{ "5C", &[_]u8{0b0101_1100} }, + .{ "5CF0", &[_]u8{ 0b0101_1100, 0b1111_0000 } }, + .{ "5CF0FF00ABABAB00", &[_]u8{ + 0b0101_1100, + 0b1111_0000, + 0b1111_1111, + 0b0000_0000, + 0b1010_1011, + 0b1010_1011, + 0b1010_1011, + 0b0000_0000, + } }, + }; + + for (hex_bytes) |hex_byte| { + const res = try encode(allocator, hex_byte[1]); + defer allocator.free(res); + + try std.testing.expectEqualSlices(u8, hex_byte[0], res); + } +} + +test "decode" { + const allocator = std.testing.allocator; + + const hex_bytes = [_][2][]const u8{ + .{ "AB", &[_]u8{0b1010_1011} }, + .{ "00", &[_]u8{0b0000_0000} }, + .{ "FF", &[_]u8{0b1111_1111} }, + .{ "5C", &[_]u8{0b0101_1100} }, + .{ "5CF0", &[_]u8{ 0b0101_1100, 0b1111_0000 } }, + .{ "5CF0FF00ABABAB00", &[_]u8{ + 0b0101_1100, + 0b1111_0000, + 0b1111_1111, + 0b0000_0000, + 0b1010_1011, + 0b1010_1011, + 0b1010_1011, + 0b0000_0000, + } }, + }; + + for (hex_bytes) |hex_byte| { + const res = try decode(allocator, hex_byte[0]); + defer allocator.free(res); + + try std.testing.expectEqualSlices(u8, hex_byte[1], res); + } +} diff --git a/src/main.zig b/src/main.zig new file mode 100644 index 0000000..72509b6 --- /dev/null +++ b/src/main.zig @@ -0,0 +1,31 @@ +const std = @import("std"); +const b64 = @import("base64.zig"); +const hex = @import("hex.zig"); + +pub fn main() !void { + var gpa = std.heap.GeneralPurposeAllocator(.{}){}; + const allocator = gpa.allocator(); + + const args = try std.process.argsAlloc(allocator); + defer std.process.argsFree(allocator, args); + + const stdout = std.io.getStdOut().writer(); + + if (std.mem.eql(u8, args[1], "b64")) { + if (std.mem.eql(u8, args[2], "-d")) { + const result = try b64.decode(allocator, args[3]); + defer allocator.free(result); + + try stdout.print("{s}", .{result}); + } + if (std.mem.eql(u8, args[2], "-e")) { + const buf = try hex.decode(allocator, args[3]); + defer allocator.free(buf); + + const result = try b64.encode(allocator, buf); + defer allocator.free(result); + + try stdout.print("{s}", .{result}); + } + } +} diff --git a/src/root.zig b/src/root.zig new file mode 100644 index 0000000..ecfeade --- /dev/null +++ b/src/root.zig @@ -0,0 +1,10 @@ +const std = @import("std"); +const testing = std.testing; + +export fn add(a: i32, b: i32) i32 { + return a + b; +} + +test "basic add functionality" { + try testing.expect(add(3, 7) == 10); +}