commit f6beb617bdfcde64a4a6f3b426437410dd4ec49b Author: Armin Friedl Date: Sun Jun 16 17:03:13 2024 +0200 Initial working bencode and torrent parser diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..8518230 --- /dev/null +++ b/.gitignore @@ -0,0 +1,245 @@ +# Created by https://www.toptal.com/developers/gitignore/api/zig,emacs,jetbrains+all,linux,windows,macos,vim +# Edit at https://www.toptal.com/developers/gitignore?templates=zig,emacs,jetbrains+all,linux,windows,macos,vim + +### Emacs ### +# -*- mode: gitignore; -*- +*~ +\#*\# +/.emacs.desktop +/.emacs.desktop.lock +*.elc +auto-save-list +tramp +.\#* + +# Org-mode +.org-id-locations +*_archive + +# flymake-mode +*_flymake.* + +# eshell files +/eshell/history +/eshell/lastdir + +# elpa packages +/elpa/ + +# reftex files +*.rel + +# AUCTeX auto folder +/auto/ + +# cask packages +.cask/ +dist/ + +# Flycheck +flycheck_*.el + +# server auth directory +/server/ + +# projectiles files +.projectile + +# directory configuration +.dir-locals.el + +# network security +/network-security.data + + +### JetBrains+all ### +# Covers JetBrains IDEs: IntelliJ, RubyMine, PhpStorm, AppCode, PyCharm, CLion, Android Studio, WebStorm and Rider +# Reference: https://intellij-support.jetbrains.com/hc/en-us/articles/206544839 + +# User-specific stuff +.idea/**/workspace.xml +.idea/**/tasks.xml +.idea/**/usage.statistics.xml +.idea/**/dictionaries +.idea/**/shelf + +# AWS User-specific +.idea/**/aws.xml + +# Generated files +.idea/**/contentModel.xml + +# Sensitive or high-churn files +.idea/**/dataSources/ +.idea/**/dataSources.ids +.idea/**/dataSources.local.xml +.idea/**/sqlDataSources.xml +.idea/**/dynamic.xml +.idea/**/uiDesigner.xml +.idea/**/dbnavigator.xml + +# Gradle +.idea/**/gradle.xml +.idea/**/libraries + +# Gradle and Maven with auto-import +# When using Gradle or Maven with auto-import, you should exclude module files, +# since they will be recreated, and may cause churn. Uncomment if using +# auto-import. +# .idea/artifacts +# .idea/compiler.xml +# .idea/jarRepositories.xml +# .idea/modules.xml +# .idea/*.iml +# .idea/modules +# *.iml +# *.ipr + +# CMake +cmake-build-*/ + +# Mongo Explorer plugin +.idea/**/mongoSettings.xml + +# File-based project format +*.iws + +# IntelliJ +out/ + +# mpeltonen/sbt-idea plugin +.idea_modules/ + +# JIRA plugin +atlassian-ide-plugin.xml + +# Cursive Clojure plugin +.idea/replstate.xml + +# SonarLint plugin +.idea/sonarlint/ + +# Crashlytics plugin (for Android Studio and IntelliJ) +com_crashlytics_export_strings.xml +crashlytics.properties +crashlytics-build.properties +fabric.properties + +# Editor-based Rest Client +.idea/httpRequests + +# Android studio 3.1+ serialized cache file +.idea/caches/build_file_checksums.ser + +### JetBrains+all Patch ### +# Ignore everything but code style settings and run configurations +# that are supposed to be shared within teams. + +.idea/* + +!.idea/codeStyles +!.idea/runConfigurations + +### Linux ### + +# temporary files which can be created if a process still has a handle open of a deleted file +.fuse_hidden* + +# KDE directory preferences +.directory + +# Linux trash folder which might appear on any partition or disk +.Trash-* + +# .nfs files are created when an open file is removed but is still being accessed +.nfs* + +### macOS ### +# General +.DS_Store +.AppleDouble +.LSOverride + +# Icon must end with two \r +Icon + + +# Thumbnails +._* + +# Files that might appear in the root of a volume +.DocumentRevisions-V100 +.fseventsd +.Spotlight-V100 +.TemporaryItems +.Trashes +.VolumeIcon.icns +.com.apple.timemachine.donotpresent + +# Directories potentially created on remote AFP share +.AppleDB +.AppleDesktop +Network Trash Folder +Temporary Items +.apdisk + +### macOS Patch ### +# iCloud generated files +*.icloud + +### Vim ### +# Swap +[._]*.s[a-v][a-z] +!*.svg # comment out if you don't need vector files +[._]*.sw[a-p] +[._]s[a-rt-v][a-z] +[._]ss[a-gi-z] +[._]sw[a-p] + +# Session +Session.vim +Sessionx.vim + +# Temporary +.netrwhist +# Auto-generated tag files +tags +# Persistent undo +[._]*.un~ + +### Windows ### +# Windows thumbnail cache files +Thumbs.db +Thumbs.db:encryptable +ehthumbs.db +ehthumbs_vista.db + +# Dump file +*.stackdump + +# Folder config file +[Dd]esktop.ini + +# Recycle Bin used on file shares +$RECYCLE.BIN/ + +# Windows Installer files +*.cab +*.msi +*.msix +*.msm +*.msp + +# Windows shortcuts +*.lnk + +### zig ### +# Zig programming language + +zig-cache/ +zig-out/ +build/ +build-*/ +docgen_tmp/ + +# End of https://www.toptal.com/developers/gitignore/api/zig,emacs,jetbrains+all,linux,windows,macos,vim diff --git a/build.zig b/build.zig new file mode 100644 index 0000000..fd79589 --- /dev/null +++ b/build.zig @@ -0,0 +1,71 @@ +const std = @import("std"); + +// Although this function looks imperative, note that its job is to +// declaratively construct a build graph that will be executed by an external +// runner. +pub fn build(b: *std.Build) void { + // Standard target options allows the person running `zig build` to choose + // what target to build for. Here we do not override the defaults, which + // means any target is allowed, and the default is native. Other options + // for restricting supported target set are available. + const target = b.standardTargetOptions(.{}); + + // Standard optimization options allow the person running `zig build` to select + // between Debug, ReleaseSafe, ReleaseFast, and ReleaseSmall. Here we do not + // set a preferred release mode, allowing the user to decide how to optimize. + const optimize = b.standardOptimizeOption(.{}); + + const exe = b.addExecutable(.{ + .name = "zephyr", + // In this case the main source file is merely a path, however, in more + // complicated build scripts, this could be a generated file. + .root_source_file = .{ .path = "src/main.zig" }, + .target = target, + .optimize = optimize, + }); + exe.linkSystemLibrary("c"); + + // This declares intent for the executable to be installed into the + // standard location when the user invokes the "install" step (the default + // step when running `zig build`). + b.installArtifact(exe); + + // This *creates* a Run step in the build graph, to be executed when another + // step is evaluated that depends on it. The next line below will establish + // such a dependency. + const run_cmd = b.addRunArtifact(exe); + + // By making the run step depend on the install step, it will be run from the + // installation directory rather than directly from within the cache directory. + // This is not necessary, however, if the application depends on other installed + // files, this ensures they will be present and in the expected location. + run_cmd.step.dependOn(b.getInstallStep()); + + // This allows the user to pass arguments to the application in the build + // command itself, like this: `zig build run -- arg1 arg2 etc` + if (b.args) |args| { + run_cmd.addArgs(args); + } + + // This creates a build step. It will be visible in the `zig build --help` menu, + // and can be selected like this: `zig build run` + // This will evaluate the `run` step rather than the default, which is "install". + const run_step = b.step("run", "Run the app"); + run_step.dependOn(&run_cmd.step); + + // Creates a step for unit testing. This only builds the test executable + // but does not run it. + const unit_tests = b.addTest(.{ + .root_source_file = .{ .path = "src/main.zig" }, + .target = target, + .optimize = optimize, + }); + + const run_unit_tests = b.addRunArtifact(unit_tests); + + // Similar to creating the run step earlier, this exposes a `test` step to + // the `zig build --help` menu, providing a way for the user to request + // running the unit tests. + const test_step = b.step("test", "Run unit tests"); + test_step.dependOn(&run_unit_tests.step); +} diff --git a/build.zig.zon b/build.zig.zon new file mode 100644 index 0000000..4a97a49 --- /dev/null +++ b/build.zig.zon @@ -0,0 +1,6 @@ +.{ + .name = "zephyr", + .version = "0.0.1", + .paths = .{""}, + .dependencies = .{}, +} diff --git a/src/bencode.zig b/src/bencode.zig new file mode 100644 index 0000000..9bb5a27 --- /dev/null +++ b/src/bencode.zig @@ -0,0 +1,340 @@ +const std = @import("std"); +const Allocator = std.mem.Allocator; +const ArenaAllocator = std.heap.ArenaAllocator; +const Reader = std.io.Reader; +const StringHashMap = std.StringHashMap; + +pub const BType = union(enum) { + Integer: i64, + String: []const u8, + List: []const BType, + Dict: StringHashMap(BType), + + pub fn get_as(self: BType, comptime tag: std.meta.Tag(BType), key: []const u8) !std.meta.TagPayload(BType, tag) { + if (self != BType.Dict) return error.NoDict; + + const val = self.Dict.get(key) orelse return error.KeyNotFound; + + if (val != tag) return error.BTypeMismatch; + + return @field(val, @tagName(tag)); + } + + pub fn format(value: BType, comptime fmt: []const u8, options: std.fmt.FormatOptions, writer: anytype) !void { + switch (value) { + .Integer => { + try writer.print("{}", .{value.Integer}); + }, + .String => { + try writer.print("{s}", .{value.String}); + }, + .List => { + try writer.print("[\n", .{}); + for (value.List) |e| { + try writer.print("\t", .{}); + try format(e, fmt, options, writer); + } + try writer.print("\n]", .{}); + }, + .Dict => { + try writer.print("{{\n", .{}); + var it = value.Dict.keyIterator(); + while (it.next()) |k| { + try writer.print("\t", .{}); + try writer.print("{s}:", .{k.*}); + const val = value.Dict.get(k.*).?; + try format(val, fmt, options, writer); + } + try writer.print("\n}}", .{}); + }, + } + } +}; + +pub const BParse = struct { + const Self = @This(); + + alloc: ArenaAllocator, + value: BType, + + pub fn get_as(self: Self, comptime tag: std.meta.Tag(BType), key: []const u8) !std.meta.TagPayload(BType, tag) { + return self.value.get_as(tag, key); + } + + pub fn deinit(self: Self) void { + self.alloc.deinit(); + } +}; + +const ParseError = error{ EndOfStream, FormatError }; + +pub fn parse(allocator: Allocator, reader: anytype) anyerror!BParse { + var ally = std.heap.ArenaAllocator.init(allocator); + errdefer ally.deinit(); + + const bparse = try parseInternal(ally.allocator(), reader); + + return BParse{ .alloc = ally, .value = bparse }; +} + +fn parseInternal(allocator: Allocator, reader: anytype) anyerror!BType { + while (nextByte(reader)) |next| { + return try parseInternalNext(allocator, next, reader); + } + return ParseError.EndOfStream; +} + +fn parseInternalNext(allocator: Allocator, next: u8, reader: anytype) anyerror!BType { + switch (next) { + 'i' => { + const res = try parse_bint(reader); + return BType{ .Integer = res }; + }, + 'l' => { + const res = try parse_blist(allocator, reader); + return BType{ .List = res }; + }, + 'd' => { + const res = try parse_bdict(allocator, reader); + return BType{ .Dict = res }; + }, + '0'...'9' => { + const res = try parse_bstring(allocator, next, reader); + return BType{ .String = res }; + }, + else => { + return ParseError.FormatError; + }, + } + + unreachable; +} + +fn parse_blist(allocator: Allocator, reader: anytype) anyerror![]const BType { + var buf = std.ArrayList(BType).init(allocator); + errdefer buf.deinit(); + + while (nextByte(reader)) |next| { + switch (next) { + 'e' => break, + else => { + const el = try parseInternalNext(allocator, next, reader); + try buf.append(el); + }, + } + } + + return buf.toOwnedSlice(); +} + +fn parse_bdict(allocator: Allocator, reader: anytype) anyerror!StringHashMap(BType) { + var map = StringHashMap(BType).init(allocator); + errdefer map.deinit(); + + while (nextByte(reader)) |next| { + switch (next) { + 'e' => { + break; + }, + else => { + const key = try parse_bstring(allocator, next, reader); + + const value_next = nextByte(reader) orelse return error.FormatError; + const value = try parseInternalNext(allocator, value_next, reader); + + try map.put(key, value); + }, + } + } + + return map; +} + +/// Parses a bencode string into a `u8` slice +/// +/// Allocates a buffer for the string. Caller owns the memory. +fn parse_bstring(allocator: Allocator, first: u8, reader: anytype) ![]const u8 { + const len = try parse_bstring_len(allocator, first, reader); + + var buf = try allocator.alloc(u8, len); + + for (0..len) |i| { + const next = nextByte(reader) orelse return ParseError.FormatError; + buf[i] = next; + } + return buf; +} + +/// Tries to parse the length specifier for a bencode string +/// +/// Owns the memory. Only temporary allocates for parsing and deallocates when +/// finished. +fn parse_bstring_len(allocator: Allocator, first: u8, reader: anytype) !usize { + // `first` already consumed from reader at that point so we need to add it + var ally = ArenaAllocator.init(allocator); + defer ally.deinit(); + + var buf = std.ArrayList(u8).init(ally.allocator()); + + try buf.append(first); + + while (nextByte(reader)) |next| { + switch (next) { + ':' => { + break; + }, + '0'...'9' => { + try buf.append(next); + }, + else => { + return ParseError.FormatError; + }, + } + } + + const tmp = try buf.toOwnedSlice(); + return try std.fmt.parseUnsigned(usize, tmp, 10); +} + +fn parse_bint(reader: anytype) !i64 { + var parse_buf: [20]u8 = undefined; // -9223372036854775808 to 9223372036854775807 + var len: usize = 0; + + while (nextByte(reader)) |next| { + switch (next) { + '0'...'9' => { + parse_buf[len] = next; + len += 1; + }, + + 'e' => { + return try std.fmt.parseInt(i64, parse_buf[0..len], 10); + }, + + else => { + return ParseError.FormatError; + }, + } + } + + return ParseError.FormatError; +} + +fn nextByte(reader: anytype) ?u8 { + return reader.readByte() catch { + std.log.debug("Parse reached end of stream", .{}); + return null; + }; +} + +test "parse int i322323e" { + const bencode = "i322323e"; + var stream = std.io.fixedBufferStream(bencode); + + const res = try parse(std.testing.allocator, stream.reader()); + defer res.deinit(); + + try std.testing.expectEqual(@as(i64, 322323), res.value.Integer); +} + +test "parse string 3:abc" { + const bencode = "3:abc"; + var stream = std.io.fixedBufferStream(bencode); + + const res = try parse(std.testing.allocator, stream.reader()); + defer res.deinit(); + + try std.testing.expectEqualStrings("abc", res.value.String); +} + +test "parse invalid int i12" { + const bencode = "i12"; + var stream = std.io.fixedBufferStream(bencode); + + const res = parse(std.testing.allocator, stream.reader()); + + try std.testing.expectError(error.FormatError, res); +} + +test "parse list l4:spam4:eggse" { + const bencode = "l4:spam4:eggse"; + + var stream = std.io.fixedBufferStream(bencode); + + const res = try parse(std.testing.allocator, stream.reader()); + defer res.deinit(); + + try std.testing.expect(res.value.List.len == 2); + try std.testing.expectEqualStrings("spam", res.value.List[0].String); + try std.testing.expectEqualStrings("eggs", res.value.List[1].String); +} + +test "parse list l4:spami322323e4:eggse" { + const bencode = "l4:spami322323e4:eggse"; + + var stream = std.io.fixedBufferStream(bencode); + + const res = try parse(std.testing.allocator, stream.reader()); + defer res.deinit(); + + try std.testing.expect(res.value.List.len == 3); + try std.testing.expectEqualStrings("spam", res.value.List[0].String); + try std.testing.expectEqual(@as(i64, 322323), res.value.List[1].Integer); + try std.testing.expectEqualStrings("eggs", res.value.List[2].String); +} + +test "parse list l4:spamli322323e4:fishe4:eggse" { + const bencode = "l4:spamli322323e4:fishe4:eggse"; + + var stream = std.io.fixedBufferStream(bencode); + + const res = try parse(std.testing.allocator, stream.reader()); + defer res.deinit(); + + try std.testing.expect(res.value.List.len == 3); + try std.testing.expectEqualStrings("spam", res.value.List[0].String); + + //nested list + try std.testing.expect(res.value.List[1].List.len == 2); + try std.testing.expectEqual(@as(i64, 322323), res.value.List[1].List[0].Integer); + try std.testing.expectEqualStrings("fish", res.value.List[1].List[1].String); + + try std.testing.expectEqualStrings("eggs", res.value.List[2].String); +} + +test "parse map d4:spamli322323e4:fishe4:eggsi1234e ({spam:[322323,fish], eggs:1234})" { + const bencode = "d4:spamli322323e4:fishe4:eggsi1234e"; + + var stream = std.io.fixedBufferStream(bencode); + + const res = try parse(std.testing.allocator, stream.reader()); + defer res.deinit(); + + try std.testing.expect(res.value.Dict.count() == 2); + try std.testing.expect(res.value.Dict.contains("spam")); + try std.testing.expect(res.value.Dict.contains("eggs")); + + try std.testing.expectEqual(@as(i64, 322323), res.value.Dict.get("spam").?.List[0].Integer); + try std.testing.expectEqualStrings("fish", res.value.Dict.get("spam").?.List[1].String); + try std.testing.expectEqual(@as(i64, 1234), res.value.Dict.get("eggs").?.Integer); +} + +test "parse invalid string 3:ab" { + const bencode = "3:ab"; + var stream = std.io.fixedBufferStream(bencode); + + const res = parse(std.testing.allocator, stream.reader()); + + try std.testing.expectError(error.FormatError, res); +} + +test "parse debian torrent" { + const bencode = try std.fs.cwd() + .openFile("test/simple.torrent", .{}); + defer bencode.close(); + + var buffered_reader = std.io.bufferedReader(bencode.reader()); + + const res = try parse(std.testing.allocator, buffered_reader.reader()); + defer res.deinit(); +} diff --git a/src/client.zig b/src/client.zig new file mode 100644 index 0000000..e69de29 diff --git a/src/main.zig b/src/main.zig new file mode 100644 index 0000000..ea387c6 --- /dev/null +++ b/src/main.zig @@ -0,0 +1,20 @@ +const std = @import("std"); +const torrent = @import("torrent.zig"); +const bencode = @import("bencode.zig"); +const tracker = @import("tracker.zig"); +const network = @import("network.zig"); + +pub fn main() !void { + var gpa = std.heap.GeneralPurposeAllocator(.{}){}; + const allocator = gpa.allocator(); + defer _ = gpa.deinit(); + + const args = try std.process.argsAlloc(allocator); + defer std.process.argsFree(allocator, args); + + const t = try torrent.Torrent.parse(allocator, args[1]); + defer t.deinit(); + + const outw = std.io.getStdOut().writer(); + try outw.print("{}", .{t}); +} diff --git a/src/network.zig b/src/network.zig new file mode 100644 index 0000000..4abd9a8 --- /dev/null +++ b/src/network.zig @@ -0,0 +1,55 @@ +const std = @import("std"); +const c = @cImport({ + @cInclude("arpa/inet.h"); + @cInclude("netdb.h"); +}); + +// roughly after +// https://beej.us/guide/bgnet/html/split/system-calls-or-bust.html#getaddrinfoprepare-to-launch +fn printip() !void { + + // equivalent to beej's `memset(&hints, 0, sizeof hints);` + // zeroing out the struct + var hints: c.addrinfo = std.mem.zeroes(c.addrinfo); + hints.ai_family = c.AF_UNSPEC; + hints.ai_socktype = c.SOCK_DGRAM; + hints.ai_flags = 0; + hints.ai_protocol = 0; + + var results: ?*c.addrinfo = null; + + const res = c.getaddrinfo("example.com", "443", &hints, &results); + defer if (results != null) c.freeaddrinfo(results); + if (res != 0) return error.UnableToResolve; + + var ip_buf: [c.INET6_ADDRSTRLEN]u8 = undefined; + + var rp = results; + while (rp) |addr| : (rp = rp.?.ai_next) { + switch (addr.ai_family) { + c.AF_INET => { + const ipv4: *c.sockaddr_in = @alignCast(@ptrCast(addr.ai_addr)); + const ip = c.inet_ntop(addr.ai_family, &ipv4.sin_addr, &ip_buf, c.INET_ADDRSTRLEN); + if (ip == null) return error.UntranslatableIP; + + std.debug.print("Addr IPv4: {s}\n", .{ip}); + }, + + c.AF_INET6 => { + const ipv6: *c.sockaddr_in6 = @ptrCast(@alignCast(addr.ai_addr)); + const ip = c.inet_ntop(addr.ai_family, &ipv6.sin6_addr, &ip_buf, c.INET6_ADDRSTRLEN); + if (ip == null) return error.UntranslatableIP; + + std.debug.print("Addr IPv6: {s}\n", .{ip}); + }, + + else => { + return error.UnknownFamily; + }, + } + } +} + +test "print ip" { + try printip(); +} diff --git a/src/torrent.zig b/src/torrent.zig new file mode 100644 index 0000000..8da6f9e --- /dev/null +++ b/src/torrent.zig @@ -0,0 +1,300 @@ +const std = @import("std"); +const bencode = @import("bencode.zig"); +const BType = bencode.BType; +const Allocator = std.mem.Allocator; + +const TorrentError = error{ InvalidTorrent, MissingEntry, AllocError }; + +pub const Torrent = struct { + const Tier = std.ArrayList([]const u8); + + const File = struct { // zig fmt: off + length: u64, + path: std.ArrayList([]const u8) + }; + + const Info = struct { // zig fmt: off + name: []const u8, + piece_length: u64, + pieces: [][20]u8, + Type: union(enum) { + Single: struct { length: u64 }, + Multi: struct { files: std.ArrayList(File) } + } + }; + + announce: []const u8, + announce_list: std.ArrayList(Tier), + comment: ?[]const u8, + info: Info, + allocator: Allocator, + + pub fn parse(allocator: Allocator, path: []u8) TorrentError!Torrent { + var torrent = Torrent{ // zig fmt: off + .announce = &[_]u8{}, + .announce_list = std.ArrayList(Tier).init(allocator), + .comment = null, + .info = undefined, + .allocator = allocator + }; + errdefer torrent.deinit(); + + const bparse: bencode.BParse = becode_decode(allocator, path) catch return error.InvalidTorrent; + defer bparse.deinit(); + + if (bparse.value != BType.Dict) return error.InvalidTorrent; + + + const announce = bparse.get_as(BType.String, "announce") catch return error.InvalidTorrent; + torrent.announce = allocator.dupe(u8, announce) catch return error.AllocError; + + try parse_announce_list(allocator, bparse, &torrent); + + if(bparse.value.Dict.contains("comment")) { + const comment = bparse.get_as(BType.String, "comment") catch return error.InvalidTorrent; + torrent.comment = allocator.dupe(u8, comment) catch return error.AllocError; + } + + const info = bparse.value.Dict.get("info") orelse return error.InvalidTorrent; + if (info != BType.Dict) return error.InvalidTorrent; + + try parse_info_common(allocator, info, &torrent); + + if (info.Dict.contains("length")) { + torrent.info.Type = .{ .Single = .{ + .length = @intCast(info.get_as(BType.Integer, "length") catch return error.InvalidTorrent), + } }; + } else { + torrent.info.Type = .{ .Multi = .{ .files = try parse_info_multifile(allocator, info) } }; + } + + return torrent; + } + + fn becode_decode(allocator: Allocator, path: []u8) !bencode.BParse { + const torrent_file = try std.fs.Dir.openFile(std.fs.cwd(), path, .{}); + defer torrent_file.close(); + + var buffered_reader = std.io.bufferedReader(torrent_file.reader()); + + const bparse = try bencode.parse(allocator, buffered_reader.reader()); + errdefer bparse.deinit(); + + return bparse; + } + + fn parse_announce_list(allocator: Allocator, bparse: bencode.BParse, torrent: *Torrent) !void { + if (!bparse.value.Dict.contains("announce-list")) return; + + const announce_list = bparse.get_as(BType.List, "announce-list") catch return error.InvalidTorrent; + + for (announce_list) |tier_list| { + if (tier_list != BType.List) return error.InvalidTorrent; + if (tier_list.List.len == 0) continue; + + var tier = Tier.init(allocator); + + for (tier_list.List) |tracker| { + if (tracker != BType.String) return error.InvalidTorrent; + tier.append(allocator.dupe(u8, tracker.String) catch return error.AllocError) catch return error.AllocError; + } + + torrent.announce_list.append(tier) catch return error.AllocError; + } + } + + fn parse_info_common(allocator: Allocator, info: BType, torrent: *Torrent) !void { + const name = info.get_as(BType.String, "name") catch return error.InvalidTorrent; + torrent.info.name = allocator.dupe(u8, name) catch return error.AllocError; + + const piece_length = info.get_as(BType.Integer, "piece length") catch return error.InvalidTorrent; + torrent.info.piece_length = @intCast(piece_length); + + // pieces are 20-byte SHA-1 hashes of file pieces + const info_pieces = info.get_as(BType.String, "pieces") catch return error.InvalidTorrent; + + const info_pieces_len = info_pieces.len / 20; + + torrent.info.pieces = allocator.alloc([20]u8, info_pieces_len) catch return error.AllocError; + + for (0..info_pieces_len) |i| { + @memcpy(&torrent.info.pieces[i], info_pieces[i .. i + 20]); + } + } + + fn parse_info_multifile(allocator: Allocator, info: BType) !std.ArrayList(File) { + var files = std.ArrayList(File).init(allocator); + + const info_files = info.get_as(BType.List, "files") catch return error.InvalidTorrent; + + for (info_files) |info_file| { + if (info_file != BType.Dict) return error.InvalidTorrent; + + var torrent_file = File{ .length = undefined, .path = std.ArrayList([]const u8).init(allocator) }; + + const file_length = info_file.get_as(BType.Integer, "length") catch return error.InvalidTorrent; + torrent_file.length = @intCast(file_length); + + const file_path = info_file.get_as(BType.List, "path") catch return error.InvalidTorrent; + for (file_path) |p| { + if (p != BType.String) return error.InvalidTorrent; + const p_dupe = allocator.dupe(u8, p.String) catch return error.AllocError; + torrent_file.path.append(p_dupe) catch return error.AllocError; + } + + files.append(torrent_file) catch return error.AllocError; + } + + return files; + } + + pub fn format(value: Torrent, comptime _: []const u8, _: std.fmt.FormatOptions, writer: anytype) !void { + try writer.print( + \\Torrent {{ + \\ .announce = {s} + \\ + , .{value.announce}); + + try writer.print(" .announce-list = [", .{}); + for (value.announce_list.items, 0..) |tier, idx| { + try writer.print("\n [", .{}); + for (tier.items, 0..) |tracker, idx2| { + try writer.print("{s}", .{tracker}); + if (idx2 < tier.items.len - 1) try writer.print(", ", .{}); + } + try writer.print("]", .{}); + + if (idx < value.announce_list.items.len - 1) try writer.print(", ", .{}); + } + try writer.print("]\n", .{}); + + try writer.print(" .comment = {?s}\n", .{value.comment}); + + try writer.print( + \\ .info = {{ + \\ .name = {s} + \\ .piece_length = {} + \\ + , .{ value.info.name, value.info.piece_length }); + + switch (value.info.Type) { + .Multi => |multi| { + try writer.print(" .files = [\n", .{}); + + for (multi.files.items) |file| { + try writer.print(" {{.length = {}, .path = [", .{file.length}); + for (0..file.path.items.len) |i| { + try writer.print("{s}", .{file.path.items[i]}); + if (i < file.path.items.len - 1) { + try writer.print(", ", .{}); + } + } + try writer.print("]}}\n", .{}); + } + + try writer.print(" ]\n", .{}); + }, + .Single => |single| { + try writer.print(" .length = {}\n", .{single.length}); + }, + } + + try writer.print(" .pieces = \n", .{}); + for (value.info.pieces) |p| { + try writer.print(" {}\n", .{std.fmt.fmtSliceHexUpper(&p)}); + } + + try writer.print( + \\ }} + \\}} + \\ + , .{}); + } + + pub fn deinit(self: Torrent) void { + self.allocator.free(self.announce); + if(self.comment) |comment| self.allocator.free(comment); + self.allocator.free(self.info.name); + self.allocator.free(self.info.pieces); + + for (self.announce_list.items) |tier| { + for (tier.items) |tracker| { + self.allocator.free(tracker); + } + + tier.deinit(); + } + self.announce_list.deinit(); + + switch (self.info.Type) { + .Multi => |multi| { + for (multi.files.items) |file| { + for (file.path.items) |p| { + self.allocator.free(p); + } + file.path.deinit(); + } + self.info.Type.Multi.files.deinit(); + }, + .Single => {}, + } + } +}; + +test "parse simple torrent" { + var buf: [std.fs.MAX_PATH_BYTES]u8 = undefined; + const path = try std.fs.realpath("test/simple.torrent", &buf); + + const res = try Torrent.parse(std.testing.allocator, path); + defer res.deinit(); + + try std.testing.expectEqualStrings("http://example.com", res.announce); + try std.testing.expectEqualStrings("simple", res.info.name); + try std.testing.expectEqual(@as(u64, 7), res.info.Type.Single.length); + try std.testing.expectEqual(@as(u64, 262144), res.info.piece_length); +} + +test "parse multifile real torrent" { + var buf: [std.fs.MAX_PATH_BYTES]u8 = undefined; + const path = try std.fs.realpath("test/rocky.torrent", &buf); + + const res = try Torrent.parse(std.testing.allocator, path); + defer res.deinit(); + + try std.testing.expectEqualStrings("http://linuxtracker.org:2710/00000000000000000000000000000000/announce", res.announce); + try std.testing.expectEqualStrings("Rocky-8.10-x86_64-minimal", res.info.name); + + try std.testing.expectEqual(4, res.info.Type.Multi.files.items.len); + + try std.testing.expectEqual(@as(u64, 1502), res.info.Type.Multi.files.items[0].length); + try std.testing.expectEqual(1, res.info.Type.Multi.files.items[0].path.items.len); + try std.testing.expectEqualStrings("CHECKSUM", res.info.Type.Multi.files.items[0].path.items[0]); + + try std.testing.expectEqual(@as(u64, 2694053888), res.info.Type.Multi.files.items[1].length); + try std.testing.expectEqual(1, res.info.Type.Multi.files.items[1].path.items.len); + try std.testing.expectEqualStrings("Rocky-8.10-x86_64-minimal.iso", res.info.Type.Multi.files.items[1].path.items[0]); + + try std.testing.expectEqual(@as(u64, 156), res.info.Type.Multi.files.items[2].length); + try std.testing.expectEqual(1, res.info.Type.Multi.files.items[2].path.items.len); + try std.testing.expectEqualStrings("Rocky-8.10-x86_64-minimal.iso.CHECKSUM", res.info.Type.Multi.files.items[2].path.items[0]); + + try std.testing.expectEqual(@as(u64, 103171), res.info.Type.Multi.files.items[3].length); + try std.testing.expectEqual(1, res.info.Type.Multi.files.items[3].path.items.len); + try std.testing.expectEqualStrings("Rocky-8.10-x86_64-minimal.iso.manifest", res.info.Type.Multi.files.items[3].path.items[0]); + + try std.testing.expectEqual(@as(u64, 4194304), res.info.piece_length); +} + +test "parse singlefile real torrent" { + var buf: [std.fs.MAX_PATH_BYTES]u8 = undefined; + const path = try std.fs.realpath("test/debian.torrent", &buf); + + const res = try Torrent.parse(std.testing.allocator, path); + defer res.deinit(); + + try std.testing.expectEqualStrings("http://bttracker.debian.org:6969/announce", res.announce); + try std.testing.expectEqualStrings("debian-12.5.0-amd64-netinst.iso", res.info.name); + + try std.testing.expectEqual(@as(u64, 659554304), res.info.Type.Single.length); + try std.testing.expectEqual(@as(u64, 262144), res.info.piece_length); +} diff --git a/src/tracker.zig b/src/tracker.zig new file mode 100644 index 0000000..e89a441 --- /dev/null +++ b/src/tracker.zig @@ -0,0 +1,35 @@ +const std = @import("std"); +const torrent = @import("torrent.zig"); + +const Torrent = torrent.Torrent; + +const Peer = struct { + ip: std.net.Address, + port: u16, +}; + +fn get_peers(allocator: std.mem.Allocator, t: Torrent) !void { + var arena = std.heap.ArenaAllocator.init(allocator); + defer arena.deinit(); + + const stream: std.net.Stream = try std.net.tcpConnectToHost(arena.allocator(), t.announce, 6969); + defer stream.close(); + + try stream.writeAll( + \\GET /announce?info_hash=%D9%BE%BA%F7%CD4%F7v%DC%D7_4%E9%F6%9B%E7G%1B%DD%C6%F8%D7%CD%F9o%DEZ%D3~%B6&peer_id=1&uploaded=0&downloaded=0 HTTP/1.1 + \\Host: bttracker.debian.org + \\ + \\ + ); + + var buf: [20]u8 = undefined; + const res = try stream.reader().read(&buf); + + std.log.err("Result {any}", .{res}); +} + +test "something" { + const t = Torrent{ .announce = "bttracker.debian.org", .allocator = undefined, .info = undefined }; + + try get_peers(std.testing.allocator, t); +} diff --git a/test/debian.torrent b/test/debian.torrent new file mode 100644 index 0000000..a935008 Binary files /dev/null and b/test/debian.torrent differ diff --git a/test/rocky.torrent b/test/rocky.torrent new file mode 100644 index 0000000..36b83ac Binary files /dev/null and b/test/rocky.torrent differ diff --git a/test/simple.torrent b/test/simple.torrent new file mode 100644 index 0000000..ba55a3c --- /dev/null +++ b/test/simple.torrent @@ -0,0 +1 @@ +d8:announce18:http://example.com7:comment7:Comment10:created by13:mktorrent 1.113:creation datei1712361951e4:infod6:lengthi7e4:name6:simple12:piece lengthi262144e6:pieces20: ٨&y(9Aee \ No newline at end of file diff --git a/tools/bencode.el b/tools/bencode.el new file mode 100644 index 0000000..100529d --- /dev/null +++ b/tools/bencode.el @@ -0,0 +1,377 @@ +;;; bencode.el --- Bencode encoding / decoding -*- lexical-binding: t; -*- + +;; This is free and unencumbered software released into the public domain. + +;; Author: Christopher Wellons +;; URL: https://github.com/skeeto/emacs-bencode +;; Version: 1.0 +;; Package-Requires: ((emacs "24.4")) + +;;; Commentary: + +;; This package provides a strict and robust [bencode][bencode] +;; encoder and decoder. Encoding is precise, taking into account +;; character encoding issues. As such, the encoder always returns +;; unibyte data intended to be written out as raw binary data without +;; additional character encoding. When encoding strings and keys, +;; UTF-8 is used by default. The decoder strictly valides its input, +;; rejecting invalid inputs. + +;; The API entrypoints are: +;; * `bencode-encode' +;; * `bencode-encode-to-buffer' +;; * `bencode-decode' +;; * `bencode-decode-from-buffer' + +;;; Code: + +(require 'cl-lib) + +(define-error 'bencode "Bencode error") +(define-error 'bencode-unsupported-type "Type cannot be encoded" 'bencode) +(define-error 'bencode-invalid-key "Not a valid dictionary key" 'bencode) +(define-error 'bencode-invalid-plist "Plist is invalid" 'bencode) +(define-error 'bencode-invalid-byte "Invalid input byte" 'bencode) +(define-error 'bencode-overflow "Integer too large" 'bencode) +(define-error 'bencode-end-of-file "End of file during parsing" + '(bencode end-of-file)) + +(defsubst bencode--int (object) + "Encode OBJECT as an integer into the current buffer." + (insert "i" (number-to-string object) "e")) + +(defsubst bencode--string (object coding-system) + "Encode OBJECT as a string into the current buffer." + (if (multibyte-string-p object) + (let ((encoded (encode-coding-string object coding-system :nocopy))) + (insert (number-to-string (length encoded)) ":" encoded)) + (insert (number-to-string (length object)) ":" object))) + +(defsubst bencode--hash-table-entries (object coding-system) + "Return a list of key-sorted entries in OBJECT with encoded keys." + (let ((entries ())) + (maphash (lambda (key value) + (cond + ((multibyte-string-p key) + (let ((encoded (encode-coding-string + key coding-system :nocopy))) + (push (cons encoded value) entries))) + ((stringp key) + (push (cons key value) entries)) + ((signal 'bencode-invalid-key key)))) + object) + (cl-sort entries #'string< :key #'car))) + +(defsubst bencode--plist-entries (object coding-system) + "Return a list of key-sorted entries in OBJECT with encoded keys." + (let ((plist object) + (entries ())) + (while plist + (let ((key (pop plist))) + (unless (keywordp key) + (signal 'bencode-invalid-key key)) + (when (null plist) + (signal 'bencode-invalid-plist object)) + (let ((name (substring (symbol-name key) 1)) + (value (pop plist))) + (if (multibyte-string-p name) + (let ((encoded (encode-coding-string + name coding-system :nocopy))) + (push (cons encoded value) entries)) + (push (cons name value) entries))))) + (cl-sort entries #'string< :key #'car))) + +(cl-defun bencode-encode (object &key (coding-system 'utf-8)) + "Return a unibyte string encoding OBJECT with bencode. + +:coding-system -- coding system for encoding strings into byte strings (utf-8) + +Supported types: +* Integer +* Multibyte and unibyte strings +* List of supported types +* Vector of supproted types (encodes to list) +* Hash table with string keys (encodes to dictionary) +* Plist with keyword symbol keys (encodes to dictionary) + +When multibyte strings are encountered either as values or dictionary +keys, they are encoded with the specified coding system (default: +UTF-8). The same coding system must be used when decoding. + +Possible error signals: +* bencode-unsupported-type +* bencode-invalid-key +* bencode-invalid-plist + +This function is not recursive. It is safe to input very deeply +nested data structures." + (with-temp-buffer + (set-buffer-multibyte nil) + (bencode-encode-to-buffer object :coding-system coding-system) + (buffer-string))) + +(cl-defun bencode-encode-to-buffer (object &key (coding-system 'utf-8)) + "Like `bencode-encode' but to the current buffer at point." + (let ((stack (list (cons :new object)))) + (while stack + (let* ((next (car stack)) + (value (cdr next))) + (cl-case (car next) + ;; Start encoding a new, unexamined value + (:new + (pop stack) + (cond ((integerp value) + (bencode--int value)) + ((stringp value) + (bencode--string value coding-system)) + ((and (consp value) + (keywordp (car value))) + (insert "d") + (let ((entries (bencode--plist-entries value coding-system))) + (push (cons :dict entries) stack))) + ((listp value) + (insert "l") + (push (cons :list value) stack)) + ((vectorp value) + (insert "l") + (push (cons :vector (cons 0 value)) stack)) + ((hash-table-p value) + (insert "d") + (let ((entries (bencode--hash-table-entries + value coding-system))) + (push (cons :dict entries) stack))) + ((signal 'bencode-unsupported-type object)))) + ;; Continue encoding dictionary + ;; (:dict . remaining-dict) + (:dict + (if (null value) + (progn + (pop stack) + (insert "e")) + (let* ((entry (car value)) + (key (car entry))) + (insert (number-to-string (length key)) ":" key) + (setf (cdr next) (cdr value)) + (push (cons :new (cdr entry)) stack)))) + ;; Continue encoding list + ;; (:list . remaining-list) + (:list + (if (null value) + (progn + (pop stack) + (insert "e")) + (setf (cdr next) (cdr value)) + (push (cons :new (car value)) stack))) + ;; Continue encoding vector (as list) + ;; (:vector index . vector) + (:vector + (let ((i (car value)) + (v (cdr value))) + (if (= i (length v)) + (progn + (pop stack) + (insert "e")) + (setf (car value) (+ i 1)) + (push (cons :new (aref v i)) stack))))))))) + +(defsubst bencode--decode-int () + "Decode an integer from the current buffer at point." + (forward-char) + (let ((start (point))) + ;; Don't allow leading zeros + (if (eql (char-after) ?0) + ;; Unless the value *is* zero + (prog1 0 + (forward-char) + (unless (eql (char-after) ?e) + (signal 'bencode-invalid-byte + (cons (char-after) (point)))) + (forward-char)) + ;; Skip minus sign + (when (eql (char-after) ?-) + (forward-char) + ;; Negative zero not allowed + (when (eql (char-after) ?0) + (signal 'bencode-invalid-byte + (cons (char-after) (point))))) + ;; Check for empty integer + (when (eql ?e (char-after)) + (signal 'bencode-invalid-byte + (cons (char-after) (point)))) + ;; Skip over digits + (unless (re-search-forward "[^0-9]" nil :noerror) + (signal 'bencode-end-of-file (point))) + ;; Check for terminator + (unless (eql ?e (char-before)) + (signal 'bencode-invalid-byte + (cons (char-before) (point)))) + ;; Try to parse the digits + (let* ((string (buffer-substring start (point))) + (result (string-to-number string))) + (if (floatp result) + (signal 'bencode-overflow (cons string result)) + result))))) + +(defsubst bencode--decode-string (coding-system) + "Decode a string from the current buffer at point. + +Returns cons of (raw . decoded)." + (let ((start (point))) + (if (eql (char-after) ?0) + ;; Handle zero length as a special case + (progn + (forward-char) + (if (eql (char-after) ?:) + (prog1 '("" . "") + (forward-char)) + (signal 'bencode-invalid-byte + (cons (char-after) (point))))) + ;; Skip over length digits + (unless (re-search-forward "[^0-9]" nil :noerror) + (signal 'bencode-end-of-file (point))) + ;; Did we find a colon? + (unless (eql ?: (char-before)) + (signal 'bencode-invalid-byte + (cons (char-before) (point)))) + (let* ((length-string (buffer-substring start (- (point) 1))) + (length (string-to-number length-string))) + (when (floatp length) + (signal 'bencode-overflow + (cons length-string length))) + (when (> (+ (point) length) (point-max)) + (signal 'bencode-end-of-file (+ (point) length))) + (let ((string (buffer-substring (point) (+ (point) length)))) + (prog1 (cons string + (decode-coding-string string coding-system :nocopy)) + (forward-char length))))))) + +(defsubst bencode--to-plist (list) + "Convert a series of parsed dictionary entries into a plist." + (let ((plist ())) + (while list + (push (pop list) plist) + (push (intern (concat ":" (pop list))) plist)) + plist)) + +(defsubst bencode--to-hash-table (list) + "Convert a series of parsed dictionary entries into a hash table." + (let ((table (make-hash-table :test 'equal))) + (prog1 table + (while list + (let ((value (pop list)) + (key (pop list))) + (setf (gethash key table) value)))))) + +(cl-defun bencode-decode-from-buffer + (&key (list-type 'list) (dict-type 'plist) (coding-system 'utf-8)) + "Like `bencode-decode' but from the current buffer starting at point. + +The point is left where parsing finished. You may want to reject +inputs with data trailing beyond the point." + ;; Operations are pushed onto an operation stack. One operation is + ;; executed once per iteration. Some operations push multiple new + ;; operations onto the stack. When no more operations are left, + ;; return the remaining element from the value stack. + (let ((op-stack '(:read)) ; operations stack + (value-stack (list nil)) ; stack of parsed values + (last-key-stack ())) ; last key seen in top dictionary + (while op-stack + (cl-case (car op-stack) + ;; Figure out what type of value is to be read next and + ;; prepare stacks accordingly. + (:read + (pop op-stack) + (cl-case (char-after) + ((nil) (signal 'bencode-end-of-file (point))) + (?i (push (bencode--decode-int) (car value-stack))) + (?l (forward-char) + (push :list op-stack) + (push nil value-stack)) + (?d (forward-char) + (push :dict op-stack) + (push nil value-stack) + (push nil last-key-stack)) + ((?0 ?1 ?2 ?3 ?4 ?5 ?6 ?7 ?8 ?9) + (push (cdr (bencode--decode-string coding-system)) + (car value-stack))) + (t (signal 'bencode-invalid-byte (point))))) + ;; Read a key and push it onto the list on top of the value stack + (:key + (pop op-stack) + (let* ((string (bencode--decode-string coding-system)) + (raw (car string)) + (key (cdr string)) + (last-key (car last-key-stack))) + (when last-key + (when (string= last-key raw) + (signal 'bencode-invalid-key (cons 'duplicate key))) + (when (string< raw last-key) + (signal 'bencode-invalid-key (list 'string> last-key raw)))) + (setf (car last-key-stack) raw) + (push key (car value-stack)))) + ;; End list, or queue operations to read another value + (:list + (if (eql (char-after) ?e) + (let ((result (nreverse (pop value-stack)))) + (forward-char) + (pop op-stack) + (if (eq list-type 'vector) + (push (vconcat result) (car value-stack)) + (push result (car value-stack)))) + (push :read op-stack))) + ;; End dict, or queue operations to read another entry + (:dict + (if (eql (char-after) ?e) + (let ((result (pop value-stack))) + (forward-char) + (pop op-stack) + (pop last-key-stack) + (if (eq dict-type 'hash-table) + (push (bencode--to-hash-table result) (car value-stack)) + (push (bencode--to-plist result) (car value-stack)))) + (push :read op-stack) + (push :key op-stack))))) + (caar value-stack))) + +(cl-defun bencode-decode + (string &key (list-type 'list) (dict-type 'plist) (coding-system 'utf-8)) + "Decode bencode data from STRING. + +:coding-system -- coding system for decoding byte strings (utf-8) +:dict-type -- target format for dictionaries (symbol: plist, hash-table) +:list-type -- target format for lists (symbol: list, vector) + +Input should generally be unibyte. Strings parsed as values and +keys will be decoded using the coding system indicated by the +given coding system (default: UTF-8). The same coding system +should be used as when encoding. There are never decoding errors +since Emacs can preserve arbitrary byte data across encoding and +decoding. See \"Text Representations\" in the Gnu Emacs Lisp +Reference Manual. + +Input is strictly validated and invalid inputs are rejected. This +includes dictionary key constraints. Dictionaries are decoded +into plists. Lists are decoded into lists. If an integer is too +large to store in an Emacs integer, the decoder will signal an +overlow error. Signals an error if STRING contains trailing data. + +Possible error signals: +* bencode-end-of-file +* bencode-invalid-key +* bencode-invalid-byte +* bencode-overflow + +This function is not recursive. It is safe to parse very deeply +nested inputs." + (with-temp-buffer + (insert string) + (setf (point) (point-min)) + (prog1 (bencode-decode-from-buffer :list-type list-type + :dict-type dict-type + :coding-system coding-system) + (when (< (point) (point-max)) + (signal 'bencode-invalid-byte (cons "Trailing data" (point))))))) + +(provide 'bencode) + +;;; bencode.el ends here