Initial working bencode and torrent parser

This commit is contained in:
Armin Friedl 2024-06-16 17:03:13 +02:00
commit f6beb617bd
13 changed files with 1450 additions and 0 deletions

245
.gitignore vendored Normal file
View file

@ -0,0 +1,245 @@
# Created by https://www.toptal.com/developers/gitignore/api/zig,emacs,jetbrains+all,linux,windows,macos,vim
# Edit at https://www.toptal.com/developers/gitignore?templates=zig,emacs,jetbrains+all,linux,windows,macos,vim
### Emacs ###
# -*- mode: gitignore; -*-
*~
\#*\#
/.emacs.desktop
/.emacs.desktop.lock
*.elc
auto-save-list
tramp
.\#*
# Org-mode
.org-id-locations
*_archive
# flymake-mode
*_flymake.*
# eshell files
/eshell/history
/eshell/lastdir
# elpa packages
/elpa/
# reftex files
*.rel
# AUCTeX auto folder
/auto/
# cask packages
.cask/
dist/
# Flycheck
flycheck_*.el
# server auth directory
/server/
# projectiles files
.projectile
# directory configuration
.dir-locals.el
# network security
/network-security.data
### JetBrains+all ###
# Covers JetBrains IDEs: IntelliJ, RubyMine, PhpStorm, AppCode, PyCharm, CLion, Android Studio, WebStorm and Rider
# Reference: https://intellij-support.jetbrains.com/hc/en-us/articles/206544839
# User-specific stuff
.idea/**/workspace.xml
.idea/**/tasks.xml
.idea/**/usage.statistics.xml
.idea/**/dictionaries
.idea/**/shelf
# AWS User-specific
.idea/**/aws.xml
# Generated files
.idea/**/contentModel.xml
# Sensitive or high-churn files
.idea/**/dataSources/
.idea/**/dataSources.ids
.idea/**/dataSources.local.xml
.idea/**/sqlDataSources.xml
.idea/**/dynamic.xml
.idea/**/uiDesigner.xml
.idea/**/dbnavigator.xml
# Gradle
.idea/**/gradle.xml
.idea/**/libraries
# Gradle and Maven with auto-import
# When using Gradle or Maven with auto-import, you should exclude module files,
# since they will be recreated, and may cause churn. Uncomment if using
# auto-import.
# .idea/artifacts
# .idea/compiler.xml
# .idea/jarRepositories.xml
# .idea/modules.xml
# .idea/*.iml
# .idea/modules
# *.iml
# *.ipr
# CMake
cmake-build-*/
# Mongo Explorer plugin
.idea/**/mongoSettings.xml
# File-based project format
*.iws
# IntelliJ
out/
# mpeltonen/sbt-idea plugin
.idea_modules/
# JIRA plugin
atlassian-ide-plugin.xml
# Cursive Clojure plugin
.idea/replstate.xml
# SonarLint plugin
.idea/sonarlint/
# Crashlytics plugin (for Android Studio and IntelliJ)
com_crashlytics_export_strings.xml
crashlytics.properties
crashlytics-build.properties
fabric.properties
# Editor-based Rest Client
.idea/httpRequests
# Android studio 3.1+ serialized cache file
.idea/caches/build_file_checksums.ser
### JetBrains+all Patch ###
# Ignore everything but code style settings and run configurations
# that are supposed to be shared within teams.
.idea/*
!.idea/codeStyles
!.idea/runConfigurations
### Linux ###
# temporary files which can be created if a process still has a handle open of a deleted file
.fuse_hidden*
# KDE directory preferences
.directory
# Linux trash folder which might appear on any partition or disk
.Trash-*
# .nfs files are created when an open file is removed but is still being accessed
.nfs*
### macOS ###
# General
.DS_Store
.AppleDouble
.LSOverride
# Icon must end with two \r
Icon
# Thumbnails
._*
# Files that might appear in the root of a volume
.DocumentRevisions-V100
.fseventsd
.Spotlight-V100
.TemporaryItems
.Trashes
.VolumeIcon.icns
.com.apple.timemachine.donotpresent
# Directories potentially created on remote AFP share
.AppleDB
.AppleDesktop
Network Trash Folder
Temporary Items
.apdisk
### macOS Patch ###
# iCloud generated files
*.icloud
### Vim ###
# Swap
[._]*.s[a-v][a-z]
!*.svg # comment out if you don't need vector files
[._]*.sw[a-p]
[._]s[a-rt-v][a-z]
[._]ss[a-gi-z]
[._]sw[a-p]
# Session
Session.vim
Sessionx.vim
# Temporary
.netrwhist
# Auto-generated tag files
tags
# Persistent undo
[._]*.un~
### Windows ###
# Windows thumbnail cache files
Thumbs.db
Thumbs.db:encryptable
ehthumbs.db
ehthumbs_vista.db
# Dump file
*.stackdump
# Folder config file
[Dd]esktop.ini
# Recycle Bin used on file shares
$RECYCLE.BIN/
# Windows Installer files
*.cab
*.msi
*.msix
*.msm
*.msp
# Windows shortcuts
*.lnk
### zig ###
# Zig programming language
zig-cache/
zig-out/
build/
build-*/
docgen_tmp/
# End of https://www.toptal.com/developers/gitignore/api/zig,emacs,jetbrains+all,linux,windows,macos,vim

71
build.zig Normal file
View file

@ -0,0 +1,71 @@
const std = @import("std");
// Although this function looks imperative, note that its job is to
// declaratively construct a build graph that will be executed by an external
// runner.
pub fn build(b: *std.Build) void {
// Standard target options allows the person running `zig build` to choose
// what target to build for. Here we do not override the defaults, which
// means any target is allowed, and the default is native. Other options
// for restricting supported target set are available.
const target = b.standardTargetOptions(.{});
// Standard optimization options allow the person running `zig build` to select
// between Debug, ReleaseSafe, ReleaseFast, and ReleaseSmall. Here we do not
// set a preferred release mode, allowing the user to decide how to optimize.
const optimize = b.standardOptimizeOption(.{});
const exe = b.addExecutable(.{
.name = "zephyr",
// In this case the main source file is merely a path, however, in more
// complicated build scripts, this could be a generated file.
.root_source_file = .{ .path = "src/main.zig" },
.target = target,
.optimize = optimize,
});
exe.linkSystemLibrary("c");
// This declares intent for the executable to be installed into the
// standard location when the user invokes the "install" step (the default
// step when running `zig build`).
b.installArtifact(exe);
// This *creates* a Run step in the build graph, to be executed when another
// step is evaluated that depends on it. The next line below will establish
// such a dependency.
const run_cmd = b.addRunArtifact(exe);
// By making the run step depend on the install step, it will be run from the
// installation directory rather than directly from within the cache directory.
// This is not necessary, however, if the application depends on other installed
// files, this ensures they will be present and in the expected location.
run_cmd.step.dependOn(b.getInstallStep());
// This allows the user to pass arguments to the application in the build
// command itself, like this: `zig build run -- arg1 arg2 etc`
if (b.args) |args| {
run_cmd.addArgs(args);
}
// This creates a build step. It will be visible in the `zig build --help` menu,
// and can be selected like this: `zig build run`
// This will evaluate the `run` step rather than the default, which is "install".
const run_step = b.step("run", "Run the app");
run_step.dependOn(&run_cmd.step);
// Creates a step for unit testing. This only builds the test executable
// but does not run it.
const unit_tests = b.addTest(.{
.root_source_file = .{ .path = "src/main.zig" },
.target = target,
.optimize = optimize,
});
const run_unit_tests = b.addRunArtifact(unit_tests);
// Similar to creating the run step earlier, this exposes a `test` step to
// the `zig build --help` menu, providing a way for the user to request
// running the unit tests.
const test_step = b.step("test", "Run unit tests");
test_step.dependOn(&run_unit_tests.step);
}

6
build.zig.zon Normal file
View file

@ -0,0 +1,6 @@
.{
.name = "zephyr",
.version = "0.0.1",
.paths = .{""},
.dependencies = .{},
}

340
src/bencode.zig Normal file
View file

@ -0,0 +1,340 @@
const std = @import("std");
const Allocator = std.mem.Allocator;
const ArenaAllocator = std.heap.ArenaAllocator;
const Reader = std.io.Reader;
const StringHashMap = std.StringHashMap;
pub const BType = union(enum) {
Integer: i64,
String: []const u8,
List: []const BType,
Dict: StringHashMap(BType),
pub fn get_as(self: BType, comptime tag: std.meta.Tag(BType), key: []const u8) !std.meta.TagPayload(BType, tag) {
if (self != BType.Dict) return error.NoDict;
const val = self.Dict.get(key) orelse return error.KeyNotFound;
if (val != tag) return error.BTypeMismatch;
return @field(val, @tagName(tag));
}
pub fn format(value: BType, comptime fmt: []const u8, options: std.fmt.FormatOptions, writer: anytype) !void {
switch (value) {
.Integer => {
try writer.print("{}", .{value.Integer});
},
.String => {
try writer.print("{s}", .{value.String});
},
.List => {
try writer.print("[\n", .{});
for (value.List) |e| {
try writer.print("\t", .{});
try format(e, fmt, options, writer);
}
try writer.print("\n]", .{});
},
.Dict => {
try writer.print("{{\n", .{});
var it = value.Dict.keyIterator();
while (it.next()) |k| {
try writer.print("\t", .{});
try writer.print("{s}:", .{k.*});
const val = value.Dict.get(k.*).?;
try format(val, fmt, options, writer);
}
try writer.print("\n}}", .{});
},
}
}
};
pub const BParse = struct {
const Self = @This();
alloc: ArenaAllocator,
value: BType,
pub fn get_as(self: Self, comptime tag: std.meta.Tag(BType), key: []const u8) !std.meta.TagPayload(BType, tag) {
return self.value.get_as(tag, key);
}
pub fn deinit(self: Self) void {
self.alloc.deinit();
}
};
const ParseError = error{ EndOfStream, FormatError };
pub fn parse(allocator: Allocator, reader: anytype) anyerror!BParse {
var ally = std.heap.ArenaAllocator.init(allocator);
errdefer ally.deinit();
const bparse = try parseInternal(ally.allocator(), reader);
return BParse{ .alloc = ally, .value = bparse };
}
fn parseInternal(allocator: Allocator, reader: anytype) anyerror!BType {
while (nextByte(reader)) |next| {
return try parseInternalNext(allocator, next, reader);
}
return ParseError.EndOfStream;
}
fn parseInternalNext(allocator: Allocator, next: u8, reader: anytype) anyerror!BType {
switch (next) {
'i' => {
const res = try parse_bint(reader);
return BType{ .Integer = res };
},
'l' => {
const res = try parse_blist(allocator, reader);
return BType{ .List = res };
},
'd' => {
const res = try parse_bdict(allocator, reader);
return BType{ .Dict = res };
},
'0'...'9' => {
const res = try parse_bstring(allocator, next, reader);
return BType{ .String = res };
},
else => {
return ParseError.FormatError;
},
}
unreachable;
}
fn parse_blist(allocator: Allocator, reader: anytype) anyerror![]const BType {
var buf = std.ArrayList(BType).init(allocator);
errdefer buf.deinit();
while (nextByte(reader)) |next| {
switch (next) {
'e' => break,
else => {
const el = try parseInternalNext(allocator, next, reader);
try buf.append(el);
},
}
}
return buf.toOwnedSlice();
}
fn parse_bdict(allocator: Allocator, reader: anytype) anyerror!StringHashMap(BType) {
var map = StringHashMap(BType).init(allocator);
errdefer map.deinit();
while (nextByte(reader)) |next| {
switch (next) {
'e' => {
break;
},
else => {
const key = try parse_bstring(allocator, next, reader);
const value_next = nextByte(reader) orelse return error.FormatError;
const value = try parseInternalNext(allocator, value_next, reader);
try map.put(key, value);
},
}
}
return map;
}
/// Parses a bencode string into a `u8` slice
///
/// Allocates a buffer for the string. Caller owns the memory.
fn parse_bstring(allocator: Allocator, first: u8, reader: anytype) ![]const u8 {
const len = try parse_bstring_len(allocator, first, reader);
var buf = try allocator.alloc(u8, len);
for (0..len) |i| {
const next = nextByte(reader) orelse return ParseError.FormatError;
buf[i] = next;
}
return buf;
}
/// Tries to parse the length specifier for a bencode string
///
/// Owns the memory. Only temporary allocates for parsing and deallocates when
/// finished.
fn parse_bstring_len(allocator: Allocator, first: u8, reader: anytype) !usize {
// `first` already consumed from reader at that point so we need to add it
var ally = ArenaAllocator.init(allocator);
defer ally.deinit();
var buf = std.ArrayList(u8).init(ally.allocator());
try buf.append(first);
while (nextByte(reader)) |next| {
switch (next) {
':' => {
break;
},
'0'...'9' => {
try buf.append(next);
},
else => {
return ParseError.FormatError;
},
}
}
const tmp = try buf.toOwnedSlice();
return try std.fmt.parseUnsigned(usize, tmp, 10);
}
fn parse_bint(reader: anytype) !i64 {
var parse_buf: [20]u8 = undefined; // -9223372036854775808 to 9223372036854775807
var len: usize = 0;
while (nextByte(reader)) |next| {
switch (next) {
'0'...'9' => {
parse_buf[len] = next;
len += 1;
},
'e' => {
return try std.fmt.parseInt(i64, parse_buf[0..len], 10);
},
else => {
return ParseError.FormatError;
},
}
}
return ParseError.FormatError;
}
fn nextByte(reader: anytype) ?u8 {
return reader.readByte() catch {
std.log.debug("Parse reached end of stream", .{});
return null;
};
}
test "parse int i322323e" {
const bencode = "i322323e";
var stream = std.io.fixedBufferStream(bencode);
const res = try parse(std.testing.allocator, stream.reader());
defer res.deinit();
try std.testing.expectEqual(@as(i64, 322323), res.value.Integer);
}
test "parse string 3:abc" {
const bencode = "3:abc";
var stream = std.io.fixedBufferStream(bencode);
const res = try parse(std.testing.allocator, stream.reader());
defer res.deinit();
try std.testing.expectEqualStrings("abc", res.value.String);
}
test "parse invalid int i12" {
const bencode = "i12";
var stream = std.io.fixedBufferStream(bencode);
const res = parse(std.testing.allocator, stream.reader());
try std.testing.expectError(error.FormatError, res);
}
test "parse list l4:spam4:eggse" {
const bencode = "l4:spam4:eggse";
var stream = std.io.fixedBufferStream(bencode);
const res = try parse(std.testing.allocator, stream.reader());
defer res.deinit();
try std.testing.expect(res.value.List.len == 2);
try std.testing.expectEqualStrings("spam", res.value.List[0].String);
try std.testing.expectEqualStrings("eggs", res.value.List[1].String);
}
test "parse list l4:spami322323e4:eggse" {
const bencode = "l4:spami322323e4:eggse";
var stream = std.io.fixedBufferStream(bencode);
const res = try parse(std.testing.allocator, stream.reader());
defer res.deinit();
try std.testing.expect(res.value.List.len == 3);
try std.testing.expectEqualStrings("spam", res.value.List[0].String);
try std.testing.expectEqual(@as(i64, 322323), res.value.List[1].Integer);
try std.testing.expectEqualStrings("eggs", res.value.List[2].String);
}
test "parse list l4:spamli322323e4:fishe4:eggse" {
const bencode = "l4:spamli322323e4:fishe4:eggse";
var stream = std.io.fixedBufferStream(bencode);
const res = try parse(std.testing.allocator, stream.reader());
defer res.deinit();
try std.testing.expect(res.value.List.len == 3);
try std.testing.expectEqualStrings("spam", res.value.List[0].String);
//nested list
try std.testing.expect(res.value.List[1].List.len == 2);
try std.testing.expectEqual(@as(i64, 322323), res.value.List[1].List[0].Integer);
try std.testing.expectEqualStrings("fish", res.value.List[1].List[1].String);
try std.testing.expectEqualStrings("eggs", res.value.List[2].String);
}
test "parse map d4:spamli322323e4:fishe4:eggsi1234e ({spam:[322323,fish], eggs:1234})" {
const bencode = "d4:spamli322323e4:fishe4:eggsi1234e";
var stream = std.io.fixedBufferStream(bencode);
const res = try parse(std.testing.allocator, stream.reader());
defer res.deinit();
try std.testing.expect(res.value.Dict.count() == 2);
try std.testing.expect(res.value.Dict.contains("spam"));
try std.testing.expect(res.value.Dict.contains("eggs"));
try std.testing.expectEqual(@as(i64, 322323), res.value.Dict.get("spam").?.List[0].Integer);
try std.testing.expectEqualStrings("fish", res.value.Dict.get("spam").?.List[1].String);
try std.testing.expectEqual(@as(i64, 1234), res.value.Dict.get("eggs").?.Integer);
}
test "parse invalid string 3:ab" {
const bencode = "3:ab";
var stream = std.io.fixedBufferStream(bencode);
const res = parse(std.testing.allocator, stream.reader());
try std.testing.expectError(error.FormatError, res);
}
test "parse debian torrent" {
const bencode = try std.fs.cwd()
.openFile("test/simple.torrent", .{});
defer bencode.close();
var buffered_reader = std.io.bufferedReader(bencode.reader());
const res = try parse(std.testing.allocator, buffered_reader.reader());
defer res.deinit();
}

0
src/client.zig Normal file
View file

20
src/main.zig Normal file
View file

@ -0,0 +1,20 @@
const std = @import("std");
const torrent = @import("torrent.zig");
const bencode = @import("bencode.zig");
const tracker = @import("tracker.zig");
const network = @import("network.zig");
pub fn main() !void {
var gpa = std.heap.GeneralPurposeAllocator(.{}){};
const allocator = gpa.allocator();
defer _ = gpa.deinit();
const args = try std.process.argsAlloc(allocator);
defer std.process.argsFree(allocator, args);
const t = try torrent.Torrent.parse(allocator, args[1]);
defer t.deinit();
const outw = std.io.getStdOut().writer();
try outw.print("{}", .{t});
}

55
src/network.zig Normal file
View file

@ -0,0 +1,55 @@
const std = @import("std");
const c = @cImport({
@cInclude("arpa/inet.h");
@cInclude("netdb.h");
});
// roughly after
// https://beej.us/guide/bgnet/html/split/system-calls-or-bust.html#getaddrinfoprepare-to-launch
fn printip() !void {
// equivalent to beej's `memset(&hints, 0, sizeof hints);`
// zeroing out the struct
var hints: c.addrinfo = std.mem.zeroes(c.addrinfo);
hints.ai_family = c.AF_UNSPEC;
hints.ai_socktype = c.SOCK_DGRAM;
hints.ai_flags = 0;
hints.ai_protocol = 0;
var results: ?*c.addrinfo = null;
const res = c.getaddrinfo("example.com", "443", &hints, &results);
defer if (results != null) c.freeaddrinfo(results);
if (res != 0) return error.UnableToResolve;
var ip_buf: [c.INET6_ADDRSTRLEN]u8 = undefined;
var rp = results;
while (rp) |addr| : (rp = rp.?.ai_next) {
switch (addr.ai_family) {
c.AF_INET => {
const ipv4: *c.sockaddr_in = @alignCast(@ptrCast(addr.ai_addr));
const ip = c.inet_ntop(addr.ai_family, &ipv4.sin_addr, &ip_buf, c.INET_ADDRSTRLEN);
if (ip == null) return error.UntranslatableIP;
std.debug.print("Addr IPv4: {s}\n", .{ip});
},
c.AF_INET6 => {
const ipv6: *c.sockaddr_in6 = @ptrCast(@alignCast(addr.ai_addr));
const ip = c.inet_ntop(addr.ai_family, &ipv6.sin6_addr, &ip_buf, c.INET6_ADDRSTRLEN);
if (ip == null) return error.UntranslatableIP;
std.debug.print("Addr IPv6: {s}\n", .{ip});
},
else => {
return error.UnknownFamily;
},
}
}
}
test "print ip" {
try printip();
}

300
src/torrent.zig Normal file
View file

@ -0,0 +1,300 @@
const std = @import("std");
const bencode = @import("bencode.zig");
const BType = bencode.BType;
const Allocator = std.mem.Allocator;
const TorrentError = error{ InvalidTorrent, MissingEntry, AllocError };
pub const Torrent = struct {
const Tier = std.ArrayList([]const u8);
const File = struct { // zig fmt: off
length: u64,
path: std.ArrayList([]const u8)
};
const Info = struct { // zig fmt: off
name: []const u8,
piece_length: u64,
pieces: [][20]u8,
Type: union(enum) {
Single: struct { length: u64 },
Multi: struct { files: std.ArrayList(File) }
}
};
announce: []const u8,
announce_list: std.ArrayList(Tier),
comment: ?[]const u8,
info: Info,
allocator: Allocator,
pub fn parse(allocator: Allocator, path: []u8) TorrentError!Torrent {
var torrent = Torrent{ // zig fmt: off
.announce = &[_]u8{},
.announce_list = std.ArrayList(Tier).init(allocator),
.comment = null,
.info = undefined,
.allocator = allocator
};
errdefer torrent.deinit();
const bparse: bencode.BParse = becode_decode(allocator, path) catch return error.InvalidTorrent;
defer bparse.deinit();
if (bparse.value != BType.Dict) return error.InvalidTorrent;
const announce = bparse.get_as(BType.String, "announce") catch return error.InvalidTorrent;
torrent.announce = allocator.dupe(u8, announce) catch return error.AllocError;
try parse_announce_list(allocator, bparse, &torrent);
if(bparse.value.Dict.contains("comment")) {
const comment = bparse.get_as(BType.String, "comment") catch return error.InvalidTorrent;
torrent.comment = allocator.dupe(u8, comment) catch return error.AllocError;
}
const info = bparse.value.Dict.get("info") orelse return error.InvalidTorrent;
if (info != BType.Dict) return error.InvalidTorrent;
try parse_info_common(allocator, info, &torrent);
if (info.Dict.contains("length")) {
torrent.info.Type = .{ .Single = .{
.length = @intCast(info.get_as(BType.Integer, "length") catch return error.InvalidTorrent),
} };
} else {
torrent.info.Type = .{ .Multi = .{ .files = try parse_info_multifile(allocator, info) } };
}
return torrent;
}
fn becode_decode(allocator: Allocator, path: []u8) !bencode.BParse {
const torrent_file = try std.fs.Dir.openFile(std.fs.cwd(), path, .{});
defer torrent_file.close();
var buffered_reader = std.io.bufferedReader(torrent_file.reader());
const bparse = try bencode.parse(allocator, buffered_reader.reader());
errdefer bparse.deinit();
return bparse;
}
fn parse_announce_list(allocator: Allocator, bparse: bencode.BParse, torrent: *Torrent) !void {
if (!bparse.value.Dict.contains("announce-list")) return;
const announce_list = bparse.get_as(BType.List, "announce-list") catch return error.InvalidTorrent;
for (announce_list) |tier_list| {
if (tier_list != BType.List) return error.InvalidTorrent;
if (tier_list.List.len == 0) continue;
var tier = Tier.init(allocator);
for (tier_list.List) |tracker| {
if (tracker != BType.String) return error.InvalidTorrent;
tier.append(allocator.dupe(u8, tracker.String) catch return error.AllocError) catch return error.AllocError;
}
torrent.announce_list.append(tier) catch return error.AllocError;
}
}
fn parse_info_common(allocator: Allocator, info: BType, torrent: *Torrent) !void {
const name = info.get_as(BType.String, "name") catch return error.InvalidTorrent;
torrent.info.name = allocator.dupe(u8, name) catch return error.AllocError;
const piece_length = info.get_as(BType.Integer, "piece length") catch return error.InvalidTorrent;
torrent.info.piece_length = @intCast(piece_length);
// pieces are 20-byte SHA-1 hashes of file pieces
const info_pieces = info.get_as(BType.String, "pieces") catch return error.InvalidTorrent;
const info_pieces_len = info_pieces.len / 20;
torrent.info.pieces = allocator.alloc([20]u8, info_pieces_len) catch return error.AllocError;
for (0..info_pieces_len) |i| {
@memcpy(&torrent.info.pieces[i], info_pieces[i .. i + 20]);
}
}
fn parse_info_multifile(allocator: Allocator, info: BType) !std.ArrayList(File) {
var files = std.ArrayList(File).init(allocator);
const info_files = info.get_as(BType.List, "files") catch return error.InvalidTorrent;
for (info_files) |info_file| {
if (info_file != BType.Dict) return error.InvalidTorrent;
var torrent_file = File{ .length = undefined, .path = std.ArrayList([]const u8).init(allocator) };
const file_length = info_file.get_as(BType.Integer, "length") catch return error.InvalidTorrent;
torrent_file.length = @intCast(file_length);
const file_path = info_file.get_as(BType.List, "path") catch return error.InvalidTorrent;
for (file_path) |p| {
if (p != BType.String) return error.InvalidTorrent;
const p_dupe = allocator.dupe(u8, p.String) catch return error.AllocError;
torrent_file.path.append(p_dupe) catch return error.AllocError;
}
files.append(torrent_file) catch return error.AllocError;
}
return files;
}
pub fn format(value: Torrent, comptime _: []const u8, _: std.fmt.FormatOptions, writer: anytype) !void {
try writer.print(
\\Torrent {{
\\ .announce = {s}
\\
, .{value.announce});
try writer.print(" .announce-list = [", .{});
for (value.announce_list.items, 0..) |tier, idx| {
try writer.print("\n [", .{});
for (tier.items, 0..) |tracker, idx2| {
try writer.print("{s}", .{tracker});
if (idx2 < tier.items.len - 1) try writer.print(", ", .{});
}
try writer.print("]", .{});
if (idx < value.announce_list.items.len - 1) try writer.print(", ", .{});
}
try writer.print("]\n", .{});
try writer.print(" .comment = {?s}\n", .{value.comment});
try writer.print(
\\ .info = {{
\\ .name = {s}
\\ .piece_length = {}
\\
, .{ value.info.name, value.info.piece_length });
switch (value.info.Type) {
.Multi => |multi| {
try writer.print(" .files = [\n", .{});
for (multi.files.items) |file| {
try writer.print(" {{.length = {}, .path = [", .{file.length});
for (0..file.path.items.len) |i| {
try writer.print("{s}", .{file.path.items[i]});
if (i < file.path.items.len - 1) {
try writer.print(", ", .{});
}
}
try writer.print("]}}\n", .{});
}
try writer.print(" ]\n", .{});
},
.Single => |single| {
try writer.print(" .length = {}\n", .{single.length});
},
}
try writer.print(" .pieces = \n", .{});
for (value.info.pieces) |p| {
try writer.print(" {}\n", .{std.fmt.fmtSliceHexUpper(&p)});
}
try writer.print(
\\ }}
\\}}
\\
, .{});
}
pub fn deinit(self: Torrent) void {
self.allocator.free(self.announce);
if(self.comment) |comment| self.allocator.free(comment);
self.allocator.free(self.info.name);
self.allocator.free(self.info.pieces);
for (self.announce_list.items) |tier| {
for (tier.items) |tracker| {
self.allocator.free(tracker);
}
tier.deinit();
}
self.announce_list.deinit();
switch (self.info.Type) {
.Multi => |multi| {
for (multi.files.items) |file| {
for (file.path.items) |p| {
self.allocator.free(p);
}
file.path.deinit();
}
self.info.Type.Multi.files.deinit();
},
.Single => {},
}
}
};
test "parse simple torrent" {
var buf: [std.fs.MAX_PATH_BYTES]u8 = undefined;
const path = try std.fs.realpath("test/simple.torrent", &buf);
const res = try Torrent.parse(std.testing.allocator, path);
defer res.deinit();
try std.testing.expectEqualStrings("http://example.com", res.announce);
try std.testing.expectEqualStrings("simple", res.info.name);
try std.testing.expectEqual(@as(u64, 7), res.info.Type.Single.length);
try std.testing.expectEqual(@as(u64, 262144), res.info.piece_length);
}
test "parse multifile real torrent" {
var buf: [std.fs.MAX_PATH_BYTES]u8 = undefined;
const path = try std.fs.realpath("test/rocky.torrent", &buf);
const res = try Torrent.parse(std.testing.allocator, path);
defer res.deinit();
try std.testing.expectEqualStrings("http://linuxtracker.org:2710/00000000000000000000000000000000/announce", res.announce);
try std.testing.expectEqualStrings("Rocky-8.10-x86_64-minimal", res.info.name);
try std.testing.expectEqual(4, res.info.Type.Multi.files.items.len);
try std.testing.expectEqual(@as(u64, 1502), res.info.Type.Multi.files.items[0].length);
try std.testing.expectEqual(1, res.info.Type.Multi.files.items[0].path.items.len);
try std.testing.expectEqualStrings("CHECKSUM", res.info.Type.Multi.files.items[0].path.items[0]);
try std.testing.expectEqual(@as(u64, 2694053888), res.info.Type.Multi.files.items[1].length);
try std.testing.expectEqual(1, res.info.Type.Multi.files.items[1].path.items.len);
try std.testing.expectEqualStrings("Rocky-8.10-x86_64-minimal.iso", res.info.Type.Multi.files.items[1].path.items[0]);
try std.testing.expectEqual(@as(u64, 156), res.info.Type.Multi.files.items[2].length);
try std.testing.expectEqual(1, res.info.Type.Multi.files.items[2].path.items.len);
try std.testing.expectEqualStrings("Rocky-8.10-x86_64-minimal.iso.CHECKSUM", res.info.Type.Multi.files.items[2].path.items[0]);
try std.testing.expectEqual(@as(u64, 103171), res.info.Type.Multi.files.items[3].length);
try std.testing.expectEqual(1, res.info.Type.Multi.files.items[3].path.items.len);
try std.testing.expectEqualStrings("Rocky-8.10-x86_64-minimal.iso.manifest", res.info.Type.Multi.files.items[3].path.items[0]);
try std.testing.expectEqual(@as(u64, 4194304), res.info.piece_length);
}
test "parse singlefile real torrent" {
var buf: [std.fs.MAX_PATH_BYTES]u8 = undefined;
const path = try std.fs.realpath("test/debian.torrent", &buf);
const res = try Torrent.parse(std.testing.allocator, path);
defer res.deinit();
try std.testing.expectEqualStrings("http://bttracker.debian.org:6969/announce", res.announce);
try std.testing.expectEqualStrings("debian-12.5.0-amd64-netinst.iso", res.info.name);
try std.testing.expectEqual(@as(u64, 659554304), res.info.Type.Single.length);
try std.testing.expectEqual(@as(u64, 262144), res.info.piece_length);
}

35
src/tracker.zig Normal file
View file

@ -0,0 +1,35 @@
const std = @import("std");
const torrent = @import("torrent.zig");
const Torrent = torrent.Torrent;
const Peer = struct {
ip: std.net.Address,
port: u16,
};
fn get_peers(allocator: std.mem.Allocator, t: Torrent) !void {
var arena = std.heap.ArenaAllocator.init(allocator);
defer arena.deinit();
const stream: std.net.Stream = try std.net.tcpConnectToHost(arena.allocator(), t.announce, 6969);
defer stream.close();
try stream.writeAll(
\\GET /announce?info_hash=%D9%BE%BA%F7%CD4%F7v%DC%D7_4%E9%F6%9B%E7G%1B%DD%C6%F8%D7%CD%F9o%DEZ%D3~%B6&peer_id=1&uploaded=0&downloaded=0 HTTP/1.1
\\Host: bttracker.debian.org
\\
\\
);
var buf: [20]u8 = undefined;
const res = try stream.reader().read(&buf);
std.log.err("Result {any}", .{res});
}
test "something" {
const t = Torrent{ .announce = "bttracker.debian.org", .allocator = undefined, .info = undefined };
try get_peers(std.testing.allocator, t);
}

BIN
test/debian.torrent Normal file

Binary file not shown.

BIN
test/rocky.torrent Normal file

Binary file not shown.

1
test/simple.torrent Normal file
View file

@ -0,0 +1 @@
d8:announce18:http://example.com7:comment7:Comment10:created by13:mktorrent 1.113:creation datei1712361951e4:infod6:lengthi7e4:name6:simple12:piece lengthi262144e6:pieces20: ³¨•ãÙ¨¾ç&y(©9A¶®á<C2AE>ee

377
tools/bencode.el Normal file
View file

@ -0,0 +1,377 @@
;;; bencode.el --- Bencode encoding / decoding -*- lexical-binding: t; -*-
;; This is free and unencumbered software released into the public domain.
;; Author: Christopher Wellons <wellons@nullprogram.com>
;; URL: https://github.com/skeeto/emacs-bencode
;; Version: 1.0
;; Package-Requires: ((emacs "24.4"))
;;; Commentary:
;; This package provides a strict and robust [bencode][bencode]
;; encoder and decoder. Encoding is precise, taking into account
;; character encoding issues. As such, the encoder always returns
;; unibyte data intended to be written out as raw binary data without
;; additional character encoding. When encoding strings and keys,
;; UTF-8 is used by default. The decoder strictly valides its input,
;; rejecting invalid inputs.
;; The API entrypoints are:
;; * `bencode-encode'
;; * `bencode-encode-to-buffer'
;; * `bencode-decode'
;; * `bencode-decode-from-buffer'
;;; Code:
(require 'cl-lib)
(define-error 'bencode "Bencode error")
(define-error 'bencode-unsupported-type "Type cannot be encoded" 'bencode)
(define-error 'bencode-invalid-key "Not a valid dictionary key" 'bencode)
(define-error 'bencode-invalid-plist "Plist is invalid" 'bencode)
(define-error 'bencode-invalid-byte "Invalid input byte" 'bencode)
(define-error 'bencode-overflow "Integer too large" 'bencode)
(define-error 'bencode-end-of-file "End of file during parsing"
'(bencode end-of-file))
(defsubst bencode--int (object)
"Encode OBJECT as an integer into the current buffer."
(insert "i" (number-to-string object) "e"))
(defsubst bencode--string (object coding-system)
"Encode OBJECT as a string into the current buffer."
(if (multibyte-string-p object)
(let ((encoded (encode-coding-string object coding-system :nocopy)))
(insert (number-to-string (length encoded)) ":" encoded))
(insert (number-to-string (length object)) ":" object)))
(defsubst bencode--hash-table-entries (object coding-system)
"Return a list of key-sorted entries in OBJECT with encoded keys."
(let ((entries ()))
(maphash (lambda (key value)
(cond
((multibyte-string-p key)
(let ((encoded (encode-coding-string
key coding-system :nocopy)))
(push (cons encoded value) entries)))
((stringp key)
(push (cons key value) entries))
((signal 'bencode-invalid-key key))))
object)
(cl-sort entries #'string< :key #'car)))
(defsubst bencode--plist-entries (object coding-system)
"Return a list of key-sorted entries in OBJECT with encoded keys."
(let ((plist object)
(entries ()))
(while plist
(let ((key (pop plist)))
(unless (keywordp key)
(signal 'bencode-invalid-key key))
(when (null plist)
(signal 'bencode-invalid-plist object))
(let ((name (substring (symbol-name key) 1))
(value (pop plist)))
(if (multibyte-string-p name)
(let ((encoded (encode-coding-string
name coding-system :nocopy)))
(push (cons encoded value) entries))
(push (cons name value) entries)))))
(cl-sort entries #'string< :key #'car)))
(cl-defun bencode-encode (object &key (coding-system 'utf-8))
"Return a unibyte string encoding OBJECT with bencode.
:coding-system -- coding system for encoding strings into byte strings (utf-8)
Supported types:
* Integer
* Multibyte and unibyte strings
* List of supported types
* Vector of supproted types (encodes to list)
* Hash table with string keys (encodes to dictionary)
* Plist with keyword symbol keys (encodes to dictionary)
When multibyte strings are encountered either as values or dictionary
keys, they are encoded with the specified coding system (default:
UTF-8). The same coding system must be used when decoding.
Possible error signals:
* bencode-unsupported-type
* bencode-invalid-key
* bencode-invalid-plist
This function is not recursive. It is safe to input very deeply
nested data structures."
(with-temp-buffer
(set-buffer-multibyte nil)
(bencode-encode-to-buffer object :coding-system coding-system)
(buffer-string)))
(cl-defun bencode-encode-to-buffer (object &key (coding-system 'utf-8))
"Like `bencode-encode' but to the current buffer at point."
(let ((stack (list (cons :new object))))
(while stack
(let* ((next (car stack))
(value (cdr next)))
(cl-case (car next)
;; Start encoding a new, unexamined value
(:new
(pop stack)
(cond ((integerp value)
(bencode--int value))
((stringp value)
(bencode--string value coding-system))
((and (consp value)
(keywordp (car value)))
(insert "d")
(let ((entries (bencode--plist-entries value coding-system)))
(push (cons :dict entries) stack)))
((listp value)
(insert "l")
(push (cons :list value) stack))
((vectorp value)
(insert "l")
(push (cons :vector (cons 0 value)) stack))
((hash-table-p value)
(insert "d")
(let ((entries (bencode--hash-table-entries
value coding-system)))
(push (cons :dict entries) stack)))
((signal 'bencode-unsupported-type object))))
;; Continue encoding dictionary
;; (:dict . remaining-dict)
(:dict
(if (null value)
(progn
(pop stack)
(insert "e"))
(let* ((entry (car value))
(key (car entry)))
(insert (number-to-string (length key)) ":" key)
(setf (cdr next) (cdr value))
(push (cons :new (cdr entry)) stack))))
;; Continue encoding list
;; (:list . remaining-list)
(:list
(if (null value)
(progn
(pop stack)
(insert "e"))
(setf (cdr next) (cdr value))
(push (cons :new (car value)) stack)))
;; Continue encoding vector (as list)
;; (:vector index . vector)
(:vector
(let ((i (car value))
(v (cdr value)))
(if (= i (length v))
(progn
(pop stack)
(insert "e"))
(setf (car value) (+ i 1))
(push (cons :new (aref v i)) stack)))))))))
(defsubst bencode--decode-int ()
"Decode an integer from the current buffer at point."
(forward-char)
(let ((start (point)))
;; Don't allow leading zeros
(if (eql (char-after) ?0)
;; Unless the value *is* zero
(prog1 0
(forward-char)
(unless (eql (char-after) ?e)
(signal 'bencode-invalid-byte
(cons (char-after) (point))))
(forward-char))
;; Skip minus sign
(when (eql (char-after) ?-)
(forward-char)
;; Negative zero not allowed
(when (eql (char-after) ?0)
(signal 'bencode-invalid-byte
(cons (char-after) (point)))))
;; Check for empty integer
(when (eql ?e (char-after))
(signal 'bencode-invalid-byte
(cons (char-after) (point))))
;; Skip over digits
(unless (re-search-forward "[^0-9]" nil :noerror)
(signal 'bencode-end-of-file (point)))
;; Check for terminator
(unless (eql ?e (char-before))
(signal 'bencode-invalid-byte
(cons (char-before) (point))))
;; Try to parse the digits
(let* ((string (buffer-substring start (point)))
(result (string-to-number string)))
(if (floatp result)
(signal 'bencode-overflow (cons string result))
result)))))
(defsubst bencode--decode-string (coding-system)
"Decode a string from the current buffer at point.
Returns cons of (raw . decoded)."
(let ((start (point)))
(if (eql (char-after) ?0)
;; Handle zero length as a special case
(progn
(forward-char)
(if (eql (char-after) ?:)
(prog1 '("" . "")
(forward-char))
(signal 'bencode-invalid-byte
(cons (char-after) (point)))))
;; Skip over length digits
(unless (re-search-forward "[^0-9]" nil :noerror)
(signal 'bencode-end-of-file (point)))
;; Did we find a colon?
(unless (eql ?: (char-before))
(signal 'bencode-invalid-byte
(cons (char-before) (point))))
(let* ((length-string (buffer-substring start (- (point) 1)))
(length (string-to-number length-string)))
(when (floatp length)
(signal 'bencode-overflow
(cons length-string length)))
(when (> (+ (point) length) (point-max))
(signal 'bencode-end-of-file (+ (point) length)))
(let ((string (buffer-substring (point) (+ (point) length))))
(prog1 (cons string
(decode-coding-string string coding-system :nocopy))
(forward-char length)))))))
(defsubst bencode--to-plist (list)
"Convert a series of parsed dictionary entries into a plist."
(let ((plist ()))
(while list
(push (pop list) plist)
(push (intern (concat ":" (pop list))) plist))
plist))
(defsubst bencode--to-hash-table (list)
"Convert a series of parsed dictionary entries into a hash table."
(let ((table (make-hash-table :test 'equal)))
(prog1 table
(while list
(let ((value (pop list))
(key (pop list)))
(setf (gethash key table) value))))))
(cl-defun bencode-decode-from-buffer
(&key (list-type 'list) (dict-type 'plist) (coding-system 'utf-8))
"Like `bencode-decode' but from the current buffer starting at point.
The point is left where parsing finished. You may want to reject
inputs with data trailing beyond the point."
;; Operations are pushed onto an operation stack. One operation is
;; executed once per iteration. Some operations push multiple new
;; operations onto the stack. When no more operations are left,
;; return the remaining element from the value stack.
(let ((op-stack '(:read)) ; operations stack
(value-stack (list nil)) ; stack of parsed values
(last-key-stack ())) ; last key seen in top dictionary
(while op-stack
(cl-case (car op-stack)
;; Figure out what type of value is to be read next and
;; prepare stacks accordingly.
(:read
(pop op-stack)
(cl-case (char-after)
((nil) (signal 'bencode-end-of-file (point)))
(?i (push (bencode--decode-int) (car value-stack)))
(?l (forward-char)
(push :list op-stack)
(push nil value-stack))
(?d (forward-char)
(push :dict op-stack)
(push nil value-stack)
(push nil last-key-stack))
((?0 ?1 ?2 ?3 ?4 ?5 ?6 ?7 ?8 ?9)
(push (cdr (bencode--decode-string coding-system))
(car value-stack)))
(t (signal 'bencode-invalid-byte (point)))))
;; Read a key and push it onto the list on top of the value stack
(:key
(pop op-stack)
(let* ((string (bencode--decode-string coding-system))
(raw (car string))
(key (cdr string))
(last-key (car last-key-stack)))
(when last-key
(when (string= last-key raw)
(signal 'bencode-invalid-key (cons 'duplicate key)))
(when (string< raw last-key)
(signal 'bencode-invalid-key (list 'string> last-key raw))))
(setf (car last-key-stack) raw)
(push key (car value-stack))))
;; End list, or queue operations to read another value
(:list
(if (eql (char-after) ?e)
(let ((result (nreverse (pop value-stack))))
(forward-char)
(pop op-stack)
(if (eq list-type 'vector)
(push (vconcat result) (car value-stack))
(push result (car value-stack))))
(push :read op-stack)))
;; End dict, or queue operations to read another entry
(:dict
(if (eql (char-after) ?e)
(let ((result (pop value-stack)))
(forward-char)
(pop op-stack)
(pop last-key-stack)
(if (eq dict-type 'hash-table)
(push (bencode--to-hash-table result) (car value-stack))
(push (bencode--to-plist result) (car value-stack))))
(push :read op-stack)
(push :key op-stack)))))
(caar value-stack)))
(cl-defun bencode-decode
(string &key (list-type 'list) (dict-type 'plist) (coding-system 'utf-8))
"Decode bencode data from STRING.
:coding-system -- coding system for decoding byte strings (utf-8)
:dict-type -- target format for dictionaries (symbol: plist, hash-table)
:list-type -- target format for lists (symbol: list, vector)
Input should generally be unibyte. Strings parsed as values and
keys will be decoded using the coding system indicated by the
given coding system (default: UTF-8). The same coding system
should be used as when encoding. There are never decoding errors
since Emacs can preserve arbitrary byte data across encoding and
decoding. See \"Text Representations\" in the Gnu Emacs Lisp
Reference Manual.
Input is strictly validated and invalid inputs are rejected. This
includes dictionary key constraints. Dictionaries are decoded
into plists. Lists are decoded into lists. If an integer is too
large to store in an Emacs integer, the decoder will signal an
overlow error. Signals an error if STRING contains trailing data.
Possible error signals:
* bencode-end-of-file
* bencode-invalid-key
* bencode-invalid-byte
* bencode-overflow
This function is not recursive. It is safe to parse very deeply
nested inputs."
(with-temp-buffer
(insert string)
(setf (point) (point-min))
(prog1 (bencode-decode-from-buffer :list-type list-type
:dict-type dict-type
:coding-system coding-system)
(when (< (point) (point-max))
(signal 'bencode-invalid-byte (cons "Trailing data" (point)))))))
(provide 'bencode)
;;; bencode.el ends here