From cdb3775eb653e5157cba3b5c009282bde053516f Mon Sep 17 00:00:00 2001 From: Armin Friedl Date: Sun, 2 Aug 2020 12:58:22 +0200 Subject: [PATCH] Compression of single folders/files Platform specific compression (Windows: zip, Unix: tar.gz) for single files or folders --- src/archive.cpp | 7 +++ src/archive.hpp | 3 ++ src/archive_sys.cpp | 101 ++++++++++++++++++++++++++++++++++++++++++++ src/archive_sys.hpp | 18 ++++++++ src/fileformats.hpp | 82 ++++++++++++++++++++++++++++++++++- src/main.cpp | 10 ++++- src/spec.hpp | 54 ++++++++++++++++++++++- 7 files changed, 272 insertions(+), 3 deletions(-) diff --git a/src/archive.cpp b/src/archive.cpp index 7ff9bc6..831c5e9 100644 --- a/src/archive.cpp +++ b/src/archive.cpp @@ -118,4 +118,11 @@ void Archive::extract(ExtractSpec extract_spec) { extractor->extract_all(reader); } +void Archive::compress(CompressSpec compress_spec) { + std::filesystem::path abs_path = std::filesystem::absolute(this->path); + + ArchiveCompressorSys compressor{abs_path, compress_spec}; + compressor.compress(); +} + } // namespace xwim diff --git a/src/archive.hpp b/src/archive.hpp index ca76979..fccd250 100644 --- a/src/archive.hpp +++ b/src/archive.hpp @@ -28,6 +28,9 @@ class Archive { /** Extract the archive at `path` according to given ExtractSpec */ void extract(ExtractSpec extract_spec); + + /** Compress the archive at `path` according to given CompressSpec */ + void compress(CompressSpec compress_spec); }; class ArchiveException : public std::exception { diff --git a/src/archive_sys.cpp b/src/archive_sys.cpp index 2b24eca..7c2fdc0 100644 --- a/src/archive_sys.cpp +++ b/src/archive_sys.cpp @@ -1,5 +1,9 @@ +#include #include #include +#include "archive.hpp" +#include "fileformats.hpp" +#include "spec.hpp" namespace logger = spdlog; #include "archive_sys.hpp" @@ -120,6 +124,103 @@ xwim::ArchiveExtractorSys::~ArchiveExtractorSys(){ } } +xwim::ArchiveCompressorSys::ArchiveCompressorSys(std::filesystem::path& root, xwim::CompressSpec compress_spec): root{root}, compress_spec{compress_spec} { + this->new_archive = archive_write_new(); + + for(xwim::archive_filter filter: this->compress_spec.filters) { + archive_write_add_filter(this->new_archive, filter); + } + + archive_write_set_format(this->new_archive, this->compress_spec.format); +} + +void xwim::ArchiveCompressorSys::compress() { + std::filesystem::path archive_path{this->root}; + if(!std::filesystem::exists(archive_path)) { + logger::error("Non-existing path: {}", archive_path.string()); + throw ArchiveSysException{"Path does not exists"}; + } + + std::filesystem::file_status file_status = std::filesystem::status(archive_path); + + if(file_status.type() != std::filesystem::file_type::directory + && file_status.type() != std::filesystem::file_type::regular) { + logger::error("Unknown path type: {}", file_status.type()); + throw ArchiveSysException{"Unknown path type"}; + } + + if ((file_status.permissions() & std::filesystem::perms::owner_read) == + std::filesystem::perms::none && + (file_status.permissions() & std::filesystem::perms::group_read) == + std::filesystem::perms::none && + (file_status.permissions() & std::filesystem::perms::others_read) == + std::filesystem::perms::none) { + logger::error("Cannot read path with permissions: {}", + file_status.permissions()); + throw ArchiveSysException{"Unreadable path"}; + } + + if(file_status.type() == std::filesystem::file_type::regular) { + while(archive_path.has_extension()) { + archive_path.replace_extension(); + } + } + + archive_path.concat(this->compress_spec.extension); + logger::debug("Writing archive at: {}", std::filesystem::absolute(archive_path).c_str()); + archive_write_open_filename(this->new_archive, std::filesystem::absolute(archive_path).c_str()); + + archive* disk = archive_read_disk_new(); + archive_read_disk_set_standard_lookup(disk); + + int r; + + r = archive_read_disk_open(disk, std::filesystem::relative(this->root).c_str()); + if(r != ARCHIVE_OK) { + throw ArchiveSysException("Could not open path for archiving", disk); + } + + archive_entry* entry; + char buff[16384]; + + for (;;) { + entry = archive_entry_new(); + r = archive_read_next_header2(disk, entry); + if (r == ARCHIVE_EOF) + break; + if (r != ARCHIVE_OK) { + throw ArchiveSysException("Could not read next archive entry", disk); + } + + archive_read_disk_descend(disk); + logger::trace("Processing entry {}", archive_entry_pathname(entry)); + + r = archive_write_header(this->new_archive, entry); + if (r < ARCHIVE_OK) { + throw ArchiveSysException("Could not write header for archive entry", + this->new_archive); + } + if (r > ARCHIVE_FAILED) { + int fd = open(archive_entry_sourcepath(entry), O_RDONLY); + ssize_t len = read(fd, buff, sizeof(buff)); + while (len > 0) { + archive_write_data(this->new_archive, buff, len); + len = read(fd, buff, sizeof(buff)); + } + close(fd); + } + archive_entry_free(entry); + } +} + +xwim::ArchiveCompressorSys::~ArchiveCompressorSys() { + logger::trace("Destructing ArchiveExtractorSys at {:p}", (void*) this->new_archive); + if(this->new_archive) { + archive_write_close(this->new_archive); + archive_write_free(this->new_archive); + } +} + static int copy_data(struct archive* ar, struct archive* aw) { int r; const void* buff; diff --git a/src/archive_sys.hpp b/src/archive_sys.hpp index a9d6556..2818d17 100644 --- a/src/archive_sys.hpp +++ b/src/archive_sys.hpp @@ -4,6 +4,7 @@ #include #include +#include "spec.hpp" #include namespace xwim { @@ -81,6 +82,23 @@ class ArchiveExtractorSys { void extract_entry(ArchiveReaderSys& reader); }; +/** A compressor for archive files + * + * Shim for `libarchive` + */ +class ArchiveCompressorSys { +private: + archive* new_archive; + std::filesystem::path root; + xwim::CompressSpec compress_spec; + +public: + ArchiveCompressorSys(std::filesystem::path& root, xwim::CompressSpec compress_spec); + ~ArchiveCompressorSys(); + + void compress(); +}; + class ArchiveSysException : public std::exception { private: std::string _what; diff --git a/src/fileformats.hpp b/src/fileformats.hpp index 95c94a6..32bf011 100644 --- a/src/fileformats.hpp +++ b/src/fileformats.hpp @@ -25,11 +25,91 @@ const std::set fileformats{".7z", ".7zip", ".jar", ".tgz", ".bz2", ".bzip2", ".gz", ".gzip", ".rar", ".tar", ".xz", ".zip"}; +/** Archive filters + * + * Archive filters are essentially either data compression algorithms or data + * encodings. Filters are used on archives after an archiving program created + * the archive out of files and folders. Multiple filters can be applied to an + * archive. The order is significant. + * + * The simplest way to understand the distinction between filters and formats is + * to visualize the traditional `tar.gz` format. Tar creates the archive (an + * archive format). Gzip compresses the archive (an archive filter). In theory + * one could create a `tar.gz.lz.uu` tarball. That is, a `tar` archive + * filter-compressed with `gzip`, filter-compressed with `lzip`, filter-encoded + * with `uuencode`. + * + * Note that while this abstraction works in many cases it is not perfect. For + * example `.zip` files are traditionally archives where every entry is + * compressed separately and then bundled them together into an archive. In + * those cases the archive format is ZIP with no (external) filters. + */ +enum archive_filter { + NONE = 0, + GZIP = 1, + BZIP2 = 2, + COMPRESS = 3, + PROGRAM = 4, + LZMA = 5, + XZ = 6, + UU = 7, + RPM = 8, + LZIP = 9, + LRZIP = 10, + LZOP = 11, + GRZIP = 12, + LZ4 = 13, + ZSTD = 14 +}; + +/** Archive formats + * + * Archive formats are the specifications for bundling together multiple files + * and folders (including metadata) into a single file (the archive). See also + * `archive_filter` for more details on the difference between archive formats + * and archive filters. + */ +enum archive_format { + BASE_MASK = 0xff0000, + CPIO = 0x10000, + CPIO_POSIX = (CPIO | 1), + CPIO_BIN_LE = (CPIO | 2), + CPIO_BIN_BE = (CPIO | 3), + CPIO_SVR4_NOCRC = (CPIO | 4), + CPIO_SVR4_CRC = (CPIO | 5), + CPIO_AFIO_LARGE = (CPIO | 6), + SHAR = 0x20000, + SHAR_BASE = (SHAR | 1), + SHAR_DUMP = (SHAR | 2), + TAR = 0x30000, + TAR_USTAR = (TAR | 1), + TAR_PAX_INTERCHANGE = (TAR | 2), + TAR_PAX_RESTRICTED = (TAR | 3), + TAR_GNUTAR = (TAR | 4), + ISO9660 = 0x40000, + ISO9660_ROCKRIDGE = (ISO9660 | 1), + ZIP = 0x50000, + EMPTY = 0x60000, + AR = 0x70000, + AR_GNU = (AR | 1), + AR_BSD = (AR | 2), + MTREE = 0x80000, + RAW = 0x90000, + XAR = 0xA0000, + LHA = 0xB0000, + CAB = 0xC0000, + RAR = 0xD0000, + SEVENZIP = 0xE0000, + WARC = 0xF0000, + RAR_V5 = 0x100000 +}; + /** Strip archive extensions from a path * * @returns Base filename without archive extensions */ -inline std::filesystem::path stem(const std::filesystem::path& path) { +inline std::filesystem::path +stem(const std::filesystem::path& path) { std::filesystem::path p_stem{path}; logger::trace("Stemming {}", p_stem.string()); diff --git a/src/main.cpp b/src/main.cpp index 01399c7..fddf13a 100644 --- a/src/main.cpp +++ b/src/main.cpp @@ -46,7 +46,15 @@ void extract(const XwimPath& xwim_path) { } void compress(const XwimPath& xwim_path) { - return; + try { + xwim::Archive archive{xwim_path.path()}; + xwim::CompressSpec compress_spec{}; + + archive.compress(compress_spec); + + } catch (xwim::ArchiveException& ae) { + logger::error("{}", ae.what()); + } } XwimPath parse_args(int argc, char** argv) { diff --git a/src/spec.hpp b/src/spec.hpp index f6fdacd..da164ca 100644 --- a/src/spec.hpp +++ b/src/spec.hpp @@ -6,6 +6,8 @@ #include #include +#include "fileformats.hpp" + namespace xwim { /** Properties of an archive @@ -20,7 +22,7 @@ struct ArchiveSpec { bool is_root_filename = false; /** the name of the (single) root is the same as the stemmed archive file name. Cannot be true if `has_single_root` is false */ - bool is_root_dir = false; /** The (single) root is a folder. Cnnot be true if + bool is_root_dir = false; /** The (single) root is a folder. Cannot be true if `has_single_root` is false */ bool has_subarchive = false; /** Whether the archive contains sub-archives */ }; @@ -36,6 +38,41 @@ struct ExtractSpec { bool extract_subarchive = false; /** Recursively extract sub-archives */ }; + +/** Compile time definitions for platform-dependent files and filters */ +#if defined(unix) || defined(__unix__) || defined(__unix) +#define XWIM_COMPRESS_FORMAT xwim::archive_format::TAR_USTAR +#define XWIM_COMPRESS_FILTER { xwim::archive_filter::GZIP } +#define XWIM_COMPRESS_EXTENSION ".tar.gz" +#elif defined(_win32) || defined(__win32__) || defined(__windows__) +#define XWIM_COMPRESS_FORMAT xwim::archive_format::ZIP +#define XWIM_COMPRESS_FILTER {} +#define XWIM_COMPRESS_EXTENSION ".zip" +#else +#define XWIM_COMPRESS_FORMAT xwim::fileformats::archive_format::ZIP +#define XWIM_COMPRESS_FILTER {} +#define XWIM_COMPRESS_EXTENSION ".zip" +#endif + +/** Properties influencing the compression process + * + * These properties can be set to influence the compression process + * accordingly. + * + * Per default, the compress spec is platform dependent to accommodate for the + * expected format on that platform. On Windows this is zip, on Unix this is + * tar.gz + */ +struct CompressSpec { + xwim::archive_format format = + XWIM_COMPRESS_FORMAT; /** The archiving format, e.g. tar */ + std::vector filters = + XWIM_COMPRESS_FILTER; /** Filters applied to the archive, + e.g. gzip */ + std::string extension = + XWIM_COMPRESS_EXTENSION; /** Archive extension, e.g. .tar.gz */ +}; + } // namespace xwim #if FMT_VERSION < 50300 @@ -78,3 +115,18 @@ struct fmt::formatter { spec.extract_subarchive); } }; + +template <> +struct fmt::formatter { + constexpr auto parse(format_parse_context& ctx) { return ctx.begin(); } + + template + auto format(const xwim::CompressSpec& spec, FormatContext& ctx) { + return format_to(ctx.out(), + "Compress[" + " .format={}," + " .filters={}" + " ]", + spec.format, spec.filters); + } +};