Compression of single folders/files
All checks were successful
continuous-integration/drone/push Build is passing

Platform specific compression (Windows: zip, Unix: tar.gz) for single files or
folders
This commit is contained in:
Armin Friedl 2020-08-02 12:58:22 +02:00
parent 9230a606ae
commit cdb3775eb6
Signed by: armin
GPG key ID: 48C726EEE7FBCBC8
7 changed files with 272 additions and 3 deletions

View file

@ -118,4 +118,11 @@ void Archive::extract(ExtractSpec extract_spec) {
extractor->extract_all(reader); extractor->extract_all(reader);
} }
void Archive::compress(CompressSpec compress_spec) {
std::filesystem::path abs_path = std::filesystem::absolute(this->path);
ArchiveCompressorSys compressor{abs_path, compress_spec};
compressor.compress();
}
} // namespace xwim } // namespace xwim

View file

@ -28,6 +28,9 @@ class Archive {
/** Extract the archive at `path` according to given ExtractSpec */ /** Extract the archive at `path` according to given ExtractSpec */
void extract(ExtractSpec extract_spec); void extract(ExtractSpec extract_spec);
/** Compress the archive at `path` according to given CompressSpec */
void compress(CompressSpec compress_spec);
}; };
class ArchiveException : public std::exception { class ArchiveException : public std::exception {

View file

@ -1,5 +1,9 @@
#include <fcntl.h>
#include <archive_entry.h> #include <archive_entry.h>
#include <spdlog/spdlog.h> #include <spdlog/spdlog.h>
#include "archive.hpp"
#include "fileformats.hpp"
#include "spec.hpp"
namespace logger = spdlog; namespace logger = spdlog;
#include "archive_sys.hpp" #include "archive_sys.hpp"
@ -120,6 +124,103 @@ xwim::ArchiveExtractorSys::~ArchiveExtractorSys(){
} }
} }
xwim::ArchiveCompressorSys::ArchiveCompressorSys(std::filesystem::path& root, xwim::CompressSpec compress_spec): root{root}, compress_spec{compress_spec} {
this->new_archive = archive_write_new();
for(xwim::archive_filter filter: this->compress_spec.filters) {
archive_write_add_filter(this->new_archive, filter);
}
archive_write_set_format(this->new_archive, this->compress_spec.format);
}
void xwim::ArchiveCompressorSys::compress() {
std::filesystem::path archive_path{this->root};
if(!std::filesystem::exists(archive_path)) {
logger::error("Non-existing path: {}", archive_path.string());
throw ArchiveSysException{"Path does not exists"};
}
std::filesystem::file_status file_status = std::filesystem::status(archive_path);
if(file_status.type() != std::filesystem::file_type::directory
&& file_status.type() != std::filesystem::file_type::regular) {
logger::error("Unknown path type: {}", file_status.type());
throw ArchiveSysException{"Unknown path type"};
}
if ((file_status.permissions() & std::filesystem::perms::owner_read) ==
std::filesystem::perms::none &&
(file_status.permissions() & std::filesystem::perms::group_read) ==
std::filesystem::perms::none &&
(file_status.permissions() & std::filesystem::perms::others_read) ==
std::filesystem::perms::none) {
logger::error("Cannot read path with permissions: {}",
file_status.permissions());
throw ArchiveSysException{"Unreadable path"};
}
if(file_status.type() == std::filesystem::file_type::regular) {
while(archive_path.has_extension()) {
archive_path.replace_extension();
}
}
archive_path.concat(this->compress_spec.extension);
logger::debug("Writing archive at: {}", std::filesystem::absolute(archive_path).c_str());
archive_write_open_filename(this->new_archive, std::filesystem::absolute(archive_path).c_str());
archive* disk = archive_read_disk_new();
archive_read_disk_set_standard_lookup(disk);
int r;
r = archive_read_disk_open(disk, std::filesystem::relative(this->root).c_str());
if(r != ARCHIVE_OK) {
throw ArchiveSysException("Could not open path for archiving", disk);
}
archive_entry* entry;
char buff[16384];
for (;;) {
entry = archive_entry_new();
r = archive_read_next_header2(disk, entry);
if (r == ARCHIVE_EOF)
break;
if (r != ARCHIVE_OK) {
throw ArchiveSysException("Could not read next archive entry", disk);
}
archive_read_disk_descend(disk);
logger::trace("Processing entry {}", archive_entry_pathname(entry));
r = archive_write_header(this->new_archive, entry);
if (r < ARCHIVE_OK) {
throw ArchiveSysException("Could not write header for archive entry",
this->new_archive);
}
if (r > ARCHIVE_FAILED) {
int fd = open(archive_entry_sourcepath(entry), O_RDONLY);
ssize_t len = read(fd, buff, sizeof(buff));
while (len > 0) {
archive_write_data(this->new_archive, buff, len);
len = read(fd, buff, sizeof(buff));
}
close(fd);
}
archive_entry_free(entry);
}
}
xwim::ArchiveCompressorSys::~ArchiveCompressorSys() {
logger::trace("Destructing ArchiveExtractorSys at {:p}", (void*) this->new_archive);
if(this->new_archive) {
archive_write_close(this->new_archive);
archive_write_free(this->new_archive);
}
}
static int copy_data(struct archive* ar, struct archive* aw) { static int copy_data(struct archive* ar, struct archive* aw) {
int r; int r;
const void* buff; const void* buff;

View file

@ -4,6 +4,7 @@
#include <filesystem> #include <filesystem>
#include <memory> #include <memory>
#include "spec.hpp"
#include <fmt/format.h> #include <fmt/format.h>
namespace xwim { namespace xwim {
@ -81,6 +82,23 @@ class ArchiveExtractorSys {
void extract_entry(ArchiveReaderSys& reader); void extract_entry(ArchiveReaderSys& reader);
}; };
/** A compressor for archive files
*
* Shim for `libarchive`
*/
class ArchiveCompressorSys {
private:
archive* new_archive;
std::filesystem::path root;
xwim::CompressSpec compress_spec;
public:
ArchiveCompressorSys(std::filesystem::path& root, xwim::CompressSpec compress_spec);
~ArchiveCompressorSys();
void compress();
};
class ArchiveSysException : public std::exception { class ArchiveSysException : public std::exception {
private: private:
std::string _what; std::string _what;

View file

@ -25,11 +25,91 @@ const std::set<std::string> fileformats{".7z", ".7zip", ".jar", ".tgz",
".bz2", ".bzip2", ".gz", ".gzip", ".bz2", ".bzip2", ".gz", ".gzip",
".rar", ".tar", ".xz", ".zip"}; ".rar", ".tar", ".xz", ".zip"};
/** Archive filters
*
* Archive filters are essentially either data compression algorithms or data
* encodings. Filters are used on archives after an archiving program created
* the archive out of files and folders. Multiple filters can be applied to an
* archive. The order is significant.
*
* The simplest way to understand the distinction between filters and formats is
* to visualize the traditional `tar.gz` format. Tar creates the archive (an
* archive format). Gzip compresses the archive (an archive filter). In theory
* one could create a `tar.gz.lz.uu` tarball. That is, a `tar` archive
* filter-compressed with `gzip`, filter-compressed with `lzip`, filter-encoded
* with `uuencode`.
*
* Note that while this abstraction works in many cases it is not perfect. For
* example `.zip` files are traditionally archives where every entry is
* compressed separately and then bundled them together into an archive. In
* those cases the archive format is ZIP with no (external) filters.
*/
enum archive_filter {
NONE = 0,
GZIP = 1,
BZIP2 = 2,
COMPRESS = 3,
PROGRAM = 4,
LZMA = 5,
XZ = 6,
UU = 7,
RPM = 8,
LZIP = 9,
LRZIP = 10,
LZOP = 11,
GRZIP = 12,
LZ4 = 13,
ZSTD = 14
};
/** Archive formats
*
* Archive formats are the specifications for bundling together multiple files
* and folders (including metadata) into a single file (the archive). See also
* `archive_filter` for more details on the difference between archive formats
* and archive filters.
*/
enum archive_format {
BASE_MASK = 0xff0000,
CPIO = 0x10000,
CPIO_POSIX = (CPIO | 1),
CPIO_BIN_LE = (CPIO | 2),
CPIO_BIN_BE = (CPIO | 3),
CPIO_SVR4_NOCRC = (CPIO | 4),
CPIO_SVR4_CRC = (CPIO | 5),
CPIO_AFIO_LARGE = (CPIO | 6),
SHAR = 0x20000,
SHAR_BASE = (SHAR | 1),
SHAR_DUMP = (SHAR | 2),
TAR = 0x30000,
TAR_USTAR = (TAR | 1),
TAR_PAX_INTERCHANGE = (TAR | 2),
TAR_PAX_RESTRICTED = (TAR | 3),
TAR_GNUTAR = (TAR | 4),
ISO9660 = 0x40000,
ISO9660_ROCKRIDGE = (ISO9660 | 1),
ZIP = 0x50000,
EMPTY = 0x60000,
AR = 0x70000,
AR_GNU = (AR | 1),
AR_BSD = (AR | 2),
MTREE = 0x80000,
RAW = 0x90000,
XAR = 0xA0000,
LHA = 0xB0000,
CAB = 0xC0000,
RAR = 0xD0000,
SEVENZIP = 0xE0000,
WARC = 0xF0000,
RAR_V5 = 0x100000
};
/** Strip archive extensions from a path /** Strip archive extensions from a path
* *
* @returns Base filename without archive extensions * @returns Base filename without archive extensions
*/ */
inline std::filesystem::path stem(const std::filesystem::path& path) { inline std::filesystem::path
stem(const std::filesystem::path& path) {
std::filesystem::path p_stem{path}; std::filesystem::path p_stem{path};
logger::trace("Stemming {}", p_stem.string()); logger::trace("Stemming {}", p_stem.string());

View file

@ -46,7 +46,15 @@ void extract(const XwimPath& xwim_path) {
} }
void compress(const XwimPath& xwim_path) { void compress(const XwimPath& xwim_path) {
return; try {
xwim::Archive archive{xwim_path.path()};
xwim::CompressSpec compress_spec{};
archive.compress(compress_spec);
} catch (xwim::ArchiveException& ae) {
logger::error("{}", ae.what());
}
} }
XwimPath parse_args(int argc, char** argv) { XwimPath parse_args(int argc, char** argv) {

View file

@ -6,6 +6,8 @@
#include <filesystem> #include <filesystem>
#include <memory> #include <memory>
#include "fileformats.hpp"
namespace xwim { namespace xwim {
/** Properties of an archive /** Properties of an archive
@ -20,7 +22,7 @@ struct ArchiveSpec {
bool is_root_filename = false; /** the name of the (single) root is the same bool is_root_filename = false; /** the name of the (single) root is the same
as the stemmed archive file name. Cannot be as the stemmed archive file name. Cannot be
true if `has_single_root` is false */ true if `has_single_root` is false */
bool is_root_dir = false; /** The (single) root is a folder. Cnnot be true if bool is_root_dir = false; /** The (single) root is a folder. Cannot be true if
`has_single_root` is false */ `has_single_root` is false */
bool has_subarchive = false; /** Whether the archive contains sub-archives */ bool has_subarchive = false; /** Whether the archive contains sub-archives */
}; };
@ -36,6 +38,41 @@ struct ExtractSpec {
bool extract_subarchive = false; /** Recursively extract sub-archives */ bool extract_subarchive = false; /** Recursively extract sub-archives */
}; };
/** Compile time definitions for platform-dependent files and filters */
#if defined(unix) || defined(__unix__) || defined(__unix)
#define XWIM_COMPRESS_FORMAT xwim::archive_format::TAR_USTAR
#define XWIM_COMPRESS_FILTER { xwim::archive_filter::GZIP }
#define XWIM_COMPRESS_EXTENSION ".tar.gz"
#elif defined(_win32) || defined(__win32__) || defined(__windows__)
#define XWIM_COMPRESS_FORMAT xwim::archive_format::ZIP
#define XWIM_COMPRESS_FILTER {}
#define XWIM_COMPRESS_EXTENSION ".zip"
#else
#define XWIM_COMPRESS_FORMAT xwim::fileformats::archive_format::ZIP
#define XWIM_COMPRESS_FILTER {}
#define XWIM_COMPRESS_EXTENSION ".zip"
#endif
/** Properties influencing the compression process
*
* These properties can be set to influence the compression process
* accordingly.
*
* Per default, the compress spec is platform dependent to accommodate for the
* expected format on that platform. On Windows this is zip, on Unix this is
* tar.gz
*/
struct CompressSpec {
xwim::archive_format format =
XWIM_COMPRESS_FORMAT; /** The archiving format, e.g. tar */
std::vector<xwim::archive_filter> filters =
XWIM_COMPRESS_FILTER; /** Filters applied to the archive,
e.g. gzip */
std::string extension =
XWIM_COMPRESS_EXTENSION; /** Archive extension, e.g. .tar.gz */
};
} // namespace xwim } // namespace xwim
#if FMT_VERSION < 50300 #if FMT_VERSION < 50300
@ -78,3 +115,18 @@ struct fmt::formatter<xwim::ExtractSpec> {
spec.extract_subarchive); spec.extract_subarchive);
} }
}; };
template <>
struct fmt::formatter<xwim::CompressSpec> {
constexpr auto parse(format_parse_context& ctx) { return ctx.begin(); }
template <typename FormatContext>
auto format(const xwim::CompressSpec& spec, FormatContext& ctx) {
return format_to(ctx.out(),
"Compress["
" .format={},"
" .filters={}"
" ]",
spec.format, spec.filters);
}
};