This commit is contained in:
Armin Friedl 2021-02-14 11:46:03 +01:00
parent 4945bbd45c
commit f5cddbc0f3
Signed by: armin
GPG key ID: 48C726EEE7FBCBC8
51 changed files with 152 additions and 1208 deletions

View file

@ -4,6 +4,8 @@ project('xwim', 'cpp',
'warning_level=3',
'b_ndebug=if-release'])
add_global_arguments('-DVERSION='+meson.version(), language: 'cpp')
subdir('src')
subdir('doc')
subdir('test')

View file

@ -1,128 +0,0 @@
#include <spdlog/spdlog.h>
#include <sys/stat.h>
namespace logger = spdlog;
#include <archive.h>
#include <archive_entry.h>
#include <algorithm>
#include <filesystem>
#include <iostream>
#include <stdexcept>
#include "archive_sys.hpp"
#include "archive.hpp"
#include "spec.hpp"
#include "fileformats.hpp"
namespace xwim {
static void _spec_is_root_filename(ArchiveSpec* spec,
ArchiveEntryView entry,
std::filesystem::path* filepath) {
auto entry_path = entry.path();
auto norm_stem = filepath->filename();
norm_stem = xwim::stem(norm_stem);
if (*entry_path.begin() != norm_stem) {
logger::debug("Archive root does not match archive name");
spec->is_root_filename = false;
} else {
logger::debug("Archive root matches archive name");
spec->is_root_filename = true;
}
logger::debug("\t-> Archive root: {}", entry_path.begin()->string());
logger::debug("\t-> Archive stem: {}", norm_stem.string());
}
static void _spec_is_root_dir(ArchiveSpec* spec, ArchiveEntryView entry) {
if (entry.is_directory()) {
logger::debug("Archive root is directory");
spec->is_root_dir = true;
} else {
logger::debug("Archive root is not a directory");
spec->is_root_dir = false;
}
logger::debug("\t-> Archive mode_t: {0:o}", entry.file_type());
}
static void _spec_has_single_root(ArchiveSpec* spec,
ArchiveEntryView first_entry,
ArchiveReaderSys& archive_reader) {
std::filesystem::path first_entry_root = *(first_entry.path().begin());
logger::trace("Testing roots");
spec->has_single_root = true;
while (archive_reader.advance()) {
ArchiveEntryView entry = archive_reader.cur();
auto next_entry = entry.path();
logger::trace("Path: {}, Root: {}", next_entry.string(),
next_entry.begin()->string());
if (first_entry_root != *next_entry.begin()) {
logger::debug("Archive has multiple roots");
logger::debug("\t-> Archive root I: {}",
first_entry_root.begin()->string());
logger::debug("\t-> Archive root II: {}", next_entry.begin()->string());
spec->has_single_root = false;
break;
}
}
if (spec->has_single_root)
logger::debug("Archive has single root: {}", first_entry_root.string());
}
Archive::Archive(std::filesystem::path path) : path{path} {}
ArchiveSpec Archive::check() {
logger::trace("Creating archive spec for {}", this->path.string());
ArchiveReaderSys archive_reader {this->path};
ArchiveSpec archive_spec;
if (!archive_reader.advance()) { // can't advance even once, archive is empty
logger::debug("Archive is empty");
return {false, false, false};
}
ArchiveEntryView first_entry = archive_reader.cur();
logger::trace("Found archive entry {}", first_entry.path_name());
_spec_is_root_filename(&archive_spec, first_entry, &this->path);
_spec_is_root_dir(&archive_spec, first_entry);
_spec_has_single_root(&archive_spec, first_entry, archive_reader);
return archive_spec;
}
void Archive::extract(ExtractSpec extract_spec) {
std::filesystem::path abs_path = std::filesystem::absolute(this->path);
std::unique_ptr<ArchiveExtractorSys> extractor;
if(extract_spec.make_dir) {
logger::trace("Creating extract directory {}", extract_spec.dirname.string());
extractor = std::unique_ptr<ArchiveExtractorSys>(new ArchiveExtractorSys{extract_spec.dirname});
} else {
extractor = std::unique_ptr<ArchiveExtractorSys>(new ArchiveExtractorSys{});
}
ArchiveReaderSys reader{abs_path};
extractor->extract_all(reader);
}
void Archive::compress(CompressSpec compress_spec) {
std::filesystem::path abs_path = std::filesystem::absolute(this->path);
ArchiveCompressorSys compressor{abs_path, compress_spec};
compressor.compress();
}
} // namespace xwim

View file

@ -1,53 +0,0 @@
#pragma once
#include <archive.h>
#include <fmt/format.h>
#include <filesystem>
#include <stdexcept>
#include <string>
#include <string_view>
#include "spec.hpp"
namespace xwim {
/** Class for interacting with archives */
class Archive {
private:
std::filesystem::path path;
public:
explicit Archive(std::filesystem::path path);
/** Generate an ArchiveSpec by analysing the archive at `path`
*
* @returns ArchiveSpec for the archive
*/
ArchiveSpec check();
/** Extract the archive at `path` according to given ExtractSpec */
void extract(ExtractSpec extract_spec);
/** Compress the archive at `path` according to given CompressSpec */
void compress(CompressSpec compress_spec);
};
class ArchiveException : public std::exception {
private:
std::string _what;
public:
ArchiveException(std::string what, archive* archive) {
if (archive_error_string(archive)) {
_what = fmt::format("{}: {}", what, archive_error_string(archive));
} else {
_what = fmt::format("{}", what);
}
}
virtual const char* what() const noexcept
{ return this->_what.c_str(); }
};
} // namespace xwim

View file

@ -1,297 +0,0 @@
#include <archive_entry.h>
#include <fcntl.h>
#include <spdlog/spdlog.h>
#include "archive.hpp"
#include "fileformats.hpp"
#include "spec.hpp"
namespace logger = spdlog;
#include <archive.h>
#include <filesystem>
#include <memory>
#include "archive_sys.hpp"
namespace fs = std::filesystem;
bool xwim::ArchiveEntryView::is_empty() { return (this->ae == nullptr); }
std::string xwim::ArchiveEntryView::path_name() {
if (!this->ae) throw ArchiveSysException{"Access to invalid archive entry"};
return archive_entry_pathname(this->ae);
}
fs::path xwim::ArchiveEntryView::path() {
if (!this->ae) throw ArchiveSysException{"Access to invalid archive entry"};
return fs::path{this->path_name()};
}
mode_t xwim::ArchiveEntryView::file_type() {
if (!this->ae) throw ArchiveSysException{"Access to invalid archive entry"};
return archive_entry_filetype(this->ae);
}
bool xwim::ArchiveEntryView::is_directory() {
return S_ISDIR(this->file_type());
}
xwim::ArchiveReaderSys::ArchiveReaderSys(fs::path &path) {
int r; // libarchive error handling
logger::trace("Setting up archive reader");
this->ar = archive_read_new();
archive_read_support_filter_all(this->ar);
archive_read_support_format_all(this->ar);
logger::trace("Reading archive at {}", path.c_str());
r = archive_read_open_filename(this->ar, path.c_str(), 10240);
if (r != ARCHIVE_OK)
throw ArchiveSysException{"Could not open archive file", this->ar};
logger::trace("Archive read succesfully");
}
xwim::ArchiveReaderSys::~ArchiveReaderSys() {
logger::trace("Destructing ArchiveReaderSys");
if (this->ar) archive_read_free(this->ar);
}
bool xwim::ArchiveReaderSys::advance() {
int r; // libarchive error handling
logger::trace("Advancing reader to next archive entry");
r = archive_read_next_header(this->ar, &this->ae);
if (r == ARCHIVE_EOF) {
this->ae = nullptr;
return false;
}
if (r != ARCHIVE_OK)
throw(ArchiveSysException{"Could not list archive", this->ar});
logger::trace("Got entry {}", archive_entry_pathname(ae));
return true;
}
const xwim::ArchiveEntryView xwim::ArchiveReaderSys::cur() {
return ArchiveEntryView{this->ae};
}
xwim::ArchiveExtractorSys::ArchiveExtractorSys(fs::path &root) {
logger::trace("Constructing ArchiveExtractorSys with path {}", root.string());
fs::create_directories(root);
fs::current_path(root);
this->writer = archive_write_disk_new();
archive_write_disk_set_standard_lookup(this->writer);
logger::trace("Constructed ArchiveExtractorSys at {:p}",
(void *)this->writer);
}
xwim::ArchiveExtractorSys::ArchiveExtractorSys() {
logger::trace("Construction ArchiveExtractorSys without root");
this->writer = archive_write_disk_new();
archive_write_disk_set_standard_lookup(this->writer);
logger::trace("Constructed ArchiveExtractorSys at {:p}",
(void *)this->writer);
}
void xwim::ArchiveExtractorSys::extract_all(xwim::ArchiveReaderSys &reader) {
while (reader.advance()) {
this->extract_entry(reader);
}
}
// forward declared
static int copy_data(struct archive *ar, struct archive *aw);
void xwim::ArchiveExtractorSys::extract_entry(xwim::ArchiveReaderSys &reader) {
int r;
r = archive_write_header(this->writer, reader.ae);
if (r != ARCHIVE_OK) {
throw(ArchiveSysException("Could not extract entry", reader.ar));
}
r = copy_data(reader.ar, this->writer);
if (r != ARCHIVE_OK) {
throw(ArchiveSysException("Could not extract entry", reader.ar));
}
}
xwim::ArchiveExtractorSys::~ArchiveExtractorSys() {
logger::trace("Destructing ArchiveExtractorSys at {:p}",
(void *)this->writer);
if (this->writer) {
archive_write_close(this->writer);
archive_write_free(this->writer);
}
}
xwim::ArchiveCompressorSys::ArchiveCompressorSys(
fs::path &root, xwim::CompressSpec compress_spec)
: root{root}, compress_spec{compress_spec} {
this->new_archive = archive_write_new();
for (xwim::archive_filter filter : this->compress_spec.filters) {
archive_write_add_filter(this->new_archive, filter);
}
archive_write_set_format(this->new_archive, this->compress_spec.format);
}
// forward declared
static fs::path archive_path_norm(const fs::path &root,
const xwim::CompressSpec &compress_spec);
void xwim::ArchiveCompressorSys::compress() {
fs::path archive_path = archive_path_norm(this->root, this->compress_spec);
logger::debug("Writing archive at: {}", archive_path.filename().c_str());
archive_write_open_filename(this->new_archive,
archive_path.filename().c_str());
archive *disk = archive_read_disk_new();
archive_read_disk_set_standard_lookup(disk);
int r;
r = archive_read_disk_open(disk, fs::relative(this->root).c_str());
if (r != ARCHIVE_OK) {
throw ArchiveSysException("Could not open path for archiving", disk);
}
archive_entry *entry;
char buff[16384];
for (;;) {
entry = archive_entry_new();
r = archive_read_next_header2(disk, entry);
if (r == ARCHIVE_EOF) break;
if (r != ARCHIVE_OK) {
throw ArchiveSysException("Could not read next archive entry", disk);
}
archive_read_disk_descend(disk);
const char* ae_path = archive_entry_pathname(entry);
fs::path ae_rel_path = fs::relative(fs::path(ae_path), this->root.parent_path());
archive_entry_set_pathname(entry, ae_rel_path.c_str());
logger::trace("Processing entry {}", archive_entry_pathname(entry));
r = archive_write_header(this->new_archive, entry);
if (r < ARCHIVE_OK) {
throw ArchiveSysException("Could not write header for archive entry",
this->new_archive);
}
if (r > ARCHIVE_FAILED) {
int fd = open(archive_entry_sourcepath(entry), O_RDONLY);
ssize_t len = read(fd, buff, sizeof(buff));
while (len > 0) {
archive_write_data(this->new_archive, buff, len);
len = read(fd, buff, sizeof(buff));
}
close(fd);
}
logger::trace("Entry written {}", archive_entry_pathname(entry));
archive_entry_free(entry);
}
}
xwim::ArchiveCompressorSys::~ArchiveCompressorSys() {
logger::trace("Destructing ArchiveExtractorSys at {:p}",
(void *)this->new_archive);
if (this->new_archive) {
archive_write_close(this->new_archive);
archive_write_free(this->new_archive);
}
}
/** Creates an archive path from the path to compress and normalizes it
*
* Note that currently only single arguments are allowed for `xwim` to
* minimize ambiguity.
*
* The archive path is determined from the argument file/directory by:
* 1. If file:
* 1.1. Stem the filename
* 1.2. Append an extension appropriate for the archive format (from the
* spec)
* 2. If directory:
* 2.1. Remove any trailing '/'
* 2.2. Append an extension appropriate for the archive format (from the
* spec)
*/
static fs::path archive_path_norm(const fs::path &root,
const xwim::CompressSpec &compress_spec) {
fs::path archive_path{root};
fs::file_status archive_path_stat = fs::status(archive_path);
std::set known_types = {fs::file_type::directory, fs::file_type::regular};
fs::perms flag_mask =
fs::perms::owner_read | fs::perms::group_read | fs::perms::others_read;
if (!fs::exists(archive_path)) {
logger::error("Non-existing path: {}", archive_path.string());
throw xwim::ArchiveSysException{"Path does not exists"};
}
if (!known_types.count(archive_path_stat.type())) {
logger::error("Unknown path type: {}", archive_path_stat.type());
throw xwim::ArchiveSysException{"Unknown path type"};
}
if ((archive_path_stat.permissions() & flag_mask) == fs::perms::none) {
logger::error("Cannot read path with permissions: {}",
archive_path_stat.permissions());
throw xwim::ArchiveSysException{"Unreadable path"};
}
if (archive_path_stat.type() == fs::file_type::regular) {
while (archive_path.has_extension()) {
archive_path.replace_extension();
}
}
if (archive_path_stat.type() == fs::file_type::directory) {
if (archive_path.string().back() == '/') {
logger::trace("Found trailing / in path");
std::string ps = archive_path.string();
ps.erase(ps.size() - 1, 1);
archive_path = fs::path{ps};
logger::trace("Normalized path to {}", archive_path.string());
}
}
archive_path.concat(compress_spec.extension);
return archive_path;
}
static int copy_data(struct archive *ar, struct archive *aw) {
int r;
const void *buff;
size_t size;
int64_t offset;
for (;;) {
r = archive_read_data_block(ar, &buff, &size, &offset);
if (r == ARCHIVE_EOF) {
return (ARCHIVE_OK);
}
if (r != ARCHIVE_OK) {
return (r);
}
r = archive_write_data_block(aw, buff, size, offset);
if (r != ARCHIVE_OK) {
return (r);
}
}
}

View file

@ -1,119 +0,0 @@
#pragma once
#include <archive.h>
#include <filesystem>
#include <memory>
#include "spec.hpp"
#include <fmt/format.h>
namespace xwim {
/** A view into an archive entry
*
* The view is non-owning and the caller must guarantee
* that the parent archive entry is valid when the view
* is accessed.
*/
class ArchiveEntryView {
private:
archive_entry* ae;
public:
ArchiveEntryView() = default;
ArchiveEntryView(archive_entry* entry) : ae{entry} {}
bool is_empty();
std::string path_name();
std::filesystem::path path();
mode_t file_type();
bool is_directory();
};
/** A reader for archive files
*
* Shim for `libarchive`. Iterates through
* entries of an archive with `next()`
*/
class ArchiveReaderSys {
private:
archive* ar;
archive_entry* ae;
friend class ArchiveExtractorSys;
public:
ArchiveReaderSys(std::filesystem::path& path);
~ArchiveReaderSys();
/** Advances the internal entry pointer
*
* @return true if the pointer advanced to the next entry
* false if the end of the archive was reached
*/
bool advance();
/** Returns a non-owning view of the current entry
*
* ArchiveEntryView is a non-owning view of the currently
* active entry in this reader. A retrieved archive entry
* may not be used after another call to advance in the
* same reader.
*
* @return a view to the archive entry this reader currently
* points to
*/
const ArchiveEntryView cur();
};
/** A extractor for archive files
*
* Shim for `libarchive`.
*/
class ArchiveExtractorSys {
private:
archive* writer;
public:
ArchiveExtractorSys(std::filesystem::path& root);
ArchiveExtractorSys();
~ArchiveExtractorSys();
void extract_all(ArchiveReaderSys& reader);
void extract_entry(ArchiveReaderSys& reader);
};
/** A compressor for archive files
*
* Shim for `libarchive`
*/
class ArchiveCompressorSys {
private:
archive* new_archive;
std::filesystem::path root;
xwim::CompressSpec compress_spec;
public:
ArchiveCompressorSys(std::filesystem::path& root, xwim::CompressSpec compress_spec);
~ArchiveCompressorSys();
void compress();
};
class ArchiveSysException : public std::exception {
private:
std::string _what;
public:
ArchiveSysException(std::string what, archive* archive) {
if (archive_error_string(archive)) {
_what = fmt::format("{}: {}", what, archive_error_string(archive));
} else {
_what = fmt::format("{}", what);
}
}
ArchiveSysException(std::string what) { _what = fmt::format("{}", what); }
virtual const char* what() const noexcept { return this->_what.c_str(); }
};
} // namespace xwim

84
src/argparse.cpp Normal file
View file

@ -0,0 +1,84 @@
#include "argparse.hpp"
#include <fmt/core.h>
#include <tclap/ArgException.h>
#include <tclap/CmdLine.h>
#include <tclap/SwitchArg.h>
#include <tclap/UnlabeledMultiArg.h>
#include <tclap/ValueArg.h>
#include <filesystem>
#include <iostream>
#include <vector>
#include "archivinfo.hpp"
#include "fileformats.hpp"
using namespace TCLAP;
using namespace xwim;
namespace fs = std::filesystem;
template <>
struct TCLAP::ArgTraits<fs::path> {
typedef ValueLike ValueCategory;
};
ArgParse::ArgParse()
: cmd{"xwim - Do What I Mean Extractor", ' ', "0.3.0"},
arg_compress{"c", "compress", "Compress <files>", false},
arg_extract{"x", "extract", "Extract <file>", false},
arg_outfile{"o", "out", "Out <file-or-path>",
false, fs::path{}, "A path on the filesystem"},
arg_infiles{"Files", "Archive to extract or files to compress", true,
"A path on the filesystem"} {
cmd.xorAdd(arg_compress, arg_extract);
cmd.add(arg_outfile);
cmd.add(arg_infiles);
};
void ArgParse::parse(int argc, char** argv) {
try {
cmd.parse(argc, argv);
} catch (ArgException& e) {
throw new xwim::ArgParseException(e.error());
}
this->extract = parse_extract();
this->outfile = arg_outfile.getValue();
this->infiles = arg_infiles.getValue();
}
bool ArgParse::parse_extract() {
// extract/compress explicitly given; xor ensured in `cmd`
if (this->arg_compress.getValue()) {
return false;
} else if (this->arg_extract.getValue()) {
return true;
}
// Not explicitly given, check if we can guess from input
// An outfile is given
if (this->arg_outfile.isSet()) {
// outfile looks like an archive
if (xwim::archivinfo::has_known_extension(this->arg_outfile.getValue())) {
return false;
}
// outfile is not a known archive, assume it meant as folder for extraction
else {
return true;
}
}
// one infile which is an archive, so intention is probably to extract this
if (this->arg_infiles.getValue().size() == 1 &&
xwim::archivinfo::is_archive(this->arg_infiles.getValue().at(0))) {
return true;
}
// all other cases, in particular multiple infiles, assume we want to compress
return false;
}
bool ArgParse::compressp() { return !this->extract; }
bool ArgParse::extractp() { return this->extract; }

45
src/argparse.hpp Normal file
View file

@ -0,0 +1,45 @@
#pragma once
#include <fmt/core.h>
#include <tclap/CmdLine.h>
#include <tclap/SwitchArg.h>
#include <tclap/UnlabeledMultiArg.h>
#include <tclap/ValueArg.h>
#include <filesystem>
#include <iostream>
namespace xwim {
class ArgParse {
private:
bool extract;
std::filesystem::path outfile;
std::vector<std::filesystem::path> infiles;
TCLAP::CmdLine cmd;
TCLAP::SwitchArg arg_compress;
TCLAP::SwitchArg arg_extract;
TCLAP::ValueArg<std::filesystem::path> arg_outfile;
TCLAP::UnlabeledMultiArg<std::filesystem::path> arg_infiles;
protected:
bool parse_extract();
public:
ArgParse();
void parse(int argc, char** argv);
bool compressp();
bool extractp();
};
class ArgParseException : public std::exception {
private:
std::string _what;
public:
ArgParseException(std::string what) : _what{what} {};
template<typename... Args>
ArgParseException(std::string fmt_string, Args&&... args) : _what{fmt::format(fmt_string, args...)} {};
virtual const char* what() const noexcept { return this->_what.c_str(); }
};
} // namespace xwim

View file

@ -1,149 +0,0 @@
/** @file fileformats.hpp
* @brief Handle archive extensions
*/
#pragma once
#include <spdlog/spdlog.h>
#include <optional>
namespace logger = spdlog;
#include <filesystem>
#include <set>
#include <string>
namespace xwim {
/** Common archive formats understood by xwim
*
* The underlying libarchive backend retrieves format information by a process
* called `bidding`. Hence, this information is mainly used to strip extensions.
*
* Stripping extensions via `std::filesystem::path` does not work reliably since
* it gets easily confused by dots in the regular file name.
*/
const std::set<std::string> fileformats{".7z", ".7zip", ".jar", ".tgz",
".bz2", ".bzip2", ".gz", ".gzip",
".rar", ".tar", ".xz", ".zip"};
/** Archive filters
*
* Archive filters are essentially either data compression algorithms or data
* encodings. Filters are used on archives after an archiving program created
* the archive out of files and folders. Multiple filters can be applied to an
* archive. The order is significant.
*
* The simplest way to understand the distinction between filters and formats is
* to visualize the traditional `tar.gz` format. Tar creates the archive (an
* archive format). Gzip compresses the archive (an archive filter). In theory
* one could create a `tar.gz.lz.uu` tarball. That is, a `tar` archive
* filter-compressed with `gzip`, filter-compressed with `lzip`, filter-encoded
* with `uuencode`.
*
* Note that while this abstraction works in many cases it is not perfect. For
* example `.zip` files are traditionally archives where every entry is
* compressed separately and then bundled them together into an archive. In
* those cases the archive format is ZIP with no (external) filters.
*/
enum archive_filter {
NONE = 0,
GZIP = 1,
BZIP2 = 2,
COMPRESS = 3,
PROGRAM = 4,
LZMA = 5,
XZ = 6,
UU = 7,
RPM = 8,
LZIP = 9,
LRZIP = 10,
LZOP = 11,
GRZIP = 12,
LZ4 = 13,
ZSTD = 14
};
/** Archive formats
*
* Archive formats are the specifications for bundling together multiple files
* and folders (including metadata) into a single file (the archive). See also
* `archive_filter` for more details on the difference between archive formats
* and archive filters.
*/
enum archive_format {
BASE_MASK = 0xff0000,
CPIO = 0x10000,
CPIO_POSIX = (CPIO | 1),
CPIO_BIN_LE = (CPIO | 2),
CPIO_BIN_BE = (CPIO | 3),
CPIO_SVR4_NOCRC = (CPIO | 4),
CPIO_SVR4_CRC = (CPIO | 5),
CPIO_AFIO_LARGE = (CPIO | 6),
SHAR = 0x20000,
SHAR_BASE = (SHAR | 1),
SHAR_DUMP = (SHAR | 2),
TAR = 0x30000,
TAR_USTAR = (TAR | 1),
TAR_PAX_INTERCHANGE = (TAR | 2),
TAR_PAX_RESTRICTED = (TAR | 3),
TAR_GNUTAR = (TAR | 4),
ISO9660 = 0x40000,
ISO9660_ROCKRIDGE = (ISO9660 | 1),
ZIP = 0x50000,
EMPTY = 0x60000,
AR = 0x70000,
AR_GNU = (AR | 1),
AR_BSD = (AR | 2),
MTREE = 0x80000,
RAW = 0x90000,
XAR = 0xA0000,
LHA = 0xB0000,
CAB = 0xC0000,
RAR = 0xD0000,
SEVENZIP = 0xE0000,
WARC = 0xF0000,
RAR_V5 = 0x100000
};
/** Strip archive extensions from a path
*
* @returns Base filename without archive extensions
*/
inline std::filesystem::path
stem(const std::filesystem::path& path) {
std::filesystem::path p_stem{path};
logger::trace("Stemming {}", p_stem.string());
p_stem = p_stem.filename();
while (fileformats.find(p_stem.extension().string()) != fileformats.end()) {
p_stem = p_stem.stem();
logger::trace("Stemmed to {}", p_stem.string());
}
logger::trace("Finished stemming {}", p_stem.string());
return p_stem;
}
/** Get the archive extension of a path.
*
* The archive extension may be a combination of supported fileformats in which
* case all of them are returned.
*
* @returns Archive extension of the archive or path() if no (known) extension
* exists.
*/
inline std::filesystem::path ext(const std::filesystem::path& path) {
std::filesystem::path p_ext{path};
logger::trace("Extracting extension of {}", p_ext.string());
std::filesystem::path p_ext_collector;
while (fileformats.find(p_ext.extension().string()) != fileformats.end()) {
// path extension() const
p_ext_collector = p_ext.extension().concat(p_ext_collector.string());
p_ext.replace_extension();
}
return p_ext_collector;
}
} // namespace xwim

View file

@ -1,77 +1,6 @@
#include <spdlog/common.h>
#include <cstdlib>
#include <iostream>
#include <ostream>
#include <string>
#include <list>
#include "util/log.hpp"
#include "util/argparse.hpp"
#include "archive.hpp"
#include "spec.hpp"
#include "fileformats.hpp"
namespace logger = spdlog;
using namespace xwim::argparse;
void extract(const XwimPath& xwim_path) {
try {
xwim::Archive archive{xwim_path.path()};
xwim::ArchiveSpec archive_spec = archive.check();
logger::info("{}", archive_spec);
xwim::ExtractSpec extract_spec{};
if (!archive_spec.has_single_root || !archive_spec.is_root_filename) {
extract_spec.make_dir = true;
std::filesystem::path stem = xwim::stem(xwim_path.path());
extract_spec.dirname = stem;
}
if (archive_spec.has_subarchive) {
extract_spec.extract_subarchive = true;
}
logger::info("{}", extract_spec);
archive.extract(extract_spec);
} catch (xwim::ArchiveException& ae) {
logger::error("{}", ae.what());
}
}
void compress(const XwimPath& xwim_path) {
try {
xwim::Archive archive{xwim_path.path()};
xwim::CompressSpec compress_spec{};
archive.compress(compress_spec);
} catch (xwim::ArchiveException& ae) {
logger::error("{}", ae.what());
}
}
XwimPath parse_args(int argc, char** argv) {
try {
return parse(argc, argv);
} catch (ArgParseException& ex) {
logger::error("{}\n", ex.what());
std::cout << usage();
std::exit(1);
}
}
int main(int argc, char** argv) {
xwim::log::init();
XwimPath xwim_path = parse_args(argc, argv);
if(xwim_path.is_archive()) { extract(xwim_path); }
else { compress(xwim_path); }
}

View file

@ -5,6 +5,7 @@ xwim_src = ['main.cpp',
xwim_libs = [dependency('libarchive', required: true),
dependency('fmt', required: true),
dependency('spdlog', required: true)]
dependency('spdlog', required: true),
dependency('tclap', required: true)]
executable('xwim', xwim_src, dependencies: xwim_libs)

View file

@ -1,132 +0,0 @@
#pragma once
#include <archive.h>
#include <fmt/format.h>
#include <filesystem>
#include <memory>
#include "fileformats.hpp"
namespace xwim {
/** Properties of an archive
*
* These properties can be retrieved by analyzing the
* archive. There is no outside-knowledge. All information
* is in the archive.
*/
struct ArchiveSpec {
bool has_single_root = false; /** There is only a single file xor a single
folder at the archive's root */
bool is_root_filename = false; /** the name of the (single) root is the same
as the stemmed archive file name. Cannot be
true if `has_single_root` is false */
bool is_root_dir = false; /** The (single) root is a folder. Cannot be true if
`has_single_root` is false */
bool has_subarchive = false; /** Whether the archive contains sub-archives */
};
/** Properties influencing the extraction process
*
* These properties can be set to influence the extraction
* process accordingly.
*/
struct ExtractSpec {
bool make_dir = false; /** Create a new directory for extraction at `dirname` */
std::filesystem::path dirname{}; /** The path to a directory for extraction */
bool extract_subarchive = false; /** Recursively extract sub-archives */
};
/** Compile time definitions for platform-dependent files and filters */
#if defined(unix) || defined(__unix__) || defined(__unix)
#define XWIM_COMPRESS_FORMAT xwim::archive_format::TAR_USTAR
#define XWIM_COMPRESS_FILTER { xwim::archive_filter::GZIP }
#define XWIM_COMPRESS_EXTENSION ".tar.gz"
#elif defined(_win32) || defined(__win32__) || defined(__windows__)
#define XWIM_COMPRESS_FORMAT xwim::archive_format::ZIP
#define XWIM_COMPRESS_FILTER {}
#define XWIM_COMPRESS_EXTENSION ".zip"
#else
#define XWIM_COMPRESS_FORMAT xwim::fileformats::archive_format::ZIP
#define XWIM_COMPRESS_FILTER {}
#define XWIM_COMPRESS_EXTENSION ".zip"
#endif
/** Properties influencing the compression process
*
* These properties can be set to influence the compression process
* accordingly.
*
* Per default, the compress spec is platform dependent to accommodate for the
* expected format on that platform. On Windows this is zip, on Unix this is
* tar.gz
*/
struct CompressSpec {
xwim::archive_format format =
XWIM_COMPRESS_FORMAT; /** The archiving format, e.g. tar */
std::vector<xwim::archive_filter> filters =
XWIM_COMPRESS_FILTER; /** Filters applied to the archive,
e.g. gzip */
std::string extension =
XWIM_COMPRESS_EXTENSION; /** Archive extension, e.g. .tar.gz */
};
} // namespace xwim
#if FMT_VERSION < 50300
typedef fmt::basic_parse_context<char> format_parse_context;
#endif
template <>
struct fmt::formatter<xwim::ArchiveSpec> {
constexpr auto parse(format_parse_context & ctx) {
return ctx.begin();
}
template <typename FormatContext>
auto format(const xwim::ArchiveSpec& spec, FormatContext& ctx) {
return format_to(ctx.out(),
"Archive["
" .has_single_root={},"
" .is_root_filename={}"
" .is_root_dir={}"
" .has_subarchive={}"
" ]",
spec.has_single_root, spec.is_root_filename,
spec.is_root_dir, spec.has_subarchive);
}
};
template <>
struct fmt::formatter<xwim::ExtractSpec> {
constexpr auto parse(format_parse_context& ctx) { return ctx.begin(); }
template <typename FormatContext>
auto format(const xwim::ExtractSpec& spec, FormatContext& ctx) {
return format_to(ctx.out(),
"Extract["
" .make_dir={},"
" .dirname={}"
" .extract_subarchive={}"
" ]",
spec.make_dir, spec.dirname.string(),
spec.extract_subarchive);
}
};
template <>
struct fmt::formatter<xwim::CompressSpec> {
constexpr auto parse(format_parse_context& ctx) { return ctx.begin(); }
template <typename FormatContext>
auto format(const xwim::CompressSpec& spec, FormatContext& ctx) {
return format_to(ctx.out(),
"Compress["
" .format={},"
" .filters={}"
" ]",
spec.format, spec.filters);
}
};

View file

@ -1,26 +0,0 @@
#include "argparse.hpp"
namespace xwim {
namespace argparse {
XwimPath parse(int argc, char** argv) {
return XwimPath{argc, argv};
}
// contructs XwimPath{} first so that destructurs may running
// http://www.vishalchovatiya.com/7-best-practices-for-exception-handling-in-cpp-with-example/
XwimPath::XwimPath(int argc, char** argv) : XwimPath{} {
if (argc < 2) throw ArgParseException{"No argument provided"};
if (argc > 2) throw ArgParseException{"Too many arguments provided"};
this->_path = std::filesystem::path{argv[1]};
}
bool XwimPath::is_archive() {
return !xwim::ext(_path).empty();
}
std::filesystem::path XwimPath::path() const {
return std::filesystem::path{_path};
}
}
}

View file

@ -1,79 +0,0 @@
#pragma once
#include <filesystem>
#include <ostream>
#include <sstream>
#include "../fileformats.hpp"
namespace xwim {
/**
* xwim allows for
* 1. an archive
* 2. a file or folder
*
* In case of (1) the archive will be extracted according to the xwim
* do-what-i-mean rules.
*
* In case of (2) the file or folder will be compressed into a "platform native"
* format, i.e. what appears to be the most widely used format on that platform.
* In case of unix this is tar.gz. In case of windows this is zip. The archive
* gets the same name as the file or folder and a proper extension.
*
* A list of files or folders is unsupported as it would be too ambigious to
* choose a name for the archive. A list of archives is unsupported for
* consistency reasons. Any mixture is unsupported as it would be too ambigious
* what the user wants. This is subject to change in the future.
*/
namespace argparse {
class XwimPath {
private:
std::filesystem::path _path;
public:
XwimPath() : _path{} {};
XwimPath(int argc, char** argv);
bool is_archive();
std::filesystem::path path() const;
};
class ArgParseException : public std::exception {
private:
std::string _what;
public:
ArgParseException(std::string what) : _what{what} {};
virtual const char* what() const noexcept { return this->_what.c_str(); }
};
XwimPath parse(int argc, char** argv);
inline std::string usage() {
std::stringstream s;
s << "USAGE:"
<< "\t xwim <path>\n"
<< "\n"
<< "PARAMS:" << std::left << std::setfill('.') << std::setw(10)
<< "\t path "
<< " Archive\n"
<< "\n"
<< "FORMATS:\n"
<< "\t .7z, .7zip .jar, .tgz, .bz2, .bzip2\n"
<< "\t .gz, .gzip, .rar, .tar, .xz, .zip\n"
<< "\n"
<< "EXAMPLES:\n"
<< "\t Extract archive archive.tar.gz:\n"
<< "\t xwim archive.tar.gz\n"
<< std::endl;
return s.str();
}
} // namespace argparse
} // namespace xwim

View file

@ -1,77 +0,0 @@
#pragma once
#include <spdlog/common.h>
#include <spdlog/spdlog.h>
#include <cstdlib>
#ifdef NDEBUG
#define XWIM_LOGLEVEL SPDLOG_LEVEL_ERROR
#else
#define XWIM_LOGLEVEL SPDLOG_LEVEL_DEBUG
#endif
namespace xwim::log {
/**
* Get log level from XWIM_LOGLEVEL environment variable.
* For valid values see SPDLOG_LEVEL_NAMES in spdlog/common.h
*
* @returns spdlog::level::level_enum::off if no valid XWIM_LOGLEVEL defined
*/
spdlog::level::level_enum _init_from_env() {
char* env_lvl = std::getenv("XWIM_LOGLEVEL");
if (!env_lvl) {
return spdlog::level::level_enum::off;
}
spdlog::level::level_enum lvl = spdlog::level::from_str(env_lvl);
//`::from_str` returns `off` if no match found
if (spdlog::level::level_enum::off == lvl) {
spdlog::debug("No environment definition for log level"); // uses default
// logger/level
}
return lvl;
}
/**
* Get log level from compile time definition.
*
* @return spdlog::level::level_enum::error for release builds (-DNDEBUG)
* spdlog::level::level_enum::debug for debug builds
*/
spdlog::level::level_enum _init_from_compile() {
return static_cast<spdlog::level::level_enum>(XWIM_LOGLEVEL);
}
/**
* Determine the log level from various sources at runtime.
*
* The log level is determined from sources in the following order (first
* wins):
* 1. The `level` argument
* 2. The XWIM_LOGLEVEL environment variable
* 3. The default for the build type (-DNDEBUG)
* -> ERROR for release builds
* -> DEBUG for debug builds
*
* The determined level is then set for the default logger via
* `spdlog::set_level`.
*/
void init(spdlog::level::level_enum level = spdlog::level::level_enum::off) {
if (spdlog::level::level_enum::off != level) {
spdlog::set_level(level);
return;
}
level = _init_from_env();
if (spdlog::level::level_enum::off != level) {
spdlog::set_level(level);
return;
}
spdlog::set_level(_init_from_compile());
}
} // namespace xwim::log

View file

@ -1,12 +0,0 @@
#include <gtest/gtest.h>
#include <archive.hpp>
#include <filesystem>
#include <spec.hpp>
TEST(ArchiveTest, ArchiveSpecDetectsSingleRoot) {
xwim::Archive archive("test/archives/root.tar.gz");
xwim::ArchiveSpec spec = archive.check();
ASSERT_TRUE(spec.has_single_root);
}

View file

@ -1,64 +0,0 @@
#include <gtest/gtest.h>
#include <fileformats.hpp>
#include <string>
TEST(FileformatsTest, StemStripsSingleKnownExtension) {
std::filesystem::path archive_path {"/some/path/to/file.rar"};
ASSERT_EQ(xwim::stem(archive_path), std::filesystem::path{"file"});
}
TEST(FileformatsTest, StemStripsMultipleKnownExtensions) {
std::filesystem::path archive_path{"/some/path/to/file.tar.rar.gz.7z.rar"};
ASSERT_EQ(xwim::stem(archive_path), std::filesystem::path{"file"});
}
TEST(FileformatsTest, StemStripsOnlyKnownExtension) {
std::filesystem::path archive_path{"/some/path/to/file.ukn.rar"};
ASSERT_EQ(xwim::stem(archive_path), std::filesystem::path{"file.ukn"});
}
TEST(FileformatsTest, StemStripsNothingWithoutKnownExtension) {
std::filesystem::path archive_path{"/some/path/to/file.ukn"};
ASSERT_EQ(xwim::stem(archive_path), std::filesystem::path{"file.ukn"});
}
TEST(FileformatsTest, StemStripsNothingWithoutExtension) {
std::filesystem::path archive_path{"/some/path/to/filerar"};
ASSERT_EQ(xwim::stem(archive_path), std::filesystem::path{"filerar"});
}
TEST(FileExtTest, ExtGetsKnownExtension) {
std::filesystem::path archive_path{"/some/path/to/file.rar"};
ASSERT_EQ(xwim::ext(archive_path), std::filesystem::path{".rar"});
}
TEST(FileExtTest, CombinedExtensionGetsAll) {
std::filesystem::path archive_path{"/some/path/to/file.tar.gz"};
ASSERT_EQ(xwim::ext(archive_path), std::filesystem::path{".tar.gz"});
}
TEST(FileExtTest, ExtEmptyForUnknownExtension) {
std::filesystem::path archive_path{"/some/path/to/file.ukn"};
ASSERT_TRUE(xwim::ext(archive_path).empty());
}
TEST(FileExtTest, CombinedExtensionGetsKnown) {
std::filesystem::path archive_path{"/some/path/to/file.ukn.tar.gz"};
ASSERT_EQ(xwim::ext(archive_path), std::filesystem::path{".tar.gz"});
}
TEST(FileExtTest, CombinedExtensionLastUnknownEmpty) {
std::filesystem::path archive_path{"/some/path/to/file.tar.gz.ukn"};
ASSERT_TRUE(xwim::ext(archive_path).empty());
}

BIN
tests/ahaahm.tar.gz Normal file

Binary file not shown.

View file

@ -0,0 +1 @@
äahääm

View file

@ -0,0 +1 @@
äahääm

BIN
tests/root.tar.gz Normal file

Binary file not shown.

17
tests/runtests/run.py Executable file
View file

@ -0,0 +1,17 @@
#!/bin/python3
import os;
import sys;
import subprocess;
from fnmatch import fnmatch;
for root, dirs, files in os.walk('../../'):
for f in files:
if len(sys.argv) > 1 and not fnmatch(f, sys.argv[1]):
continue;
print(f"Running {f}")
print(f"{os.path.join(root,f)}")
r = subprocess.run(["../../../target/src/xwim", os.path.join(root, f)], capture_output=True, encoding='utf-8')
print(f"{r.stdout}")
print(f"{r.stderr}", file=sys.stderr)

BIN
tests/test-1.23.7z Normal file

Binary file not shown.

BIN
tests/test-1.23.arj Normal file

Binary file not shown.

BIN
tests/test-1.23.cpio Normal file

Binary file not shown.

BIN
tests/test-1.23.lzh Normal file

Binary file not shown.

BIN
tests/test-1.23.rar Normal file

Binary file not shown.

BIN
tests/test-1.23.tar Normal file

Binary file not shown.

BIN
tests/test-1.23.tar.bz2 Normal file

Binary file not shown.

BIN
tests/test-1.23.tar.gz Normal file

Binary file not shown.

BIN
tests/test-1.23.tar.lrz Normal file

Binary file not shown.

BIN
tests/test-1.23.tar.lzma Normal file

Binary file not shown.

BIN
tests/test-1.23.zip Normal file

Binary file not shown.

BIN
tests/test-1.23_all.deb Normal file

Binary file not shown.

BIN
tests/test-2_all.deb Normal file

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

BIN
tests/test-empty.tar.bz2 Normal file

Binary file not shown.

Binary file not shown.

BIN
tests/test-onedir.tar.bz2 Normal file

Binary file not shown.

BIN
tests/test-onedir.tar.gz Normal file

Binary file not shown.

BIN
tests/test-onefile.tar.gz Normal file

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

BIN
tests/test-text.bz2 Normal file

Binary file not shown.

BIN
tests/test-text.gz Normal file

Binary file not shown.

BIN
tests/test-text.lrz Normal file

Binary file not shown.

BIN
tests/test-text.lz Normal file

Binary file not shown.

BIN
tests/test-text.xz Normal file

Binary file not shown.

Binary file not shown.