Refactoring and extraction
Some checks failed
continuous-integration/drone/push Build is failing

This commit is contained in:
Armin Friedl 2021-02-19 05:28:06 +01:00
parent f5cddbc0f3
commit 189be660f5
Signed by: armin
GPG key ID: 48C726EEE7FBCBC8
12 changed files with 613 additions and 135 deletions

View file

@ -8,4 +8,4 @@ add_global_arguments('-DVERSION='+meson.version(), language: 'cpp')
subdir('src') subdir('src')
subdir('doc') subdir('doc')
subdir('test') # subdir('test')

87
src/Archiver.cpp Normal file
View file

@ -0,0 +1,87 @@
#include "Archiver.hpp"
#include <spdlog/spdlog.h>
#include <map>
#include <memory>
#include "Common.hpp"
namespace xwim {
using namespace std;
namespace fs = std::filesystem;
// Extract longest known extension from path
fs::path archive_extension(const fs::path& path) {
// TODO: creates lots of paths, refactor
fs::path ext;
fs::path tmp_path = path;
while (tmp_path.has_extension()) {
fs::path tmp_ext = tmp_path.extension() += ext;
auto search = extensions_format.find(tmp_ext);
// (Combined) extension not known, return last known extension
if (search == extensions_format.end()) return ext;
// Continue extending extension
ext = tmp_ext;
tmp_path = tmp_path.stem();
}
return ext;
}
// Strip longest known extension from path
fs::path strip_archive_extension(const fs::path& path) {
// TODO: creates lots of paths, refactor
fs::path ext;
fs::path tmp_path = path;
while (tmp_path.has_extension()) {
fs::path tmp_ext = tmp_path.extension() += ext;
auto search = extensions_format.find(tmp_ext);
// (Combined) extension not known, return stripped path
if (search == extensions_format.end()) return tmp_path;
// Continue stripping path
ext = tmp_ext;
tmp_path = tmp_path.stem();
}
return tmp_path;
}
bool can_extract(const fs::path& path) {
fs::path ext = archive_extension(path);
if (format_extensions.find(ext.string()) != format_extensions.end()) {
spdlog::debug("Found {} in known formats", ext);
return true;
}
spdlog::debug("Could not find {} in known formats", ext);
return false;
}
Format parse_format(const fs::path& path) {
fs::path ext = archive_extension(path);
auto search = extensions_format.find(ext);
if (search == extensions_format.end()) {
throw XwimError{"No known archiver for {}", path};
}
return search->second;
}
unique_ptr<Archiver> make_archiver(const string& archive_name) {
switch (parse_format(archive_name)) {
case Format::TAR_GZ:
case Format::ZIP:
return make_unique<LibArchiver>();
default:
throw XwimError{
"Cannot construct archiver for {}. `extension_format` surjection "
"invariant violated?",
archive_name};
};
}
} // namespace xwim

48
src/Archiver.hpp Normal file
View file

@ -0,0 +1,48 @@
#pragma once
#include <fmt/core.h>
#include <filesystem>
#include <map>
#include <memory>
#include <set>
#include "Common.hpp"
namespace xwim {
// Invariant:
// `extensions_format` defines a surjection from `format_extensions`
// to `Formats`
const std::set<std::string> format_extensions{".tar.gz", ".zip"};
enum class Format { TAR_GZ, ZIP };
const std::map<std::string, Format> extensions_format{
{".tar.gz", Format::TAR_GZ}, {".zip", Format::ZIP}};
class Archiver {
public:
virtual void compress(std::set<std::filesystem::path> ins,
std::filesystem::path archive_out) = 0;
virtual void extract(std::filesystem::path archive_in,
std::filesystem::path out) = 0;
virtual ~Archiver() = default;
};
class LibArchiver : public Archiver {
public:
void compress(std::set<std::filesystem::path> ins,
std::filesystem::path archive_out);
void extract(std::filesystem::path archive_in, std::filesystem::path out);
};
std::filesystem::path archive_extension(const std::filesystem::path& path);
std::filesystem::path strip_archive_extension(const std::filesystem::path& path);
Format parse_format(const std::filesystem::path& path);
bool can_extract(const std::filesystem::path& path);
std::unique_ptr<Archiver> make_archiver(const std::string& archive_name);
} // namespace xwim

30
src/Common.hpp Normal file
View file

@ -0,0 +1,30 @@
#pragma once
#include <fmt/core.h>
#include <filesystem>
#include <string>
#include <random>
template <>
struct fmt::formatter<std::filesystem::path> {
constexpr auto parse(format_parse_context& ctx) { return ctx.begin(); }
template <typename FormatContext>
auto format(const std::filesystem::path& path, FormatContext& ctx) {
return format_to(ctx.out(), path.string());
}
};
class XwimError : public std::runtime_error {
public:
template <typename... Args>
XwimError(const std::string& fmt, const Args... args)
: std::runtime_error(fmt::format(fmt, args...)){}
};
inline int rand_int(int from, int to) {
std::random_device rd;
std::mt19937 gen(rd());
std::uniform_int_distribution<> distrib(from, to);
return distrib(gen);
}

75
src/Log.hpp Normal file
View file

@ -0,0 +1,75 @@
#pragma once
#include <spdlog/common.h>
#include <spdlog/spdlog.h>
#include <cstdlib>
#ifdef NDEBUG
#define XWIM_LOGLEVEL SPDLOG_LEVEL_ERROR
#else
#define XWIM_LOGLEVEL SPDLOG_LEVEL_DEBUG
#endif
namespace xwim::log {
/**
* Get log level from XWIM_LOGLEVEL environment variable.
* For valid values see SPDLOG_LEVEL_NAMES in spdlog/common.h
*
* @returns spdlog::level::level_enum::off if no valid XWIM_LOGLEVEL defined
*/
spdlog::level::level_enum _init_from_env() {
char* env_lvl = std::getenv("XWIM_LOGLEVEL");
if (!env_lvl) {
return spdlog::level::level_enum::off;
}
spdlog::level::level_enum lvl = spdlog::level::from_str(env_lvl);
//`::from_str` returns `off` if no match found
if (spdlog::level::level_enum::off == lvl) {
spdlog::debug("No environment definition for log level"); // uses default
// logger/level
}
return lvl;
}
/**
* Get log level from compile time definition.
*
* @return spdlog::level::level_enum::error for release builds (-DNDEBUG)
* spdlog::level::level_enum::debug for debug builds
*/
spdlog::level::level_enum _init_from_compile() {
return static_cast<spdlog::level::level_enum>(XWIM_LOGLEVEL);
}
/**
* Determine the log level from various sources at runtime.
*
* The log level is determined from sources in the following order (first
* wins):
* 1. The `level` argument
* 2. The XWIM_LOGLEVEL environment variable
* 3. The default for the build type (-DNDEBUG)
* -> ERROR for release builds
* -> DEBUG for debug builds
*
* The determined level is then set for the default logger via
* `spdlog::set_level`.
*/
void init(spdlog::level::level_enum level = spdlog::level::level_enum::off) {
if (spdlog::level::level_enum::off != level) {
spdlog::set_level(level);
return;
}
level = _init_from_env();
if (spdlog::level::level_enum::off != level) {
spdlog::set_level(level);
return;
}
spdlog::set_level(_init_from_compile());
}
} // namespace xwim::log

135
src/Xwim.cpp Normal file
View file

@ -0,0 +1,135 @@
#include "Xwim.hpp"
#include <spdlog/spdlog.h>
#include <cstdlib>
#include <filesystem>
#include <ios>
#include <iostream>
#include <random>
#include <string>
#include "Archiver.hpp"
#include "Common.hpp"
namespace xwim {
using namespace std;
namespace fs = std::filesystem;
#if defined(unix) || defined(__unix__) || defined(__unix)
std::string default_extension = ".tar.gz";
#elif defined(_win32) || defined(__win32__) || defined(__windows__)
std::string default_extension = ".zip";
#else
std::string default_extension = ".zip";
#endif
Xwim::Xwim() : action{Action::UNKNOWN} {}
void Xwim::try_infer() {
infer_action();
infer_output();
if (action == Action::COMPRESS) {
archiver = make_archiver(out.string());
} else if (action == Action::EXTRACT) {
// we can only handle one archive for extraction at a time.
// Checked in `infer_extraction_output`
archiver = make_archiver(ins.begin()->string());
}
}
void Xwim::dwim() {
switch (action) {
case Action::COMPRESS:
this->archiver->compress(ins, out);
break;
case Action::EXTRACT:
this->archiver->extract(*ins.begin(), out);
break;
default:
spdlog::error("Unknown action");
}
}
void Xwim::infer_action() {
if (action != Action::UNKNOWN) return;
if (ins.size() == 1 && can_extract(*ins.begin())) {
action = Action::EXTRACT;
} else {
action = Action::COMPRESS;
}
spdlog::debug("Inferred action: {}", action);
}
void Xwim::infer_output() {
if (!out.empty()) return;
switch (action) {
case Action::COMPRESS:
infer_compression_output();
break;
case Action::EXTRACT:
infer_extraction_output();
break;
default:
throw XwimError{"Cannot infer output, action is unknown"};
}
spdlog::debug("Inferred out: {}", out.string());
}
void Xwim::infer_compression_output() {
if (ins.size() == 1) {
// archive name is just the name of the input with default archive
// extension
fs::path p = *ins.begin();
while (p.has_extension()) p = p.stem();
p += default_extension;
out = p;
} else {
// We cannot guess the name of the output archive
// TODO use readline/lineoise/editline for path completion
cout << "Archive name: ";
cin >> out;
out = fs::path(out);
}
}
void Xwim::infer_extraction_output() {
if (ins.size() > 1) {
throw XwimError{"Cannot extract more than one archive at a time"};
}
// create a temporary path for extraction
fs::path archive_stem = xwim::strip_archive_extension(*ins.begin());
// note: we use here what is considered an `extensions` by `fs::path` so that
// we can strip it again easily later on
archive_stem += ".";
archive_stem += to_string(rand_int(999, 99999));
archive_stem += ".tmp";
this->out = archive_stem;
}
void Xwim::setCompress() {
this->action = Action::COMPRESS;
spdlog::debug("Set action to {}", this->action);
}
void Xwim::setExtract() {
this->action = Action::EXTRACT;
spdlog::debug("Set action to {}", this->action);
}
void Xwim::setOut(fs::path path) {
this->out = path;
spdlog::debug("Set out to {}", this->out);
}
void Xwim::setIns(vector<fs::path> ins) {
this->ins.insert(ins.begin(), ins.end());
if (this->ins.size() != ins.size()) {
spdlog::warn("Duplicate input files found. Removed {} duplicate(s).",
(ins.size() - this->ins.size()));
}
}
}

62
src/Xwim.hpp Normal file
View file

@ -0,0 +1,62 @@
#pragma once
#include <fmt/core.h>
#include <fmt/format.h>
#include <exception>
#include <memory>
#include <set>
#include <stdexcept>
#include "Common.hpp"
#include "Archiver.hpp"
namespace xwim {
using namespace std;
namespace fs = std::filesystem;
enum class Action { UNKNOWN, EXTRACT, COMPRESS };
class Xwim {
private:
Action action;
fs::path out;
set<fs::path> ins;
unique_ptr<Archiver> archiver;
void infer_action();
void infer_output();
void infer_compression_output();
void infer_extraction_output();
public:
Xwim();
void try_infer();
void dwim();
void setCompress();
void setExtract();
void setOut(fs::path);
void setIns(vector<fs::path> ins);
};
} // namespace xwim
template <>
struct fmt::formatter<xwim::Action> {
constexpr auto parse(format_parse_context& ctx) { return ctx.begin(); }
template <typename FormatContext>
auto format(const xwim::Action& action, FormatContext& ctx) {
switch (action) {
case xwim::Action::UNKNOWN:
return format_to(ctx.out(), "UNKNOWN");
case xwim::Action::EXTRACT:
return format_to(ctx.out(), "EXTRACT");
case xwim::Action::COMPRESS:
return format_to(ctx.out(), "COMPRESS");
};
return format_to(ctx.out(), "");
}
};

View file

@ -0,0 +1,103 @@
#include <archive.h>
#include <archive_entry.h>
#include <fmt/core.h>
#include <spdlog/spdlog.h>
#include <filesystem>
#include <iostream>
#include <memory>
#include "Archiver.hpp"
#include "Common.hpp"
namespace xwim {
using namespace std;
namespace fs = std::filesystem;
static int copy_data(shared_ptr<archive> reader, shared_ptr<archive> writer);
void LibArchiver::compress(set<fs::path> ins, fs::path archive_out) { return; }
void LibArchiver::extract(fs::path archive_in, fs::path out) {
spdlog::debug("Extracting archive {} to {}", archive_in, out);
int r; // libarchive error handling
// cannot use unique_ptr here since unique_ptr requires a
// complete type. `archive` is forward declared only.
shared_ptr<archive> reader;
reader = shared_ptr<archive>(archive_read_new(), archive_read_free);
archive_read_support_filter_all(reader.get());
archive_read_support_format_all(reader.get());
r = archive_read_open_filename(reader.get(), archive_in.c_str(), 10240);
if (r != ARCHIVE_OK) {
throw XwimError{"Failed opening archive {}. {}", archive_in,
archive_error_string(reader.get())};
}
shared_ptr<archive> writer;
writer = shared_ptr<archive>(archive_write_disk_new(), archive_write_free);
archive_write_disk_set_standard_lookup(writer.get());
fs::create_directories(out);
fs::path cur_path = fs::current_path();
fs::current_path(out);
archive_entry* entry;
for (;;) {
r = archive_read_next_header(reader.get(), &entry);
if (r == ARCHIVE_EOF) break;
if (r != ARCHIVE_OK) {
throw XwimError{"Failed extracting archive entry. {}", archive_error_string(reader.get())};
}
r = archive_write_header(writer.get(), entry);
if (r != ARCHIVE_OK) {
throw XwimError{"Failed writing archive entry header. {}", archive_error_string(writer.get())};
}
if (archive_entry_size(entry) > 0) {
r = copy_data(reader, writer);
if (r != ARCHIVE_OK) {
throw XwimError{"Failed writing archive entry data. {}",
archive_error_string(writer.get())};
}
}
r = archive_write_finish_entry(writer.get());
if (r != ARCHIVE_OK) {
throw XwimError{"Failed finishing archive entry data. {}",
archive_error_string(writer.get())};
}
}
if (r != ARCHIVE_OK && r != ARCHIVE_EOF) {
throw XwimError{"Failed extracting archive {}. {}", archive_in,
archive_error_string(reader.get())};
}
fs::current_path(cur_path);
}
static int copy_data(shared_ptr<archive> reader, shared_ptr<archive> writer) {
int r;
const void *buff;
size_t size;
int64_t offset;
for (;;) {
r = archive_read_data_block(reader.get(), &buff, &size, &offset);
if (r == ARCHIVE_EOF) {
return (ARCHIVE_OK);
}
if (r != ARCHIVE_OK) {
return (r);
}
r = archive_write_data_block(writer.get(), buff, size, offset);
if (r != ARCHIVE_OK) {
return (r);
}
}
}
} // namespace xwim

View file

@ -1,84 +0,0 @@
#include "argparse.hpp"
#include <fmt/core.h>
#include <tclap/ArgException.h>
#include <tclap/CmdLine.h>
#include <tclap/SwitchArg.h>
#include <tclap/UnlabeledMultiArg.h>
#include <tclap/ValueArg.h>
#include <filesystem>
#include <iostream>
#include <vector>
#include "archivinfo.hpp"
#include "fileformats.hpp"
using namespace TCLAP;
using namespace xwim;
namespace fs = std::filesystem;
template <>
struct TCLAP::ArgTraits<fs::path> {
typedef ValueLike ValueCategory;
};
ArgParse::ArgParse()
: cmd{"xwim - Do What I Mean Extractor", ' ', "0.3.0"},
arg_compress{"c", "compress", "Compress <files>", false},
arg_extract{"x", "extract", "Extract <file>", false},
arg_outfile{"o", "out", "Out <file-or-path>",
false, fs::path{}, "A path on the filesystem"},
arg_infiles{"Files", "Archive to extract or files to compress", true,
"A path on the filesystem"} {
cmd.xorAdd(arg_compress, arg_extract);
cmd.add(arg_outfile);
cmd.add(arg_infiles);
};
void ArgParse::parse(int argc, char** argv) {
try {
cmd.parse(argc, argv);
} catch (ArgException& e) {
throw new xwim::ArgParseException(e.error());
}
this->extract = parse_extract();
this->outfile = arg_outfile.getValue();
this->infiles = arg_infiles.getValue();
}
bool ArgParse::parse_extract() {
// extract/compress explicitly given; xor ensured in `cmd`
if (this->arg_compress.getValue()) {
return false;
} else if (this->arg_extract.getValue()) {
return true;
}
// Not explicitly given, check if we can guess from input
// An outfile is given
if (this->arg_outfile.isSet()) {
// outfile looks like an archive
if (xwim::archivinfo::has_known_extension(this->arg_outfile.getValue())) {
return false;
}
// outfile is not a known archive, assume it meant as folder for extraction
else {
return true;
}
}
// one infile which is an archive, so intention is probably to extract this
if (this->arg_infiles.getValue().size() == 1 &&
xwim::archivinfo::is_archive(this->arg_infiles.getValue().at(0))) {
return true;
}
// all other cases, in particular multiple infiles, assume we want to compress
return false;
}
bool ArgParse::compressp() { return !this->extract; }
bool ArgParse::extractp() { return this->extract; }

View file

@ -1,45 +0,0 @@
#pragma once
#include <fmt/core.h>
#include <tclap/CmdLine.h>
#include <tclap/SwitchArg.h>
#include <tclap/UnlabeledMultiArg.h>
#include <tclap/ValueArg.h>
#include <filesystem>
#include <iostream>
namespace xwim {
class ArgParse {
private:
bool extract;
std::filesystem::path outfile;
std::vector<std::filesystem::path> infiles;
TCLAP::CmdLine cmd;
TCLAP::SwitchArg arg_compress;
TCLAP::SwitchArg arg_extract;
TCLAP::ValueArg<std::filesystem::path> arg_outfile;
TCLAP::UnlabeledMultiArg<std::filesystem::path> arg_infiles;
protected:
bool parse_extract();
public:
ArgParse();
void parse(int argc, char** argv);
bool compressp();
bool extractp();
};
class ArgParseException : public std::exception {
private:
std::string _what;
public:
ArgParseException(std::string what) : _what{what} {};
template<typename... Args>
ArgParseException(std::string fmt_string, Args&&... args) : _what{fmt::format(fmt_string, args...)} {};
virtual const char* what() const noexcept { return this->_what.c_str(); }
};
} // namespace xwim

View file

@ -1,6 +1,75 @@
#include <spdlog/common.h> #include <spdlog/common.h>
#include <spdlog/logger.h>
#include <spdlog/spdlog.h>
#include <tclap/ArgException.h>
#include <tclap/CmdLine.h>
#include <tclap/StdOutput.h>
#include <tclap/SwitchArg.h>
#include <tclap/UnlabeledMultiArg.h>
#include <tclap/ValueArg.h>
#include <cstdlib> #include <cstdlib>
#include <filesystem>
#include "Common.hpp"
#include "Log.hpp"
#include "Xwim.hpp"
using namespace xwim;
using namespace std;
namespace fs = std::filesystem;
template <>
struct TCLAP::ArgTraits<std::filesystem::path> {
typedef ValueLike ValueCategory;
};
int main(int argc, char** argv) { int main(int argc, char** argv) {
log::init();
TCLAP::CmdLine cmd{"xwim - Do What I Mean Extractor", ' ', "0.3.0"};
TCLAP::SwitchArg arg_compress{"c", "compress", "Compress <files>", cmd,
false};
TCLAP::SwitchArg arg_extract{"x", "extract", "Extract <file>", cmd, false};
TCLAP::ValueArg<fs::path> arg_outfile{
"o", "out", "Out <file-or-path>",
false, fs::path{}, "A path on the filesystem",
cmd};
TCLAP::UnlabeledMultiArg<fs::path> arg_infiles{
"Files", "Archive to extract or files to compress", true,
"A path on the filesystem", cmd};
Xwim xwim;
cmd.parse(argc, argv);
if (arg_extract.isSet() && arg_compress.isSet()) {
// This is a bit ugly but `none-or-xor` only available in
// tclap-1.4 which is not well supported in current
// distributions
auto out = TCLAP::StdOutput{};
TCLAP::ArgException e{
"Cannot compress `-c` and extract `-x` simultaneously"};
try {
out.failure(cmd, e);
} catch (TCLAP::ExitException& e) {
exit(e.getExitStatus());
}
}
// `none-or-xor` ensured already
if (arg_extract.isSet()) xwim.setExtract();
if (arg_compress.isSet()) xwim.setCompress();
if (arg_outfile.isSet()) xwim.setOut(arg_outfile.getValue());
if (arg_infiles.isSet()) xwim.setIns(arg_infiles.getValue());
try {
xwim.try_infer();
xwim.dwim();
} catch (XwimError& e) {
spdlog::error(e.what());
}
} }

View file

@ -1,11 +1,9 @@
xwim_src = ['main.cpp', xwim_src = ['main.cpp', 'Xwim.cpp', 'Archiver.cpp']
'archive.cpp', xwim_archiver = ['archiver/LibArchiver.cpp']
'archive_sys.cpp',
'util/argparse.cpp']
xwim_libs = [dependency('libarchive', required: true), xwim_libs = [dependency('libarchive', required: true),
dependency('fmt', required: true), dependency('fmt', required: true),
dependency('spdlog', required: true), dependency('spdlog', required: true),
dependency('tclap', required: true)] dependency('tclap', required: true)]
executable('xwim', xwim_src, dependencies: xwim_libs) executable('xwim', xwim_src+xwim_archiver, dependencies: xwim_libs)