diff --git a/meson.build b/meson.build index 0b2df58..7c77ef2 100644 --- a/meson.build +++ b/meson.build @@ -8,4 +8,4 @@ add_global_arguments('-DVERSION='+meson.version(), language: 'cpp') subdir('src') subdir('doc') -subdir('test') +# subdir('test') diff --git a/src/Archiver.cpp b/src/Archiver.cpp new file mode 100644 index 0000000..b3e3a91 --- /dev/null +++ b/src/Archiver.cpp @@ -0,0 +1,87 @@ +#include "Archiver.hpp" + +#include + +#include +#include + +#include "Common.hpp" + +namespace xwim { +using namespace std; +namespace fs = std::filesystem; + +// Extract longest known extension from path +fs::path archive_extension(const fs::path& path) { + // TODO: creates lots of paths, refactor + fs::path ext; + fs::path tmp_path = path; + while (tmp_path.has_extension()) { + fs::path tmp_ext = tmp_path.extension() += ext; + auto search = extensions_format.find(tmp_ext); + + // (Combined) extension not known, return last known extension + if (search == extensions_format.end()) return ext; + + // Continue extending extension + ext = tmp_ext; + tmp_path = tmp_path.stem(); + } + + return ext; +} + +// Strip longest known extension from path +fs::path strip_archive_extension(const fs::path& path) { + // TODO: creates lots of paths, refactor + fs::path ext; + fs::path tmp_path = path; + while (tmp_path.has_extension()) { + fs::path tmp_ext = tmp_path.extension() += ext; + auto search = extensions_format.find(tmp_ext); + + // (Combined) extension not known, return stripped path + if (search == extensions_format.end()) return tmp_path; + + // Continue stripping path + ext = tmp_ext; + tmp_path = tmp_path.stem(); + } + + return tmp_path; +} + +bool can_extract(const fs::path& path) { + fs::path ext = archive_extension(path); + if (format_extensions.find(ext.string()) != format_extensions.end()) { + spdlog::debug("Found {} in known formats", ext); + return true; + } + + spdlog::debug("Could not find {} in known formats", ext); + return false; +} + +Format parse_format(const fs::path& path) { + fs::path ext = archive_extension(path); + auto search = extensions_format.find(ext); + if (search == extensions_format.end()) { + throw XwimError{"No known archiver for {}", path}; + } + + return search->second; +} + +unique_ptr make_archiver(const string& archive_name) { + switch (parse_format(archive_name)) { + case Format::TAR_GZ: + case Format::ZIP: + return make_unique(); + default: + throw XwimError{ + "Cannot construct archiver for {}. `extension_format` surjection " + "invariant violated?", + archive_name}; + }; +} +} // namespace xwim diff --git a/src/Archiver.hpp b/src/Archiver.hpp new file mode 100644 index 0000000..52af90e --- /dev/null +++ b/src/Archiver.hpp @@ -0,0 +1,48 @@ +#pragma once + +#include + +#include +#include +#include +#include + +#include "Common.hpp" + +namespace xwim { + +// Invariant: +// `extensions_format` defines a surjection from `format_extensions` +// to `Formats` +const std::set format_extensions{".tar.gz", ".zip"}; +enum class Format { TAR_GZ, ZIP }; +const std::map extensions_format{ + {".tar.gz", Format::TAR_GZ}, {".zip", Format::ZIP}}; + +class Archiver { + public: + virtual void compress(std::set ins, + std::filesystem::path archive_out) = 0; + + virtual void extract(std::filesystem::path archive_in, + std::filesystem::path out) = 0; + + virtual ~Archiver() = default; +}; + +class LibArchiver : public Archiver { + public: + void compress(std::set ins, + std::filesystem::path archive_out); + + void extract(std::filesystem::path archive_in, std::filesystem::path out); +}; + +std::filesystem::path archive_extension(const std::filesystem::path& path); +std::filesystem::path strip_archive_extension(const std::filesystem::path& path); +Format parse_format(const std::filesystem::path& path); +bool can_extract(const std::filesystem::path& path); + +std::unique_ptr make_archiver(const std::string& archive_name); + +} // namespace xwim diff --git a/src/Common.hpp b/src/Common.hpp new file mode 100644 index 0000000..8d7becb --- /dev/null +++ b/src/Common.hpp @@ -0,0 +1,30 @@ +#pragma once + +#include +#include +#include +#include + +template <> +struct fmt::formatter { + constexpr auto parse(format_parse_context& ctx) { return ctx.begin(); } + + template + auto format(const std::filesystem::path& path, FormatContext& ctx) { + return format_to(ctx.out(), path.string()); + } +}; + +class XwimError : public std::runtime_error { + public: + template + XwimError(const std::string& fmt, const Args... args) + : std::runtime_error(fmt::format(fmt, args...)){} +}; + +inline int rand_int(int from, int to) { + std::random_device rd; + std::mt19937 gen(rd()); + std::uniform_int_distribution<> distrib(from, to); + return distrib(gen); +} diff --git a/src/Log.hpp b/src/Log.hpp new file mode 100644 index 0000000..fd9db5a --- /dev/null +++ b/src/Log.hpp @@ -0,0 +1,75 @@ +#pragma once +#include +#include +#include +#ifdef NDEBUG +#define XWIM_LOGLEVEL SPDLOG_LEVEL_ERROR +#else +#define XWIM_LOGLEVEL SPDLOG_LEVEL_DEBUG +#endif + +namespace xwim::log { + +/** + * Get log level from XWIM_LOGLEVEL environment variable. + * For valid values see SPDLOG_LEVEL_NAMES in spdlog/common.h + * + * @returns spdlog::level::level_enum::off if no valid XWIM_LOGLEVEL defined + */ +spdlog::level::level_enum _init_from_env() { + char* env_lvl = std::getenv("XWIM_LOGLEVEL"); + if (!env_lvl) { + return spdlog::level::level_enum::off; + } + + spdlog::level::level_enum lvl = spdlog::level::from_str(env_lvl); + + //`::from_str` returns `off` if no match found + if (spdlog::level::level_enum::off == lvl) { + spdlog::debug("No environment definition for log level"); // uses default + // logger/level + } + + return lvl; +} + +/** + * Get log level from compile time definition. + * + * @return spdlog::level::level_enum::error for release builds (-DNDEBUG) + * spdlog::level::level_enum::debug for debug builds + */ +spdlog::level::level_enum _init_from_compile() { + return static_cast(XWIM_LOGLEVEL); +} + +/** + * Determine the log level from various sources at runtime. + * + * The log level is determined from sources in the following order (first + * wins): + * 1. The `level` argument + * 2. The XWIM_LOGLEVEL environment variable + * 3. The default for the build type (-DNDEBUG) + * -> ERROR for release builds + * -> DEBUG for debug builds + * + * The determined level is then set for the default logger via + * `spdlog::set_level`. + */ +void init(spdlog::level::level_enum level = spdlog::level::level_enum::off) { + if (spdlog::level::level_enum::off != level) { + spdlog::set_level(level); + return; + } + + level = _init_from_env(); + if (spdlog::level::level_enum::off != level) { + spdlog::set_level(level); + return; + } + + spdlog::set_level(_init_from_compile()); +} + +} // namespace xwim::log diff --git a/src/Xwim.cpp b/src/Xwim.cpp new file mode 100644 index 0000000..052a9cc --- /dev/null +++ b/src/Xwim.cpp @@ -0,0 +1,135 @@ +#include "Xwim.hpp" + +#include + +#include +#include +#include +#include +#include +#include + +#include "Archiver.hpp" +#include "Common.hpp" + +namespace xwim { +using namespace std; +namespace fs = std::filesystem; + +#if defined(unix) || defined(__unix__) || defined(__unix) +std::string default_extension = ".tar.gz"; +#elif defined(_win32) || defined(__win32__) || defined(__windows__) +std::string default_extension = ".zip"; +#else +std::string default_extension = ".zip"; +#endif + +Xwim::Xwim() : action{Action::UNKNOWN} {} + +void Xwim::try_infer() { + infer_action(); + infer_output(); + + if (action == Action::COMPRESS) { + archiver = make_archiver(out.string()); + } else if (action == Action::EXTRACT) { + // we can only handle one archive for extraction at a time. + // Checked in `infer_extraction_output` + archiver = make_archiver(ins.begin()->string()); + } +} +void Xwim::dwim() { + switch (action) { + case Action::COMPRESS: + this->archiver->compress(ins, out); + break; + case Action::EXTRACT: + this->archiver->extract(*ins.begin(), out); + break; + default: + spdlog::error("Unknown action"); + } +} + +void Xwim::infer_action() { + if (action != Action::UNKNOWN) return; + + if (ins.size() == 1 && can_extract(*ins.begin())) { + action = Action::EXTRACT; + } else { + action = Action::COMPRESS; + } + spdlog::debug("Inferred action: {}", action); +} + +void Xwim::infer_output() { + if (!out.empty()) return; + + switch (action) { + case Action::COMPRESS: + infer_compression_output(); + break; + case Action::EXTRACT: + infer_extraction_output(); + break; + default: + throw XwimError{"Cannot infer output, action is unknown"}; + } + + spdlog::debug("Inferred out: {}", out.string()); +} + +void Xwim::infer_compression_output() { + if (ins.size() == 1) { + // archive name is just the name of the input with default archive + // extension + + fs::path p = *ins.begin(); + while (p.has_extension()) p = p.stem(); + p += default_extension; + out = p; + } else { + // We cannot guess the name of the output archive + + // TODO use readline/lineoise/editline for path completion + cout << "Archive name: "; + cin >> out; + out = fs::path(out); + } +} +void Xwim::infer_extraction_output() { + if (ins.size() > 1) { + throw XwimError{"Cannot extract more than one archive at a time"}; + } + + // create a temporary path for extraction + fs::path archive_stem = xwim::strip_archive_extension(*ins.begin()); + + // note: we use here what is considered an `extensions` by `fs::path` so that + // we can strip it again easily later on + archive_stem += "."; + archive_stem += to_string(rand_int(999, 99999)); + archive_stem += ".tmp"; + this->out = archive_stem; +} + +void Xwim::setCompress() { + this->action = Action::COMPRESS; + spdlog::debug("Set action to {}", this->action); +} +void Xwim::setExtract() { + this->action = Action::EXTRACT; + spdlog::debug("Set action to {}", this->action); +} +void Xwim::setOut(fs::path path) { + this->out = path; + spdlog::debug("Set out to {}", this->out); +} +void Xwim::setIns(vector ins) { + this->ins.insert(ins.begin(), ins.end()); + if (this->ins.size() != ins.size()) { + spdlog::warn("Duplicate input files found. Removed {} duplicate(s).", + (ins.size() - this->ins.size())); + } +} +} diff --git a/src/Xwim.hpp b/src/Xwim.hpp new file mode 100644 index 0000000..b91f669 --- /dev/null +++ b/src/Xwim.hpp @@ -0,0 +1,62 @@ +#pragma once + +#include +#include + +#include +#include +#include +#include + +#include "Common.hpp" +#include "Archiver.hpp" + +namespace xwim { +using namespace std; +namespace fs = std::filesystem; + +enum class Action { UNKNOWN, EXTRACT, COMPRESS }; + +class Xwim { + private: + Action action; + fs::path out; + set ins; + unique_ptr archiver; + + void infer_action(); + void infer_output(); + void infer_compression_output(); + void infer_extraction_output(); + + public: + Xwim(); + + void try_infer(); + void dwim(); + + void setCompress(); + void setExtract(); + void setOut(fs::path); + void setIns(vector ins); +}; + +} // namespace xwim + +template <> +struct fmt::formatter { + constexpr auto parse(format_parse_context& ctx) { return ctx.begin(); } + + template + auto format(const xwim::Action& action, FormatContext& ctx) { + switch (action) { + case xwim::Action::UNKNOWN: + return format_to(ctx.out(), "UNKNOWN"); + case xwim::Action::EXTRACT: + return format_to(ctx.out(), "EXTRACT"); + case xwim::Action::COMPRESS: + return format_to(ctx.out(), "COMPRESS"); + }; + return format_to(ctx.out(), ""); + } +}; diff --git a/src/archiver/LibArchiver.cpp b/src/archiver/LibArchiver.cpp new file mode 100644 index 0000000..1198965 --- /dev/null +++ b/src/archiver/LibArchiver.cpp @@ -0,0 +1,103 @@ +#include +#include +#include +#include + +#include +#include +#include + +#include "Archiver.hpp" +#include "Common.hpp" + +namespace xwim { +using namespace std; +namespace fs = std::filesystem; + +static int copy_data(shared_ptr reader, shared_ptr writer); + +void LibArchiver::compress(set ins, fs::path archive_out) { return; } + +void LibArchiver::extract(fs::path archive_in, fs::path out) { + spdlog::debug("Extracting archive {} to {}", archive_in, out); + int r; // libarchive error handling + + // cannot use unique_ptr here since unique_ptr requires a + // complete type. `archive` is forward declared only. + shared_ptr reader; + reader = shared_ptr(archive_read_new(), archive_read_free); + archive_read_support_filter_all(reader.get()); + archive_read_support_format_all(reader.get()); + r = archive_read_open_filename(reader.get(), archive_in.c_str(), 10240); + if (r != ARCHIVE_OK) { + throw XwimError{"Failed opening archive {}. {}", archive_in, + archive_error_string(reader.get())}; + } + + shared_ptr writer; + writer = shared_ptr(archive_write_disk_new(), archive_write_free); + archive_write_disk_set_standard_lookup(writer.get()); + + fs::create_directories(out); + fs::path cur_path = fs::current_path(); + fs::current_path(out); + + archive_entry* entry; + for (;;) { + r = archive_read_next_header(reader.get(), &entry); + if (r == ARCHIVE_EOF) break; + + if (r != ARCHIVE_OK) { + throw XwimError{"Failed extracting archive entry. {}", archive_error_string(reader.get())}; + } + + r = archive_write_header(writer.get(), entry); + if (r != ARCHIVE_OK) { + throw XwimError{"Failed writing archive entry header. {}", archive_error_string(writer.get())}; + } + + if (archive_entry_size(entry) > 0) { + r = copy_data(reader, writer); + if (r != ARCHIVE_OK) { + throw XwimError{"Failed writing archive entry data. {}", + archive_error_string(writer.get())}; + } + } + + r = archive_write_finish_entry(writer.get()); + if (r != ARCHIVE_OK) { + throw XwimError{"Failed finishing archive entry data. {}", + archive_error_string(writer.get())}; + } + } + + if (r != ARCHIVE_OK && r != ARCHIVE_EOF) { + throw XwimError{"Failed extracting archive {}. {}", archive_in, + archive_error_string(reader.get())}; + } + + fs::current_path(cur_path); +} + +static int copy_data(shared_ptr reader, shared_ptr writer) { + int r; + const void *buff; + size_t size; + int64_t offset; + + for (;;) { + r = archive_read_data_block(reader.get(), &buff, &size, &offset); + if (r == ARCHIVE_EOF) { + return (ARCHIVE_OK); + } + if (r != ARCHIVE_OK) { + return (r); + } + r = archive_write_data_block(writer.get(), buff, size, offset); + if (r != ARCHIVE_OK) { + return (r); + } + } +} + +} // namespace xwim diff --git a/src/argparse.cpp b/src/argparse.cpp deleted file mode 100644 index 3f78e08..0000000 --- a/src/argparse.cpp +++ /dev/null @@ -1,84 +0,0 @@ -#include "argparse.hpp" - -#include -#include -#include -#include -#include -#include - -#include -#include -#include - -#include "archivinfo.hpp" -#include "fileformats.hpp" - -using namespace TCLAP; -using namespace xwim; -namespace fs = std::filesystem; - -template <> -struct TCLAP::ArgTraits { - typedef ValueLike ValueCategory; -}; - -ArgParse::ArgParse() - : cmd{"xwim - Do What I Mean Extractor", ' ', "0.3.0"}, - arg_compress{"c", "compress", "Compress ", false}, - arg_extract{"x", "extract", "Extract ", false}, - arg_outfile{"o", "out", "Out ", - false, fs::path{}, "A path on the filesystem"}, - arg_infiles{"Files", "Archive to extract or files to compress", true, - "A path on the filesystem"} { - cmd.xorAdd(arg_compress, arg_extract); - cmd.add(arg_outfile); - cmd.add(arg_infiles); -}; - -void ArgParse::parse(int argc, char** argv) { - try { - cmd.parse(argc, argv); - } catch (ArgException& e) { - throw new xwim::ArgParseException(e.error()); - } - - this->extract = parse_extract(); - this->outfile = arg_outfile.getValue(); - this->infiles = arg_infiles.getValue(); -} - -bool ArgParse::parse_extract() { - // extract/compress explicitly given; xor ensured in `cmd` - if (this->arg_compress.getValue()) { - return false; - } else if (this->arg_extract.getValue()) { - return true; - } - - // Not explicitly given, check if we can guess from input - - // An outfile is given - if (this->arg_outfile.isSet()) { - // outfile looks like an archive - if (xwim::archivinfo::has_known_extension(this->arg_outfile.getValue())) { - return false; - } - - // outfile is not a known archive, assume it meant as folder for extraction - else { - return true; - } - } - - // one infile which is an archive, so intention is probably to extract this - if (this->arg_infiles.getValue().size() == 1 && - xwim::archivinfo::is_archive(this->arg_infiles.getValue().at(0))) { - return true; - } - - // all other cases, in particular multiple infiles, assume we want to compress - return false; -} -bool ArgParse::compressp() { return !this->extract; } -bool ArgParse::extractp() { return this->extract; } diff --git a/src/argparse.hpp b/src/argparse.hpp deleted file mode 100644 index dd0a372..0000000 --- a/src/argparse.hpp +++ /dev/null @@ -1,45 +0,0 @@ -#pragma once - -#include -#include -#include -#include -#include - -#include -#include - -namespace xwim { -class ArgParse { - private: - bool extract; - std::filesystem::path outfile; - std::vector infiles; - - TCLAP::CmdLine cmd; - TCLAP::SwitchArg arg_compress; - TCLAP::SwitchArg arg_extract; - TCLAP::ValueArg arg_outfile; - TCLAP::UnlabeledMultiArg arg_infiles; - - protected: - bool parse_extract(); - - public: - ArgParse(); - void parse(int argc, char** argv); - bool compressp(); - bool extractp(); -}; - -class ArgParseException : public std::exception { - private: - std::string _what; - - public: - ArgParseException(std::string what) : _what{what} {}; - template - ArgParseException(std::string fmt_string, Args&&... args) : _what{fmt::format(fmt_string, args...)} {}; - virtual const char* what() const noexcept { return this->_what.c_str(); } -}; -} // namespace xwim diff --git a/src/main.cpp b/src/main.cpp index 5c970cf..1a6ca8a 100644 --- a/src/main.cpp +++ b/src/main.cpp @@ -1,6 +1,75 @@ #include +#include +#include +#include +#include +#include +#include +#include +#include + #include +#include + +#include "Common.hpp" +#include "Log.hpp" +#include "Xwim.hpp" + +using namespace xwim; +using namespace std; +namespace fs = std::filesystem; + +template <> +struct TCLAP::ArgTraits { + typedef ValueLike ValueCategory; +}; int main(int argc, char** argv) { + log::init(); + TCLAP::CmdLine cmd{"xwim - Do What I Mean Extractor", ' ', "0.3.0"}; + + TCLAP::SwitchArg arg_compress{"c", "compress", "Compress ", cmd, + false}; + TCLAP::SwitchArg arg_extract{"x", "extract", "Extract ", cmd, false}; + + TCLAP::ValueArg arg_outfile{ + "o", "out", "Out ", + false, fs::path{}, "A path on the filesystem", + cmd}; + TCLAP::UnlabeledMultiArg arg_infiles{ + "Files", "Archive to extract or files to compress", true, + "A path on the filesystem", cmd}; + + Xwim xwim; + + cmd.parse(argc, argv); + + if (arg_extract.isSet() && arg_compress.isSet()) { + // This is a bit ugly but `none-or-xor` only available in + // tclap-1.4 which is not well supported in current + // distributions + auto out = TCLAP::StdOutput{}; + TCLAP::ArgException e{ + "Cannot compress `-c` and extract `-x` simultaneously"}; + try { + out.failure(cmd, e); + } catch (TCLAP::ExitException& e) { + exit(e.getExitStatus()); + } + } + + // `none-or-xor` ensured already + if (arg_extract.isSet()) xwim.setExtract(); + if (arg_compress.isSet()) xwim.setCompress(); + + if (arg_outfile.isSet()) xwim.setOut(arg_outfile.getValue()); + if (arg_infiles.isSet()) xwim.setIns(arg_infiles.getValue()); + + try { + xwim.try_infer(); + xwim.dwim(); + } catch (XwimError& e) { + spdlog::error(e.what()); + } } diff --git a/src/meson.build b/src/meson.build index 69ad6da..9d67853 100644 --- a/src/meson.build +++ b/src/meson.build @@ -1,11 +1,9 @@ -xwim_src = ['main.cpp', - 'archive.cpp', - 'archive_sys.cpp', - 'util/argparse.cpp'] +xwim_src = ['main.cpp', 'Xwim.cpp', 'Archiver.cpp'] +xwim_archiver = ['archiver/LibArchiver.cpp'] xwim_libs = [dependency('libarchive', required: true), dependency('fmt', required: true), dependency('spdlog', required: true), dependency('tclap', required: true)] -executable('xwim', xwim_src, dependencies: xwim_libs) +executable('xwim', xwim_src+xwim_archiver, dependencies: xwim_libs)