From e59968444ed79df4b4bffa3421f688967b79465e Mon Sep 17 00:00:00 2001 From: Armin Friedl Date: Sun, 23 Feb 2020 15:20:47 +0100 Subject: [PATCH] [feat] Add known fileformats Reliably stripping (multiple) archive extensions (e.g. .tar.gz) is not supported by std::filesystem::path. Dots in the regular root folder name can cause issues. fileformats.hpp adds well-known format extensions which are used for stripping the extension. Note that this is not used by libarchive or xwim to determine the filters/formats for extraction. This is done by libarchive's `bidding`. --- meson.build | 2 +- src/archive.cpp | 5 ++--- src/archive.hpp | 5 +---- src/fileformats.hpp | 38 ++++++++++++++++++++++++++++++++++++++ src/main.cpp | 6 ++---- src/meson.build | 4 +++- src/spec.hpp | 5 +---- 7 files changed, 48 insertions(+), 17 deletions(-) create mode 100644 src/fileformats.hpp diff --git a/meson.build b/meson.build index 5ec2ae2..aa1a0d1 100644 --- a/meson.build +++ b/meson.build @@ -1,5 +1,5 @@ project('xwim', 'cpp', - version: '0.1', + version: '0.2', default_options: ['cpp_std=c++17']) subdir('src') diff --git a/src/archive.cpp b/src/archive.cpp index 50026da..7ff9bc6 100644 --- a/src/archive.cpp +++ b/src/archive.cpp @@ -14,6 +14,7 @@ namespace logger = spdlog; #include "archive_sys.hpp" #include "archive.hpp" #include "spec.hpp" +#include "fileformats.hpp" namespace xwim { @@ -22,9 +23,7 @@ static void _spec_is_root_filename(ArchiveSpec* spec, std::filesystem::path* filepath) { auto entry_path = entry.path(); auto norm_stem = filepath->filename(); - - while (norm_stem.has_extension()) - norm_stem = norm_stem.stem(); + norm_stem = xwim::stem(norm_stem); if (*entry_path.begin() != norm_stem) { logger::debug("Archive root does not match archive name"); diff --git a/src/archive.hpp b/src/archive.hpp index bbf36b9..0e3b494 100644 --- a/src/archive.hpp +++ b/src/archive.hpp @@ -1,5 +1,4 @@ -#ifndef ARCHIVE_H -#define ARCHIVE_H +#pragma once #include #include @@ -42,5 +41,3 @@ class ArchiveException : public std::exception { }; } // namespace xwim - -#endif diff --git a/src/fileformats.hpp b/src/fileformats.hpp new file mode 100644 index 0000000..4dc3562 --- /dev/null +++ b/src/fileformats.hpp @@ -0,0 +1,38 @@ +#pragma once + +#include +namespace logger = spdlog; + +/** Common archive formats understood by xwim + * + * The underlying libarchive backend retrieves format information by a process + * called `bidding`. Hence, this information is mainly used to strip extensions. + * + * Stripping extensions via `std::filesystem::path` does not work reliably since + * it gets easily confused by dots in the regular file name. + */ + +#include +#include +#include + +namespace xwim { + + const std::set fileformats{ + ".7z", ".7zip", ".jar", ".tgz", ".bz2", ".bzip2", ".gz", + ".gzip", ".rar", ".tar", ".tar.gz", ".tar.bz2", ".tar.xz", ".zip"}; + +inline std::filesystem::path stem(std::filesystem::path& path) { + std::filesystem::path p_stem {path}; + logger::trace("Stemming {}", p_stem.string()); + + while( fileformats.find(p_stem.extension().string()) != fileformats.end() ) { + p_stem = p_stem.stem(); + logger::trace("Stemmed to {}", p_stem.string()); + } + + logger::trace("Finished stemming {}", p_stem.string()); + return p_stem; +} + +} // namespace xwim diff --git a/src/main.cpp b/src/main.cpp index c34037a..a5e64c9 100644 --- a/src/main.cpp +++ b/src/main.cpp @@ -9,6 +9,7 @@ namespace logger = spdlog; #include "archive.hpp" #include "spec.hpp" +#include "fileformats.hpp" int main(int argc, char** argv) { logger::set_level(logger::level::trace); @@ -25,10 +26,7 @@ int main(int argc, char** argv) { if (!archive_spec.has_single_root || !archive_spec.is_root_filename) { extract_spec.make_dir = true; - std::filesystem::path stem = filepath.stem(); - - while (stem.has_extension()) - stem = stem.stem(); + std::filesystem::path stem = xwim::stem(filepath); extract_spec.dirname = stem; } diff --git a/src/meson.build b/src/meson.build index caba555..bfebd28 100644 --- a/src/meson.build +++ b/src/meson.build @@ -1,9 +1,11 @@ src = ['main.cpp', 'archive.cpp', 'archive_sys.cpp'] + inc = ['archive.hpp', 'spec.hpp', - 'archive_sys.hpp'] + 'archive_sys.hpp', + 'fileformats.hpp'] libs = [dependency('libarchive', required: true), dependency('fmt', required: true), diff --git a/src/spec.hpp b/src/spec.hpp index c0bf840..e9ab8d2 100644 --- a/src/spec.hpp +++ b/src/spec.hpp @@ -1,5 +1,4 @@ -#ifndef SPEC_H -#define SPEC_H +#pragma once #include #include @@ -55,5 +54,3 @@ struct fmt::formatter { spec.make_dir, spec.dirname.string(), spec.extract_subarchive); } }; - -#endif